def signin(self, user, password, data): self.req.http.c.setopt( pycurl.REFERER, "https://1fichier.com/login.pl?lg=en") try: html = self.load("https://1fichier.com/login.pl?lg=en", post={'mail': user, 'pass': password, 'It': "on", 'purge': "off", 'valider': "Send"}) if any(_x in html for _x in ('>Invalid username or Password', '>Invalid email address', '>Invalid password')): self.fail_login() except BadHeader, e: if e.code == 403: self.fail_login() else: raise
def Curl(url,headers): while 1: try: c = pycurl.Curl() c.setopt(pycurl.REFERER, 'http://weixin.sogou.com/') c.setopt(pycurl.FOLLOWLOCATION, True) c.setopt(pycurl.MAXREDIRS,5) c.setopt(pycurl.CONNECTTIMEOUT, 60) c.setopt(pycurl.TIMEOUT,120) c.setopt(pycurl.ENCODING, 'gzip,deflate') c.fp = StringIO.StringIO() c.setopt(pycurl.URL, url) c.setopt(pycurl.HTTPHEADER,headers) c.setopt(c.WRITEFUNCTION, c.fp.write) c.perform() html = c.fp.getvalue() if '??????' in html: print u'??????,??10??' time.sleep(600) else: return html except Exception, e: print url,'curl(url)',e continue #????????
def curl(url, debug=False, **kwargs): while 1: try: s = StringIO.StringIO() c = pycurl.Curl() c.setopt(pycurl.URL, url) c.setopt(pycurl.REFERER, url) c.setopt(pycurl.FOLLOWLOCATION, True) c.setopt(pycurl.TIMEOUT, 60) c.setopt(pycurl.ENCODING, 'gzip') c.setopt(pycurl.USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36') c.setopt(pycurl.NOSIGNAL, True) c.setopt(pycurl.WRITEFUNCTION, s.write) for k, v in kwargs.iteritems(): c.setopt(vars(pycurl)[k], v) c.perform() c.close() return s.getvalue() except: if debug: raise continue
def ccurl(url,value): hdr = "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:45.0) Gecko/20100101 Firefox/45.0" c = pycurl.Curl() if value == "no_redir": print("no redirect") else: c.setopt(c.FOLLOWLOCATION, True) c.setopt(c.USERAGENT, hdr) if value != "" and value != "no_redir": post_data = {'id': value} post_d = urllib.parse.urlencode(post_data) c.setopt(c.POSTFIELDS,post_d) #if rfr != "": # c.setopt(pycurl.REFERER, rfr) url = str(url) c.setopt(c.URL, url) storage = BytesIO() c.setopt(c.WRITEDATA, storage) c.perform() c.close() content = storage.getvalue() content = getContentUnicode(content) return (content)
def get_html(url, user_agent, refer_url): """ curl html :param url: :param user_agent: :param refer_url: :return: """ curl = pycurl.Curl() curl.setopt(pycurl.USERAGENT, user_agent) curl.setopt(pycurl.REFERER, refer_url) buffers = StringIO() curl.setopt(pycurl.URL, url) curl.setopt(pycurl.WRITEDATA, buffers) curl.perform() body = buffers.getvalue() buffers.close() curl.close() return body
def get (url, user_agent=UA, referrer=None): """Make a GET request of the url using pycurl and return the data (which is None if unsuccessful)""" data = None databuffer = StringIO() curl = pycurl.Curl() curl.setopt(pycurl.URL, url) curl.setopt(pycurl.FOLLOWLOCATION, 1) curl.setopt(pycurl.CONNECTTIMEOUT, 5) curl.setopt(pycurl.TIMEOUT, 8) curl.setopt(pycurl.WRITEFUNCTION, databuffer.write) curl.setopt(pycurl.COOKIEFILE, '') if user_agent: curl.setopt(pycurl.USERAGENT, user_agent) if referrer is not None: curl.setopt(pycurl.REFERER, referrer) try: curl.perform() data = databuffer.getvalue() except Exception: pass curl.close() return data
def get_download_link(fs_id): """ ?????? :param fs_id: :return: """ curl = pycurl.Curl() curl.setopt(pycurl.USERAGENT, const.USER_AGENT) curl.setopt(pycurl.REFERER, const.PAN_REFER_URL) buffers = StringIO() request_dict = { 'channel': 'chunlei', 'timestamp': '1473685224', 'fidlist': [fs_id], 'type': 'dlink', 'web': 1, 'clienttype': 0, 'bdstoken': 'e0e895bb3ef7b0cb70899ee66b74e809', 'sign': decode_sign(parse_sign2('d76e889b6aafd3087ac3bd56f4d4053a', '3545d271c5d07ba27355d39da0c62a4ee06d2d25')) } target_url = const.PAN_API_URL + 'download?' + urllib.urlencode(request_dict) curl.setopt(pycurl.URL, target_url) curl.setopt(pycurl.WRITEDATA, buffers) curl.setopt(pycurl.COOKIEFILE, "cookie.txt") curl.perform() body = buffers.getvalue() buffers.close() curl.close() data = json.loads(body) if data['errno']: return None return data['dlink'][0]['dlink']
def setRequestContext(self, url, get, post, referer, cookies, multipart=False): """ sets everything needed for the request """ url = myquote(url) if get: get = urlencode(get) url = "%s?%s" % (url, get) self.c.setopt(pycurl.URL, url) self.c.lastUrl = url if post: self.c.setopt(pycurl.POST, 1) if not multipart: if type(post) == unicode: post = str(post) #unicode not allowed elif type(post) == str: pass else: post = myurlencode(post) self.c.setopt(pycurl.POSTFIELDS, post) else: post = [(x, y.encode('utf8') if type(y) == unicode else y ) for x, y in post.iteritems()] self.c.setopt(pycurl.HTTPPOST, post) else: self.c.setopt(pycurl.POST, 0) if referer and self.lastURL: self.c.setopt(pycurl.REFERER, str(self.lastURL)) if cookies: self.c.setopt(pycurl.COOKIEFILE, "") self.c.setopt(pycurl.COOKIEJAR, "") self.getCookies()
def curl_get(self, url, refUrl=None): buf = cStringIO.StringIO() curl = pycurl.Curl() curl.setopt(curl.URL, url) curl.setopt(curl.WRITEFUNCTION, buf.write) curl.setopt(pycurl.SSL_VERIFYPEER, 0) #curl.setopt(pycurl.SSL_VERIFYHOST, 0) #curl.setopt(pycurl.HEADERFUNCTION, self.headerCookie) curl.setopt(pycurl.VERBOSE, 0) curl.setopt(pycurl.USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:46.0) Gecko/20100101 Firefox/46.0') #curl.setopt(pycurl.HTTPGET,1) #curl.setopt(pycurl.COOKIE, Cookie) #curl.setopt(pycurl.POSTFIELDS, 'j_username={ngnms_user}&j_password={ngnms_password}'.format(**self.ngnms_login)) curl.setopt(pycurl.COOKIEJAR, '/htdocs/logs/py_cookie.txt') curl.setopt(pycurl.COOKIEFILE, '/htdocs/logs/py_cookie.txt') if refUrl: curl.setopt(pycurl.REFERER, refUrl) #curl.setopt(c.CONNECTTIMEOUT, 5) #curl.setopt(c.TIMEOUT, 8) curl.perform() backinfo = '' if curl.getinfo(pycurl.RESPONSE_CODE) == 200: backinfo = buf.getvalue() curl.close() return backinfo
def handle_request(self): curl_handle = pycurl.Curl() # set default options. curl_handle.setopt(pycurl.URL, self.request_url) curl_handle.setopt(pycurl.REFERER, self.request_url) curl_handle.setopt(pycurl.USERAGENT, self.useragent) curl_handle.setopt(pycurl.TIMEOUT, self.curlopts['TIMEOUT']) curl_handle.setopt(pycurl.CONNECTTIMEOUT, self.curlopts['CONNECTTIMEOUT']) curl_handle.setopt(pycurl.HEADER, True) #curl_handle.setopt(pycurl.VERBOSE, 1) curl_handle.setopt(pycurl.FOLLOWLOCATION, 1) curl_handle.setopt(pycurl.MAXREDIRS, 5) if(self.request_headers and len(self.request_headers) > 0): tmplist = list() for(key, value) in self.request_headers.items(): tmplist.append(key + ':' + value) curl_handle.setopt(pycurl.HTTPHEADER, tmplist) #??????POST curl_handle.setopt(pycurl.HTTPPROXYTUNNEL, 1) curl_handle.setopt(pycurl.POSTFIELDS, self.request_body) response = StringIO.StringIO() curl_handle.setopt(pycurl.WRITEFUNCTION, response.write) try: curl_handle.perform() except pycurl.error as error: raise ChannelException(error, 5) self.response_code = curl_handle.getinfo(curl_handle.HTTP_CODE) header_size = curl_handle.getinfo(curl_handle.HEADER_SIZE) resp_str = response.getvalue() self.response_headers = resp_str[0 : header_size] self.response_body = resp_str[header_size : ] response.close() curl_handle.close()
def ccurlPost(url,value): hdr = "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:45.0) Gecko/20100101 Firefox/45.0" c = pycurl.Curl() if value == "no_redir": print("no redirect") else: c.setopt(c.FOLLOWLOCATION, True) c.setopt(c.USERAGENT, hdr) if value != "" and value != "no_redir": post_data = {'id': value} post_d = urllib.parse.urlencode(post_data) c.setopt(c.POSTFIELDS,post_d) #if rfr != "": # c.setopt(pycurl.REFERER, rfr) url = str(url) c.setopt(c.URL, url) storage = BytesIO() c.setopt(c.WRITEDATA, storage) c.perform() c.close() content = storage.getvalue() content = getContentUnicode(content) return (content)
def list_dir(dir_name): """ ???????? :param dir_name: ?? :return: """ result = list() curl = pycurl.Curl() curl.setopt(pycurl.USERAGENT, const.USER_AGENT) curl.setopt(pycurl.REFERER, const.PAN_REFER_URL) buffers = StringIO() request_dict = { 'channel': 'chunlei', 'clienttype': 0, 'showempty': 0, 'web': 1, 'order': 'time', 'desc': 1, 'page': 1, 'num': 100, 'dir': dir_name, 'bdstoken': 'e0e895bb3ef7b0cb70899ee66b74e809' } target_url = const.PAN_API_URL + 'list?' + urllib.urlencode(request_dict) curl.setopt(pycurl.URL, target_url) curl.setopt(pycurl.WRITEDATA, buffers) curl.setopt(pycurl.COOKIEFILE, "cookie.txt") curl.perform() body = buffers.getvalue() print body buffers.close() curl.close() data = json.loads(body) if data['errno'] == 0: for a_list in data['list']: dlink = get_download_link(a_list['fs_id']) if dlink: dlink = dlink.replace('\\', '') result.append(dlink) return result
def get_dlinks(search_target, get_dlinks_only=True): """ ????url??????? :param search_target: ???? :param get_dlinks_only: ?????? :return ??????????? """ refer_url = const.REFER_URL % search_target curl = pycurl.Curl() curl.setopt(pycurl.USERAGENT, const.USER_AGENT) curl.setopt(pycurl.REFERER, refer_url) result = [] ll = 0 record_start_cursor = get_record_start_cursor(const.CURSOR_FILE) if record_start_cursor: ll = int(record_start_cursor) print('start') # ?????????????? while True: print('crawler pictures of page %d' % (ll / 30 + 1)) # ??str????? buffers = StringIO() target_url = const.API_URL % (search_target, search_target, ll) curl.setopt(pycurl.URL, target_url) curl.setopt(pycurl.WRITEDATA, buffers) curl.perform() body = buffers.getvalue() body = body.replace('null', 'None') data = eval(body) if 'data' in data: has_data = False for a_data in data['data']: obj_url = None if 'objURL' in a_data: obj_url = a_data['objURL'] if obj_url: has_data = True result.append(obj_url) if not has_data: print('no more pic') break ll += 30 else: print('no more pic') break print('done') curl.close() # ??page_num if ll: set_record_start_cursor(str(ll), const.CURSOR_FILE) for index, data in enumerate(result): result[index] = decode_url(data) if not get_dlinks_only: save_to_file(result, search_target + '.txt', const.BASE_FOLDER)