我们从Python开源项目中,提取了以下18个代码示例,用于说明如何使用pycurl.ENCODING。
def get_connection(): # pycurl initialization h = pycurl.Curl() # follow redirects h.setopt(pycurl.FOLLOWLOCATION, False) # enable compression h.setopt(pycurl.ENCODING, 'gzip, deflate') # certifi h.setopt(pycurl.CAINFO, certifi.where()) # no signal h.setopt(pycurl.NOSIGNAL, 1) # certificate informations h.setopt(pycurl.OPT_CERTINFO, 1) return h
def test_post(self): curl = CurlStub(b"result") result = fetch("http://example.com", post=True, curl=curl) self.assertEqual(result, b"result") self.assertEqual(curl.options, {pycurl.URL: b"http://example.com", pycurl.FOLLOWLOCATION: 1, pycurl.MAXREDIRS: 5, pycurl.CONNECTTIMEOUT: 30, pycurl.LOW_SPEED_LIMIT: 1, pycurl.LOW_SPEED_TIME: 600, pycurl.NOSIGNAL: 1, pycurl.WRITEFUNCTION: Any(), pycurl.POST: True, pycurl.DNS_CACHE_TIMEOUT: 0, pycurl.ENCODING: b"gzip,deflate"})
def test_post_data(self): curl = CurlStub(b"result") result = fetch("http://example.com", post=True, data="data", curl=curl) self.assertEqual(result, b"result") self.assertEqual(curl.options[pycurl.READFUNCTION](), b"data") self.assertEqual(curl.options, {pycurl.URL: b"http://example.com", pycurl.FOLLOWLOCATION: 1, pycurl.MAXREDIRS: 5, pycurl.CONNECTTIMEOUT: 30, pycurl.LOW_SPEED_LIMIT: 1, pycurl.LOW_SPEED_TIME: 600, pycurl.NOSIGNAL: 1, pycurl.WRITEFUNCTION: Any(), pycurl.POST: True, pycurl.POSTFIELDSIZE: 4, pycurl.READFUNCTION: Any(), pycurl.DNS_CACHE_TIMEOUT: 0, pycurl.ENCODING: b"gzip,deflate"})
def test_cainfo(self): curl = CurlStub(b"result") result = fetch("https://example.com", cainfo="cainfo", curl=curl) self.assertEqual(result, b"result") self.assertEqual(curl.options, {pycurl.URL: b"https://example.com", pycurl.FOLLOWLOCATION: 1, pycurl.MAXREDIRS: 5, pycurl.CONNECTTIMEOUT: 30, pycurl.LOW_SPEED_LIMIT: 1, pycurl.LOW_SPEED_TIME: 600, pycurl.NOSIGNAL: 1, pycurl.WRITEFUNCTION: Any(), pycurl.CAINFO: b"cainfo", pycurl.DNS_CACHE_TIMEOUT: 0, pycurl.ENCODING: b"gzip,deflate"})
def test_headers(self): curl = CurlStub(b"result") result = fetch("http://example.com", headers={"a": "1", "b": "2"}, curl=curl) self.assertEqual(result, b"result") self.assertEqual(curl.options, {pycurl.URL: b"http://example.com", pycurl.FOLLOWLOCATION: 1, pycurl.MAXREDIRS: 5, pycurl.CONNECTTIMEOUT: 30, pycurl.LOW_SPEED_LIMIT: 1, pycurl.LOW_SPEED_TIME: 600, pycurl.NOSIGNAL: 1, pycurl.WRITEFUNCTION: Any(), pycurl.HTTPHEADER: ["a: 1", "b: 2"], pycurl.DNS_CACHE_TIMEOUT: 0, pycurl.ENCODING: b"gzip,deflate"})
def test_pycurl_insecure(self): curl = CurlStub(b"result") result = fetch("http://example.com/get-ca-cert", curl=curl, insecure=True) self.assertEqual(result, b"result") self.assertEqual(curl.options, {pycurl.URL: b"http://example.com/get-ca-cert", pycurl.FOLLOWLOCATION: 1, pycurl.MAXREDIRS: 5, pycurl.CONNECTTIMEOUT: 30, pycurl.LOW_SPEED_LIMIT: 1, pycurl.LOW_SPEED_TIME: 600, pycurl.NOSIGNAL: 1, pycurl.WRITEFUNCTION: Any(), pycurl.SSL_VERIFYPEER: False, pycurl.DNS_CACHE_TIMEOUT: 0, pycurl.ENCODING: b"gzip,deflate"})
def initHandle(self): """ sets common options to curl handle """ self.c.setopt(pycurl.FOLLOWLOCATION, 1) self.c.setopt(pycurl.MAXREDIRS, 5) self.c.setopt(pycurl.CONNECTTIMEOUT, 30) self.c.setopt(pycurl.NOSIGNAL, 1) self.c.setopt(pycurl.NOPROGRESS, 1) if hasattr(pycurl, "AUTOREFERER"): self.c.setopt(pycurl.AUTOREFERER, 1) self.c.setopt(pycurl.SSL_VERIFYPEER, 0) self.c.setopt(pycurl.LOW_SPEED_TIME, 30) self.c.setopt(pycurl.LOW_SPEED_LIMIT, 5) #self.c.setopt(pycurl.VERBOSE, 1) self.c.setopt(pycurl.USERAGENT, "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0") if pycurl.version_info()[7]: self.c.setopt(pycurl.ENCODING, "gzip, deflate") self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*", "Accept-Language: en-US,en", "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7", "Connection: keep-alive", "Keep-Alive: 300", "Expect:"])
def Curl(url,headers): while 1: try: c = pycurl.Curl() c.setopt(pycurl.REFERER, 'http://weixin.sogou.com/') c.setopt(pycurl.FOLLOWLOCATION, True) c.setopt(pycurl.MAXREDIRS,5) c.setopt(pycurl.CONNECTTIMEOUT, 60) c.setopt(pycurl.TIMEOUT,120) c.setopt(pycurl.ENCODING, 'gzip,deflate') c.fp = StringIO.StringIO() c.setopt(pycurl.URL, url) c.setopt(pycurl.HTTPHEADER,headers) c.setopt(c.WRITEFUNCTION, c.fp.write) c.perform() html = c.fp.getvalue() if '??????' in html: print u'??????,??10??' time.sleep(600) else: return html except Exception, e: print url,'curl(url)',e continue #????????
def getHtml(url,headers): c = pycurl.Curl() #??curl???????? c.setopt(pycurl.URL, url) #??????URL c.setopt(pycurl.FOLLOWLOCATION, True) #???????? c.setopt(pycurl.MAXREDIRS,5) #????????? c.setopt(pycurl.CONNECTTIMEOUT, 60) #?????? c.setopt(pycurl.TIMEOUT,120) #???? c.setopt(pycurl.ENCODING, 'gzip,deflate') #??gzip???????????????????gzip?????????gzip?????? c.fp = StringIO.StringIO() #??StringIO?? c.setopt(pycurl.HTTPHEADER,headers) #????? c.setopt(pycurl.POST, 1) #??get c.setopt(pycurl.POSTFIELDS, data) #??POST?? c.setopt(c.WRITEFUNCTION, c.fp.write) #??????? c.perform() #?? html = c.fp.getvalue() #????? return html
def curl(url, debug=False, **kwargs): while 1: try: s = StringIO.StringIO() c = pycurl.Curl() c.setopt(pycurl.URL, url) c.setopt(pycurl.REFERER, url) c.setopt(pycurl.FOLLOWLOCATION, True) c.setopt(pycurl.TIMEOUT, 60) c.setopt(pycurl.ENCODING, 'gzip') c.setopt(pycurl.USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36') c.setopt(pycurl.NOSIGNAL, True) c.setopt(pycurl.WRITEFUNCTION, s.write) for k, v in kwargs.iteritems(): c.setopt(vars(pycurl)[k], v) c.perform() c.close() return s.getvalue() except: if debug: raise continue
def test_gzip(url): t = Test() c = pycurl.Curl() c.setopt(pycurl.WRITEFUNCTION,t.callback) c.setopt(pycurl.ENCODING, 'gzip') c.setopt(pycurl.URL,url) c.setopt(pycurl.USERAGENT,"User-Agent':'EMAO_OPS_MONITOR) Gecko/20091201 Firefox/3.5.6)") c.perform() TOTAL_TIME = c.getinfo(c.TOTAL_TIME) #print "????????%.2f ms" %(TOTAL_TIME*1000) return TOTAL_TIME * 1000
def test_basic(self): curl = CurlStub(b"result") result = fetch("http://example.com", curl=curl) self.assertEqual(result, b"result") self.assertEqual(curl.options, {pycurl.URL: b"http://example.com", pycurl.FOLLOWLOCATION: 1, pycurl.MAXREDIRS: 5, pycurl.CONNECTTIMEOUT: 30, pycurl.LOW_SPEED_LIMIT: 1, pycurl.LOW_SPEED_TIME: 600, pycurl.NOSIGNAL: 1, pycurl.WRITEFUNCTION: Any(), pycurl.DNS_CACHE_TIMEOUT: 0, pycurl.ENCODING: b"gzip,deflate"})
def test_create_curl(self): curls = [] def pycurl_Curl(): curl = CurlStub(b"result") curls.append(curl) return curl Curl = pycurl.Curl try: pycurl.Curl = pycurl_Curl result = fetch("http://example.com") curl = curls[0] self.assertEqual(result, b"result") self.assertEqual(curl.options, {pycurl.URL: b"http://example.com", pycurl.FOLLOWLOCATION: 1, pycurl.MAXREDIRS: 5, pycurl.CONNECTTIMEOUT: 30, pycurl.LOW_SPEED_LIMIT: 1, pycurl.LOW_SPEED_TIME: 600, pycurl.NOSIGNAL: 1, pycurl.WRITEFUNCTION: Any(), pycurl.DNS_CACHE_TIMEOUT: 0, pycurl.ENCODING: b"gzip,deflate"}) finally: pycurl.Curl = Curl
def getPage (self, url, requestHeader = []) : resultFormate = StringIO.StringIO() fakeIp = self.fakeIp() requestHeader.append('CLIENT-IP:' + fakeIp) requestHeader.append('X-FORWARDED-FOR:' + fakeIp) try: curl = pycurl.Curl() curl.setopt(pycurl.URL, url.strip()) curl.setopt(pycurl.ENCODING, 'gzip,deflate') curl.setopt(pycurl.HEADER, 1) curl.setopt(pycurl.TIMEOUT, 120) curl.setopt(pycurl.SSL_VERIFYPEER, 0) curl.setopt(pycurl.SSL_VERIFYHOST, 0) curl.setopt(pycurl.HTTPHEADER, requestHeader) curl.setopt(pycurl.WRITEFUNCTION, resultFormate.write) curl.perform() headerSize = curl.getinfo(pycurl.HEADER_SIZE) curl.close() header = resultFormate.getvalue()[0 : headerSize].split('\r\n') body = resultFormate.getvalue()[headerSize : ] except Exception, e: header = '' body = '' return header, body
def getKeyword(i):#??json try: time.sleep(1) headers = [ 'Host:fengchao.baidu.com', 'User-Agent: %s' %getUA(), 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'Accept-Encoding: gzip, deflate', 'Referer: http://fengchao.baidu.com/nirvana/main.html?userid=8048066', 'Connection: keep-alive', 'COOKIE:%s' %COOKIE, ] post = urllib.urlencode({ 'params': '{"entry":"kr_station","query":"%s","querytype":1,"pageNo":1,"pageSize":300}' % keyword_list[i], 'path': 'jupiter/GET/kr/word', 'token': TOKEN, 'userid': USERID, }) url = 'http://fengchao.baidu.com/nirvana/request.ajax?path=jupiter/GET/kr/word' c = pycurl.Curl() # c.setopt(pycurl.PROXY, getRandomAlbIp()) c.setopt(pycurl.URL, url) c.setopt(pycurl.FOLLOWLOCATION, True) c.setopt(pycurl.MAXREDIRS,5) c.setopt(pycurl.CONNECTTIMEOUT, 20) c.setopt(pycurl.TIMEOUT,20) c.setopt(pycurl.ENCODING, 'gzip,deflate') c.fp = StringIO.StringIO() c.setopt(pycurl.HTTPHEADER,headers) c.setopt(pycurl.POST, 1) c.setopt(pycurl.POSTFIELDS, post) c.setopt(c.WRITEFUNCTION, c.fp.write) c.perform() # mutex.acquire()#?? jsonData = c.fp.getvalue() analyseJsonData(i,jsonData) # mutex.release()#?? except Exception,e: print e pass
def init_handle(self): """ Sets common options to curl handle. """ self.setopt(pycurl.FOLLOWLOCATION, 1) self.setopt(pycurl.MAXREDIRS, 5) self.setopt(pycurl.CONNECTTIMEOUT, 30) self.setopt(pycurl.NOSIGNAL, 1) self.setopt(pycurl.NOPROGRESS, 1) if hasattr(pycurl, "AUTOREFERER"): self.setopt(pycurl.AUTOREFERER, 1) self.setopt(pycurl.SSL_VERIFYPEER, 0) # Interval for low speed, detects connection loss, but can abort dl if # hoster stalls the download self.setopt(pycurl.LOW_SPEED_TIME, 45) self.setopt(pycurl.LOW_SPEED_LIMIT, 5) # do not save the cookies self.setopt(pycurl.COOKIEFILE, '') self.setopt(pycurl.COOKIEJAR, '') # self.setopt(pycurl.VERBOSE, 1) self.setopt( pycurl.USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; rv:53.0) ' 'Gecko/20100101 Firefox/53.0') if pycurl.version_info()[7]: self.setopt(pycurl.ENCODING, 'gzip,deflate') self.headers.update( {'Accept': "*/*", 'Accept-Language': "en-US,en", 'Accept-Charset': "ISO-8859-1,utf-8;q=0.7,*;q=0.7", 'Connection': "keep-alive", 'Keep-Alive': "300", 'Expect': ""})
def _create_handle(self): c = pycurl.Curl() # c.setopt(pycurl.VERBOSE, 1) c.setopt(pycurl.CONNECTTIMEOUT, CONNECT_TIMEOUT) c.setopt(pycurl.TIMEOUT, TIMEOUT) c.setopt(pycurl.ENCODING, 'gzip, deflate') return c
def pidePOST(self,url,data,compressed = False,cookie=False, contador_curl = 0, debug=False): time.sleep(3) Scrape.contador+=1 print ("\n"+url) print ("\n\t.l."+str(Scrape.contador)) c = pycurl.Curl() if cookie: c.setopt(pycurl.COOKIEJAR, 'cookie.txt') c.setopt(pycurl.COOKIEFILE, 'cookie.txt') c.setopt(pycurl.URL, url) c.setopt(pycurl.CONNECTTIMEOUT, 15) c.setopt(pycurl.TIMEOUT, 25) c.setopt(pycurl.HTTPHEADER, self.headers) if compressed: c.setopt(pycurl.ENCODING, 'gzip,deflate') c.setopt(c.POSTFIELDS, data) if debug: c.setopt(c.VERBOSE, True) c.setopt( pycurl.PROXY, '127.0.0.1' ) c.setopt( pycurl.PROXYPORT, 9050 ) c.setopt( pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5_HOSTNAME ) b = BytesIO() BytesIO c.setopt(pycurl.WRITEFUNCTION, b.write) self.url = url try: c.perform() self.response_string = b.getvalue() #print (self.response_string) b.close() except Exception as e: #print ('Razon:',e) self.response_string = None