我们从Python开源项目中,提取了以下19个代码示例,用于说明如何使用urllib.error.ContentTooShortError()。
def _get(url): LOG.debug('GET {u!r}'.format(u=url)) # TODO proper error handling - or none try: response = urlopen(url) except HTTPError: raise except ContentTooShortError: raise except URLError: raise except Exception: raise LOG.debug('{} {}'.format(response.status, response.reason)) if response.status not in (200,): raise ValueError('{} {}'.format(response.status, response.reason)) return response
def test_short_content_raises_ContentTooShortError(self): self.fakehttp(b'''HTTP/1.1 200 OK Date: Wed, 02 Jan 2008 03:03:54 GMT Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e Connection: close Content-Length: 100 Content-Type: text/html; charset=iso-8859-1 FF ''') def _reporthook(par1, par2, par3): pass with self.assertRaises(urllib_error.ContentTooShortError): try: urllib_request.urlretrieve('http://example.com/', reporthook=_reporthook) finally: self.unfakehttp()
def download(url, num_retries=2, user_agent='wswp', charset='utf-8'): print('Downloading:', url) request = urllib.request.Request(url) request.add_header('User-agent', user_agent) try: resp = urllib.request.urlopen(request) cs = resp.headers.get_content_charset() if not cs: cs = charset html = resp.read().decode(cs) except (URLError, HTTPError, ContentTooShortError) as e: print('Download error:', e.reason) html = None if num_retries > 0: if hasattr(e, 'code') and 500 <= e.code < 600: # recursively retry 5xx HTTP errors return download(url, num_retries - 1) return html
def download(url, local, **kwargs): if not local: raise ValueError('local filepath is empty') try: if not os.path.exists(os.path.dirname(local)): os.makedirs(os.path.dirname(local)) res = Request(url, **kwargs) read_size = 0 real_size = int(res.header['content-length']) with open(local, 'wb') as f: while True: block = res.response.read(1024*8) if not block: break f.write(block) read_size += len(block) if read_size < real_size: raise ContentTooShortError( 'retrieval incomplete: got only {} out of {} bytes'.formate(read_size, real_size), None ) except Exception as e: raise e
def download(url, user_agent='wswp', num_retries=2, charset='utf-8'): print('Downloading:', url) request = urllib.request.Request(url) request.add_header('User-agent', user_agent) try: resp = urllib.request.urlopen(request) cs = resp.headers.get_content_charset() if not cs: cs = charset html = resp.read().decode(cs) except (URLError, HTTPError, ContentTooShortError) as e: print('Download error:', e.reason) html = None if num_retries > 0: if hasattr(e, 'code') and 500 <= e.code < 600: # recursively retry 5xx HTTP errors return download(url, num_retries - 1) return html
def test_short_content_raises_ContentTooShortError_without_reporthook(self): self.fakehttp(b'''HTTP/1.1 200 OK Date: Wed, 02 Jan 2008 03:03:54 GMT Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e Connection: close Content-Length: 100 Content-Type: text/html; charset=iso-8859-1 FF ''') with self.assertRaises(urllib_error.ContentTooShortError): try: urllib_request.urlretrieve('http://example.com/') finally: self.unfakehttp()
def download(url, num_retries=2, user_agent='wswp', charset='utf-8', proxy=None): """ Download a given URL and return the page content args: url (str): URL kwargs: user_agent (str): user agent (default: wswp) charset (str): charset if website does not include one in headers proxy (str): proxy url, ex 'http://IP' (default: None) num_retries (int): number of retries if a 5xx error is seen (default: 2) """ print('Downloading:', url) request = urllib.request.Request(url) request.add_header('User-agent', user_agent) try: if proxy: proxy_support = urllib.request.ProxyHandler({'http': proxy}) opener = urllib.request.build_opener(proxy_support) urllib.request.install_opener(opener) resp = urllib.request.urlopen(request) cs = resp.headers.get_content_charset() if not cs: cs = charset html = resp.read().decode(cs) except (URLError, HTTPError, ContentTooShortError) as e: print('Download error:', e) html = None if num_retries > 0: if hasattr(e, 'code') and 500 <= e.code < 600: # recursively retry 5xx HTTP errors return download(url, num_retries - 1) return html
def download(url, num_retries=2): print('Downloading:', url) try: html = urllib.request.urlopen(url).read() except (URLError, HTTPError, ContentTooShortError) as e: print('Download error:', e.reason) html = None if num_retries > 0: if hasattr(e, 'code') and 500 <= e.code < 600: # recursively retry 5xx HTTP errors return download(url, num_retries - 1) return html
def download(url): print('Downloading:', url) try: html = urllib.request.urlopen(url).read() except (URLError, HTTPError, ContentTooShortError) as e: print('Download error:', e.reason) html = None return html
def download(url, num_retries=2, user_agent='wswp', charset='utf-8', proxy=None): """ Download a given URL and return the page content args: url (str): URL kwargs: user_agent (str): user agent (default: wswp) charset (str): charset if website does not include one in headers proxy (str): proxy url, ex 'http://IP' (default: None) num_retries (int): number of retries if a 5xx error is seen (default: 2) """ print('Downloading:', url) request = urllib.request.Request(url) request.add_header('User-agent', user_agent) try: if proxy: proxy_support = urllib.request.ProxyHandler({'http': proxy}) opener = urllib.request.build_opener(proxy_support) urllib.request.install_opener(opener) resp = urllib.request.urlopen(request) cs = resp.headers.get_content_charset() if not cs: cs = charset html = resp.read().decode(cs) except (URLError, HTTPError, ContentTooShortError) as e: print('Download error:', e.reason) html = None if num_retries > 0: if hasattr(e, 'code') and 500 <= e.code < 600: # recursively retry 5xx HTTP errors return download(url, num_retries - 1) return html
def download(url, num_retries=2, user_agent='wswp'): print('Downloading:', url) request = urllib.request.Request(url) request.add_header('User-agent', user_agent) try: html = urllib.request.urlopen(request).read() except (URLError, HTTPError, ContentTooShortError) as e: print('Download error:', e.reason) html = None if num_retries > 0: if hasattr(e, 'code') and 500 <= e.code < 600: # recursively retry 5xx HTTP errors return download(url, num_retries - 1) return html
def download(url, user_agent='wswp', num_retries=2, charset='utf-8', proxy=None): """ Download a given URL and return the page content args: url (str): URL kwargs: user_agent (str): user agent (default: wswp) charset (str): charset if website does not include one in headers proxy (str): proxy url, ex 'http://IP' (default: None) num_retries (int): number of retries if a 5xx error is seen (default: 2) """ print('Downloading:', url) request = urllib.request.Request(url) request.add_header('User-agent', user_agent) try: if proxy: proxy_support = urllib.request.ProxyHandler({'http': proxy}) opener = urllib.request.build_opener(proxy_support) urllib.request.install_opener(opener) resp = urllib.request.urlopen(request) cs = resp.headers.get_content_charset() if not cs: cs = charset html = resp.read().decode(cs) except (URLError, HTTPError, ContentTooShortError) as e: print('Download error:', e) html = None if num_retries > 0: if hasattr(e, 'code') and 500 <= e.code < 600: # recursively retry 5xx HTTP errors return download(url, num_retries - 1) return html
def download(url, user_agent='wswp', num_retries=2, charset='utf-8', proxy=None): """ Download a given URL and return the page content args: url (str): URL kwargs: user_agent (str): user agent (default: wswp) charset (str): charset if website does not include one in headers proxy (str): proxy url, ex 'http://IP' (default: None) num_retries (int): number of retries if a 5xx error is seen (default: 2) """ print('Downloading:', url) request = urllib.request.Request(url) request.add_header('User-agent', user_agent) try: if proxy: proxy_support = urllib.request.ProxyHandler({'http': proxy}) opener = urllib.request.build_opener(proxy_support) urllib.request.install_opener(opener) resp = urllib.request.urlopen(request) cs = resp.headers.get_content_charset() if not cs: cs = charset html = resp.read().decode(cs) except (URLError, HTTPError, ContentTooShortError) as e: print('Download error:', e.reason) html = None if num_retries > 0: if hasattr(e, 'code') and 500 <= e.code < 600: # recursively retry 5xx HTTP errors return download(url, num_retries - 1) return html
def download(url, user_agent='wswp', num_retries=2): print('Downloading:', url) request = urllib.request.Request(url) request.add_header('User-agent', user_agent) try: html = urllib.request.urlopen(request).read() except (URLError, HTTPError, ContentTooShortError) as e: print('Download error:', e.reason) html = None if num_retries > 0: if hasattr(e, 'code') and 500 <= e.code < 600: # recursively retry 5xx HTTP errors return download(url, num_retries - 1) return html
def get_zhaopin_html(jobarea_name, job_type): url_temp = "http://sou.zhaopin.com/jobs/searchresult.ashx?jl={jobarea_name}&kw={job_type}&sm=0&p=1&source=1" url = url_temp.format(jobarea_name=urllib.request.quote(jobarea_name),job_type=urllib.request.quote(job_type)) headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36' } logger.warning("-------->15-->02") try: time.sleep(2) req = urllib.request.Request(url, None, headers) response = urlopen(req) # response = urllib.request.urlopen(url) # ???? except HTTPError as e1: print("The (www.python.org)server couldn't fulfill the request.") logger.error('-------->15-->02->HTTPError-> %s' % ( e1.msg)) except URLError as e2: print('We failed to reach a server.') logger.error('-------->15-->03->URLError->%s' % (e2.msg)) except ContentTooShortError as e3: print('Reason: ', e3.reason) logger.error('-------->15-->04->ContentTooShortError-> %s' % (e3.msg)) else: html = "" logger.warning("-------->15-->05") try: if response: logger.warning("-------->15-->05-->01a") html = response.read() # ????????unicode logger.warning("-------->15-->05-->01b") else: logger.warning("-------->15-->05-->02") except Exception as e4: logger.error('-------->15-->06->URLError->%s' % (str(e4))) finally: logger.warning("-------->15-->07-->finally") response.close() logger.warning("-------->15-->08") if html: return html.decode('UTF-8') else: logger.error("-------->15-->09-> html is None") return "" # ?? ?????zhaopin.com ???, css???