我们从Python开源项目中,提取了以下8个代码示例,用于说明如何使用urllib.URLError()。
def rank_checker(url,hatebu_url): try: html = request.urlopen(hatebu_url) except request.HTTPError as e: print(e.reason) except request.URLError as e: print(e.reason) soup = BeautifulSoup(html,"lxml") a = soup.find("a",href=url) if a == None: rank = None else: rank = a.get("data-entryrank") return rank # ????????????????????
def prepare(self, first_url): tmp_cookie = http.cookiejar.CookieJar() opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(tmp_cookie)) try: response = opener.open(self.logout_url, timeout=3) except urllib.HTTPError as e: logging.warn("server process request error: err_code=%s", e.code) return -5, None except urllib.URLError as e: logging.warn("reach server error: reason=%s", e.reason) return -10, None except Exception as e: logging.warn("other exception: msg=%s", e.message) return -100, None for item in tmp_cookie: self.my_cookie+=item.name + "=" +item.value + ";" #htm = response.read() return 0, None ########## post data to request_url ##############
def get_category(url): try: html = request.urlopen("http://b.hatena.ne.jp/entry/{}".format(url)) soup = BeautifulSoup(html,"lxml") return soup.find("html").get("data-category-name") except request.HTTPError as e: print(e.reason) except request.URLError as e: print(e.reason) #??????????????????
def is_hatenatop(url): try: html = request.urlopen("http://hatenablog.com/") except urllib.HTTPError as e: print(e.reason) except urllib.URLError as e: print(e.reason) soup = BeautifulSoup(html,"lxml") a = soup.find("a",href=url) if a is None: return False return url == a.get("href")
def crawl_detail(self): #???????????? for ipo in self.ipo_list: attr_report(446076) url = self.detail_url % ipo["code"] req = urllib.request.Request( url=url, ) req.add_header('User-agent', user_agent) retry_cnt = 0 while retry_cnt < 3: try: try: resp = urllib.request.urlopen(req, timeout=3) except urllib.HTTPError as e: logging.warning("server process request error: err_code=%s", e.code) return -5, None except urllib.URLError as e: logging.warning("reach server error: reason=%s", e.reason) return -10, None except Exception as e: logging.warning("other exception: msg=%s", e.message) return -100, None html_text = resp.read().decode("gbk") resp.close() if self._parse_detail(html_text, ipo): break else: retry_cnt += 1 except Exception as e: retry_cnt += 1 logging.warning('cn craw {0} detail ex:{1}, {2}'.format(ipo["code"], e, traceback.format_exc())) if retry_cnt >= 3: attr_report(441846) logging.info("cn craw {0} detail fail".format(ipo["code"])) else: attr_report(441845)
def post_to_url(self, request_url, post_data): post_encode = urlencode(post_data).encode() #print post_encode req = urllib.request.Request( url=request_url, data=post_encode ) req.add_header('Cookie', self.my_cookie) #print req.headers #print req.data try: resp = urllib.request.urlopen(req, timeout=3) except urllib.error.HTTPError as e: logging.warn("server process request error: err_code=%s", e.code) return -5, None except urllib.error.URLError as e: logging.warn("reach server error: reason=%s", e.reason) return -10, None except Exception as e: logging.warn("other exception: msg=%s", e.__str__()) return -100, None htm = resp.read() return 0, htm ########## get data to request_url ###############
def get_to_url(self, request_url, get_data): if get_data == "": tmp_url = request_url else: tmp_url = request_url + "?" + get_data #print tmp_url #print self.my_cookie req = urllib.request.Request( url=request_url, ) req.add_header('Cookie', self.my_cookie) #print req.headers #print req.data try: resp = urllib.request.urlopen(req, timeout=3) except urllib.HTTPError as e: logging.warn("server process request error: err_code=%s", e.code) return -5, None except urllib.URLError as e: logging.warn("reach server error: reason=%s", e.reason) return -10, None except Exception as e: logging.warn("other exception: msg=%s", e.message) return -100, None htm = resp.read() return 0, htm
def _fetch_url(self): try: r = urllib data = r.read() self.f = xio.StringIO(data) self._fetched = True except urllib.URLError: return self.fetch_next_server()