我们从Python开源项目中,提取了以下7个代码示例,用于说明如何使用requests.TooManyRedirects()。
def can_access(url): mod = 'can_access' answer = "U" response = None try: response = requests.get(url, timeout=5) current_page = (response.text, 'lxml') answer = "SL" except requests.exceptions.ConnectionError: print("ERROR: Page is inaccessible, return U and move to next case.") except requests.exceptions.Timeout as e: print e except requests.TooManyRedirects as e: print e except requests.exceptions.ChunkedEncodingError as e: print e except socket.error as e: print e return answer, response, mod
def can_access(url): mod = 'can_access' answer = "U" response = None try: response = requests.get(url, timeout=5) answer = "SL" except: print (sys.exc_info()[0]) """ except requests.exceptions.ConnectionError as e: print e except requests.exceptions.Timeout as e: print e except requests.TooManyRedirects as e: print e except requests.exceptions.ChunkedEncodingError as e: print e except requests.exceptions.ContentDecodingError as e: print e except socket.error as e: print e """ return answer, response, mod
def movie_spider(movie_tag): page_num = 0 movie_list = list() try_times = 0 while True: url = 'https://www.douban.com/tag/' + urllib.request.quote(movie_tag) + '/movie?start=' + str(page_num * 15) time.sleep(numpy.random.rand() * 5) # Hang up the thread to avoid requesting too frequently try: req = requests.get(url, headers=User_Agents[page_num % len(User_Agents)], timeout=50) req.raise_for_status() req.encoding = req.apparent_encoding source_code = req.text plain_text = str(source_code) except (requests.HTTPError, requests.URLRequired, requests.Timeout, requests.TooManyRedirects) as error: print(error) continue soup = BeautifulSoup(plain_text, 'lxml') list_soup = soup.find('div', attrs={'class': 'mod movie-list'}) try_times += 1 if list_soup == None and try_times < 200: continue elif list_soup == None or len(list_soup) <= 1: break # No information returned after 200-time requesting for movie_info in list_soup.findAll('dd'): page_parser(movie_info, movie_list) try_times = 0 # set 0 when got valid information page_num += 1 print("Downloading Information From Tag: {1} Page: {0} ".format(page_num, movie_tag)) print('Finish Catching Tag -> {0}'.format(movie_tag)) return movie_list
def get_latest_version_infos(url, filename='data/.editolido.cfg.json'): infos = infos_from_giturl(url) jsonurl = raw_content_url(url, filename, branch_or_tag=infos['branch']) logger.info('downloading %s' % jsonurl) try: r = requests.get(jsonurl, verify=True, timeout=(3.1, 27)) r.raise_for_status() data = r.json() r.close() except requests.HTTPError: # noinspection PyUnboundLocalVariable logger.error('status code %s' % r.status_code) raise except requests.Timeout: # pragma no cover logger.error('download timeout... aborting update') raise except requests.ConnectionError: # pragma no cover logger.error('download connection error... aborting update') raise except requests.TooManyRedirects: # pragma no cover logger.error('too many redirects... aborting update') raise except requests.exceptions.RequestException: # pragma no cover logger.error('download fail... aborting update') raise return data
def url_request(self, method_, url_, payloads_, headers_): """ @params: method, http method, 'GET' or 'POST', or 'PUT', or 'DELETE', or 'HEAD' url, url, string, absolute url of resource at goal webserver payloads, dict or None, extra data to send when visit certain resource headers, dict or None, customed headers @return: if http status is 200, this function will return [[data], url, payloads, headers] if occurs connection error or http status is not 200, this function will return None """ try: header = self.default_headers if headers_ is None else headers_ payloads = urllib.urlencode(payloads_) if payloads_ is not None else None if payloads is not None: pass rqst = self.session.request(method=method_, url=url_, params=payloads, headers=header, timeout=10) if 'Set-Cookie' in rqst.headers or 'Set-Cookie2' in rqst.headers: self.session.cookies.save(ignore_discard=True) if rqst.status_code != 200: rqst = self.session.request(method=method_, url=url_, params=payloads, headers=header, timeout=10) if rqst.status_code != 200: gl.g_fail_url.warning('%s %s'%(url_, str(payloads_))) return None return HttpQuint(url_, headers_, payloads_, [rqst.content], rqst.headers) #return [[rqst.content], method_, url_, payloads_, headers_] except (requests.HTTPError, requests.Timeout, requests.ConnectionError, requests.TooManyRedirects), e: tips = '%s when visit %s '%(e, url_) if payloads_ is None else '%s when \ visit %s with data %s'%(e, url_, str(payloads_).decode('unicode_escape')) self.logger.error(tips) gl.g_fail_url.warning('%s %s'%(url_, str(payloads_))) return None
def book_spider(book_tag): page_num = 0 book_list = list() try_times = 0 while True: url = 'https://www.douban.com/tag/' + urllib.request.quote(book_tag) + '/book?start=' + str(page_num * 15) time.sleep(numpy.random.rand() * 5) # Hang up the thread to avoid requesting too frequently try: source_code = requests.get(url, headers=User_Agents[page_num % len(User_Agents)], timeout=50).text plain_text = str(source_code) except (requests.HTTPError, requests.URLRequired, requests.Timeout, requests.TooManyRedirects) as error: print(error) continue soup = BeautifulSoup(plain_text, 'lxml') list_soup = soup.find('div', attrs={'class': 'mod book-list'}) try_times += 1 if list_soup == None and try_times < 200: continue elif list_soup == None or len(list_soup) <= 1: break # No information returned after 200-time requesting for book_info in list_soup.findAll('dd'): title = book_info.find('a', attrs={'class': 'title'}).string.strip() desc = book_info.find('div', attrs={'class': 'desc'}).string.strip() desc_list = desc.split('/') book_url = book_info.find('a', attrs={'class': 'title'}).get('href') try: author_info = '/'.join(desc_list[0:-3]) except: author_info = ' ??' try: pub_info = '/'.join(desc_list[-3:]) except: pub_info = ' ??' try: rating = book_info.find('span', {'class': 'rating_nums'}).string.strip() except: rating = '0.0' book_list.append([title, rating, author_info, pub_info]) try_times = 0 # set 0 when got valid information page_num += 1 print("Downloading Information From Tag: {1} Page: {0} ".format(page_num, book_tag)) print('Finish Catching Tag -> {0}'.format(book_tag)) return book_list
def deco_log(self, log_name, fun_name, check_error=False): """ :param fun_name: ???????????? :param log_name: ??????????????,????sys.argv[0][0:-3] + '.log'????py????log :param check_error: ???????????True?False????False :return:?????“????”??? """ # ??py????????????????????? self.init(log_name) # ??python2??nonlocal?????????????? status = [1] msg = [1] if check_error: def log(func): def record(*args, **kwargs): try: t0 = time.time() back = func(*args, **kwargs) #?? run_time = time.time() - t0 #???????????info????????? status[0] = 2 msg[0] = "%s?????????%s?" %(fun_name, run_time) return back # sys._getframe().f_code.co_name????????? except IndexError, e: status[0] = 3 msg[0] = "???:%s???????\n???:%s\n?????:\n%s" % (fun_name, e, traceback.format_exc()) except requests.ConnectionError, e: status[0] = 4 msg[0] = "???:%s???????????DNS??????????\n???:%s\n?????:\n%s" % (fun_name, e, traceback.format_exc()) except requests.TooManyRedirects, e: status[0] = 4 msg[0] = "???:%s????????????????\n???:%s\n?????:\n%s" % (fun_name, e, traceback.format_exc()) except requests.HTTPError, e: status[0] = 4 msg[0] = "???:%s????200?????\n???:%s\n?????:\n%s" % (fun_name, e, traceback.format_exc()) except requests.RequestException, e: status[0] = 4 msg[0] = "???:%s???requests????\n???:%s\n?????:\n%s" % (fun_name, e, traceback.format_exc()) except Exception, e: status[0] = 5 msg[0] = "???:%s????????\n???:%s\n?????:\n%s" % (fun_name, e, traceback.format_exc()) finally: self.fun_log_type(status[0], msg[0]) return record else: def log(func): return func return log