我们从Python开源项目中,提取了以下9个代码示例,用于说明如何使用requests.utils.get_encoding_from_headers()。
def procdata_getencoding(seed,headers,content): code = utils.get_encoding_from_headers(headers) if code: if code.lower() == 'gbk' or code.lower() == 'gb2312': code = 'gbk' elif code.lower() == 'utf-8': code = 'utf-8' else: code = None if code == None: code = utils.get_encodings_from_content(content) print "content",seed,code if code: code = code[0] if code.lower() == 'gbk' or code.lower() == 'gb2312': code = 'gbk' return code
def make_response(status_code: int = 200, content: bytes = b'', headers: dict = None, reason: str = None, encoding: str = None, ) -> Response: response = Response() response.status_code = status_code response._content = content response._content_consumed = True response.headers = CaseInsensitiveDict(headers or {}) response.encoding = encoding or get_encoding_from_headers(headers or {}) response.reason = reason return response
def encoding(self): """ encoding of Response.content. if Response.encoding is None, encoding will be guessed by header or content or chardet if available. """ if hasattr(self, '_encoding'): return self._encoding # content is unicode if isinstance(self.content, six.text_type): return 'unicode' # Try charset from content-type encoding = get_encoding_from_headers(self.headers) if encoding == 'ISO-8859-1': encoding = None # Try charset from content if not encoding and get_encodings_from_content: if six.PY3: encoding = get_encodings_from_content(utils.pretty_unicode(self.content[:100])) else: encoding = get_encodings_from_content(self.content) encoding = encoding and encoding[0] or None # Fallback to auto-detected encoding. if not encoding and chardet is not None: encoding = chardet.detect(self.content)['encoding'] if encoding and encoding.lower() == 'gb2312': encoding = 'gb18030' self._encoding = encoding or 'utf-8' return self._encoding
def filter_encoding(self,seed, headers,content): code = utils.get_encoding_from_headers(headers) if code: if code.lower() == 'gbk' or code.lower() == 'gb2312': code = 'gbk' return True elif code.lower() == 'utf-8' or code.lower() == 'utf8': code = 'utf8' # as for utf8, we should check the content else: # 'ISO-8859-1' and so on, code = None # chinese website may also miss the content-encoding header, so detect the content if code == None: codes = utils.get_encodings_from_content(content) if codes: for code in codes: if code.lower() in [ 'gbk','gb2312']: return True elif code.lower() == 'utf8' or code.lower() == 'utf-8': code = 'utf8' break if code != 'utf8': return False # here handle utf8 # to detect any chinese char win try: ucon = content.decode('utf8') for uchar in ucon: i = ord(uchar) if i >= 0x4e00 and i <= 0x9fa5: return True except Exception, e: print url, e pass return False
def get_unicode_from_response(response): """Return the requested content back in unicode. This will first attempt to retrieve the encoding from the response headers. If that fails, it will use :func:`requests_toolbelt.utils.deprecated.get_encodings_from_content` to determine encodings from HTML elements. .. code-block:: python import requests from requests_toolbelt.utils import deprecated r = requests.get(url) text = deprecated.get_unicode_from_response(r) :param response: Response object to get unicode content from. :type response: requests.models.Response """ tried_encodings = set() # Try charset from content-type encoding = utils.get_encoding_from_headers(response.headers) if encoding: try: return str(response.content, encoding) except UnicodeError: tried_encodings.add(encoding.lower()) encodings = get_encodings_from_content(response.content) for _encoding in encodings: _encoding = _encoding.lower() if _encoding in tried_encodings: continue try: return str(response.content, _encoding) except UnicodeError: tried_encodings.add(_encoding) # Fall back: if encoding: try: return str(response.content, encoding, errors='replace') except TypeError: pass return response.text