我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用urllib.parse.unquote_to_bytes()。
def compat_urllib_parse_unquote_to_bytes(string): """unquote_to_bytes('abc%20def') -> b'abc def'.""" # Note: strings are encoded as UTF-8. This is only an issue if it contains # unescaped non-ASCII characters, which URIs should not. if not string: # Is it a string-like object? string.split return b'' if isinstance(string, compat_str): string = string.encode('utf-8') bits = string.split(b'%') if len(bits) == 1: return string res = [bits[0]] append = res.append for item in bits[1:]: try: append(compat_urllib_parse._hextochr[item[:2]]) append(item[2:]) except KeyError: append(b'%') append(item) return b''.join(res)
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'): """Replace %xx escapes by their single-character equivalent. The optional encoding and errors parameters specify how to decode percent-encoded sequences into Unicode characters, as accepted by the bytes.decode() method. By default, percent-encoded sequences are decoded with UTF-8, and invalid sequences are replaced by a placeholder character. unquote('abc%20def') -> 'abc def'. """ if '%' not in string: string.split return string if encoding is None: encoding = 'utf-8' if errors is None: errors = 'replace' bits = _asciire.split(string) res = [bits[0]] append = res.append for i in range(1, len(bits), 2): append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors)) append(bits[i + 1]) return ''.join(res)
def uri_to_iri(uri): """ Converts a Uniform Resource Identifier(URI) into an Internationalized Resource Identifier(IRI). This is the algorithm from section 3.2 of RFC 3987. Takes an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and returns unicode containing the encoded result (e.g. '/I \xe2\x99\xa5 Django/'). """ if uri is None: return uri uri = force_bytes(uri) iri = unquote_to_bytes(uri) if six.PY3 else unquote(uri) return repercent_broken_unicode(iri).decode('utf-8')
def data_open(self, req): # data URLs as specified in RFC 2397. # # ignores POSTed data # # syntax: # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data # mediatype := [ type "/" subtype ] *( ";" parameter ) # data := *urlchar # parameter := attribute "=" value url = req.get_full_url() scheme, data = url.split(":", 1) mediatype, data = data.split(",", 1) # even base64 encoded data URLs might be quoted so unquote in any case: data = compat_urllib_parse_unquote_to_bytes(data) if mediatype.endswith(";base64"): data = binascii.a2b_base64(data) mediatype = mediatype[:-7] if not mediatype: mediatype = "text/plain;charset=US-ASCII" headers = email.message_from_string( "Content-type: %s\nContent-length: %d\n" % (mediatype, len(data))) return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
def data_open(self, req): # data URLs as specified in RFC 2397. # # ignores POSTed data # # syntax: # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data # mediatype := [ type "/" subtype ] *( ";" parameter ) # data := *urlchar # parameter := attribute "=" value url = req.get_full_url() scheme, data = url.split(':', 1) mediatype, data = data.split(',', 1) # even base64 encoded data URLs might be quoted so unquote in any case: data = compat_urllib_parse_unquote_to_bytes(data) if mediatype.endswith(';base64'): data = binascii.a2b_base64(data) mediatype = mediatype[:-7] if not mediatype: mediatype = 'text/plain;charset=US-ASCII' headers = email.message_from_string( 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data))) return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
def test_unquoting_badpercent(self): # Test unquoting on bad percent-escapes given = '%xab' expect = given result = urllib_parse.unquote(given) self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result)) given = '%x' expect = given result = urllib_parse.unquote(given) self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result)) given = '%' expect = given result = urllib_parse.unquote(given) self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result)) # unquote_to_bytes given = '%xab' expect = bytes(given, 'ascii') result = urllib_parse.unquote_to_bytes(given) self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" % (expect, result)) given = '%x' expect = bytes(given, 'ascii') result = urllib_parse.unquote_to_bytes(given) self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" % (expect, result)) given = '%' expect = bytes(given, 'ascii') result = urllib_parse.unquote_to_bytes(given) self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" % (expect, result)) self.assertRaises((TypeError, AttributeError), urllib_parse.unquote_to_bytes, None) self.assertRaises((TypeError, AttributeError), urllib_parse.unquote_to_bytes, ())
def test_unquoting_mixed_case(self): # Test unquoting on mixed-case hex digits in the percent-escapes given = '%Ab%eA' expect = b'\xab\xea' result = urllib_parse.unquote_to_bytes(given) self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" % (expect, result))
def test_unquote_to_bytes(self): given = 'br%C3%BCckner_sapporo_20050930.doc' expect = b'br\xc3\xbcckner_sapporo_20050930.doc' result = urllib_parse.unquote_to_bytes(given) self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" % (expect, result)) # Test on a string with unescaped non-ASCII characters # (Technically an invalid URI; expect those characters to be UTF-8 # encoded). result = urllib_parse.unquote_to_bytes("\u6f22%C3%BC") expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc" self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" % (expect, result)) # Test with a bytes as input given = b'%A2%D8ab%FF' expect = b'\xa2\xd8ab\xff' result = urllib_parse.unquote_to_bytes(given) self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" % (expect, result)) # Test with a bytes as input, with unescaped non-ASCII bytes # (Technically an invalid URI; expect those bytes to be preserved) given = b'%A2\xd8ab%FF' expect = b'\xa2\xd8ab\xff' result = urllib_parse.unquote_to_bytes(given) self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" % (expect, result))
def url_unescape(value, encoding='utf-8', plus=True): """Decodes the given value from a URL. The argument may be either a byte or unicode string. If encoding is None, the result will be a byte string. Otherwise, the result is a unicode string in the specified encoding. If ``plus`` is true (the default), plus signs will be interpreted as spaces (literal plus signs must be represented as "%2B"). This is appropriate for query strings and form-encoded values but not for the path component of a URL. Note that this default is the reverse of Python's urllib module. .. versionadded:: 3.1 The ``plus`` argument """ if encoding is None: if plus: # unquote_to_bytes doesn't have a _plus variant value = to_basestring(value).replace('+', ' ') return urllib_parse.unquote_to_bytes(value) else: unquote = (urllib_parse.unquote_plus if plus else urllib_parse.unquote) return unquote(to_basestring(value), encoding=encoding)
def parse_options_header(value, multiple=False): """Parse a ``Content-Type`` like header into a tuple with the content type and the options: >>> parse_options_header('text/html; charset=utf8') ('text/html', {'charset': 'utf8'}) This should not be used to parse ``Cache-Control`` like headers that use a slightly different format. For these headers use the :func:`parse_dict_header` function. .. versionadded:: 0.5 :param value: the header to parse. :param multiple: Whether try to parse and return multiple MIME types :return: (mimetype, options) or (mimetype, options, mimetype, options, …) if multiple=True """ if not value: return '', {} result = [] value = "," + value.replace("\n", ",") while value: match = _option_header_start_mime_type.match(value) if not match: break result.append(match.group(1)) # mimetype options = {} # Parse options rest = match.group(2) while rest: optmatch = _option_header_piece_re.match(rest) if not optmatch: break option, encoding, _, option_value = optmatch.groups() option = unquote_header_value(option) if option_value is not None: option_value = unquote_header_value( option_value, option == 'filename') if encoding is not None: option_value = _unquote(option_value).decode(encoding) options[option] = option_value rest = rest[optmatch.end():] result.append(options) if multiple is False: return tuple(result) value = rest return tuple(result) if result else ('', {})