Python urllib.parse 模块,unquote_to_bytes() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用urllib.parse.unquote_to_bytes()

项目:Qyoutube-dl    作者:lzambella    | 项目源码 | 文件源码
def compat_urllib_parse_unquote_to_bytes(string):
        """unquote_to_bytes('abc%20def') -> b'abc def'."""
        # Note: strings are encoded as UTF-8. This is only an issue if it contains
        # unescaped non-ASCII characters, which URIs should not.
        if not string:
            # Is it a string-like object?
            string.split
            return b''
        if isinstance(string, compat_str):
            string = string.encode('utf-8')
        bits = string.split(b'%')
        if len(bits) == 1:
            return string
        res = [bits[0]]
        append = res.append
        for item in bits[1:]:
            try:
                append(compat_urllib_parse._hextochr[item[:2]])
                append(item[2:])
            except KeyError:
                append(b'%')
                append(item)
        return b''.join(res)
项目:Qyoutube-dl    作者:lzambella    | 项目源码 | 文件源码
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
        """Replace %xx escapes by their single-character equivalent. The optional
        encoding and errors parameters specify how to decode percent-encoded
        sequences into Unicode characters, as accepted by the bytes.decode()
        method.
        By default, percent-encoded sequences are decoded with UTF-8, and invalid
        sequences are replaced by a placeholder character.

        unquote('abc%20def') -> 'abc def'.
        """
        if '%' not in string:
            string.split
            return string
        if encoding is None:
            encoding = 'utf-8'
        if errors is None:
            errors = 'replace'
        bits = _asciire.split(string)
        res = [bits[0]]
        append = res.append
        for i in range(1, len(bits), 2):
            append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
            append(bits[i + 1])
        return ''.join(res)
项目:youtube_downloader    作者:aksinghdce    | 项目源码 | 文件源码
def compat_urllib_parse_unquote_to_bytes(string):
        """unquote_to_bytes('abc%20def') -> b'abc def'."""
        # Note: strings are encoded as UTF-8. This is only an issue if it contains
        # unescaped non-ASCII characters, which URIs should not.
        if not string:
            # Is it a string-like object?
            string.split
            return b''
        if isinstance(string, compat_str):
            string = string.encode('utf-8')
        bits = string.split(b'%')
        if len(bits) == 1:
            return string
        res = [bits[0]]
        append = res.append
        for item in bits[1:]:
            try:
                append(compat_urllib_parse._hextochr[item[:2]])
                append(item[2:])
            except KeyError:
                append(b'%')
                append(item)
        return b''.join(res)
项目:youtube_downloader    作者:aksinghdce    | 项目源码 | 文件源码
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
        """Replace %xx escapes by their single-character equivalent. The optional
        encoding and errors parameters specify how to decode percent-encoded
        sequences into Unicode characters, as accepted by the bytes.decode()
        method.
        By default, percent-encoded sequences are decoded with UTF-8, and invalid
        sequences are replaced by a placeholder character.

        unquote('abc%20def') -> 'abc def'.
        """
        if '%' not in string:
            string.split
            return string
        if encoding is None:
            encoding = 'utf-8'
        if errors is None:
            errors = 'replace'
        bits = _asciire.split(string)
        res = [bits[0]]
        append = res.append
        for i in range(1, len(bits), 2):
            append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
            append(bits[i + 1])
        return ''.join(res)
项目:optimalvibes    作者:littlemika    | 项目源码 | 文件源码
def compat_urllib_parse_unquote_to_bytes(string):
        """unquote_to_bytes('abc%20def') -> b'abc def'."""
        # Note: strings are encoded as UTF-8. This is only an issue if it contains
        # unescaped non-ASCII characters, which URIs should not.
        if not string:
            # Is it a string-like object?
            string.split
            return b''
        if isinstance(string, compat_str):
            string = string.encode('utf-8')
        bits = string.split(b'%')
        if len(bits) == 1:
            return string
        res = [bits[0]]
        append = res.append
        for item in bits[1:]:
            try:
                append(compat_urllib_parse._hextochr[item[:2]])
                append(item[2:])
            except KeyError:
                append(b'%')
                append(item)
        return b''.join(res)
项目:optimalvibes    作者:littlemika    | 项目源码 | 文件源码
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
        """Replace %xx escapes by their single-character equivalent. The optional
        encoding and errors parameters specify how to decode percent-encoded
        sequences into Unicode characters, as accepted by the bytes.decode()
        method.
        By default, percent-encoded sequences are decoded with UTF-8, and invalid
        sequences are replaced by a placeholder character.

        unquote('abc%20def') -> 'abc def'.
        """
        if '%' not in string:
            string.split
            return string
        if encoding is None:
            encoding = 'utf-8'
        if errors is None:
            errors = 'replace'
        bits = _asciire.split(string)
        res = [bits[0]]
        append = res.append
        for i in range(1, len(bits), 2):
            append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
            append(bits[i + 1])
        return ''.join(res)
项目:tvalacarta    作者:tvalacarta    | 项目源码 | 文件源码
def compat_urllib_parse_unquote_to_bytes(string):
        """unquote_to_bytes('abc%20def') -> b'abc def'."""
        # Note: strings are encoded as UTF-8. This is only an issue if it contains
        # unescaped non-ASCII characters, which URIs should not.
        if not string:
            # Is it a string-like object?
            string.split
            return b''
        if isinstance(string, compat_str):
            string = string.encode('utf-8')
        bits = string.split(b'%')
        if len(bits) == 1:
            return string
        res = [bits[0]]
        append = res.append
        for item in bits[1:]:
            try:
                append(compat_urllib_parse._hextochr[item[:2]])
                append(item[2:])
            except KeyError:
                append(b'%')
                append(item)
        return b''.join(res)
项目:tvalacarta    作者:tvalacarta    | 项目源码 | 文件源码
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
        """Replace %xx escapes by their single-character equivalent. The optional
        encoding and errors parameters specify how to decode percent-encoded
        sequences into Unicode characters, as accepted by the bytes.decode()
        method.
        By default, percent-encoded sequences are decoded with UTF-8, and invalid
        sequences are replaced by a placeholder character.

        unquote('abc%20def') -> 'abc def'.
        """
        if '%' not in string:
            string.split
            return string
        if encoding is None:
            encoding = 'utf-8'
        if errors is None:
            errors = 'replace'
        bits = _asciire.split(string)
        res = [bits[0]]
        append = res.append
        for i in range(1, len(bits), 2):
            append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
            append(bits[i + 1])
        return ''.join(res)
项目:kodi-plugin.video.ted-talks-chinese    作者:daineseh    | 项目源码 | 文件源码
def compat_urllib_parse_unquote_to_bytes(string):
        """unquote_to_bytes('abc%20def') -> b'abc def'."""
        # Note: strings are encoded as UTF-8. This is only an issue if it contains
        # unescaped non-ASCII characters, which URIs should not.
        if not string:
            # Is it a string-like object?
            string.split
            return b''
        if isinstance(string, compat_str):
            string = string.encode('utf-8')
        bits = string.split(b'%')
        if len(bits) == 1:
            return string
        res = [bits[0]]
        append = res.append
        for item in bits[1:]:
            try:
                append(compat_urllib_parse._hextochr[item[:2]])
                append(item[2:])
            except KeyError:
                append(b'%')
                append(item)
        return b''.join(res)
项目:kodi-plugin.video.ted-talks-chinese    作者:daineseh    | 项目源码 | 文件源码
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
        """Replace %xx escapes by their single-character equivalent. The optional
        encoding and errors parameters specify how to decode percent-encoded
        sequences into Unicode characters, as accepted by the bytes.decode()
        method.
        By default, percent-encoded sequences are decoded with UTF-8, and invalid
        sequences are replaced by a placeholder character.

        unquote('abc%20def') -> 'abc def'.
        """
        if '%' not in string:
            string.split
            return string
        if encoding is None:
            encoding = 'utf-8'
        if errors is None:
            errors = 'replace'
        bits = _asciire.split(string)
        res = [bits[0]]
        append = res.append
        for i in range(1, len(bits), 2):
            append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
            append(bits[i + 1])
        return ''.join(res)
项目:GUIYoutube    作者:coltking    | 项目源码 | 文件源码
def compat_urllib_parse_unquote_to_bytes(string):
        """unquote_to_bytes('abc%20def') -> b'abc def'."""
        # Note: strings are encoded as UTF-8. This is only an issue if it contains
        # unescaped non-ASCII characters, which URIs should not.
        if not string:
            # Is it a string-like object?
            string.split
            return b''
        if isinstance(string, compat_str):
            string = string.encode('utf-8')
        bits = string.split(b'%')
        if len(bits) == 1:
            return string
        res = [bits[0]]
        append = res.append
        for item in bits[1:]:
            try:
                append(compat_urllib_parse._hextochr[item[:2]])
                append(item[2:])
            except KeyError:
                append(b'%')
                append(item)
        return b''.join(res)
项目:GUIYoutube    作者:coltking    | 项目源码 | 文件源码
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
        """Replace %xx escapes by their single-character equivalent. The optional
        encoding and errors parameters specify how to decode percent-encoded
        sequences into Unicode characters, as accepted by the bytes.decode()
        method.
        By default, percent-encoded sequences are decoded with UTF-8, and invalid
        sequences are replaced by a placeholder character.

        unquote('abc%20def') -> 'abc def'.
        """
        if '%' not in string:
            string.split
            return string
        if encoding is None:
            encoding = 'utf-8'
        if errors is None:
            errors = 'replace'
        bits = _asciire.split(string)
        res = [bits[0]]
        append = res.append
        for i in range(1, len(bits), 2):
            append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
            append(bits[i + 1])
        return ''.join(res)
项目:Yv-dl    作者:r0oth3x49    | 项目源码 | 文件源码
def compat_urllib_parse_unquote_to_bytes(string):
        """unquote_to_bytes('abc%20def') -> b'abc def'."""
        # Note: strings are encoded as UTF-8. This is only an issue if it contains
        # unescaped non-ASCII characters, which URIs should not.
        if not string:
            # Is it a string-like object?
            string.split
            return b''
        if isinstance(string, compat_str):
            string = string.encode('utf-8')
        bits = string.split(b'%')
        if len(bits) == 1:
            return string
        res = [bits[0]]
        append = res.append
        for item in bits[1:]:
            try:
                append(compat_urllib_parse._hextochr[item[:2]])
                append(item[2:])
            except KeyError:
                append(b'%')
                append(item)
        return b''.join(res)
项目:Yv-dl    作者:r0oth3x49    | 项目源码 | 文件源码
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
        """Replace %xx escapes by their single-character equivalent. The optional
        encoding and errors parameters specify how to decode percent-encoded
        sequences into Unicode characters, as accepted by the bytes.decode()
        method.
        By default, percent-encoded sequences are decoded with UTF-8, and invalid
        sequences are replaced by a placeholder character.

        unquote('abc%20def') -> 'abc def'.
        """
        if '%' not in string:
            string.split
            return string
        if encoding is None:
            encoding = 'utf-8'
        if errors is None:
            errors = 'replace'
        bits = _asciire.split(string)
        res = [bits[0]]
        append = res.append
        for i in range(1, len(bits), 2):
            append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
            append(bits[i + 1])
        return ''.join(res)
项目:anime-dl    作者:Xonshiz    | 项目源码 | 文件源码
def compat_urllib_parse_unquote_to_bytes(string):
        """unquote_to_bytes('abc%20def') -> b'abc def'."""
        # Note: strings are encoded as UTF-8. This is only an issue if it contains
        # unescaped non-ASCII characters, which URIs should not.
        if not string:
            # Is it a string-like object?
            string.split
            return b''
        if isinstance(string, compat_str):
            string = string.encode('utf-8')
        bits = string.split(b'%')
        if len(bits) == 1:
            return string
        res = [bits[0]]
        append = res.append
        for item in bits[1:]:
            try:
                append(compat_urllib_parse._hextochr[item[:2]])
                append(item[2:])
            except KeyError:
                append(b'%')
                append(item)
        return b''.join(res)
项目:anime-dl    作者:Xonshiz    | 项目源码 | 文件源码
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
        """Replace %xx escapes by their single-character equivalent. The optional
        encoding and errors parameters specify how to decode percent-encoded
        sequences into Unicode characters, as accepted by the bytes.decode()
        method.
        By default, percent-encoded sequences are decoded with UTF-8, and invalid
        sequences are replaced by a placeholder character.

        unquote('abc%20def') -> 'abc def'.
        """
        if '%' not in string:
            string.split
            return string
        if encoding is None:
            encoding = 'utf-8'
        if errors is None:
            errors = 'replace'
        bits = _asciire.split(string)
        res = [bits[0]]
        append = res.append
        for i in range(1, len(bits), 2):
            append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
            append(bits[i + 1])
        return ''.join(res)
项目:get-youtube-subtitle-url-node    作者:joegesualdo    | 项目源码 | 文件源码
def compat_urllib_parse_unquote_to_bytes(string):
        """unquote_to_bytes('abc%20def') -> b'abc def'."""
        # Note: strings are encoded as UTF-8. This is only an issue if it contains
        # unescaped non-ASCII characters, which URIs should not.
        if not string:
            # Is it a string-like object?
            string.split
            return b''
        if isinstance(string, compat_str):
            string = string.encode('utf-8')
        bits = string.split(b'%')
        if len(bits) == 1:
            return string
        res = [bits[0]]
        append = res.append
        for item in bits[1:]:
            try:
                append(compat_urllib_parse._hextochr[item[:2]])
                append(item[2:])
            except KeyError:
                append(b'%')
                append(item)
        return b''.join(res)
项目:get-youtube-subtitle-url-node    作者:joegesualdo    | 项目源码 | 文件源码
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
        """Replace %xx escapes by their single-character equivalent. The optional
        encoding and errors parameters specify how to decode percent-encoded
        sequences into Unicode characters, as accepted by the bytes.decode()
        method.
        By default, percent-encoded sequences are decoded with UTF-8, and invalid
        sequences are replaced by a placeholder character.

        unquote('abc%20def') -> 'abc def'.
        """
        if '%' not in string:
            string.split
            return string
        if encoding is None:
            encoding = 'utf-8'
        if errors is None:
            errors = 'replace'
        bits = _asciire.split(string)
        res = [bits[0]]
        append = res.append
        for i in range(1, len(bits), 2):
            append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
            append(bits[i + 1])
        return ''.join(res)
项目:ngDownloaderAPI    作者:manishbisht    | 项目源码 | 文件源码
def compat_urllib_parse_unquote_to_bytes(string):
        """unquote_to_bytes('abc%20def') -> b'abc def'."""
        # Note: strings are encoded as UTF-8. This is only an issue if it contains
        # unescaped non-ASCII characters, which URIs should not.
        if not string:
            # Is it a string-like object?
            string.split
            return b''
        if isinstance(string, compat_str):
            string = string.encode('utf-8')
        bits = string.split(b'%')
        if len(bits) == 1:
            return string
        res = [bits[0]]
        append = res.append
        for item in bits[1:]:
            try:
                append(compat_urllib_parse._hextochr[item[:2]])
                append(item[2:])
            except KeyError:
                append(b'%')
                append(item)
        return b''.join(res)
项目:ngDownloaderAPI    作者:manishbisht    | 项目源码 | 文件源码
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
        """Replace %xx escapes by their single-character equivalent. The optional
        encoding and errors parameters specify how to decode percent-encoded
        sequences into Unicode characters, as accepted by the bytes.decode()
        method.
        By default, percent-encoded sequences are decoded with UTF-8, and invalid
        sequences are replaced by a placeholder character.

        unquote('abc%20def') -> 'abc def'.
        """
        if '%' not in string:
            string.split
            return string
        if encoding is None:
            encoding = 'utf-8'
        if errors is None:
            errors = 'replace'
        bits = _asciire.split(string)
        res = [bits[0]]
        append = res.append
        for i in range(1, len(bits), 2):
            append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
            append(bits[i + 1])
        return ''.join(res)
项目:CodingDojo    作者:ComputerSocietyUNB    | 项目源码 | 文件源码
def uri_to_iri(uri):
    """
    Converts a Uniform Resource Identifier(URI) into an Internationalized
    Resource Identifier(IRI).

    This is the algorithm from section 3.2 of RFC 3987.

    Takes an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and returns
    unicode containing the encoded result (e.g. '/I \xe2\x99\xa5 Django/').
    """
    if uri is None:
        return uri
    uri = force_bytes(uri)
    iri = unquote_to_bytes(uri) if six.PY3 else unquote(uri)
    return repercent_broken_unicode(iri).decode('utf-8')
项目:Qyoutube-dl    作者:lzambella    | 项目源码 | 文件源码
def data_open(self, req):
            # data URLs as specified in RFC 2397.
            #
            # ignores POSTed data
            #
            # syntax:
            # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
            # mediatype := [ type "/" subtype ] *( ";" parameter )
            # data      := *urlchar
            # parameter := attribute "=" value
            url = req.get_full_url()

            scheme, data = url.split(":", 1)
            mediatype, data = data.split(",", 1)

            # even base64 encoded data URLs might be quoted so unquote in any case:
            data = compat_urllib_parse_unquote_to_bytes(data)
            if mediatype.endswith(";base64"):
                data = binascii.a2b_base64(data)
                mediatype = mediatype[:-7]

            if not mediatype:
                mediatype = "text/plain;charset=US-ASCII"

            headers = email.message_from_string(
                "Content-type: %s\nContent-length: %d\n" % (mediatype, len(data)))

            return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
项目:youtube_downloader    作者:aksinghdce    | 项目源码 | 文件源码
def data_open(self, req):
            # data URLs as specified in RFC 2397.
            #
            # ignores POSTed data
            #
            # syntax:
            # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
            # mediatype := [ type "/" subtype ] *( ";" parameter )
            # data      := *urlchar
            # parameter := attribute "=" value
            url = req.get_full_url()

            scheme, data = url.split(':', 1)
            mediatype, data = data.split(',', 1)

            # even base64 encoded data URLs might be quoted so unquote in any case:
            data = compat_urllib_parse_unquote_to_bytes(data)
            if mediatype.endswith(';base64'):
                data = binascii.a2b_base64(data)
                mediatype = mediatype[:-7]

            if not mediatype:
                mediatype = 'text/plain;charset=US-ASCII'

            headers = email.message_from_string(
                'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))

            return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
项目:optimalvibes    作者:littlemika    | 项目源码 | 文件源码
def data_open(self, req):
            # data URLs as specified in RFC 2397.
            #
            # ignores POSTed data
            #
            # syntax:
            # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
            # mediatype := [ type "/" subtype ] *( ";" parameter )
            # data      := *urlchar
            # parameter := attribute "=" value
            url = req.get_full_url()

            scheme, data = url.split(':', 1)
            mediatype, data = data.split(',', 1)

            # even base64 encoded data URLs might be quoted so unquote in any case:
            data = compat_urllib_parse_unquote_to_bytes(data)
            if mediatype.endswith(';base64'):
                data = binascii.a2b_base64(data)
                mediatype = mediatype[:-7]

            if not mediatype:
                mediatype = 'text/plain;charset=US-ASCII'

            headers = email.message_from_string(
                'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))

            return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
项目:tvalacarta    作者:tvalacarta    | 项目源码 | 文件源码
def data_open(self, req):
            # data URLs as specified in RFC 2397.
            #
            # ignores POSTed data
            #
            # syntax:
            # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
            # mediatype := [ type "/" subtype ] *( ";" parameter )
            # data      := *urlchar
            # parameter := attribute "=" value
            url = req.get_full_url()

            scheme, data = url.split(':', 1)
            mediatype, data = data.split(',', 1)

            # even base64 encoded data URLs might be quoted so unquote in any case:
            data = compat_urllib_parse_unquote_to_bytes(data)
            if mediatype.endswith(';base64'):
                data = binascii.a2b_base64(data)
                mediatype = mediatype[:-7]

            if not mediatype:
                mediatype = 'text/plain;charset=US-ASCII'

            headers = email.message_from_string(
                'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))

            return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
项目:packaging    作者:blockstack    | 项目源码 | 文件源码
def test_unquoting_badpercent(self):
        # Test unquoting on bad percent-escapes
        given = '%xab'
        expect = given
        result = urllib_parse.unquote(given)
        self.assertEqual(expect, result, "using unquote(): %r != %r"
                         % (expect, result))
        given = '%x'
        expect = given
        result = urllib_parse.unquote(given)
        self.assertEqual(expect, result, "using unquote(): %r != %r"
                         % (expect, result))
        given = '%'
        expect = given
        result = urllib_parse.unquote(given)
        self.assertEqual(expect, result, "using unquote(): %r != %r"
                         % (expect, result))
        # unquote_to_bytes
        given = '%xab'
        expect = bytes(given, 'ascii')
        result = urllib_parse.unquote_to_bytes(given)
        self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
                         % (expect, result))
        given = '%x'
        expect = bytes(given, 'ascii')
        result = urllib_parse.unquote_to_bytes(given)
        self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
                         % (expect, result))
        given = '%'
        expect = bytes(given, 'ascii')
        result = urllib_parse.unquote_to_bytes(given)
        self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
                         % (expect, result))
        self.assertRaises((TypeError, AttributeError), urllib_parse.unquote_to_bytes, None)
        self.assertRaises((TypeError, AttributeError), urllib_parse.unquote_to_bytes, ())
项目:packaging    作者:blockstack    | 项目源码 | 文件源码
def test_unquoting_mixed_case(self):
        # Test unquoting on mixed-case hex digits in the percent-escapes
        given = '%Ab%eA'
        expect = b'\xab\xea'
        result = urllib_parse.unquote_to_bytes(given)
        self.assertEqual(expect, result,
                         "using unquote_to_bytes(): %r != %r"
                         % (expect, result))
项目:packaging    作者:blockstack    | 项目源码 | 文件源码
def test_unquote_to_bytes(self):
        given = 'br%C3%BCckner_sapporo_20050930.doc'
        expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
        result = urllib_parse.unquote_to_bytes(given)
        self.assertEqual(expect, result,
                         "using unquote_to_bytes(): %r != %r"
                         % (expect, result))
        # Test on a string with unescaped non-ASCII characters
        # (Technically an invalid URI; expect those characters to be UTF-8
        # encoded).
        result = urllib_parse.unquote_to_bytes("\u6f22%C3%BC")
        expect = b'\xe6\xbc\xa2\xc3\xbc'    # UTF-8 for "\u6f22\u00fc"
        self.assertEqual(expect, result,
                         "using unquote_to_bytes(): %r != %r"
                         % (expect, result))
        # Test with a bytes as input
        given = b'%A2%D8ab%FF'
        expect = b'\xa2\xd8ab\xff'
        result = urllib_parse.unquote_to_bytes(given)
        self.assertEqual(expect, result,
                         "using unquote_to_bytes(): %r != %r"
                         % (expect, result))
        # Test with a bytes as input, with unescaped non-ASCII bytes
        # (Technically an invalid URI; expect those bytes to be preserved)
        given = b'%A2\xd8ab%FF'
        expect = b'\xa2\xd8ab\xff'
        result = urllib_parse.unquote_to_bytes(given)
        self.assertEqual(expect, result,
                         "using unquote_to_bytes(): %r != %r"
                         % (expect, result))
项目:kodi-plugin.video.ted-talks-chinese    作者:daineseh    | 项目源码 | 文件源码
def data_open(self, req):
            # data URLs as specified in RFC 2397.
            #
            # ignores POSTed data
            #
            # syntax:
            # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
            # mediatype := [ type "/" subtype ] *( ";" parameter )
            # data      := *urlchar
            # parameter := attribute "=" value
            url = req.get_full_url()

            scheme, data = url.split(':', 1)
            mediatype, data = data.split(',', 1)

            # even base64 encoded data URLs might be quoted so unquote in any case:
            data = compat_urllib_parse_unquote_to_bytes(data)
            if mediatype.endswith(';base64'):
                data = binascii.a2b_base64(data)
                mediatype = mediatype[:-7]

            if not mediatype:
                mediatype = 'text/plain;charset=US-ASCII'

            headers = email.message_from_string(
                'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))

            return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
项目:GUIYoutube    作者:coltking    | 项目源码 | 文件源码
def data_open(self, req):
            # data URLs as specified in RFC 2397.
            #
            # ignores POSTed data
            #
            # syntax:
            # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
            # mediatype := [ type "/" subtype ] *( ";" parameter )
            # data      := *urlchar
            # parameter := attribute "=" value
            url = req.get_full_url()

            scheme, data = url.split(':', 1)
            mediatype, data = data.split(',', 1)

            # even base64 encoded data URLs might be quoted so unquote in any case:
            data = compat_urllib_parse_unquote_to_bytes(data)
            if mediatype.endswith(';base64'):
                data = binascii.a2b_base64(data)
                mediatype = mediatype[:-7]

            if not mediatype:
                mediatype = 'text/plain;charset=US-ASCII'

            headers = email.message_from_string(
                'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))

            return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
项目:lifesoundtrack    作者:MTG    | 项目源码 | 文件源码
def uri_to_iri(uri):
    """
    Converts a Uniform Resource Identifier(URI) into an Internationalized
    Resource Identifier(IRI).

    This is the algorithm from section 3.2 of RFC 3987.

    Takes an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and returns
    unicode containing the encoded result (e.g. '/I \xe2\x99\xa5 Django/').
    """
    if uri is None:
        return uri
    uri = force_bytes(uri)
    iri = unquote_to_bytes(uri) if six.PY3 else unquote(uri)
    return repercent_broken_unicode(iri).decode('utf-8')
项目:Yv-dl    作者:r0oth3x49    | 项目源码 | 文件源码
def data_open(self, req):
            # data URLs as specified in RFC 2397.
            #
            # ignores POSTed data
            #
            # syntax:
            # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
            # mediatype := [ type "/" subtype ] *( ";" parameter )
            # data      := *urlchar
            # parameter := attribute "=" value
            url = req.get_full_url()

            scheme, data = url.split(':', 1)
            mediatype, data = data.split(',', 1)

            # even base64 encoded data URLs might be quoted so unquote in any case:
            data = compat_urllib_parse_unquote_to_bytes(data)
            if mediatype.endswith(';base64'):
                data = binascii.a2b_base64(data)
                mediatype = mediatype[:-7]

            if not mediatype:
                mediatype = 'text/plain;charset=US-ASCII'

            headers = email.message_from_string(
                'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))

            return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
项目:liberator    作者:libscie    | 项目源码 | 文件源码
def uri_to_iri(uri):
    """
    Converts a Uniform Resource Identifier(URI) into an Internationalized
    Resource Identifier(IRI).

    This is the algorithm from section 3.2 of RFC 3987.

    Takes an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and returns
    unicode containing the encoded result (e.g. '/I \xe2\x99\xa5 Django/').
    """
    if uri is None:
        return uri
    uri = force_bytes(uri)
    iri = unquote_to_bytes(uri) if six.PY3 else unquote(uri)
    return repercent_broken_unicode(iri).decode('utf-8')
项目:djanoDoc    作者:JustinChavez    | 项目源码 | 文件源码
def uri_to_iri(uri):
    """
    Converts a Uniform Resource Identifier(URI) into an Internationalized
    Resource Identifier(IRI).

    This is the algorithm from section 3.2 of RFC 3987.

    Takes an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and returns
    unicode containing the encoded result (e.g. '/I \xe2\x99\xa5 Django/').
    """
    if uri is None:
        return uri
    uri = force_bytes(uri)
    iri = unquote_to_bytes(uri) if six.PY3 else unquote(uri)
    return repercent_broken_unicode(iri).decode('utf-8')
项目:anime-dl    作者:Xonshiz    | 项目源码 | 文件源码
def data_open(self, req):
            # data URLs as specified in RFC 2397.
            #
            # ignores POSTed data
            #
            # syntax:
            # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
            # mediatype := [ type "/" subtype ] *( ";" parameter )
            # data      := *urlchar
            # parameter := attribute "=" value
            url = req.get_full_url()

            scheme, data = url.split(':', 1)
            mediatype, data = data.split(',', 1)

            # even base64 encoded data URLs might be quoted so unquote in any case:
            data = compat_urllib_parse_unquote_to_bytes(data)
            if mediatype.endswith(';base64'):
                data = binascii.a2b_base64(data)
                mediatype = mediatype[:-7]

            if not mediatype:
                mediatype = 'text/plain;charset=US-ASCII'

            headers = email.message_from_string(
                'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))

            return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
项目:aweasome_learning    作者:Knight-ZXW    | 项目源码 | 文件源码
def url_unescape(value, encoding='utf-8', plus=True):
        """Decodes the given value from a URL.

        The argument may be either a byte or unicode string.

        If encoding is None, the result will be a byte string.  Otherwise,
        the result is a unicode string in the specified encoding.

        If ``plus`` is true (the default), plus signs will be interpreted
        as spaces (literal plus signs must be represented as "%2B").  This
        is appropriate for query strings and form-encoded values but not
        for the path component of a URL.  Note that this default is the
        reverse of Python's urllib module.

        .. versionadded:: 3.1
           The ``plus`` argument
        """
        if encoding is None:
            if plus:
                # unquote_to_bytes doesn't have a _plus variant
                value = to_basestring(value).replace('+', ' ')
            return urllib_parse.unquote_to_bytes(value)
        else:
            unquote = (urllib_parse.unquote_plus if plus
                       else urllib_parse.unquote)
            return unquote(to_basestring(value), encoding=encoding)
项目:zenchmarks    作者:squeaky-pl    | 项目源码 | 文件源码
def url_unescape(value, encoding='utf-8', plus=True):
        """Decodes the given value from a URL.

        The argument may be either a byte or unicode string.

        If encoding is None, the result will be a byte string.  Otherwise,
        the result is a unicode string in the specified encoding.

        If ``plus`` is true (the default), plus signs will be interpreted
        as spaces (literal plus signs must be represented as "%2B").  This
        is appropriate for query strings and form-encoded values but not
        for the path component of a URL.  Note that this default is the
        reverse of Python's urllib module.

        .. versionadded:: 3.1
           The ``plus`` argument
        """
        if encoding is None:
            if plus:
                # unquote_to_bytes doesn't have a _plus variant
                value = to_basestring(value).replace('+', ' ')
            return urllib_parse.unquote_to_bytes(value)
        else:
            unquote = (urllib_parse.unquote_plus if plus
                       else urllib_parse.unquote)
            return unquote(to_basestring(value), encoding=encoding)
项目:get-youtube-subtitle-url-node    作者:joegesualdo    | 项目源码 | 文件源码
def data_open(self, req):
            # data URLs as specified in RFC 2397.
            #
            # ignores POSTed data
            #
            # syntax:
            # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
            # mediatype := [ type "/" subtype ] *( ";" parameter )
            # data      := *urlchar
            # parameter := attribute "=" value
            url = req.get_full_url()

            scheme, data = url.split(':', 1)
            mediatype, data = data.split(',', 1)

            # even base64 encoded data URLs might be quoted so unquote in any case:
            data = compat_urllib_parse_unquote_to_bytes(data)
            if mediatype.endswith(';base64'):
                data = binascii.a2b_base64(data)
                mediatype = mediatype[:-7]

            if not mediatype:
                mediatype = 'text/plain;charset=US-ASCII'

            headers = email.message_from_string(
                'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))

            return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
项目:ngDownloaderAPI    作者:manishbisht    | 项目源码 | 文件源码
def data_open(self, req):
            # data URLs as specified in RFC 2397.
            #
            # ignores POSTed data
            #
            # syntax:
            # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
            # mediatype := [ type "/" subtype ] *( ";" parameter )
            # data      := *urlchar
            # parameter := attribute "=" value
            url = req.get_full_url()

            scheme, data = url.split(':', 1)
            mediatype, data = data.split(',', 1)

            # even base64 encoded data URLs might be quoted so unquote in any case:
            data = compat_urllib_parse_unquote_to_bytes(data)
            if mediatype.endswith(';base64'):
                data = binascii.a2b_base64(data)
                mediatype = mediatype[:-7]

            if not mediatype:
                mediatype = 'text/plain;charset=US-ASCII'

            headers = email.message_from_string(
                'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))

            return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
项目:browser_vuln_check    作者:lcatro    | 项目源码 | 文件源码
def url_unescape(value, encoding='utf-8', plus=True):
        """Decodes the given value from a URL.

        The argument may be either a byte or unicode string.

        If encoding is None, the result will be a byte string.  Otherwise,
        the result is a unicode string in the specified encoding.

        If ``plus`` is true (the default), plus signs will be interpreted
        as spaces (literal plus signs must be represented as "%2B").  This
        is appropriate for query strings and form-encoded values but not
        for the path component of a URL.  Note that this default is the
        reverse of Python's urllib module.

        .. versionadded:: 3.1
           The ``plus`` argument
        """
        if encoding is None:
            if plus:
                # unquote_to_bytes doesn't have a _plus variant
                value = to_basestring(value).replace('+', ' ')
            return urllib_parse.unquote_to_bytes(value)
        else:
            unquote = (urllib_parse.unquote_plus if plus
                       else urllib_parse.unquote)
            return unquote(to_basestring(value), encoding=encoding)
项目:TornadoWeb    作者:VxCoder    | 项目源码 | 文件源码
def url_unescape(value, encoding='utf-8', plus=True):
        """Decodes the given value from a URL.

        The argument may be either a byte or unicode string.

        If encoding is None, the result will be a byte string.  Otherwise,
        the result is a unicode string in the specified encoding.

        If ``plus`` is true (the default), plus signs will be interpreted
        as spaces (literal plus signs must be represented as "%2B").  This
        is appropriate for query strings and form-encoded values but not
        for the path component of a URL.  Note that this default is the
        reverse of Python's urllib module.

        .. versionadded:: 3.1
           The ``plus`` argument
        """
        if encoding is None:
            if plus:
                # unquote_to_bytes doesn't have a _plus variant
                value = to_basestring(value).replace('+', ' ')
            return urllib_parse.unquote_to_bytes(value)
        else:
            unquote = (urllib_parse.unquote_plus if plus
                       else urllib_parse.unquote)
            return unquote(to_basestring(value), encoding=encoding)
项目:django-next-train    作者:bitpixdigital    | 项目源码 | 文件源码
def uri_to_iri(uri):
    """
    Converts a Uniform Resource Identifier(URI) into an Internationalized
    Resource Identifier(IRI).

    This is the algorithm from section 3.2 of RFC 3987.

    Takes an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and returns
    unicode containing the encoded result (e.g. '/I \xe2\x99\xa5 Django/').
    """
    if uri is None:
        return uri
    uri = force_bytes(uri)
    iri = unquote_to_bytes(uri) if six.PY3 else unquote(uri)
    return repercent_broken_unicode(iri).decode('utf-8')
项目:django-wechat-api    作者:crazy-canux    | 项目源码 | 文件源码
def uri_to_iri(uri):
    """
    Converts a Uniform Resource Identifier(URI) into an Internationalized
    Resource Identifier(IRI).

    This is the algorithm from section 3.2 of RFC 3987.

    Takes an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and returns
    unicode containing the encoded result (e.g. '/I \xe2\x99\xa5 Django/').
    """
    if uri is None:
        return uri
    uri = force_bytes(uri)
    iri = unquote_to_bytes(uri) if six.PY3 else unquote(uri)
    return repercent_broken_unicode(iri).decode('utf-8')
项目:ProgrameFacil    作者:Gpzim98    | 项目源码 | 文件源码
def url_unescape(value, encoding='utf-8', plus=True):
        """Decodes the given value from a URL.

        The argument may be either a byte or unicode string.

        If encoding is None, the result will be a byte string.  Otherwise,
        the result is a unicode string in the specified encoding.

        If ``plus`` is true (the default), plus signs will be interpreted
        as spaces (literal plus signs must be represented as "%2B").  This
        is appropriate for query strings and form-encoded values but not
        for the path component of a URL.  Note that this default is the
        reverse of Python's urllib module.

        .. versionadded:: 3.1
           The ``plus`` argument
        """
        if encoding is None:
            if plus:
                # unquote_to_bytes doesn't have a _plus variant
                value = to_basestring(value).replace('+', ' ')
            return urllib_parse.unquote_to_bytes(value)
        else:
            unquote = (urllib_parse.unquote_plus if plus
                       else urllib_parse.unquote)
            return unquote(to_basestring(value), encoding=encoding)
项目:ProgrameFacil    作者:Gpzim98    | 项目源码 | 文件源码
def url_unescape(value, encoding='utf-8', plus=True):
        """Decodes the given value from a URL.

        The argument may be either a byte or unicode string.

        If encoding is None, the result will be a byte string.  Otherwise,
        the result is a unicode string in the specified encoding.

        If ``plus`` is true (the default), plus signs will be interpreted
        as spaces (literal plus signs must be represented as "%2B").  This
        is appropriate for query strings and form-encoded values but not
        for the path component of a URL.  Note that this default is the
        reverse of Python's urllib module.

        .. versionadded:: 3.1
           The ``plus`` argument
        """
        if encoding is None:
            if plus:
                # unquote_to_bytes doesn't have a _plus variant
                value = to_basestring(value).replace('+', ' ')
            return urllib_parse.unquote_to_bytes(value)
        else:
            unquote = (urllib_parse.unquote_plus if plus
                       else urllib_parse.unquote)
            return unquote(to_basestring(value), encoding=encoding)
项目:Sci-Finder    作者:snverse    | 项目源码 | 文件源码
def parse_options_header(value, multiple=False):
    """Parse a ``Content-Type`` like header into a tuple with the content
    type and the options:

    >>> parse_options_header('text/html; charset=utf8')
    ('text/html', {'charset': 'utf8'})

    This should not be used to parse ``Cache-Control`` like headers that use
    a slightly different format.  For these headers use the
    :func:`parse_dict_header` function.

    .. versionadded:: 0.5

    :param value: the header to parse.
    :param multiple: Whether try to parse and return multiple MIME types
    :return: (mimetype, options) or (mimetype, options, mimetype, options, …)
             if multiple=True
    """
    if not value:
        return '', {}

    result = []

    value = "," + value.replace("\n", ",")
    while value:
        match = _option_header_start_mime_type.match(value)
        if not match:
            break
        result.append(match.group(1))  # mimetype
        options = {}
        # Parse options
        rest = match.group(2)
        while rest:
            optmatch = _option_header_piece_re.match(rest)
            if not optmatch:
                break
            option, encoding, _, option_value = optmatch.groups()
            option = unquote_header_value(option)
            if option_value is not None:
                option_value = unquote_header_value(
                    option_value,
                    option == 'filename')
                if encoding is not None:
                    option_value = _unquote(option_value).decode(encoding)
            options[option] = option_value
            rest = rest[optmatch.end():]
        result.append(options)
        if multiple is False:
            return tuple(result)
        value = rest

    return tuple(result) if result else ('', {})
项目:Sci-Finder    作者:snverse    | 项目源码 | 文件源码
def parse_options_header(value, multiple=False):
    """Parse a ``Content-Type`` like header into a tuple with the content
    type and the options:

    >>> parse_options_header('text/html; charset=utf8')
    ('text/html', {'charset': 'utf8'})

    This should not be used to parse ``Cache-Control`` like headers that use
    a slightly different format.  For these headers use the
    :func:`parse_dict_header` function.

    .. versionadded:: 0.5

    :param value: the header to parse.
    :param multiple: Whether try to parse and return multiple MIME types
    :return: (mimetype, options) or (mimetype, options, mimetype, options, …)
             if multiple=True
    """
    if not value:
        return '', {}

    result = []

    value = "," + value.replace("\n", ",")
    while value:
        match = _option_header_start_mime_type.match(value)
        if not match:
            break
        result.append(match.group(1))  # mimetype
        options = {}
        # Parse options
        rest = match.group(2)
        while rest:
            optmatch = _option_header_piece_re.match(rest)
            if not optmatch:
                break
            option, encoding, _, option_value = optmatch.groups()
            option = unquote_header_value(option)
            if option_value is not None:
                option_value = unquote_header_value(
                    option_value,
                    option == 'filename')
                if encoding is not None:
                    option_value = _unquote(option_value).decode(encoding)
            options[option] = option_value
            rest = rest[optmatch.end():]
        result.append(options)
        if multiple is False:
            return tuple(result)
        value = rest

    return tuple(result) if result else ('', {})
项目:RPoint    作者:george17-meet    | 项目源码 | 文件源码
def parse_options_header(value, multiple=False):
    """Parse a ``Content-Type`` like header into a tuple with the content
    type and the options:

    >>> parse_options_header('text/html; charset=utf8')
    ('text/html', {'charset': 'utf8'})

    This should not be used to parse ``Cache-Control`` like headers that use
    a slightly different format.  For these headers use the
    :func:`parse_dict_header` function.

    .. versionadded:: 0.5

    :param value: the header to parse.
    :param multiple: Whether try to parse and return multiple MIME types
    :return: (mimetype, options) or (mimetype, options, mimetype, options, …)
             if multiple=True
    """
    if not value:
        return '', {}

    result = []

    value = "," + value.replace("\n", ",")
    while value:
        match = _option_header_start_mime_type.match(value)
        if not match:
            break
        result.append(match.group(1))  # mimetype
        options = {}
        # Parse options
        rest = match.group(2)
        while rest:
            optmatch = _option_header_piece_re.match(rest)
            if not optmatch:
                break
            option, encoding, _, option_value = optmatch.groups()
            option = unquote_header_value(option)
            if option_value is not None:
                option_value = unquote_header_value(
                    option_value,
                    option == 'filename')
                if encoding is not None:
                    option_value = _unquote(option_value).decode(encoding)
            options[option] = option_value
            rest = rest[optmatch.end():]
        result.append(options)
        if multiple is False:
            return tuple(result)
        value = rest

    return tuple(result) if result else ('', {})
项目:Indushell    作者:SecarmaLabs    | 项目源码 | 文件源码
def parse_options_header(value, multiple=False):
    """Parse a ``Content-Type`` like header into a tuple with the content
    type and the options:

    >>> parse_options_header('text/html; charset=utf8')
    ('text/html', {'charset': 'utf8'})

    This should not be used to parse ``Cache-Control`` like headers that use
    a slightly different format.  For these headers use the
    :func:`parse_dict_header` function.

    .. versionadded:: 0.5

    :param value: the header to parse.
    :param multiple: Whether try to parse and return multiple MIME types
    :return: (mimetype, options) or (mimetype, options, mimetype, options, …)
             if multiple=True
    """
    if not value:
        return '', {}

    result = []

    value = "," + value.replace("\n", ",")
    while value:
        match = _option_header_start_mime_type.match(value)
        if not match:
            break
        result.append(match.group(1))  # mimetype
        options = {}
        # Parse options
        rest = match.group(2)
        while rest:
            optmatch = _option_header_piece_re.match(rest)
            if not optmatch:
                break
            option, encoding, _, option_value = optmatch.groups()
            option = unquote_header_value(option)
            if option_value is not None:
                option_value = unquote_header_value(
                    option_value,
                    option == 'filename')
                if encoding is not None:
                    option_value = _unquote(option_value).decode(encoding)
            options[option] = option_value
            rest = rest[optmatch.end():]
        result.append(options)
        if multiple is False:
            return tuple(result)
        value = rest

    return tuple(result) if result else ('', {})
项目:Liljimbo-Chatbot    作者:chrisjim316    | 项目源码 | 文件源码
def parse_options_header(value, multiple=False):
    """Parse a ``Content-Type`` like header into a tuple with the content
    type and the options:

    >>> parse_options_header('text/html; charset=utf8')
    ('text/html', {'charset': 'utf8'})

    This should not be used to parse ``Cache-Control`` like headers that use
    a slightly different format.  For these headers use the
    :func:`parse_dict_header` function.

    .. versionadded:: 0.5

    :param value: the header to parse.
    :param multiple: Whether try to parse and return multiple MIME types
    :return: (mimetype, options) or (mimetype, options, mimetype, options, …)
             if multiple=True
    """
    if not value:
        return '', {}

    result = []

    value = "," + value.replace("\n", ",")
    while value:
        match = _option_header_start_mime_type.match(value)
        if not match:
            break
        result.append(match.group(1))  # mimetype
        options = {}
        # Parse options
        rest = match.group(2)
        while rest:
            optmatch = _option_header_piece_re.match(rest)
            if not optmatch:
                break
            option, encoding, _, option_value = optmatch.groups()
            option = unquote_header_value(option)
            if option_value is not None:
                option_value = unquote_header_value(
                    option_value,
                    option == 'filename')
                if encoding is not None:
                    option_value = _unquote(option_value).decode(encoding)
            options[option] = option_value
            rest = rest[optmatch.end():]
        result.append(options)
        if multiple is False:
            return tuple(result)
        value = rest

    return tuple(result) if result else ('', {})