我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用codecs.getdecoder()。
def test_all(self): api = ( "encode", "decode", "register", "CodecInfo", "Codec", "IncrementalEncoder", "IncrementalDecoder", "StreamReader", "StreamWriter", "lookup", "getencoder", "getdecoder", "getincrementalencoder", "getincrementaldecoder", "getreader", "getwriter", "register_error", "lookup_error", "strict_errors", "replace_errors", "ignore_errors", "xmlcharrefreplace_errors", "backslashreplace_errors", "open", "EncodedFile", "iterencode", "iterdecode", "BOM", "BOM_BE", "BOM_LE", "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_BE", "BOM_UTF16_LE", "BOM_UTF32", "BOM_UTF32_BE", "BOM_UTF32_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE", # Undocumented "StreamReaderWriter", "StreamRecoder", ) self.assertEqual(sorted(api), sorted(codecs.__all__)) for api in codecs.__all__: getattr(codecs, api)
def _unescape_unicode_in_token(self, token_value): # XXX HACK XXX # We want to convert unicode escapes into unicode characters, # because the regexp engine only supports the latter. # But decoding with unicode-escape converts whitespace as well, # which is bad because our regexps are whitespace agnostic. # It also unescapes double backslashes, which messes up with the # regexp. token_value = token_value.replace('\\'*2, '\\'*4) # The equivalent whitespace escaping is: # token_value = token_value.replace(r'\n', r'\\n') # token_value = token_value.replace(r'\r', r'\\r') # token_value = token_value.replace(r'\f', r'\\f') # but for speed reasons, I ended-up with this ridiculus regexp: token_value = re.sub(r'(\\[nrf])', r'\\\1', token_value) return codecs.getdecoder('unicode_escape')(token_value)[0]
def test_all(self): api = ( "encode", "decode", "register", "CodecInfo", "Codec", "IncrementalEncoder", "IncrementalDecoder", "StreamReader", "StreamWriter", "lookup", "getencoder", "getdecoder", "getincrementalencoder", "getincrementaldecoder", "getreader", "getwriter", "register_error", "lookup_error", "strict_errors", "replace_errors", "ignore_errors", "xmlcharrefreplace_errors", "backslashreplace_errors", "open", "EncodedFile", "iterencode", "iterdecode", "BOM", "BOM_BE", "BOM_LE", "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_BE", "BOM_UTF16_LE", "BOM_UTF32", "BOM_UTF32_BE", "BOM_UTF32_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE", # Undocumented "StreamReaderWriter", "StreamRecoder", ) self.assertCountEqual(api, codecs.__all__) for api in codecs.__all__: getattr(codecs, api)
def generate_password(password, cook_method, cmd): password = cook_method(password) if cmd == 'unlock': field = b'00' else: password = password + password if cmd == 'unset': field = b'10' if cmd == 'set': field = b'01' header = b'450000' + field + b'00000020' header = codecs.getdecoder('hex')(header)[0] return header + password
def uppercase_escape(s): unicode_escape = codecs.getdecoder('unicode_escape') return re.sub( r'\\U[0-9a-fA-F]{8}', lambda m: unicode_escape(m.group(0))[0], s)
def lowercase_escape(s): unicode_escape = codecs.getdecoder('unicode_escape') return re.sub( r'\\u[0-9a-fA-F]{4}', lambda m: unicode_escape(m.group(0))[0], s)
def __init__(self, unicode_csvfile, *args, **kwargs): decoder = codecs.getdecoder('utf-8') self.decoder = lambda v: decoder(v)[0] utf8_csvfile = codecs.iterencode(unicode_csvfile, encoding='utf-8') # bollicks to csv.DictReader being an oldstyle class csv.DictReader.__init__(self, utf8_csvfile, *args, **kwargs) self.fieldnames = [self.decoder(f) for f in self.fieldnames]
def clean_path(path): return codecs.getdecoder("unicode_escape")(os.path.expanduser(path))[0]
def authCheck(self, username, password, req): with Session() as session: user = models.getUserByName(session, username) if not user or user and not user.password: return if ("t" in list(req.params.keys()) and "s" in list(req.params.keys())): sum = hashlib.md5() sum.update(user.password.encode("utf-8")) sum.update(req.params["s"].encode("utf-8")) if sum.hexdigest() == req.params["t"]: # Stash the user for easy access req.authed_user = user return req.authed_user.roles else: return elif password.startswith("enc:"): try: decode_hex = codecs.getdecoder("hex_codec") password = decode_hex(password[4:])[0] password = password.decode("utf-8") except: return if user and password == user.password: # Stash the user for easy access req.authed_user = user return req.authed_user.roles
def test_decode_callback(self): if sys.maxunicode > 0xffff: codecs.register_error("UnicodeInternalTest", codecs.ignore_errors) decoder = codecs.getdecoder("unicode_internal") ab = "ab".encode("unicode_internal").decode() ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]), "ascii"), "UnicodeInternalTest") self.assertEqual(("ab", 12), ignored)
def test_getdecoder(self): self.assertRaises(TypeError, codecs.getdecoder) self.assertRaises(LookupError, codecs.getdecoder, "__spam__")
def test_bad_decode_args(self): for encoding in all_unicode_encodings: decoder = codecs.getdecoder(encoding) self.assertRaises(TypeError, decoder) if encoding not in ("idna", "punycode"): self.assertRaises(TypeError, decoder, 42)
def test_basics(self): binput = bytes(range(256)) for encoding in bytes_transform_encodings: # generic codecs interface (o, size) = codecs.getencoder(encoding)(binput) self.assertEqual(size, len(binput)) (i, size) = codecs.getdecoder(encoding)(o) self.assertEqual(size, len(o)) self.assertEqual(i, binput)
def test_errorcallback_longindex(self): dec = codecs.getdecoder('euc-kr') myreplace = lambda exc: ('', sys.maxsize+1) codecs.register_error('test.cjktest', myreplace) self.assertRaises(IndexError, dec, b'apple\x92ham\x93spam', 'test.cjktest')
def __init__(self, output_encoding='utf-8', input_encoding='utf-8', fallback_encoding='iso-8859-1'): self.output_codec = codecs.getencoder(output_encoding) self.input_decoder = codecs.getdecoder(input_encoding) self.fallback_decoder = codecs.getdecoder(fallback_encoding) self._input_buffer = bytearray() self._output_buffer = bytearray() self._closed = False
def test_decode_callback(self): if sys.maxunicode > 0xffff: codecs.register_error("UnicodeInternalTest", codecs.ignore_errors) decoder = codecs.getdecoder("unicode_internal") ab = u"ab".encode("unicode_internal") ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]), "UnicodeInternalTest") self.assertEqual((u"ab", 12), ignored)
def test_basics(self): s = "abc123" for encoding in all_string_encodings: (bytes, size) = codecs.getencoder(encoding)(s) self.assertEqual(size, len(s)) (chars, size) = codecs.getdecoder(encoding)(bytes) self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
def test_decode_callback(self): codecs.register_error("UnicodeInternalTest", codecs.ignore_errors) decoder = codecs.getdecoder("unicode_internal") with support.check_warnings(('unicode_internal codec has been ' 'deprecated', DeprecationWarning)): ab = "ab".encode("unicode_internal").decode() ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]), "ascii"), "UnicodeInternalTest") self.assertEqual(("ab", 12), ignored)
def _unescape(text): """Unescape unicode character codes within a string. """ pattern = r'\\{1,2}u[0-9a-fA-F]{4}' decode = lambda x: codecs.getdecoder('unicode_escape')(x.group())[0] return re.sub(pattern, decode, text)
def codepage(self, codepage): if not isinstance(codepage, CodePage): raise TypeError("codepage should be a CodePage, not a %r" % type(codepage)) meta = self._meta if meta.status != READ_WRITE: raise DbfError('%s not in read/write mode, unable to change codepage' % meta.filename) meta.header.codepage(codepage.code) meta.decoder = codecs.getdecoder(codepage.name) meta.encoder = codecs.getencoder(codepage.name) self._update_disk(headeronly=True)
def test_nonascii_text_cptrans(self): "check non-ascii text to unicode" table = Table(':memory:', 'data C(50); memo M', codepage='cp437', dbf_type='vfp', on_disk=False) table.open() decoder = codecs.getdecoder('cp437') if py_ver < (3, 0): high_ascii = decoder(''.join(chr(c) for c in range(128, 128+50)))[0] else: high_ascii = bytes(range(128, 128+50)).decode('cp437') table.append(dict(data=high_ascii, memo=high_ascii)) self.assertEqual(table[0].data, high_ascii) self.assertEqual(table[0].memo, high_ascii) table.close()