我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用codecs.c()。
def test_xmlcharnamereplace(self): # This time use a named character entity for unencodable # characters, if one is available. def xmlcharnamereplace(exc): if not isinstance(exc, UnicodeEncodeError): raise TypeError("don't know how to handle %r" % exc) l = [] for c in exc.object[exc.start:exc.end]: try: l.append("&%s;" % html.entities.codepoint2name[ord(c)]) except KeyError: l.append("&#%d;" % ord(c)) return ("".join(l), exc.end) codecs.register_error( "test.xmlcharnamereplace", xmlcharnamereplace) sin = "\xab\u211c\xbb = \u2329\u1234\u20ac\u232a" sout = b"«ℜ» = ⟨ሴ€⟩" self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout) sout = b"\xabℜ\xbb = ⟨ሴ€⟩" self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout) sout = b"\xabℜ\xbb = ⟨ሴ\xa4⟩" self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)
def test_charmapencode(self): # For charmap encodings the replacement string will be # mapped through the encoding again. This means, that # to be able to use e.g. the "replace" handler, the # charmap has to have a mapping for "?". charmap = dict((ord(c), bytes(2*c.upper(), 'ascii')) for c in "abcdefgh") sin = "abc" sout = b"AABBCC" self.assertEqual(codecs.charmap_encode(sin, "strict", charmap)[0], sout) sin = "abcA" self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap) charmap[ord("?")] = b"XYZ" sin = "abcDEF" sout = b"AABBCCXYZXYZXYZ" self.assertEqual(codecs.charmap_encode(sin, "replace", charmap)[0], sout) charmap[ord("?")] = "XYZ" # wrong type in mapping self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
def test_xmlcharnamereplace(self): # This time use a named character entity for unencodable # characters, if one is available. def xmlcharnamereplace(exc): if not isinstance(exc, UnicodeEncodeError): raise TypeError("don't know how to handle %r" % exc) l = [] for c in exc.object[exc.start:exc.end]: try: l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)]) except KeyError: l.append(u"&#%d;" % ord(c)) return (u"".join(l), exc.end) codecs.register_error( "test.xmlcharnamereplace", xmlcharnamereplace) sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a" sout = "«ℜ» = ⟨ሴ€⟩" self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout) sout = "\xabℜ\xbb = ⟨ሴ€⟩" self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout) sout = "\xabℜ\xbb = ⟨ሴ\xa4⟩" self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)
def test_charmapencode(self): # For charmap encodings the replacement string will be # mapped through the encoding again. This means, that # to be able to use e.g. the "replace" handler, the # charmap has to have a mapping for "?". charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"]) sin = u"abc" sout = "AABBCC" self.assertEqual(codecs.charmap_encode(sin, "strict", charmap)[0], sout) sin = u"abcA" self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap) charmap[ord("?")] = "XYZ" sin = u"abcDEF" sout = "AABBCCXYZXYZXYZ" self.assertEqual(codecs.charmap_encode(sin, "replace", charmap)[0], sout) charmap[ord("?")] = u"XYZ" self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap) charmap[ord("?")] = u"XYZ" self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
def replace_surrogate_encode(mystring): """ Returns a (unicode) string, not the more logical bytes, because the codecs register_error functionality expects this. """ decoded = [] for ch in mystring: # if PY3: # code = ch # else: code = ord(ch) # The following magic comes from Py3.3's Python/codecs.c file: if not 0xD800 <= code <= 0xDCFF: # Not a surrogate. Fail with the original exception. raise exc # mybytes = [0xe0 | (code >> 12), # 0x80 | ((code >> 6) & 0x3f), # 0x80 | (code & 0x3f)] # Is this a good idea? if 0xDC00 <= code <= 0xDC7F: decoded.append(_unichr(code - 0xDC00)) elif code <= 0xDCFF: decoded.append(_unichr(code - 0xDC00)) else: raise NotASurrogateError return str().join(decoded)
def replace_surrogate_encode(mystring): """ Returns a (unicode) string, not the more logical bytes, because the codecs register_error functionality expects this. """ decoded = [] for ch in mystring: # if utils.PY3: # code = ch # else: code = ord(ch) # The following magic comes from Py3.3's Python/codecs.c file: if not 0xD800 <= code <= 0xDCFF: # Not a surrogate. Fail with the original exception. raise exc # mybytes = [0xe0 | (code >> 12), # 0x80 | ((code >> 6) & 0x3f), # 0x80 | (code & 0x3f)] # Is this a good idea? if 0xDC00 <= code <= 0xDC7F: decoded.append(_unichr(code - 0xDC00)) elif code <= 0xDCFF: decoded.append(_unichr(code - 0xDC00)) else: raise NotASurrogateError return str().join(decoded)
def test_uninamereplace(self): # We're using the names from the unicode database this time, # and we're doing "syntax highlighting" here, i.e. we include # the replaced text in ANSI escape sequences. For this it is # useful that the error handler is not called for every single # unencodable character, but for a complete sequence of # unencodable characters, otherwise we would output many # unnecessary escape sequences. def uninamereplace(exc): if not isinstance(exc, UnicodeEncodeError): raise TypeError("don't know how to handle %r" % exc) l = [] for c in exc.object[exc.start:exc.end]: l.append(unicodedata.name(c, "0x%x" % ord(c))) return ("\033[1m%s\033[0m" % ", ".join(l), exc.end) codecs.register_error( "test.uninamereplace", uninamereplace) sin = "\xac\u1234\u20ac\u8000" sout = b"\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout) sout = b"\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout) sout = b"\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m" self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)
def test_badandgoodxmlcharrefreplaceexceptions(self): # "xmlcharrefreplace" complains about a non-exception passed in self.assertRaises( TypeError, codecs.xmlcharrefreplace_errors, 42 ) # "xmlcharrefreplace" complains about the wrong exception types self.assertRaises( TypeError, codecs.xmlcharrefreplace_errors, UnicodeError("ouch") ) # "xmlcharrefreplace" can only be used for encoding self.assertRaises( TypeError, codecs.xmlcharrefreplace_errors, UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") ) self.assertRaises( TypeError, codecs.xmlcharrefreplace_errors, UnicodeTranslateError("\u3042", 0, 1, "ouch") ) # Use the correct exception cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042) s = "".join(chr(c) for c in cs) self.assertEqual( codecs.xmlcharrefreplace_errors( UnicodeEncodeError("ascii", s, 0, len(s), "ouch") ), ("".join("&#%d;" % ord(c) for c in s), len(s)) )
def test_badregistercall(self): # enhance coverage of: # Modules/_codecsmodule.c::register_error() # Python/codecs.c::PyCodec_RegisterError() self.assertRaises(TypeError, codecs.register_error, 42) self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42)
def test_unknownhandler(self): # enhance coverage of: # Modules/_codecsmodule.c::lookup_error() self.assertRaises(LookupError, codecs.lookup_error, "test.unknown")
def test_xmlcharrefvalues(self): # enhance coverage of: # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors() # and inline implementations v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000) if sys.maxunicode>=100000: v += (100000, 500000, 1000000) s = "".join([chr(x) for x in v]) codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors) for enc in ("ascii", "iso-8859-15"): for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"): s.encode(enc, err)
def test_translatehelper(self): # enhance coverage of: # Objects/unicodeobject.c::unicode_encode_call_errorhandler() # and callers # (Unfortunately the errors argument is not directly accessible # from Python, so we can't test that much) class D(dict): def __getitem__(self, key): raise ValueError #self.assertRaises(ValueError, "\xff".translate, D()) self.assertRaises(TypeError, "\xff".translate, {0xff: sys.maxunicode+1}) self.assertRaises(TypeError, "\xff".translate, {0xff: ()})
def test_uninamereplace(self): # We're using the names from the unicode database this time, # and we're doing "syntax highlighting" here, i.e. we include # the replaced text in ANSI escape sequences. For this it is # useful that the error handler is not called for every single # unencodable character, but for a complete sequence of # unencodable characters, otherwise we would output many # unnecessary escape sequences. def uninamereplace(exc): if not isinstance(exc, UnicodeEncodeError): raise TypeError("don't know how to handle %r" % exc) l = [] for c in exc.object[exc.start:exc.end]: l.append(unicodedata.name(c, u"0x%x" % ord(c))) return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end) codecs.register_error( "test.uninamereplace", uninamereplace) sin = u"\xac\u1234\u20ac\u8000" sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout) sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout) sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m" self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)
def test_badandgoodxmlcharrefreplaceexceptions(self): # "xmlcharrefreplace" complains about a non-exception passed in self.assertRaises( TypeError, codecs.xmlcharrefreplace_errors, 42 ) # "xmlcharrefreplace" complains about the wrong exception types self.assertRaises( TypeError, codecs.xmlcharrefreplace_errors, UnicodeError("ouch") ) # "xmlcharrefreplace" can only be used for encoding self.assertRaises( TypeError, codecs.xmlcharrefreplace_errors, UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch") ) self.assertRaises( TypeError, codecs.xmlcharrefreplace_errors, UnicodeTranslateError(u"\u3042", 0, 1, "ouch") ) # Use the correct exception cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000) cs += (0xdfff, 0xd800) s = u"".join(unichr(c) for c in cs) s += u"\U0001869f\U000186a0\U000f423f\U000f4240" cs += (99999, 100000, 999999, 1000000) self.assertEqual( codecs.xmlcharrefreplace_errors( UnicodeEncodeError("ascii", u"a" + s + u"b", 1, 1 + len(s), "ouch") ), (u"".join(u"&#%d;" % c for c in cs), 1 + len(s)) )
def test_xmlcharrefvalues(self): # enhance coverage of: # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors() # and inline implementations v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000) if sys.maxunicode>=100000: v += (100000, 500000, 1000000) s = u"".join([unichr(x) for x in v]) codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors) for enc in ("ascii", "iso-8859-15"): for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"): s.encode(enc, err)
def test_translatehelper(self): # enhance coverage of: # Objects/unicodeobject.c::unicode_encode_call_errorhandler() # and callers # (Unfortunately the errors argument is not directly accessible # from Python, so we can't test that much) class D(dict): def __getitem__(self, key): raise ValueError self.assertRaises(ValueError, u"\xff".translate, D()) self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1}) self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})
def test_xmlcharrefvalues(self): # enhance coverage of: # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors() # and inline implementations v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000) if SIZEOF_WCHAR_T == 4: v += (100000, 500000, 1000000) s = "".join([chr(x) for x in v]) codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors) for enc in ("ascii", "iso-8859-15"): for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"): s.encode(enc, err)
def test_badandgoodxmlcharrefreplaceexceptions(self): # "xmlcharrefreplace" complains about a non-exception passed in self.assertRaises( TypeError, codecs.xmlcharrefreplace_errors, 42 ) # "xmlcharrefreplace" complains about the wrong exception types self.assertRaises( TypeError, codecs.xmlcharrefreplace_errors, UnicodeError("ouch") ) # "xmlcharrefreplace" can only be used for encoding self.assertRaises( TypeError, codecs.xmlcharrefreplace_errors, UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch") ) self.assertRaises( TypeError, codecs.xmlcharrefreplace_errors, UnicodeTranslateError(u"\u3042", 0, 1, "ouch") ) # Use the correct exception cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042) s = "".join(unichr(c) for c in cs) self.assertEqual( codecs.xmlcharrefreplace_errors( UnicodeEncodeError("ascii", s, 0, len(s), "ouch") ), (u"".join(u"&#%d;" % ord(c) for c in s), len(s)) )