我们从Python开源项目中,提取了以下7个代码示例,用于说明如何使用codecs.html()。
def get_special_case_params(): # The windows and unix specific tests should be tested on both unix and Windows to detect crashes. special_cases = [ [u"noexist", u"noexist"], [u"tests/ascii/ex1", u"noexist"], [u"noexist", u"tests/ascii/ex1"], [u"tests/ascii/ex1", u"tests/ascii/ex1", "--outfile", "/dev/null"], [u"tests/ascii/ex1", u"tests/ascii/ex2"], [u"tests/utf_8/ex3", u"tests/utf_8/ex4"], [u"tests/utf_8/ex3", u"tests/utf_8/ex4", u"--oldfile-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-8\""], [u"tests/utf_8/ex3", u"tests/utf_8/ex4", u"--oldfile-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-8\"", u"--output-encoding", u"\"utf-8\""], [u"tests/ascii/ex5", u"tests/ascii/ex6"], [u"tests/ascii/ex7", u"tests/ascii/ex8"], [u"tests/ascii/a.json", u"tests/ascii/b.json"], [u"tests/ascii/a.json", u"tests/ascii/b.json", u"--push-delimiters", u"\"{\"", u"\"[\"", u"--pop-delimiters", u"\"}\"", u"\"]\"", u"--include-delimiters"], [u"tests/utf_8/fancy1", u"tests/utf_8/fancy2", u"--delimiters", u"???", u"--include-delimiters", u"--parameters-encoding", u"\"utf-8\"", u"--output-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-8\"", u"--oldfile-encoding", u"\"utf-8\""], [u"tests/utf_8/fancy1", u"tests/utf_8/fancy2", u"--delimiters", u"\"\\u65e5\\u672c\\u56fd\"", u"--include-delimiters", u"--parameters-encoding", u"\"utf-8\"", u"--output-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-8\"", u"--oldfile-encoding", u"\"utf-8\""], [u"tests/utf_8/this-is-encoded-in-utf-8", u"tests/utf_16/this-is-encoded-in-utf-16", u"--output-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-16\"", u"--oldfile-encoding", u"\"utf-8\"", u"--enable-mark"], [u"tests/ascii/a.html", u"tests/ascii/b.html", u"-m", u"html"] ] return special_cases[random.randint(0, len(special_cases)-1)]
def error_handler(error): """Error handler for surrogateescape decoding. Should be used with an ASCII-compatible encoding (e.g., 'latin-1' or 'utf-8'). Replaces any invalid byte sequences with surrogate code points. As specified in https://docs.python.org/2/library/codecs.html#codecs.register_error. """ # We can't use this with UnicodeEncodeError; the UTF-8 encoder doesn't raise # an error for surrogates. Instead, use encode. if not isinstance(error, UnicodeDecodeError): raise error result = [] for i in range(error.start, error.end): byte = ord(error.object[i]) if byte < 128: raise error result.append(unichr(0xdc00 + byte)) return ''.join(result), error.end
def from_path(path, manager=None, allow_nested=False, citation_clearing=True, encoding='utf-8', **kwargs): """Loads a BEL graph from a file resource. This function is a thin wrapper around :func:`from_lines`. :param str path: A file path :param manager: database connection string to cache, pre-built :class:`Manager`, or None to use default cache :type manager: None or str or pybel.manager.Manager :param bool allow_nested: if true, turn off nested statement failures :param bool citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? Delegated to :class:`pybel.parser.ControlParser` :param str encoding: the encoding to use when reading this file. Is passed to :code:`codecs.open`. See the python `docs <https://docs.python.org/3/library/codecs.html#standard-encodings>`_ for a list of standard encodings. For example, files starting with a UTF-8 BOM should use :code:`utf_8_sig` :param dict kwargs: keyword arguments to :func:`pybel.io.line_utils.parse_lines` :rtype: BELGraph """ log.info('Loading from path: %s', path) with codecs.open(os.path.expanduser(path), encoding=encoding) as file: return from_lines( lines=file, manager=manager, allow_nested=allow_nested, citation_clearing=citation_clearing, **kwargs )
def run(self, edit, encoding, file_name, need_codecs): self.view.set_name('ConvertToUTF8 Instructions') self.view.set_scratch(True) self.view.settings().set("word_wrap", True) msg = 'File: {0}\nEncoding: {1}\nError: '.format(file_name, encoding) if need_codecs: msg = msg + 'Codecs missing\n\n' branch = self.get_branch(sublime.platform(), sublime.arch()) if branch: ver = '33' if ST3 else '26' msg = msg + 'Please install Codecs{0} plugin (https://github.com/seanliang/Codecs{0}/tree/{1}).\n'.format(ver, branch) else: import platform msg = msg + 'Please send the following information to sunlxy (at) yahoo.com:\n====== Debug Information ======\nVersion: {0}-{1}\nPlatform: {2}\nPath: {3}\nEncoding: {4}\n'.format( sublime.version(), sublime.arch(), platform.platform(), sys.path, encoding ) else: msg = msg + 'Unsupported encoding, see http://docs.python.org/library/codecs.html#standard-encodings\n\nPlease try other tools such as iconv.\n' self.view.insert(edit, 0, msg) self.view.set_read_only(True) self.view.window().focus_view(self.view)
def encode(self, input, errors='strict'): assert errors == 'strict' #return codecs.encode(input, self.base_encoding, self.name), len(input) # The above line could totally be all we needed, relying on the error # handling to replace the unencodable Unicode characters with our extended # byte sequences. # # However, there seems to be a design bug in Python (probably intentional): # the error handler for encoding is supposed to return a **Unicode** character, # that then needs to be encodable itself... Ugh. # # So we implement what codecs.encode() should have been doing: which is expect # error handler to return bytes() to be added to the output. # # This seems to have been fixed in Python 3.3. We should try using that and # use fallback only if that failed. # https://docs.python.org/3.3/library/codecs.html#codecs.register_error length = len(input) out = b'' while input: try: part = codecs.encode(input, self.base_encoding) out += part input = '' # All converted except UnicodeEncodeError as e: # Convert the correct part out += codecs.encode(input[:e.start], self.base_encoding) replacement, pos = self.error(e) out += replacement input = input[pos:] return out, length
def pocketsphinx(self): # NOTE: copy() # Return a shallow copy of x. # source: https://docs.python.org/3/library/copy.html """Get pocketsphinx speech to text settings.""" return self._data.get('pocketsphinx', {}).copy()