我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用chardet.universaldetector.UniversalDetector()。
def description_of(lines, name='stdin'): """ Return a string describing the probable encoding of a file or list of strings. :param lines: The lines to get the encoding of. :type lines: Iterable of bytes :param name: Name of file or collection of lines :type name: str """ u = UniversalDetector() for line in lines: u.feed(line) u.close() result = u.result if result['encoding']: return '{0}: {1} with confidence {2}'.format(name, result['encoding'], result['confidence']) else: return '{0}: no result'.format(name)
def detect_convert(filename): detector = UniversalDetector() detector.reset() cache = b'' with open(filename, 'rb') as f: for line in f: detector.feed(line) cache += line if detector.done: break detector.close() cache = cache.decode( detector.result['encoding'] or args.fallback_enc, errors='ignore') cache += f.read().decode( detector.result['encoding'] or args.fallback_enc, errors='ignore') cf = convertfunc(cache, args.locale, args.locale_only) return cf(cache)
def description_of(lines, name='stdin'): """ Return a string describing the probable encoding of a file or list of strings. :param lines: The lines to get the encoding of. :type lines: Iterable of bytes :param name: Name of file or collection of lines :type name: str """ u = UniversalDetector() for line in lines: u.feed(line) u.close() result = u.result if PY2: name = name.decode(sys.getfilesystemencoding(), 'ignore') if result['encoding']: return '{0}: {1} with confidence {2}'.format(name, result['encoding'], result['confidence']) else: return '{0}: no result'.format(name)
def detect(view, file_name, cnt): if not file_name or not os.path.exists(file_name) or os.path.getsize(file_name) == 0: return encoding = encoding_cache.pop(file_name) if encoding: sublime.set_timeout(lambda: init_encoding_vars(view, encoding, detect_on_fail=True), 0) return sublime.set_timeout(lambda: view.set_status('origin_encoding', 'Detecting encoding, please wait...'), 0) detector = UniversalDetector() fp = open(file_name, 'rb') for line in fp: # cut MS-Windows CR code line = line.replace(b'\r',b'') detector.feed(line) cnt -= 1 if detector.done or cnt == 0: break fp.close() detector.close() encoding = detector.result['encoding'] if encoding: encoding = encoding.upper() confidence = detector.result['confidence'] sublime.set_timeout(lambda: check_encoding(view, encoding, confidence), 0)
def detect(self, begin_line, end_line): begin_line = int(begin_line) end_line = int(end_line) begin_point = self.view.text_point(begin_line + 1, 0) end_point = self.view.text_point(end_line, 0) - 1 region = sublime.Region(begin_point, end_point) content = self.get_text(region) if not content: return detector = UniversalDetector() detector.feed(content) detector.close() encoding = detector.result['encoding'] confidence = detector.result['confidence'] encoding = encoding.upper() if confidence < SETTINGS['confidence'] or encoding in SKIP_ENCODINGS: return self.view.run_command('convert_text_to_utf8', {'begin_line': begin_line, 'end_line': end_line, 'encoding': encoding})