我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用charade.universaldetector.UniversalDetector()。
def detect(aBuf): if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or (version_info >= (3, 0) and not isinstance(aBuf, bytes))): raise ValueError('Expected a bytes object, not a unicode object') from . import universaldetector u = universaldetector.UniversalDetector() u.reset() u.feed(aBuf) u.close() return u.result
def _description_of(path): """Return a string describing the probable encoding of a file.""" from charade.universaldetector import UniversalDetector u = UniversalDetector() for line in open(path, 'rb'): u.feed(line) u.close() result = u.result if result['encoding']: return '%s: %s with confidence %s' % (path, result['encoding'], result['confidence']) else: return '%s: no result' % path
def detectEncoding(self, parseMeta=True, chardet=True): # First look for a BOM # This will also read past the BOM if present encoding = self.detectBOM() confidence = "certain" # If there is no BOM need to look for meta elements with encoding # information if encoding is None and parseMeta: encoding = self.detectEncodingMeta() confidence = "tentative" # Guess with chardet, if avaliable if encoding is None and chardet: confidence = "tentative" try: try: from charade.universaldetector import UniversalDetector except ImportError: from chardet.universaldetector import UniversalDetector buffers = [] detector = UniversalDetector() while not detector.done: buffer = self.rawStream.read(self.numBytesChardet) assert isinstance(buffer, bytes) if not buffer: break buffers.append(buffer) detector.feed(buffer) detector.close() encoding = detector.result['encoding'] self.rawStream.seek(0) except ImportError: pass # If all else fails use the default encoding if encoding is None: confidence = "tentative" encoding = self.defaultEncoding # Substitute for equivalent encodings: encodingSub = {"iso-8859-1": "windows-1252"} if encoding.lower() in encodingSub: encoding = encodingSub[encoding.lower()] return encoding, confidence