我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用codecs.utf_8_decode()。
def _buffer_decode(self, input, errors, final): if self.first: if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this really is a BOM # => try again on the next call return ("", 0) else: self.first = 0 else: self.first = 0 if input[:3] == codecs.BOM_UTF8: (output, consumed) = \ codecs.utf_8_decode(input[3:], errors, final) return (output, consumed+3) return codecs.utf_8_decode(input, errors, final)
def __init__(self, stream): self.name = None self.stream = None self.stream_pointer = 0 self.eof = True self.buffer = '' self.pointer = 0 self.full_buffer = unicode('') self.full_pointer = 0 self.raw_buffer = None self.raw_decode = codecs.utf_8_decode self.encoding = 'utf-8' self.index = 0 self.line = 0 self.column = 0 self.stream = stream self.name = getattr(stream, 'name', '<file>') self.eof = False self.raw_buffer = None while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): self.update_raw() self.update(1)
def test_decode_unicode(self): # Most decoders don't accept unicode input decoders = [ codecs.utf_7_decode, codecs.utf_8_decode, codecs.utf_16_le_decode, codecs.utf_16_be_decode, codecs.utf_16_ex_decode, codecs.utf_32_decode, codecs.utf_32_le_decode, codecs.utf_32_be_decode, codecs.utf_32_ex_decode, codecs.latin_1_decode, codecs.ascii_decode, codecs.charmap_decode, ] if hasattr(codecs, "mbcs_decode"): decoders.append(codecs.mbcs_decode) for decoder in decoders: self.assertRaises(TypeError, decoder, "xxx")
def determine_encoding(self): # type: () -> None while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): self.update_raw() if isinstance(self.raw_buffer, binary_type): if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): self.raw_decode = codecs.utf_16_le_decode # type: ignore self.encoding = 'utf-16-le' elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): self.raw_decode = codecs.utf_16_be_decode # type: ignore self.encoding = 'utf-16-be' else: self.raw_decode = codecs.utf_8_decode # type: ignore self.encoding = 'utf-8' self.update(1) # 4 if 32 bit unicode supported, 2 e.g. on MacOS (issue 56)
def send_mail(json_string): # Extract sender and subject json_blob = json.loads(json_string) sender = json_blob['headers']['From'] sender = re.sub('^.*\<', '', sender) EMAIL_TO = re.sub('\>.*$', '', sender) if BB_DEBUG: syslog.syslog(syslog.LOG_ERR, 'Invoked send_mail(json_string) for '+EMAIL_TO) subj = common_functions.extract_subject(json_blob['headers']) if BB_DEBUG: syslog.syslog(syslog.LOG_ERR, 'Invoked send_mail(json_string) subject '+subj) #SUBJECT = 'Extracted IOCs for: '+subj.decode("utf-8", "ignore") SUBJECT = 'Extracted IOCs for: '+str(codecs.utf_8_decode(subj.encode('utf8'))[0]) if BB_DEBUG: syslog.syslog(syslog.LOG_ERR, 'Invoked send_mail(json_string) subject '+SUBJECT) msg = MIMEText(json2string(json_string), _charset='utf-8') msg['Subject'] = SUBJECT msg['From'] = EMAIL_FROM msg['To'] = EMAIL_TO if BB_DEBUG: syslog.syslog(syslog.LOG_ERR, 'Invoked send_mail(json_string) msg composed ') server = smtplib.SMTP(EMAIL_SERVER) server.sendmail(EMAIL_FROM, EMAIL_TO, msg.as_string()) if BB_DEBUG: syslog.syslog(syslog.LOG_ERR, 'Finished')
def decode(input, errors='strict'): return codecs.utf_8_decode(input, errors, True)
def decode(input, errors='strict'): prefix = 0 if input[:3] == codecs.BOM_UTF8: input = input[3:] prefix = 3 (output, consumed) = codecs.utf_8_decode(input, errors, True) return (output, consumed+prefix)
def decode(self, input, errors='strict'): if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this is a BOM # => try again on the next call return ("", 0) elif input[:3] == codecs.BOM_UTF8: self.decode = codecs.utf_8_decode (output, consumed) = codecs.utf_8_decode(input[3:],errors) return (output, consumed+3) # (else) no BOM present self.decode = codecs.utf_8_decode return codecs.utf_8_decode(input, errors) ### encodings module API
def _buffer_decode(self, input, errors, final): if self.first: if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this really is a BOM # => try again on the next call return (u"", 0) else: self.first = None else: self.first = None if input[:3] == codecs.BOM_UTF8: (output, consumed) = codecs.utf_8_decode(input[3:], errors, final) return (output, consumed+3) return codecs.utf_8_decode(input, errors, final)
def decode(self, input, errors='strict'): if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this is a BOM # => try again on the next call return (u"", 0) elif input[:3] == codecs.BOM_UTF8: self.decode = codecs.utf_8_decode (output, consumed) = codecs.utf_8_decode(input[3:],errors) return (output, consumed+3) # (else) no BOM present self.decode = codecs.utf_8_decode return codecs.utf_8_decode(input, errors) ### encodings module API
def determine_encoding(self): while not self.eof and len(self.raw_buffer) < 2: self.update_raw() if not isinstance(self.raw_buffer, unicode): if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): self.raw_decode = codecs.utf_16_le_decode self.encoding = 'utf-16-le' elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): self.raw_decode = codecs.utf_16_be_decode self.encoding = 'utf-16-be' else: self.raw_decode = codecs.utf_8_decode self.encoding = 'utf-8' self.update(1)
def determine_encoding(self): while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): self.update_raw() if isinstance(self.raw_buffer, bytes): if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): self.raw_decode = codecs.utf_16_le_decode self.encoding = 'utf-16-le' elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): self.raw_decode = codecs.utf_16_be_decode self.encoding = 'utf-16-be' else: self.raw_decode = codecs.utf_8_decode self.encoding = 'utf-8' self.update(1)