我们从Python开源项目中,提取了以下2个代码示例,用于说明如何使用codecs.BOM_UTF32。
def remove_bom(filename): if os.path.isfile(filename): f = open(filename, 'rb') # read first 4 bytes header = f.read(4) # check for BOM bom_len = 0 encodings = [(codecs.BOM_UTF32, 4), (codecs.BOM_UTF16, 2), (codecs.BOM_UTF8, 3)] # remove appropriate number of bytes for h, l in encodings: if header.startswith(h): bom_len = l break f.seek(0) f.read(bom_len) return f
def get_decoded_header(header, value): subject, encoding = decode_header(value)[0] subject = subject.strip() # extra whitespace will mess up encoding if isinstance(subject, bytes): # Remove Byte Order Mark (BOM) from UTF strings if encoding == 'utf-8': return re.sub(codecs.BOM_UTF8, b"", subject).decode(encoding) if encoding == 'utf-16': return re.sub(codecs.BOM_UTF16, b"", subject).decode(encoding) elif encoding == 'utf-32': return re.sub(codecs.BOM_UTF32, b"", subject).decode(encoding) # Try various UTF decodings for any unknown 8bit encodings elif encoding == 'unknown-8bit': for enc in [('utf-8', codecs.BOM_UTF8), ('utf-32', codecs.BOM_UTF32), # 32 before 16 so it raises errors ('utf-16', codecs.BOM_UTF16)]: try: return re.sub(enc[1], b"", subject).decode(enc[0]) except UnicodeDecodeError: continue # If none of those encoding work return it in RFC2047 format return str(subject) # Provide RFC2047 format string if encoding is a unknown encoding # Better to have the analyst decode themselves than to provide a mangled string elif encoding is None: return str(subject) else: return subject.decode(encoding)