Python codecs 模块,BOM_UTF32 实例源码

我们从Python开源项目中,提取了以下2个代码示例,用于说明如何使用codecs.BOM_UTF32

项目:pmi_sprint_reporter    作者:cumc-dbmi    | 项目源码 | 文件源码
def remove_bom(filename):
    if os.path.isfile(filename):
        f = open(filename, 'rb')

        # read first 4 bytes
        header = f.read(4)

        # check for BOM
        bom_len = 0
        encodings = [(codecs.BOM_UTF32, 4),
                     (codecs.BOM_UTF16, 2),
                     (codecs.BOM_UTF8, 3)]

        # remove appropriate number of bytes
        for h, l in encodings:
            if header.startswith(h):
                bom_len = l
                break
        f.seek(0)
        f.read(bom_len)
        return f
项目:misp-modules    作者:MISP    | 项目源码 | 文件源码
def get_decoded_header(header, value):
    subject, encoding = decode_header(value)[0]
    subject = subject.strip()  # extra whitespace will mess up encoding
    if isinstance(subject, bytes):
        # Remove Byte Order Mark (BOM) from UTF strings
        if encoding == 'utf-8':
            return re.sub(codecs.BOM_UTF8, b"", subject).decode(encoding)
        if encoding == 'utf-16':
            return re.sub(codecs.BOM_UTF16, b"", subject).decode(encoding)
        elif encoding == 'utf-32':
            return re.sub(codecs.BOM_UTF32, b"", subject).decode(encoding)
        # Try various UTF decodings for any unknown 8bit encodings
        elif encoding == 'unknown-8bit':
            for enc in [('utf-8', codecs.BOM_UTF8),
                        ('utf-32', codecs.BOM_UTF32),  # 32 before 16 so it raises errors
                        ('utf-16', codecs.BOM_UTF16)]:
                try:
                    return re.sub(enc[1], b"", subject).decode(enc[0])
                except UnicodeDecodeError:
                    continue
            # If none of those encoding work return it in RFC2047 format
            return str(subject)
        # Provide RFC2047 format string if encoding is a unknown encoding
        # Better to have the analyst decode themselves than to provide a mangled string
        elif encoding is None:
            return str(subject)
        else:
            return subject.decode(encoding)