我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用email.errors.HeaderParseError()。
def parse_docstring(docstring): """ Parse out the parts of a docstring. Return (title, body, metadata). """ docstring = trim_docstring(docstring) parts = re.split(r'\n{2,}', docstring) title = parts[0] if len(parts) == 1: body = '' metadata = {} else: parser = HeaderParser() try: metadata = parser.parsestr(parts[-1]) except HeaderParseError: metadata = {} body = "\n\n".join(parts[1:]) else: metadata = dict(metadata.items()) if metadata: body = "\n\n".join(parts[1:-1]) else: body = "\n\n".join(parts[1:]) return title, body, metadata
def test_set_boundary(self): eq = self.assertEqual # This one has no existing boundary parameter, but the Content-Type: # header appears fifth. msg = self._msgobj('msg_01.txt') msg.set_boundary('BOUNDARY') header, value = msg.items()[4] eq(header.lower(), 'content-type') eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"') # This one has a Content-Type: header, with a boundary, stuck in the # middle of its headers. Make sure the order is preserved; it should # be fifth. msg = self._msgobj('msg_04.txt') msg.set_boundary('BOUNDARY') header, value = msg.items()[4] eq(header.lower(), 'content-type') eq(value, 'multipart/mixed; boundary="BOUNDARY"') # And this one has no Content-Type: header at all. msg = self._msgobj('msg_03.txt') self.assertRaises(errors.HeaderParseError, msg.set_boundary, 'BOUNDARY')
def get_subject_from_headers(headers): """ Get the subject of an email :param `Message` headers: The SMTP headers of the email :rtype: str :return: The subject of the email """ subject = '' subject_part = [] if 'Subject' in headers and headers['Subject'] is not None: try: decodefrag = decode_header(headers['Subject']) except HeaderParseError: return subject for line, encoding in decodefrag: enc = 'utf-8' if encoding is None or encoding == 'unknown' else encoding subject_part.append(utils.decode_every_charset_in_the_world(line, enc)) subject = ''.join(subject_part)[:1023] return subject
def get_comment(value): """comment = "(" *([FWS] ccontent) [FWS] ")" ccontent = ctext / quoted-pair / comment We handle nested comments here, and quoted-pair in our qp-ctext routine. """ if value and value[0] != '(': raise errors.HeaderParseError( "expected '(' but found '{}'".format(value)) comment = Comment() value = value[1:] while value and value[0] != ")": if value[0] in WSP: token, value = get_fws(value) elif value[0] == '(': token, value = get_comment(value) else: token, value = get_qp_ctext(value) comment.append(token) if not value: comment.defects.append(errors.InvalidHeaderDefect( "end of header inside comment")) return comment, value return comment, value[1:]
def get_dot_atom_text(value): """ dot-text = 1*atext *("." 1*atext) """ dot_atom_text = DotAtomText() if not value or value[0] in ATOM_ENDS: raise errors.HeaderParseError("expected atom at a start of " "dot-atom-text but found '{}'".format(value)) while value and value[0] not in ATOM_ENDS: token, value = get_atext(value) dot_atom_text.append(token) if value and value[0] == '.': dot_atom_text.append(DOT) value = value[1:] if dot_atom_text[-1] is DOT: raise errors.HeaderParseError("expected atom at end of dot-atom-text " "but found '{}'".format('.'+value)) return dot_atom_text, value
def get_dot_atom(value): """ dot-atom = [CFWS] dot-atom-text [CFWS] Any place we can have a dot atom, we could instead have an rfc2047 encoded word. """ dot_atom = DotAtom() if value[0] in CFWS_LEADER: token, value = get_cfws(value) dot_atom.append(token) if value.startswith('=?'): try: token, value = get_encoded_word(value) except errors.HeaderParseError: # XXX: need to figure out how to register defects when # appropriate here. token, value = get_dot_atom_text(value) else: token, value = get_dot_atom_text(value) dot_atom.append(token) if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) dot_atom.append(token) return dot_atom, value
def get_mailbox(value): """ mailbox = name-addr / addr-spec """ # The only way to figure out if we are dealing with a name-addr or an # addr-spec is to try parsing each one. mailbox = Mailbox() try: token, value = get_name_addr(value) except errors.HeaderParseError: try: token, value = get_addr_spec(value) except errors.HeaderParseError: raise errors.HeaderParseError( "expected mailbox but found '{}'".format(value)) if any(isinstance(x, errors.InvalidHeaderDefect) for x in token.all_defects): mailbox.token_type = 'invalid-mailbox' mailbox.append(token) return mailbox, value
def get_ttext(value): """ttext = <matches _ttext_matcher> We allow any non-TOKEN_ENDS in ttext, but add defects to the token's defects list if we find non-ttext characters. We also register defects for *any* non-printables even though the RFC doesn't exclude all of them, because we follow the spirit of RFC 5322. """ m = _non_token_end_matcher(value) if not m: raise errors.HeaderParseError( "expected ttext but found '{}'".format(value)) ttext = m.group() value = value[len(ttext):] ttext = ValueTerminal(ttext, 'ttext') _validate_xtext(ttext) return ttext, value
def get_token(value): """token = [CFWS] 1*ttext [CFWS] The RFC equivalent of ttext is any US-ASCII chars except space, ctls, or tspecials. We also exclude tabs even though the RFC doesn't. The RFC implies the CFWS but is not explicit about it in the BNF. """ mtoken = Token() if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) mtoken.append(token) if value and value[0] in TOKEN_ENDS: raise errors.HeaderParseError( "expected token but found '{}'".format(value)) token, value = get_ttext(value) mtoken.append(token) if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) mtoken.append(token) return mtoken, value
def get_attrtext(value): """attrtext = 1*(any non-ATTRIBUTE_ENDS character) We allow any non-ATTRIBUTE_ENDS in attrtext, but add defects to the token's defects list if we find non-attrtext characters. We also register defects for *any* non-printables even though the RFC doesn't exclude all of them, because we follow the spirit of RFC 5322. """ m = _non_attribute_end_matcher(value) if not m: raise errors.HeaderParseError( "expected attrtext but found {!r}".format(value)) attrtext = m.group() value = value[len(attrtext):] attrtext = ValueTerminal(attrtext, 'attrtext') _validate_xtext(attrtext) return attrtext, value
def get_attribute(value): """ [CFWS] 1*attrtext [CFWS] This version of the BNF makes the CFWS explicit, and as usual we use a value terminal for the actual run of characters. The RFC equivalent of attrtext is the token characters, with the subtraction of '*', "'", and '%'. We include tab in the excluded set just as we do for token. """ attribute = Attribute() if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) attribute.append(token) if value and value[0] in ATTRIBUTE_ENDS: raise errors.HeaderParseError( "expected token but found '{}'".format(value)) token, value = get_attrtext(value) attribute.append(token) if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) attribute.append(token) return attribute, value
def get_extended_attrtext(value): """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%') This is a special parsing routine so that we get a value that includes % escapes as a single string (which we decode as a single string later). """ m = _non_extended_attribute_end_matcher(value) if not m: raise errors.HeaderParseError( "expected extended attrtext but found {!r}".format(value)) attrtext = m.group() value = value[len(attrtext):] attrtext = ValueTerminal(attrtext, 'extended-attrtext') _validate_xtext(attrtext) return attrtext, value
def get_value(value): """ quoted-string / attribute """ v = Value() if not value: raise errors.HeaderParseError("Expected value but found end of string") leader = None if value[0] in CFWS_LEADER: leader, value = get_cfws(value) if not value: raise errors.HeaderParseError("Expected value but found " "only {}".format(leader)) if value[0] == '"': token, value = get_quoted_string(value) else: token, value = get_extended_attribute(value) if leader is not None: token[:0] = [leader] v.append(token) return v, value
def smtp_VRFY(self, arg): if arg: try: address, params = self._getaddr(arg) except HeaderParseError: address = None if address is None: await self.push('502 Could not VRFY %s' % arg) else: status = await self._call_handler_hook('VRFY', address) await self.push( '252 Cannot VRFY user, but will accept message ' 'and attempt delivery' if status is MISSING else status) else: await self.push('501 Syntax: VRFY <address>')
def parse_docstring(docstring): """ Parse out the parts of a docstring. Returns (title, body, metadata). """ docstring = trim_docstring(docstring) parts = re.split(r'\n{2,}', docstring) title = parts[0] if len(parts) == 1: body = '' metadata = {} else: parser = HeaderParser() try: metadata = parser.parsestr(parts[-1]) except HeaderParseError: metadata = {} body = "\n\n".join(parts[1:]) else: metadata = dict(metadata.items()) if metadata: body = "\n\n".join(parts[1:-1]) else: body = "\n\n".join(parts[1:]) return title, body, metadata
def encode(self, splitchars=';, '): """Encode a message header into an RFC-compliant format. There are many issues involved in converting a given string for use in an email header. Only certain character sets are readable in most email clients, and as header strings can only contain a subset of 7-bit ASCII, care must be taken to properly convert and encode (with Base64 or quoted-printable) header strings. In addition, there is a 75-character length limit on any given encoded header field, so line-wrapping must be performed, even with double-byte character sets. This method will do its best to convert the string to the correct character set used in email, and encode and line wrap it safely with the appropriate scheme for that character set. If the given charset is not known or an error occurs during conversion, this function will return the header untouched. Optional splitchars is a string containing characters to split long ASCII lines on, in rough support of RFC 2822's `highest level syntactic breaks'. This doesn't affect RFC 2047 encoded lines. """ newchunks = [] maxlinelen = self._firstlinelen lastlen = 0 for s, charset in self._chunks: # The first bit of the next chunk should be just long enough to # fill the next line. Don't forget the space separating the # encoded words. targetlen = maxlinelen - lastlen - 1 if targetlen < charset.encoded_header_len(''): # Stick it on the next line targetlen = maxlinelen newchunks += self._split(s, charset, targetlen, splitchars) lastchunk, lastcharset = newchunks[-1] lastlen = lastcharset.encoded_header_len(lastchunk) value = self._encode_chunks(newchunks, maxlinelen) if _embeded_header.search(value): raise HeaderParseError("header value appears to contain " "an embedded header: {!r}".format(value)) return value
def test_broken_base64_header(self): raises = self.assertRaises s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3IQ?=' raises(errors.HeaderParseError, decode_header, s) # Test RFC 2231 header parameters (en/de)coding
def test_broken_base64_header(self): raises = self.assertRaises s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?=' raises(errors.HeaderParseError, decode_header, s) # Test RFC 2231 header parameters (en/de)coding