commit 4a53c1f65beb951587ed0664a107e5339b3f8c22
parent 60bfa2edb213e82738037473393e80c144982b31
Author: nibble <unknown>
Date: Mon, 9 Nov 2009 19:28:21 +0100
* Added missing rfc's
* Minor optimization in dmc-filter when using -v
Diffstat:
6 files changed, 7913 insertions(+), 6 deletions(-)
diff --git a/doc/imf-rfc5322.txt b/doc/imf-rfc5322.txt
@@ -0,0 +1,3195 @@
+
+
+
+
+
+
+Network Working Group P. Resnick, Ed.
+Request for Comments: 5322 Qualcomm Incorporated
+Obsoletes: 2822 October 2008
+Updates: 4021
+Category: Standards Track
+
+
+ Internet Message Format
+
+Status of This Memo
+
+ This document specifies an Internet standards track protocol for the
+ Internet community, and requests discussion and suggestions for
+ improvements. Please refer to the current edition of the "Internet
+ Official Protocol Standards" (STD 1) for the standardization state
+ and status of this protocol. Distribution of this memo is unlimited.
+
+Abstract
+
+ This document specifies the Internet Message Format (IMF), a syntax
+ for text messages that are sent between computer users, within the
+ framework of "electronic mail" messages. This specification is a
+ revision of Request For Comments (RFC) 2822, which itself superseded
+ Request For Comments (RFC) 822, "Standard for the Format of ARPA
+ Internet Text Messages", updating it to reflect current practice and
+ incorporating incremental changes that were specified in other RFCs.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 1]
+
+RFC 5322 Internet Message Format October 2008
+
+
+Table of Contents
+
+ 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 4
+ 1.1. Scope . . . . . . . . . . . . . . . . . . . . . . . . . . 4
+ 1.2. Notational Conventions . . . . . . . . . . . . . . . . . . 5
+ 1.2.1. Requirements Notation . . . . . . . . . . . . . . . . 5
+ 1.2.2. Syntactic Notation . . . . . . . . . . . . . . . . . . 5
+ 1.2.3. Structure of This Document . . . . . . . . . . . . . . 5
+ 2. Lexical Analysis of Messages . . . . . . . . . . . . . . . . . 6
+ 2.1. General Description . . . . . . . . . . . . . . . . . . . 6
+ 2.1.1. Line Length Limits . . . . . . . . . . . . . . . . . . 7
+ 2.2. Header Fields . . . . . . . . . . . . . . . . . . . . . . 8
+ 2.2.1. Unstructured Header Field Bodies . . . . . . . . . . . 8
+ 2.2.2. Structured Header Field Bodies . . . . . . . . . . . . 8
+ 2.2.3. Long Header Fields . . . . . . . . . . . . . . . . . . 8
+ 2.3. Body . . . . . . . . . . . . . . . . . . . . . . . . . . . 9
+ 3. Syntax . . . . . . . . . . . . . . . . . . . . . . . . . . . . 10
+ 3.1. Introduction . . . . . . . . . . . . . . . . . . . . . . . 10
+ 3.2. Lexical Tokens . . . . . . . . . . . . . . . . . . . . . . 10
+ 3.2.1. Quoted characters . . . . . . . . . . . . . . . . . . 10
+ 3.2.2. Folding White Space and Comments . . . . . . . . . . . 11
+ 3.2.3. Atom . . . . . . . . . . . . . . . . . . . . . . . . . 12
+ 3.2.4. Quoted Strings . . . . . . . . . . . . . . . . . . . . 13
+ 3.2.5. Miscellaneous Tokens . . . . . . . . . . . . . . . . . 14
+ 3.3. Date and Time Specification . . . . . . . . . . . . . . . 14
+ 3.4. Address Specification . . . . . . . . . . . . . . . . . . 16
+ 3.4.1. Addr-Spec Specification . . . . . . . . . . . . . . . 17
+ 3.5. Overall Message Syntax . . . . . . . . . . . . . . . . . . 18
+ 3.6. Field Definitions . . . . . . . . . . . . . . . . . . . . 19
+ 3.6.1. The Origination Date Field . . . . . . . . . . . . . . 22
+ 3.6.2. Originator Fields . . . . . . . . . . . . . . . . . . 22
+ 3.6.3. Destination Address Fields . . . . . . . . . . . . . . 23
+ 3.6.4. Identification Fields . . . . . . . . . . . . . . . . 25
+ 3.6.5. Informational Fields . . . . . . . . . . . . . . . . . 27
+ 3.6.6. Resent Fields . . . . . . . . . . . . . . . . . . . . 28
+ 3.6.7. Trace Fields . . . . . . . . . . . . . . . . . . . . . 30
+ 3.6.8. Optional Fields . . . . . . . . . . . . . . . . . . . 30
+ 4. Obsolete Syntax . . . . . . . . . . . . . . . . . . . . . . . 31
+ 4.1. Miscellaneous Obsolete Tokens . . . . . . . . . . . . . . 32
+ 4.2. Obsolete Folding White Space . . . . . . . . . . . . . . . 33
+ 4.3. Obsolete Date and Time . . . . . . . . . . . . . . . . . . 33
+ 4.4. Obsolete Addressing . . . . . . . . . . . . . . . . . . . 35
+ 4.5. Obsolete Header Fields . . . . . . . . . . . . . . . . . . 35
+ 4.5.1. Obsolete Origination Date Field . . . . . . . . . . . 36
+ 4.5.2. Obsolete Originator Fields . . . . . . . . . . . . . . 36
+ 4.5.3. Obsolete Destination Address Fields . . . . . . . . . 37
+ 4.5.4. Obsolete Identification Fields . . . . . . . . . . . . 37
+ 4.5.5. Obsolete Informational Fields . . . . . . . . . . . . 37
+
+
+
+Resnick Standards Track [Page 2]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ 4.5.6. Obsolete Resent Fields . . . . . . . . . . . . . . . . 38
+ 4.5.7. Obsolete Trace Fields . . . . . . . . . . . . . . . . 38
+ 4.5.8. Obsolete optional fields . . . . . . . . . . . . . . . 38
+ 5. Security Considerations . . . . . . . . . . . . . . . . . . . 38
+ 6. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 39
+ Appendix A. Example Messages . . . . . . . . . . . . . . . . . 43
+ Appendix A.1. Addressing Examples . . . . . . . . . . . . . . . 44
+ Appendix A.1.1. A Message from One Person to Another with
+ Simple Addressing . . . . . . . . . . . . . . . . 44
+ Appendix A.1.2. Different Types of Mailboxes . . . . . . . . . . . 45
+ Appendix A.1.3. Group Addresses . . . . . . . . . . . . . . . . . 45
+ Appendix A.2. Reply Messages . . . . . . . . . . . . . . . . . . 46
+ Appendix A.3. Resent Messages . . . . . . . . . . . . . . . . . 47
+ Appendix A.4. Messages with Trace Fields . . . . . . . . . . . . 48
+ Appendix A.5. White Space, Comments, and Other Oddities . . . . 49
+ Appendix A.6. Obsoleted Forms . . . . . . . . . . . . . . . . . 50
+ Appendix A.6.1. Obsolete Addressing . . . . . . . . . . . . . . . 50
+ Appendix A.6.2. Obsolete Dates . . . . . . . . . . . . . . . . . . 50
+ Appendix A.6.3. Obsolete White Space and Comments . . . . . . . . 51
+ Appendix B. Differences from Earlier Specifications . . . . . 52
+ Appendix C. Acknowledgements . . . . . . . . . . . . . . . . . 53
+ 7. References . . . . . . . . . . . . . . . . . . . . . . . . . . 55
+ 7.1. Normative References . . . . . . . . . . . . . . . . . . . 55
+ 7.2. Informative References . . . . . . . . . . . . . . . . . . 55
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 3]
+
+RFC 5322 Internet Message Format October 2008
+
+
+1. Introduction
+
+1.1. Scope
+
+ This document specifies the Internet Message Format (IMF), a syntax
+ for text messages that are sent between computer users, within the
+ framework of "electronic mail" messages. This specification is an
+ update to [RFC2822], which itself superseded [RFC0822], updating it
+ to reflect current practice and incorporating incremental changes
+ that were specified in other RFCs such as [RFC1123].
+
+ This document specifies a syntax only for text messages. In
+ particular, it makes no provision for the transmission of images,
+ audio, or other sorts of structured data in electronic mail messages.
+ There are several extensions published, such as the MIME document
+ series ([RFC2045], [RFC2046], [RFC2049]), which describe mechanisms
+ for the transmission of such data through electronic mail, either by
+ extending the syntax provided here or by structuring such messages to
+ conform to this syntax. Those mechanisms are outside of the scope of
+ this specification.
+
+ In the context of electronic mail, messages are viewed as having an
+ envelope and contents. The envelope contains whatever information is
+ needed to accomplish transmission and delivery. (See [RFC5321] for a
+ discussion of the envelope.) The contents comprise the object to be
+ delivered to the recipient. This specification applies only to the
+ format and some of the semantics of message contents. It contains no
+ specification of the information in the envelope.
+
+ However, some message systems may use information from the contents
+ to create the envelope. It is intended that this specification
+ facilitate the acquisition of such information by programs.
+
+ This specification is intended as a definition of what message
+ content format is to be passed between systems. Though some message
+ systems locally store messages in this format (which eliminates the
+ need for translation between formats) and others use formats that
+ differ from the one specified in this specification, local storage is
+ outside of the scope of this specification.
+
+ Note: This specification is not intended to dictate the internal
+ formats used by sites, the specific message system features that
+ they are expected to support, or any of the characteristics of
+ user interface programs that create or read messages. In
+ addition, this document does not specify an encoding of the
+ characters for either transport or storage; that is, it does not
+ specify the number of bits used or how those bits are specifically
+ transferred over the wire or stored on disk.
+
+
+
+Resnick Standards Track [Page 4]
+
+RFC 5322 Internet Message Format October 2008
+
+
+1.2. Notational Conventions
+
+1.2.1. Requirements Notation
+
+ This document occasionally uses terms that appear in capital letters.
+ When the terms "MUST", "SHOULD", "RECOMMENDED", "MUST NOT", "SHOULD
+ NOT", and "MAY" appear capitalized, they are being used to indicate
+ particular requirements of this specification. A discussion of the
+ meanings of these terms appears in [RFC2119].
+
+1.2.2. Syntactic Notation
+
+ This specification uses the Augmented Backus-Naur Form (ABNF)
+ [RFC5234] notation for the formal definitions of the syntax of
+ messages. Characters will be specified either by a decimal value
+ (e.g., the value %d65 for uppercase A and %d97 for lowercase A) or by
+ a case-insensitive literal value enclosed in quotation marks (e.g.,
+ "A" for either uppercase or lowercase A).
+
+1.2.3. Structure of This Document
+
+ This document is divided into several sections.
+
+ This section, section 1, is a short introduction to the document.
+
+ Section 2 lays out the general description of a message and its
+ constituent parts. This is an overview to help the reader understand
+ some of the general principles used in the later portions of this
+ document. Any examples in this section MUST NOT be taken as
+ specification of the formal syntax of any part of a message.
+
+ Section 3 specifies formal ABNF rules for the structure of each part
+ of a message (the syntax) and describes the relationship between
+ those parts and their meaning in the context of a message (the
+ semantics). That is, it lays out the actual rules for the structure
+ of each part of a message (the syntax) as well as a description of
+ the parts and instructions for their interpretation (the semantics).
+ This includes analysis of the syntax and semantics of subparts of
+ messages that have specific structure. The syntax included in
+ section 3 represents messages as they MUST be created. There are
+ also notes in section 3 to indicate if any of the options specified
+ in the syntax SHOULD be used over any of the others.
+
+ Both sections 2 and 3 describe messages that are legal to generate
+ for purposes of this specification.
+
+
+
+
+
+
+Resnick Standards Track [Page 5]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ Section 4 of this document specifies an "obsolete" syntax. There are
+ references in section 3 to these obsolete syntactic elements. The
+ rules of the obsolete syntax are elements that have appeared in
+ earlier versions of this specification or have previously been widely
+ used in Internet messages. As such, these elements MUST be
+ interpreted by parsers of messages in order to be conformant to this
+ specification. However, since items in this syntax have been
+ determined to be non-interoperable or to cause significant problems
+ for recipients of messages, they MUST NOT be generated by creators of
+ conformant messages.
+
+ Section 5 details security considerations to take into account when
+ implementing this specification.
+
+ Appendix A lists examples of different sorts of messages. These
+ examples are not exhaustive of the types of messages that appear on
+ the Internet, but give a broad overview of certain syntactic forms.
+
+ Appendix B lists the differences between this specification and
+ earlier specifications for Internet messages.
+
+ Appendix C contains acknowledgements.
+
+2. Lexical Analysis of Messages
+
+2.1. General Description
+
+ At the most basic level, a message is a series of characters. A
+ message that is conformant with this specification is composed of
+ characters with values in the range of 1 through 127 and interpreted
+ as US-ASCII [ANSI.X3-4.1986] characters. For brevity, this document
+ sometimes refers to this range of characters as simply "US-ASCII
+ characters".
+
+ Note: This document specifies that messages are made up of
+ characters in the US-ASCII range of 1 through 127. There are
+ other documents, specifically the MIME document series ([RFC2045],
+ [RFC2046], [RFC2047], [RFC2049], [RFC4288], [RFC4289]), that
+ extend this specification to allow for values outside of that
+ range. Discussion of those mechanisms is not within the scope of
+ this specification.
+
+ Messages are divided into lines of characters. A line is a series of
+ characters that is delimited with the two characters carriage-return
+ and line-feed; that is, the carriage return (CR) character (ASCII
+ value 13) followed immediately by the line feed (LF) character (ASCII
+ value 10). (The carriage return/line feed pair is usually written in
+ this document as "CRLF".)
+
+
+
+Resnick Standards Track [Page 6]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ A message consists of header fields (collectively called "the header
+ section of the message") followed, optionally, by a body. The header
+ section is a sequence of lines of characters with special syntax as
+ defined in this specification. The body is simply a sequence of
+ characters that follows the header section and is separated from the
+ header section by an empty line (i.e., a line with nothing preceding
+ the CRLF).
+
+ Note: Common parlance and earlier versions of this specification
+ use the term "header" to either refer to the entire header section
+ or to refer to an individual header field. To avoid ambiguity,
+ this document does not use the terms "header" or "headers" in
+ isolation, but instead always uses "header field" to refer to the
+ individual field and "header section" to refer to the entire
+ collection.
+
+2.1.1. Line Length Limits
+
+ There are two limits that this specification places on the number of
+ characters in a line. Each line of characters MUST be no more than
+ 998 characters, and SHOULD be no more than 78 characters, excluding
+ the CRLF.
+
+ The 998 character limit is due to limitations in many implementations
+ that send, receive, or store IMF messages which simply cannot handle
+ more than 998 characters on a line. Receiving implementations would
+ do well to handle an arbitrarily large number of characters in a line
+ for robustness sake. However, there are so many implementations that
+ (in compliance with the transport requirements of [RFC5321]) do not
+ accept messages containing more than 1000 characters including the CR
+ and LF per line, it is important for implementations not to create
+ such messages.
+
+ The more conservative 78 character recommendation is to accommodate
+ the many implementations of user interfaces that display these
+ messages which may truncate, or disastrously wrap, the display of
+ more than 78 characters per line, in spite of the fact that such
+ implementations are non-conformant to the intent of this
+ specification (and that of [RFC5321] if they actually cause
+ information to be lost). Again, even though this limitation is put
+ on messages, it is incumbent upon implementations that display
+ messages to handle an arbitrarily large number of characters in a
+ line (certainly at least up to the 998 character limit) for the sake
+ of robustness.
+
+
+
+
+
+
+
+Resnick Standards Track [Page 7]
+
+RFC 5322 Internet Message Format October 2008
+
+
+2.2. Header Fields
+
+ Header fields are lines beginning with a field name, followed by a
+ colon (":"), followed by a field body, and terminated by CRLF. A
+ field name MUST be composed of printable US-ASCII characters (i.e.,
+ characters that have values between 33 and 126, inclusive), except
+ colon. A field body may be composed of printable US-ASCII characters
+ as well as the space (SP, ASCII value 32) and horizontal tab (HTAB,
+ ASCII value 9) characters (together known as the white space
+ characters, WSP). A field body MUST NOT include CR and LF except
+ when used in "folding" and "unfolding", as described in section
+ 2.2.3. All field bodies MUST conform to the syntax described in
+ sections 3 and 4 of this specification.
+
+2.2.1. Unstructured Header Field Bodies
+
+ Some field bodies in this specification are defined simply as
+ "unstructured" (which is specified in section 3.2.5 as any printable
+ US-ASCII characters plus white space characters) with no further
+ restrictions. These are referred to as unstructured field bodies.
+ Semantically, unstructured field bodies are simply to be treated as a
+ single line of characters with no further processing (except for
+ "folding" and "unfolding" as described in section 2.2.3).
+
+2.2.2. Structured Header Field Bodies
+
+ Some field bodies in this specification have a syntax that is more
+ restrictive than the unstructured field bodies described above.
+ These are referred to as "structured" field bodies. Structured field
+ bodies are sequences of specific lexical tokens as described in
+ sections 3 and 4 of this specification. Many of these tokens are
+ allowed (according to their syntax) to be introduced or end with
+ comments (as described in section 3.2.2) as well as the white space
+ characters, and those white space characters are subject to "folding"
+ and "unfolding" as described in section 2.2.3. Semantic analysis of
+ structured field bodies is given along with their syntax.
+
+2.2.3. Long Header Fields
+
+ Each header field is logically a single line of characters comprising
+ the field name, the colon, and the field body. For convenience
+ however, and to deal with the 998/78 character limitations per line,
+ the field body portion of a header field can be split into a
+ multiple-line representation; this is called "folding". The general
+ rule is that wherever this specification allows for folding white
+ space (not simply WSP characters), a CRLF may be inserted before any
+ WSP.
+
+
+
+
+Resnick Standards Track [Page 8]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ For example, the header field:
+
+ Subject: This is a test
+
+ can be represented as:
+
+ Subject: This
+ is a test
+
+ Note: Though structured field bodies are defined in such a way
+ that folding can take place between many of the lexical tokens
+ (and even within some of the lexical tokens), folding SHOULD be
+ limited to placing the CRLF at higher-level syntactic breaks. For
+ instance, if a field body is defined as comma-separated values, it
+ is recommended that folding occur after the comma separating the
+ structured items in preference to other places where the field
+ could be folded, even if it is allowed elsewhere.
+
+ The process of moving from this folded multiple-line representation
+ of a header field to its single line representation is called
+ "unfolding". Unfolding is accomplished by simply removing any CRLF
+ that is immediately followed by WSP. Each header field should be
+ treated in its unfolded form for further syntactic and semantic
+ evaluation. An unfolded header field has no length restriction and
+ therefore may be indeterminately long.
+
+2.3. Body
+
+ The body of a message is simply lines of US-ASCII characters. The
+ only two limitations on the body are as follows:
+
+ o CR and LF MUST only occur together as CRLF; they MUST NOT appear
+ independently in the body.
+ o Lines of characters in the body MUST be limited to 998 characters,
+ and SHOULD be limited to 78 characters, excluding the CRLF.
+
+ Note: As was stated earlier, there are other documents,
+ specifically the MIME documents ([RFC2045], [RFC2046], [RFC2049],
+ [RFC4288], [RFC4289]), that extend (and limit) this specification
+ to allow for different sorts of message bodies. Again, these
+ mechanisms are beyond the scope of this document.
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 9]
+
+RFC 5322 Internet Message Format October 2008
+
+
+3. Syntax
+
+3.1. Introduction
+
+ The syntax as given in this section defines the legal syntax of
+ Internet messages. Messages that are conformant to this
+ specification MUST conform to the syntax in this section. If there
+ are options in this section where one option SHOULD be generated,
+ that is indicated either in the prose or in a comment next to the
+ syntax.
+
+ For the defined expressions, a short description of the syntax and
+ use is given, followed by the syntax in ABNF, followed by a semantic
+ analysis. The following primitive tokens that are used but otherwise
+ unspecified are taken from the "Core Rules" of [RFC5234], Appendix
+ B.1: CR, LF, CRLF, HTAB, SP, WSP, DQUOTE, DIGIT, ALPHA, and VCHAR.
+
+ In some of the definitions, there will be non-terminals whose names
+ start with "obs-". These "obs-" elements refer to tokens defined in
+ the obsolete syntax in section 4. In all cases, these productions
+ are to be ignored for the purposes of generating legal Internet
+ messages and MUST NOT be used as part of such a message. However,
+ when interpreting messages, these tokens MUST be honored as part of
+ the legal syntax. In this sense, section 3 defines a grammar for the
+ generation of messages, with "obs-" elements that are to be ignored,
+ while section 4 adds grammar for the interpretation of messages.
+
+3.2. Lexical Tokens
+
+ The following rules are used to define an underlying lexical
+ analyzer, which feeds tokens to the higher-level parsers. This
+ section defines the tokens used in structured header field bodies.
+
+ Note: Readers of this specification need to pay special attention
+ to how these lexical tokens are used in both the lower-level and
+ higher-level syntax later in the document. Particularly, the
+ white space tokens and the comment tokens defined in section 3.2.2
+ get used in the lower-level tokens defined here, and those lower-
+ level tokens are in turn used as parts of the higher-level tokens
+ defined later. Therefore, white space and comments may be allowed
+ in the higher-level tokens even though they may not explicitly
+ appear in a particular definition.
+
+3.2.1. Quoted characters
+
+ Some characters are reserved for special interpretation, such as
+ delimiting lexical tokens. To permit use of these characters as
+ uninterpreted data, a quoting mechanism is provided.
+
+
+
+Resnick Standards Track [Page 10]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ quoted-pair = ("\" (VCHAR / WSP)) / obs-qp
+
+ Where any quoted-pair appears, it is to be interpreted as the
+ character alone. That is to say, the "\" character that appears as
+ part of a quoted-pair is semantically "invisible".
+
+ Note: The "\" character may appear in a message where it is not
+ part of a quoted-pair. A "\" character that does not appear in a
+ quoted-pair is not semantically invisible. The only places in
+ this specification where quoted-pair currently appears are
+ ccontent, qcontent, and in obs-dtext in section 4.
+
+3.2.2. Folding White Space and Comments
+
+ White space characters, including white space used in folding
+ (described in section 2.2.3), may appear between many elements in
+ header field bodies. Also, strings of characters that are treated as
+ comments may be included in structured field bodies as characters
+ enclosed in parentheses. The following defines the folding white
+ space (FWS) and comment constructs.
+
+ Strings of characters enclosed in parentheses are considered comments
+ so long as they do not appear within a "quoted-string", as defined in
+ section 3.2.4. Comments may nest.
+
+ There are several places in this specification where comments and FWS
+ may be freely inserted. To accommodate that syntax, an additional
+ token for "CFWS" is defined for places where comments and/or FWS can
+ occur. However, where CFWS occurs in this specification, it MUST NOT
+ be inserted in such a way that any line of a folded header field is
+ made up entirely of WSP characters and nothing else.
+
+ FWS = ([*WSP CRLF] 1*WSP) / obs-FWS
+ ; Folding white space
+
+ ctext = %d33-39 / ; Printable US-ASCII
+ %d42-91 / ; characters not including
+ %d93-126 / ; "(", ")", or "\"
+ obs-ctext
+
+ ccontent = ctext / quoted-pair / comment
+
+ comment = "(" *([FWS] ccontent) [FWS] ")"
+
+ CFWS = (1*([FWS] comment) [FWS]) / FWS
+
+
+
+
+
+
+Resnick Standards Track [Page 11]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ Throughout this specification, where FWS (the folding white space
+ token) appears, it indicates a place where folding, as discussed in
+ section 2.2.3, may take place. Wherever folding appears in a message
+ (that is, a header field body containing a CRLF followed by any WSP),
+ unfolding (removal of the CRLF) is performed before any further
+ semantic analysis is performed on that header field according to this
+ specification. That is to say, any CRLF that appears in FWS is
+ semantically "invisible".
+
+ A comment is normally used in a structured field body to provide some
+ human-readable informational text. Since a comment is allowed to
+ contain FWS, folding is permitted within the comment. Also note that
+ since quoted-pair is allowed in a comment, the parentheses and
+ backslash characters may appear in a comment, so long as they appear
+ as a quoted-pair. Semantically, the enclosing parentheses are not
+ part of the comment; the comment is what is contained between the two
+ parentheses. As stated earlier, the "\" in any quoted-pair and the
+ CRLF in any FWS that appears within the comment are semantically
+ "invisible" and therefore not part of the comment either.
+
+ Runs of FWS, comment, or CFWS that occur between lexical tokens in a
+ structured header field are semantically interpreted as a single
+ space character.
+
+3.2.3. Atom
+
+ Several productions in structured header field bodies are simply
+ strings of certain basic characters. Such productions are called
+ atoms.
+
+ Some of the structured header field bodies also allow the period
+ character (".", ASCII value 46) within runs of atext. An additional
+ "dot-atom" token is defined for those purposes.
+
+ Note: The "specials" token does not appear anywhere else in this
+ specification. It is simply the visible (i.e., non-control, non-
+ white space) characters that do not appear in atext. It is
+ provided only because it is useful for implementers who use tools
+ that lexically analyze messages. Each of the characters in
+ specials can be used to indicate a tokenization point in lexical
+ analysis.
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 12]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ atext = ALPHA / DIGIT / ; Printable US-ASCII
+ "!" / "#" / ; characters not including
+ "$" / "%" / ; specials. Used for atoms.
+ "&" / "'" /
+ "*" / "+" /
+ "-" / "/" /
+ "=" / "?" /
+ "^" / "_" /
+ "`" / "{" /
+ "|" / "}" /
+ "~"
+
+ atom = [CFWS] 1*atext [CFWS]
+
+ dot-atom-text = 1*atext *("." 1*atext)
+
+ dot-atom = [CFWS] dot-atom-text [CFWS]
+
+ specials = "(" / ")" / ; Special characters that do
+ "<" / ">" / ; not appear in atext
+ "[" / "]" /
+ ":" / ";" /
+ "@" / "\" /
+ "," / "." /
+ DQUOTE
+
+ Both atom and dot-atom are interpreted as a single unit, comprising
+ the string of characters that make it up. Semantically, the optional
+ comments and FWS surrounding the rest of the characters are not part
+ of the atom; the atom is only the run of atext characters in an atom,
+ or the atext and "." characters in a dot-atom.
+
+3.2.4. Quoted Strings
+
+ Strings of characters that include characters other than those
+ allowed in atoms can be represented in a quoted string format, where
+ the characters are surrounded by quote (DQUOTE, ASCII value 34)
+ characters.
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 13]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ qtext = %d33 / ; Printable US-ASCII
+ %d35-91 / ; characters not including
+ %d93-126 / ; "\" or the quote character
+ obs-qtext
+
+ qcontent = qtext / quoted-pair
+
+ quoted-string = [CFWS]
+ DQUOTE *([FWS] qcontent) [FWS] DQUOTE
+ [CFWS]
+
+ A quoted-string is treated as a unit. That is, quoted-string is
+ identical to atom, semantically. Since a quoted-string is allowed to
+ contain FWS, folding is permitted. Also note that since quoted-pair
+ is allowed in a quoted-string, the quote and backslash characters may
+ appear in a quoted-string so long as they appear as a quoted-pair.
+
+ Semantically, neither the optional CFWS outside of the quote
+ characters nor the quote characters themselves are part of the
+ quoted-string; the quoted-string is what is contained between the two
+ quote characters. As stated earlier, the "\" in any quoted-pair and
+ the CRLF in any FWS/CFWS that appears within the quoted-string are
+ semantically "invisible" and therefore not part of the quoted-string
+ either.
+
+3.2.5. Miscellaneous Tokens
+
+ Three additional tokens are defined: word and phrase for combinations
+ of atoms and/or quoted-strings, and unstructured for use in
+ unstructured header fields and in some places within structured
+ header fields.
+
+ word = atom / quoted-string
+
+ phrase = 1*word / obs-phrase
+
+ unstructured = (*([FWS] VCHAR) *WSP) / obs-unstruct
+
+3.3. Date and Time Specification
+
+ Date and time values occur in several header fields. This section
+ specifies the syntax for a full date and time specification. Though
+ folding white space is permitted throughout the date-time
+ specification, it is RECOMMENDED that a single space be used in each
+ place that FWS appears (whether it is required or optional); some
+ older implementations will not interpret longer sequences of folding
+ white space correctly.
+
+
+
+
+Resnick Standards Track [Page 14]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ date-time = [ day-of-week "," ] date time [CFWS]
+
+ day-of-week = ([FWS] day-name) / obs-day-of-week
+
+ day-name = "Mon" / "Tue" / "Wed" / "Thu" /
+ "Fri" / "Sat" / "Sun"
+
+ date = day month year
+
+ day = ([FWS] 1*2DIGIT FWS) / obs-day
+
+ month = "Jan" / "Feb" / "Mar" / "Apr" /
+ "May" / "Jun" / "Jul" / "Aug" /
+ "Sep" / "Oct" / "Nov" / "Dec"
+
+ year = (FWS 4*DIGIT FWS) / obs-year
+
+ time = time-of-day zone
+
+ time-of-day = hour ":" minute [ ":" second ]
+
+ hour = 2DIGIT / obs-hour
+
+ minute = 2DIGIT / obs-minute
+
+ second = 2DIGIT / obs-second
+
+ zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone
+
+ The day is the numeric day of the month. The year is any numeric
+ year 1900 or later.
+
+ The time-of-day specifies the number of hours, minutes, and
+ optionally seconds since midnight of the date indicated.
+
+ The date and time-of-day SHOULD express local time.
+
+ The zone specifies the offset from Coordinated Universal Time (UTC,
+ formerly referred to as "Greenwich Mean Time") that the date and
+ time-of-day represent. The "+" or "-" indicates whether the time-of-
+ day is ahead of (i.e., east of) or behind (i.e., west of) Universal
+ Time. The first two digits indicate the number of hours difference
+ from Universal Time, and the last two digits indicate the number of
+ additional minutes difference from Universal Time. (Hence, +hhmm
+ means +(hh * 60 + mm) minutes, and -hhmm means -(hh * 60 + mm)
+ minutes). The form "+0000" SHOULD be used to indicate a time zone at
+ Universal Time. Though "-0000" also indicates Universal Time, it is
+
+
+
+
+Resnick Standards Track [Page 15]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ used to indicate that the time was generated on a system that may be
+ in a local time zone other than Universal Time and that the date-time
+ contains no information about the local time zone.
+
+ A date-time specification MUST be semantically valid. That is, the
+ day-of-week (if included) MUST be the day implied by the date, the
+ numeric day-of-month MUST be between 1 and the number of days allowed
+ for the specified month (in the specified year), the time-of-day MUST
+ be in the range 00:00:00 through 23:59:60 (the number of seconds
+ allowing for a leap second; see [RFC1305]), and the last two digits
+ of the zone MUST be within the range 00 through 59.
+
+3.4. Address Specification
+
+ Addresses occur in several message header fields to indicate senders
+ and recipients of messages. An address may either be an individual
+ mailbox, or a group of mailboxes.
+
+ address = mailbox / group
+
+ mailbox = name-addr / addr-spec
+
+ name-addr = [display-name] angle-addr
+
+ angle-addr = [CFWS] "<" addr-spec ">" [CFWS] /
+ obs-angle-addr
+
+ group = display-name ":" [group-list] ";" [CFWS]
+
+ display-name = phrase
+
+ mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
+
+ address-list = (address *("," address)) / obs-addr-list
+
+ group-list = mailbox-list / CFWS / obs-group-list
+
+ A mailbox receives mail. It is a conceptual entity that does not
+ necessarily pertain to file storage. For example, some sites may
+ choose to print mail on a printer and deliver the output to the
+ addressee's desk.
+
+ Normally, a mailbox is composed of two parts: (1) an optional display
+ name that indicates the name of the recipient (which can be a person
+ or a system) that could be displayed to the user of a mail
+ application, and (2) an addr-spec address enclosed in angle brackets
+
+
+
+
+
+Resnick Standards Track [Page 16]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ ("<" and ">"). There is an alternate simple form of a mailbox where
+ the addr-spec address appears alone, without the recipient's name or
+ the angle brackets. The Internet addr-spec address is described in
+ section 3.4.1.
+
+ Note: Some legacy implementations used the simple form where the
+ addr-spec appears without the angle brackets, but included the
+ name of the recipient in parentheses as a comment following the
+ addr-spec. Since the meaning of the information in a comment is
+ unspecified, implementations SHOULD use the full name-addr form of
+ the mailbox, instead of the legacy form, to specify the display
+ name associated with a mailbox. Also, because some legacy
+ implementations interpret the comment, comments generally SHOULD
+ NOT be used in address fields to avoid confusing such
+ implementations.
+
+ When it is desirable to treat several mailboxes as a single unit
+ (i.e., in a distribution list), the group construct can be used. The
+ group construct allows the sender to indicate a named group of
+ recipients. This is done by giving a display name for the group,
+ followed by a colon, followed by a comma-separated list of any number
+ of mailboxes (including zero and one), and ending with a semicolon.
+ Because the list of mailboxes can be empty, using the group construct
+ is also a simple way to communicate to recipients that the message
+ was sent to one or more named sets of recipients, without actually
+ providing the individual mailbox address for any of those recipients.
+
+3.4.1. Addr-Spec Specification
+
+ An addr-spec is a specific Internet identifier that contains a
+ locally interpreted string followed by the at-sign character ("@",
+ ASCII value 64) followed by an Internet domain. The locally
+ interpreted string is either a quoted-string or a dot-atom. If the
+ string can be represented as a dot-atom (that is, it contains no
+ characters other than atext characters or "." surrounded by atext
+ characters), then the dot-atom form SHOULD be used and the quoted-
+ string form SHOULD NOT be used. Comments and folding white space
+ SHOULD NOT be used around the "@" in the addr-spec.
+
+ Note: A liberal syntax for the domain portion of addr-spec is
+ given here. However, the domain portion contains addressing
+ information specified by and used in other protocols (e.g.,
+ [RFC1034], [RFC1035], [RFC1123], [RFC5321]). It is therefore
+ incumbent upon implementations to conform to the syntax of
+ addresses for the context in which they are used.
+
+
+
+
+
+
+Resnick Standards Track [Page 17]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ addr-spec = local-part "@" domain
+
+ local-part = dot-atom / quoted-string / obs-local-part
+
+ domain = dot-atom / domain-literal / obs-domain
+
+ domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
+
+ dtext = %d33-90 / ; Printable US-ASCII
+ %d94-126 / ; characters not including
+ obs-dtext ; "[", "]", or "\"
+
+ The domain portion identifies the point to which the mail is
+ delivered. In the dot-atom form, this is interpreted as an Internet
+ domain name (either a host name or a mail exchanger name) as
+ described in [RFC1034], [RFC1035], and [RFC1123]. In the domain-
+ literal form, the domain is interpreted as the literal Internet
+ address of the particular host. In both cases, how addressing is
+ used and how messages are transported to a particular host is covered
+ in separate documents, such as [RFC5321]. These mechanisms are
+ outside of the scope of this document.
+
+ The local-part portion is a domain-dependent string. In addresses,
+ it is simply interpreted on the particular host as a name of a
+ particular mailbox.
+
+3.5. Overall Message Syntax
+
+ A message consists of header fields, optionally followed by a message
+ body. Lines in a message MUST be a maximum of 998 characters
+ excluding the CRLF, but it is RECOMMENDED that lines be limited to 78
+ characters excluding the CRLF. (See section 2.1.1 for explanation.)
+ In a message body, though all of the characters listed in the text
+ rule MAY be used, the use of US-ASCII control characters (values 1
+ through 8, 11, 12, and 14 through 31) is discouraged since their
+ interpretation by receivers for display is not guaranteed.
+
+ message = (fields / obs-fields)
+ [CRLF body]
+
+ body = (*(*998text CRLF) *998text) / obs-body
+
+ text = %d1-9 / ; Characters excluding CR
+ %d11 / ; and LF
+ %d12 /
+ %d14-127
+
+
+
+
+
+Resnick Standards Track [Page 18]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ The header fields carry most of the semantic information and are
+ defined in section 3.6. The body is simply a series of lines of text
+ that are uninterpreted for the purposes of this specification.
+
+3.6. Field Definitions
+
+ The header fields of a message are defined here. All header fields
+ have the same general syntactic structure: a field name, followed by
+ a colon, followed by the field body. The specific syntax for each
+ header field is defined in the subsequent sections.
+
+ Note: In the ABNF syntax for each field in subsequent sections,
+ each field name is followed by the required colon. However, for
+ brevity, sometimes the colon is not referred to in the textual
+ description of the syntax. It is, nonetheless, required.
+
+ It is important to note that the header fields are not guaranteed to
+ be in a particular order. They may appear in any order, and they
+ have been known to be reordered occasionally when transported over
+ the Internet. However, for the purposes of this specification,
+ header fields SHOULD NOT be reordered when a message is transported
+ or transformed. More importantly, the trace header fields and resent
+ header fields MUST NOT be reordered, and SHOULD be kept in blocks
+ prepended to the message. See sections 3.6.6 and 3.6.7 for more
+ information.
+
+ The only required header fields are the origination date field and
+ the originator address field(s). All other header fields are
+ syntactically optional. More information is contained in the table
+ following this definition.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 19]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ fields = *(trace
+ *optional-field /
+ *(resent-date /
+ resent-from /
+ resent-sender /
+ resent-to /
+ resent-cc /
+ resent-bcc /
+ resent-msg-id))
+ *(orig-date /
+ from /
+ sender /
+ reply-to /
+ to /
+ cc /
+ bcc /
+ message-id /
+ in-reply-to /
+ references /
+ subject /
+ comments /
+ keywords /
+ optional-field)
+
+ The following table indicates limits on the number of times each
+ field may occur in the header section of a message as well as any
+ special limitations on the use of those fields. An asterisk ("*")
+ next to a value in the minimum or maximum column indicates that a
+ special restriction appears in the Notes column.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 20]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ +----------------+--------+------------+----------------------------+
+ | Field | Min | Max number | Notes |
+ | | number | | |
+ +----------------+--------+------------+----------------------------+
+ | trace | 0 | unlimited | Block prepended - see |
+ | | | | 3.6.7 |
+ | resent-date | 0* | unlimited* | One per block, required if |
+ | | | | other resent fields are |
+ | | | | present - see 3.6.6 |
+ | resent-from | 0 | unlimited* | One per block - see 3.6.6 |
+ | resent-sender | 0* | unlimited* | One per block, MUST occur |
+ | | | | with multi-address |
+ | | | | resent-from - see 3.6.6 |
+ | resent-to | 0 | unlimited* | One per block - see 3.6.6 |
+ | resent-cc | 0 | unlimited* | One per block - see 3.6.6 |
+ | resent-bcc | 0 | unlimited* | One per block - see 3.6.6 |
+ | resent-msg-id | 0 | unlimited* | One per block - see 3.6.6 |
+ | orig-date | 1 | 1 | |
+ | from | 1 | 1 | See sender and 3.6.2 |
+ | sender | 0* | 1 | MUST occur with |
+ | | | | multi-address from - see |
+ | | | | 3.6.2 |
+ | reply-to | 0 | 1 | |
+ | to | 0 | 1 | |
+ | cc | 0 | 1 | |
+ | bcc | 0 | 1 | |
+ | message-id | 0* | 1 | SHOULD be present - see |
+ | | | | 3.6.4 |
+ | in-reply-to | 0* | 1 | SHOULD occur in some |
+ | | | | replies - see 3.6.4 |
+ | references | 0* | 1 | SHOULD occur in some |
+ | | | | replies - see 3.6.4 |
+ | subject | 0 | 1 | |
+ | comments | 0 | unlimited | |
+ | keywords | 0 | unlimited | |
+ | optional-field | 0 | unlimited | |
+ +----------------+--------+------------+----------------------------+
+
+ The exact interpretation of each field is described in subsequent
+ sections.
+
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 21]
+
+RFC 5322 Internet Message Format October 2008
+
+
+3.6.1. The Origination Date Field
+
+ The origination date field consists of the field name "Date" followed
+ by a date-time specification.
+
+ orig-date = "Date:" date-time CRLF
+
+ The origination date specifies the date and time at which the creator
+ of the message indicated that the message was complete and ready to
+ enter the mail delivery system. For instance, this might be the time
+ that a user pushes the "send" or "submit" button in an application
+ program. In any case, it is specifically not intended to convey the
+ time that the message is actually transported, but rather the time at
+ which the human or other creator of the message has put the message
+ into its final form, ready for transport. (For example, a portable
+ computer user who is not connected to a network might queue a message
+ for delivery. The origination date is intended to contain the date
+ and time that the user queued the message, not the time when the user
+ connected to the network to send the message.)
+
+3.6.2. Originator Fields
+
+ The originator fields of a message consist of the from field, the
+ sender field (when applicable), and optionally the reply-to field.
+ The from field consists of the field name "From" and a comma-
+ separated list of one or more mailbox specifications. If the from
+ field contains more than one mailbox specification in the mailbox-
+ list, then the sender field, containing the field name "Sender" and a
+ single mailbox specification, MUST appear in the message. In either
+ case, an optional reply-to field MAY also be included, which contains
+ the field name "Reply-To" and a comma-separated list of one or more
+ addresses.
+
+ from = "From:" mailbox-list CRLF
+
+ sender = "Sender:" mailbox CRLF
+
+ reply-to = "Reply-To:" address-list CRLF
+
+ The originator fields indicate the mailbox(es) of the source of the
+ message. The "From:" field specifies the author(s) of the message,
+ that is, the mailbox(es) of the person(s) or system(s) responsible
+ for the writing of the message. The "Sender:" field specifies the
+ mailbox of the agent responsible for the actual transmission of the
+ message. For example, if a secretary were to send a message for
+ another person, the mailbox of the secretary would appear in the
+ "Sender:" field and the mailbox of the actual author would appear in
+ the "From:" field. If the originator of the message can be indicated
+
+
+
+Resnick Standards Track [Page 22]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ by a single mailbox and the author and transmitter are identical, the
+ "Sender:" field SHOULD NOT be used. Otherwise, both fields SHOULD
+ appear.
+
+ Note: The transmitter information is always present. The absence
+ of the "Sender:" field is sometimes mistakenly taken to mean that
+ the agent responsible for transmission of the message has not been
+ specified. This absence merely means that the transmitter is
+ identical to the author and is therefore not redundantly placed
+ into the "Sender:" field.
+
+ The originator fields also provide the information required when
+ replying to a message. When the "Reply-To:" field is present, it
+ indicates the address(es) to which the author of the message suggests
+ that replies be sent. In the absence of the "Reply-To:" field,
+ replies SHOULD by default be sent to the mailbox(es) specified in the
+ "From:" field unless otherwise specified by the person composing the
+ reply.
+
+ In all cases, the "From:" field SHOULD NOT contain any mailbox that
+ does not belong to the author(s) of the message. See also section
+ 3.6.3 for more information on forming the destination addresses for a
+ reply.
+
+3.6.3. Destination Address Fields
+
+ The destination fields of a message consist of three possible fields,
+ each of the same form: the field name, which is either "To", "Cc", or
+ "Bcc", followed by a comma-separated list of one or more addresses
+ (either mailbox or group syntax).
+
+ to = "To:" address-list CRLF
+
+ cc = "Cc:" address-list CRLF
+
+ bcc = "Bcc:" [address-list / CFWS] CRLF
+
+ The destination fields specify the recipients of the message. Each
+ destination field may have one or more addresses, and the addresses
+ indicate the intended recipients of the message. The only difference
+ between the three fields is how each is used.
+
+ The "To:" field contains the address(es) of the primary recipient(s)
+ of the message.
+
+
+
+
+
+
+
+Resnick Standards Track [Page 23]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ The "Cc:" field (where the "Cc" means "Carbon Copy" in the sense of
+ making a copy on a typewriter using carbon paper) contains the
+ addresses of others who are to receive the message, though the
+ content of the message may not be directed at them.
+
+ The "Bcc:" field (where the "Bcc" means "Blind Carbon Copy") contains
+ addresses of recipients of the message whose addresses are not to be
+ revealed to other recipients of the message. There are three ways in
+ which the "Bcc:" field is used. In the first case, when a message
+ containing a "Bcc:" field is prepared to be sent, the "Bcc:" line is
+ removed even though all of the recipients (including those specified
+ in the "Bcc:" field) are sent a copy of the message. In the second
+ case, recipients specified in the "To:" and "Cc:" lines each are sent
+ a copy of the message with the "Bcc:" line removed as above, but the
+ recipients on the "Bcc:" line get a separate copy of the message
+ containing a "Bcc:" line. (When there are multiple recipient
+ addresses in the "Bcc:" field, some implementations actually send a
+ separate copy of the message to each recipient with a "Bcc:"
+ containing only the address of that particular recipient.) Finally,
+ since a "Bcc:" field may contain no addresses, a "Bcc:" field can be
+ sent without any addresses indicating to the recipients that blind
+ copies were sent to someone. Which method to use with "Bcc:" fields
+ is implementation dependent, but refer to the "Security
+ Considerations" section of this document for a discussion of each.
+
+ When a message is a reply to another message, the mailboxes of the
+ authors of the original message (the mailboxes in the "From:" field)
+ or mailboxes specified in the "Reply-To:" field (if it exists) MAY
+ appear in the "To:" field of the reply since these would normally be
+ the primary recipients of the reply. If a reply is sent to a message
+ that has destination fields, it is often desirable to send a copy of
+ the reply to all of the recipients of the message, in addition to the
+ author. When such a reply is formed, addresses in the "To:" and
+ "Cc:" fields of the original message MAY appear in the "Cc:" field of
+ the reply, since these are normally secondary recipients of the
+ reply. If a "Bcc:" field is present in the original message,
+ addresses in that field MAY appear in the "Bcc:" field of the reply,
+ but they SHOULD NOT appear in the "To:" or "Cc:" fields.
+
+ Note: Some mail applications have automatic reply commands that
+ include the destination addresses of the original message in the
+ destination addresses of the reply. How those reply commands
+ behave is implementation dependent and is beyond the scope of this
+ document. In particular, whether or not to include the original
+ destination addresses when the original message had a "Reply-To:"
+ field is not addressed here.
+
+
+
+
+
+Resnick Standards Track [Page 24]
+
+RFC 5322 Internet Message Format October 2008
+
+
+3.6.4. Identification Fields
+
+ Though listed as optional in the table in section 3.6, every message
+ SHOULD have a "Message-ID:" field. Furthermore, reply messages
+ SHOULD have "In-Reply-To:" and "References:" fields as appropriate
+ and as described below.
+
+ The "Message-ID:" field contains a single unique message identifier.
+ The "References:" and "In-Reply-To:" fields each contain one or more
+ unique message identifiers, optionally separated by CFWS.
+
+ The message identifier (msg-id) syntax is a limited version of the
+ addr-spec construct enclosed in the angle bracket characters, "<" and
+ ">". Unlike addr-spec, this syntax only permits the dot-atom-text
+ form on the left-hand side of the "@" and does not have internal CFWS
+ anywhere in the message identifier.
+
+ Note: As with addr-spec, a liberal syntax is given for the right-
+ hand side of the "@" in a msg-id. However, later in this section,
+ the use of a domain for the right-hand side of the "@" is
+ RECOMMENDED. Again, the syntax of domain constructs is specified
+ by and used in other protocols (e.g., [RFC1034], [RFC1035],
+ [RFC1123], [RFC5321]). It is therefore incumbent upon
+ implementations to conform to the syntax of addresses for the
+ context in which they are used.
+
+ message-id = "Message-ID:" msg-id CRLF
+
+ in-reply-to = "In-Reply-To:" 1*msg-id CRLF
+
+ references = "References:" 1*msg-id CRLF
+
+ msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS]
+
+ id-left = dot-atom-text / obs-id-left
+
+ id-right = dot-atom-text / no-fold-literal / obs-id-right
+
+ no-fold-literal = "[" *dtext "]"
+
+ The "Message-ID:" field provides a unique message identifier that
+ refers to a particular version of a particular message. The
+ uniqueness of the message identifier is guaranteed by the host that
+ generates it (see below). This message identifier is intended to be
+ machine readable and not necessarily meaningful to humans. A message
+ identifier pertains to exactly one version of a particular message;
+ subsequent revisions to the message each receive new message
+ identifiers.
+
+
+
+Resnick Standards Track [Page 25]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ Note: There are many instances when messages are "changed", but
+ those changes do not constitute a new instantiation of that
+ message, and therefore the message would not get a new message
+ identifier. For example, when messages are introduced into the
+ transport system, they are often prepended with additional header
+ fields such as trace fields (described in section 3.6.7) and
+ resent fields (described in section 3.6.6). The addition of such
+ header fields does not change the identity of the message and
+ therefore the original "Message-ID:" field is retained. In all
+ cases, it is the meaning that the sender of the message wishes to
+ convey (i.e., whether this is the same message or a different
+ message) that determines whether or not the "Message-ID:" field
+ changes, not any particular syntactic difference that appears (or
+ does not appear) in the message.
+
+ The "In-Reply-To:" and "References:" fields are used when creating a
+ reply to a message. They hold the message identifier of the original
+ message and the message identifiers of other messages (for example,
+ in the case of a reply to a message that was itself a reply). The
+ "In-Reply-To:" field may be used to identify the message (or
+ messages) to which the new message is a reply, while the
+ "References:" field may be used to identify a "thread" of
+ conversation.
+
+ When creating a reply to a message, the "In-Reply-To:" and
+ "References:" fields of the resultant message are constructed as
+ follows:
+
+ The "In-Reply-To:" field will contain the contents of the
+ "Message-ID:" field of the message to which this one is a reply (the
+ "parent message"). If there is more than one parent message, then
+ the "In-Reply-To:" field will contain the contents of all of the
+ parents' "Message-ID:" fields. If there is no "Message-ID:" field in
+ any of the parent messages, then the new message will have no "In-
+ Reply-To:" field.
+
+ The "References:" field will contain the contents of the parent's
+ "References:" field (if any) followed by the contents of the parent's
+ "Message-ID:" field (if any). If the parent message does not contain
+ a "References:" field but does have an "In-Reply-To:" field
+ containing a single message identifier, then the "References:" field
+ will contain the contents of the parent's "In-Reply-To:" field
+ followed by the contents of the parent's "Message-ID:" field (if
+ any). If the parent has none of the "References:", "In-Reply-To:",
+ or "Message-ID:" fields, then the new message will have no
+ "References:" field.
+
+
+
+
+
+Resnick Standards Track [Page 26]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ Note: Some implementations parse the "References:" field to
+ display the "thread of the discussion". These implementations
+ assume that each new message is a reply to a single parent and
+ hence that they can walk backwards through the "References:" field
+ to find the parent of each message listed there. Therefore,
+ trying to form a "References:" field for a reply that has multiple
+ parents is discouraged; how to do so is not defined in this
+ document.
+
+ The message identifier (msg-id) itself MUST be a globally unique
+ identifier for a message. The generator of the message identifier
+ MUST guarantee that the msg-id is unique. There are several
+ algorithms that can be used to accomplish this. Since the msg-id has
+ a similar syntax to addr-spec (identical except that quoted strings,
+ comments, and folding white space are not allowed), a good method is
+ to put the domain name (or a domain literal IP address) of the host
+ on which the message identifier was created on the right-hand side of
+ the "@" (since domain names and IP addresses are normally unique),
+ and put a combination of the current absolute date and time along
+ with some other currently unique (perhaps sequential) identifier
+ available on the system (for example, a process id number) on the
+ left-hand side. Though other algorithms will work, it is RECOMMENDED
+ that the right-hand side contain some domain identifier (either of
+ the host itself or otherwise) such that the generator of the message
+ identifier can guarantee the uniqueness of the left-hand side within
+ the scope of that domain.
+
+ Semantically, the angle bracket characters are not part of the
+ msg-id; the msg-id is what is contained between the two angle bracket
+ characters.
+
+3.6.5. Informational Fields
+
+ The informational fields are all optional. The "Subject:" and
+ "Comments:" fields are unstructured fields as defined in section
+ 2.2.1, and therefore may contain text or folding white space. The
+ "Keywords:" field contains a comma-separated list of one or more
+ words or quoted-strings.
+
+ subject = "Subject:" unstructured CRLF
+
+ comments = "Comments:" unstructured CRLF
+
+ keywords = "Keywords:" phrase *("," phrase) CRLF
+
+ These three fields are intended to have only human-readable content
+ with information about the message. The "Subject:" field is the most
+ common and contains a short string identifying the topic of the
+
+
+
+Resnick Standards Track [Page 27]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ message. When used in a reply, the field body MAY start with the
+ string "Re: " (an abbreviation of the Latin "in re", meaning "in the
+ matter of") followed by the contents of the "Subject:" field body of
+ the original message. If this is done, only one instance of the
+ literal string "Re: " ought to be used since use of other strings or
+ more than one instance can lead to undesirable consequences. The
+ "Comments:" field contains any additional comments on the text of the
+ body of the message. The "Keywords:" field contains a comma-
+ separated list of important words and phrases that might be useful
+ for the recipient.
+
+3.6.6. Resent Fields
+
+ Resent fields SHOULD be added to any message that is reintroduced by
+ a user into the transport system. A separate set of resent fields
+ SHOULD be added each time this is done. All of the resent fields
+ corresponding to a particular resending of the message SHOULD be
+ grouped together. Each new set of resent fields is prepended to the
+ message; that is, the most recent set of resent fields appears
+ earlier in the message. No other fields in the message are changed
+ when resent fields are added.
+
+ Each of the resent fields corresponds to a particular field elsewhere
+ in the syntax. For instance, the "Resent-Date:" field corresponds to
+ the "Date:" field and the "Resent-To:" field corresponds to the "To:"
+ field. In each case, the syntax for the field body is identical to
+ the syntax given previously for the corresponding field.
+
+ When resent fields are used, the "Resent-From:" and "Resent-Date:"
+ fields MUST be sent. The "Resent-Message-ID:" field SHOULD be sent.
+ "Resent-Sender:" SHOULD NOT be used if "Resent-Sender:" would be
+ identical to "Resent-From:".
+
+ resent-date = "Resent-Date:" date-time CRLF
+
+ resent-from = "Resent-From:" mailbox-list CRLF
+
+ resent-sender = "Resent-Sender:" mailbox CRLF
+
+ resent-to = "Resent-To:" address-list CRLF
+
+ resent-cc = "Resent-Cc:" address-list CRLF
+
+ resent-bcc = "Resent-Bcc:" [address-list / CFWS] CRLF
+
+ resent-msg-id = "Resent-Message-ID:" msg-id CRLF
+
+
+
+
+
+Resnick Standards Track [Page 28]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ Resent fields are used to identify a message as having been
+ reintroduced into the transport system by a user. The purpose of
+ using resent fields is to have the message appear to the final
+ recipient as if it were sent directly by the original sender, with
+ all of the original fields remaining the same. Each set of resent
+ fields correspond to a particular resending event. That is, if a
+ message is resent multiple times, each set of resent fields gives
+ identifying information for each individual time. Resent fields are
+ strictly informational. They MUST NOT be used in the normal
+ processing of replies or other such automatic actions on messages.
+
+ Note: Reintroducing a message into the transport system and using
+ resent fields is a different operation from "forwarding".
+ "Forwarding" has two meanings: One sense of forwarding is that a
+ mail reading program can be told by a user to forward a copy of a
+ message to another person, making the forwarded message the body
+ of the new message. A forwarded message in this sense does not
+ appear to have come from the original sender, but is an entirely
+ new message from the forwarder of the message. Forwarding may
+ also mean that a mail transport program gets a message and
+ forwards it on to a different destination for final delivery.
+ Resent header fields are not intended for use with either type of
+ forwarding.
+
+ The resent originator fields indicate the mailbox of the person(s) or
+ system(s) that resent the message. As with the regular originator
+ fields, there are two forms: a simple "Resent-From:" form, which
+ contains the mailbox of the individual doing the resending, and the
+ more complex form, when one individual (identified in the "Resent-
+ Sender:" field) resends a message on behalf of one or more others
+ (identified in the "Resent-From:" field).
+
+ Note: When replying to a resent message, replies behave just as
+ they would with any other message, using the original "From:",
+ "Reply-To:", "Message-ID:", and other fields. The resent fields
+ are only informational and MUST NOT be used in the normal
+ processing of replies.
+
+ The "Resent-Date:" indicates the date and time at which the resent
+ message is dispatched by the resender of the message. Like the
+ "Date:" field, it is not the date and time that the message was
+ actually transported.
+
+ The "Resent-To:", "Resent-Cc:", and "Resent-Bcc:" fields function
+ identically to the "To:", "Cc:", and "Bcc:" fields, respectively,
+ except that they indicate the recipients of the resent message, not
+ the recipients of the original message.
+
+
+
+
+Resnick Standards Track [Page 29]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ The "Resent-Message-ID:" field provides a unique identifier for the
+ resent message.
+
+3.6.7. Trace Fields
+
+ The trace fields are a group of header fields consisting of an
+ optional "Return-Path:" field, and one or more "Received:" fields.
+ The "Return-Path:" header field contains a pair of angle brackets
+ that enclose an optional addr-spec. The "Received:" field contains a
+ (possibly empty) list of tokens followed by a semicolon and a date-
+ time specification. Each token must be a word, angle-addr, addr-
+ spec, or a domain. Further restrictions are applied to the syntax of
+ the trace fields by specifications that provide for their use, such
+ as [RFC5321].
+
+ trace = [return]
+ 1*received
+
+ return = "Return-Path:" path CRLF
+
+ path = angle-addr / ([CFWS] "<" [CFWS] ">" [CFWS])
+
+ received = "Received:" *received-token ";" date-time CRLF
+
+ received-token = word / angle-addr / addr-spec / domain
+
+ A full discussion of the Internet mail use of trace fields is
+ contained in [RFC5321]. For the purposes of this specification, the
+ trace fields are strictly informational, and any formal
+ interpretation of them is outside of the scope of this document.
+
+3.6.8. Optional Fields
+
+ Fields may appear in messages that are otherwise unspecified in this
+ document. They MUST conform to the syntax of an optional-field.
+ This is a field name, made up of the printable US-ASCII characters
+ except SP and colon, followed by a colon, followed by any text that
+ conforms to the unstructured syntax.
+
+ The field names of any optional field MUST NOT be identical to any
+ field name specified elsewhere in this document.
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 30]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ optional-field = field-name ":" unstructured CRLF
+
+ field-name = 1*ftext
+
+ ftext = %d33-57 / ; Printable US-ASCII
+ %d59-126 ; characters not including
+ ; ":".
+
+ For the purposes of this specification, any optional field is
+ uninterpreted.
+
+4. Obsolete Syntax
+
+ Earlier versions of this specification allowed for different (usually
+ more liberal) syntax than is allowed in this version. Also, there
+ have been syntactic elements used in messages on the Internet whose
+ interpretations have never been documented. Though these syntactic
+ forms MUST NOT be generated according to the grammar in section 3,
+ they MUST be accepted and parsed by a conformant receiver. This
+ section documents many of these syntactic elements. Taking the
+ grammar in section 3 and adding the definitions presented in this
+ section will result in the grammar to use for the interpretation of
+ messages.
+
+ Note: This section identifies syntactic forms that any
+ implementation MUST reasonably interpret. However, there are
+ certainly Internet messages that do not conform to even the
+ additional syntax given in this section. The fact that a
+ particular form does not appear in any section of this document is
+ not justification for computer programs to crash or for malformed
+ data to be irretrievably lost by any implementation. It is up to
+ the implementation to deal with messages robustly.
+
+ One important difference between the obsolete (interpreting) and the
+ current (generating) syntax is that in structured header field bodies
+ (i.e., between the colon and the CRLF of any structured header
+ field), white space characters, including folding white space, and
+ comments could be freely inserted between any syntactic tokens. This
+ allowed many complex forms that have proven difficult for some
+ implementations to parse.
+
+ Another key difference between the obsolete and the current syntax is
+ that the rule in section 3.2.2 regarding lines composed entirely of
+ white space in comments and folding white space does not apply. See
+ the discussion of folding white space in section 4.2 below.
+
+ Finally, certain characters that were formerly allowed in messages
+ appear in this section. The NUL character (ASCII value 0) was once
+
+
+
+Resnick Standards Track [Page 31]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ allowed, but is no longer for compatibility reasons. Similarly, US-
+ ASCII control characters other than CR, LF, SP, and HTAB (ASCII
+ values 1 through 8, 11, 12, 14 through 31, and 127) were allowed to
+ appear in header field bodies. CR and LF were allowed to appear in
+ messages other than as CRLF; this use is also shown here.
+
+ Other differences in syntax and semantics are noted in the following
+ sections.
+
+4.1. Miscellaneous Obsolete Tokens
+
+ These syntactic elements are used elsewhere in the obsolete syntax or
+ in the main syntax. Bare CR, bare LF, and NUL are added to obs-qp,
+ obs-body, and obs-unstruct. US-ASCII control characters are added to
+ obs-qp, obs-unstruct, obs-ctext, and obs-qtext. The period character
+ is added to obs-phrase. The obs-phrase-list provides for a
+ (potentially empty) comma-separated list of phrases that may include
+ "null" elements. That is, there could be two or more commas in such
+ a list with nothing in between them, or commas at the beginning or
+ end of the list.
+
+ Note: The "period" (or "full stop") character (".") in obs-phrase
+ is not a form that was allowed in earlier versions of this or any
+ other specification. Period (nor any other character from
+ specials) was not allowed in phrase because it introduced a
+ parsing difficulty distinguishing between phrases and portions of
+ an addr-spec (see section 4.4). It appears here because the
+ period character is currently used in many messages in the
+ display-name portion of addresses, especially for initials in
+ names, and therefore must be interpreted properly.
+
+ obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
+ %d11 / ; characters that do not
+ %d12 / ; include the carriage
+ %d14-31 / ; return, line feed, and
+ %d127 ; white space characters
+
+ obs-ctext = obs-NO-WS-CTL
+
+ obs-qtext = obs-NO-WS-CTL
+
+ obs-utext = %d0 / obs-NO-WS-CTL / VCHAR
+
+ obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR)
+
+ obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)
+
+ obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS)
+
+
+
+Resnick Standards Track [Page 32]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ obs-phrase = word *(word / "." / CFWS)
+
+ obs-phrase-list = [phrase / CFWS] *("," [phrase / CFWS])
+
+ Bare CR and bare LF appear in messages with two different meanings.
+ In many cases, bare CR or bare LF are used improperly instead of CRLF
+ to indicate line separators. In other cases, bare CR and bare LF are
+ used simply as US-ASCII control characters with their traditional
+ ASCII meanings.
+
+4.2. Obsolete Folding White Space
+
+ In the obsolete syntax, any amount of folding white space MAY be
+ inserted where the obs-FWS rule is allowed. This creates the
+ possibility of having two consecutive "folds" in a line, and
+ therefore the possibility that a line which makes up a folded header
+ field could be composed entirely of white space.
+
+ obs-FWS = 1*WSP *(CRLF 1*WSP)
+
+4.3. Obsolete Date and Time
+
+ The syntax for the obsolete date format allows a 2 digit year in the
+ date field and allows for a list of alphabetic time zone specifiers
+ that were used in earlier versions of this specification. It also
+ permits comments and folding white space between many of the tokens.
+
+ obs-day-of-week = [CFWS] day-name [CFWS]
+
+ obs-day = [CFWS] 1*2DIGIT [CFWS]
+
+ obs-year = [CFWS] 2*DIGIT [CFWS]
+
+ obs-hour = [CFWS] 2DIGIT [CFWS]
+
+ obs-minute = [CFWS] 2DIGIT [CFWS]
+
+ obs-second = [CFWS] 2DIGIT [CFWS]
+
+ obs-zone = "UT" / "GMT" / ; Universal Time
+ ; North American UT
+ ; offsets
+ "EST" / "EDT" / ; Eastern: - 5/ - 4
+ "CST" / "CDT" / ; Central: - 6/ - 5
+ "MST" / "MDT" / ; Mountain: - 7/ - 6
+ "PST" / "PDT" / ; Pacific: - 8/ - 7
+ ;
+
+
+
+
+Resnick Standards Track [Page 33]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ %d65-73 / ; Military zones - "A"
+ %d75-90 / ; through "I" and "K"
+ %d97-105 / ; through "Z", both
+ %d107-122 ; upper and lower case
+
+ Where a two or three digit year occurs in a date, the year is to be
+ interpreted as follows: If a two digit year is encountered whose
+ value is between 00 and 49, the year is interpreted by adding 2000,
+ ending up with a value between 2000 and 2049. If a two digit year is
+ encountered with a value between 50 and 99, or any three digit year
+ is encountered, the year is interpreted by adding 1900.
+
+ In the obsolete time zone, "UT" and "GMT" are indications of
+ "Universal Time" and "Greenwich Mean Time", respectively, and are
+ both semantically identical to "+0000".
+
+ The remaining three character zones are the US time zones. The first
+ letter, "E", "C", "M", or "P" stands for "Eastern", "Central",
+ "Mountain", and "Pacific". The second letter is either "S" for
+ "Standard" time, or "D" for "Daylight Savings" (or summer) time.
+ Their interpretations are as follows:
+
+ EDT is semantically equivalent to -0400
+ EST is semantically equivalent to -0500
+ CDT is semantically equivalent to -0500
+ CST is semantically equivalent to -0600
+ MDT is semantically equivalent to -0600
+ MST is semantically equivalent to -0700
+ PDT is semantically equivalent to -0700
+ PST is semantically equivalent to -0800
+
+ The 1 character military time zones were defined in a non-standard
+ way in [RFC0822] and are therefore unpredictable in their meaning.
+ The original definitions of the military zones "A" through "I" are
+ equivalent to "+0100" through "+0900", respectively; "K", "L", and
+ "M" are equivalent to "+1000", "+1100", and "+1200", respectively;
+ "N" through "Y" are equivalent to "-0100" through "-1200".
+ respectively; and "Z" is equivalent to "+0000". However, because of
+ the error in [RFC0822], they SHOULD all be considered equivalent to
+ "-0000" unless there is out-of-band information confirming their
+ meaning.
+
+ Other multi-character (usually between 3 and 5) alphabetic time zones
+ have been used in Internet messages. Any such time zone whose
+ meaning is not known SHOULD be considered equivalent to "-0000"
+ unless there is out-of-band information confirming their meaning.
+
+
+
+
+
+Resnick Standards Track [Page 34]
+
+RFC 5322 Internet Message Format October 2008
+
+
+4.4. Obsolete Addressing
+
+ There are four primary differences in addressing. First, mailbox
+ addresses were allowed to have a route portion before the addr-spec
+ when enclosed in "<" and ">". The route is simply a comma-separated
+ list of domain names, each preceded by "@", and the list terminated
+ by a colon. Second, CFWS were allowed between the period-separated
+ elements of local-part and domain (i.e., dot-atom was not used). In
+ addition, local-part is allowed to contain quoted-string in addition
+ to just atom. Third, mailbox-list and address-list were allowed to
+ have "null" members. That is, there could be two or more commas in
+ such a list with nothing in between them, or commas at the beginning
+ or end of the list. Finally, US-ASCII control characters and quoted-
+ pairs were allowed in domain literals and are added here.
+
+ obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS]
+
+ obs-route = obs-domain-list ":"
+
+ obs-domain-list = *(CFWS / ",") "@" domain
+ *("," [CFWS] ["@" domain])
+
+ obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS])
+
+ obs-addr-list = *([CFWS] ",") address *("," [address / CFWS])
+
+ obs-group-list = 1*([CFWS] ",") [CFWS]
+
+ obs-local-part = word *("." word)
+
+ obs-domain = atom *("." atom)
+
+ obs-dtext = obs-NO-WS-CTL / quoted-pair
+
+ When interpreting addresses, the route portion SHOULD be ignored.
+
+4.5. Obsolete Header Fields
+
+ Syntactically, the primary difference in the obsolete field syntax is
+ that it allows multiple occurrences of any of the fields and they may
+ occur in any order. Also, any amount of white space is allowed
+ before the ":" at the end of the field name.
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 35]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ obs-fields = *(obs-return /
+ obs-received /
+ obs-orig-date /
+ obs-from /
+ obs-sender /
+ obs-reply-to /
+ obs-to /
+ obs-cc /
+ obs-bcc /
+ obs-message-id /
+ obs-in-reply-to /
+ obs-references /
+ obs-subject /
+ obs-comments /
+ obs-keywords /
+ obs-resent-date /
+ obs-resent-from /
+ obs-resent-send /
+ obs-resent-rply /
+ obs-resent-to /
+ obs-resent-cc /
+ obs-resent-bcc /
+ obs-resent-mid /
+ obs-optional)
+
+ Except for destination address fields (described in section 4.5.3),
+ the interpretation of multiple occurrences of fields is unspecified.
+ Also, the interpretation of trace fields and resent fields that do
+ not occur in blocks prepended to the message is unspecified as well.
+ Unless otherwise noted in the following sections, interpretation of
+ other fields is identical to the interpretation of their non-obsolete
+ counterparts in section 3.
+
+4.5.1. Obsolete Origination Date Field
+
+ obs-orig-date = "Date" *WSP ":" date-time CRLF
+
+4.5.2. Obsolete Originator Fields
+
+ obs-from = "From" *WSP ":" mailbox-list CRLF
+
+ obs-sender = "Sender" *WSP ":" mailbox CRLF
+
+ obs-reply-to = "Reply-To" *WSP ":" address-list CRLF
+
+
+
+
+
+
+
+Resnick Standards Track [Page 36]
+
+RFC 5322 Internet Message Format October 2008
+
+
+4.5.3. Obsolete Destination Address Fields
+
+ obs-to = "To" *WSP ":" address-list CRLF
+
+ obs-cc = "Cc" *WSP ":" address-list CRLF
+
+ obs-bcc = "Bcc" *WSP ":"
+ (address-list / (*([CFWS] ",") [CFWS])) CRLF
+
+ When multiple occurrences of destination address fields occur in a
+ message, they SHOULD be treated as if the address list in the first
+ occurrence of the field is combined with the address lists of the
+ subsequent occurrences by adding a comma and concatenating.
+
+4.5.4. Obsolete Identification Fields
+
+ The obsolete "In-Reply-To:" and "References:" fields differ from the
+ current syntax in that they allow phrase (words or quoted strings) to
+ appear. The obsolete forms of the left and right sides of msg-id
+ allow interspersed CFWS, making them syntactically identical to
+ local-part and domain, respectively.
+
+ obs-message-id = "Message-ID" *WSP ":" msg-id CRLF
+
+ obs-in-reply-to = "In-Reply-To" *WSP ":" *(phrase / msg-id) CRLF
+
+ obs-references = "References" *WSP ":" *(phrase / msg-id) CRLF
+
+ obs-id-left = local-part
+
+ obs-id-right = domain
+
+ For purposes of interpretation, the phrases in the "In-Reply-To:" and
+ "References:" fields are ignored.
+
+ Semantically, none of the optional CFWS in the local-part and the
+ domain is part of the obs-id-left and obs-id-right, respectively.
+
+4.5.5. Obsolete Informational Fields
+
+ obs-subject = "Subject" *WSP ":" unstructured CRLF
+
+ obs-comments = "Comments" *WSP ":" unstructured CRLF
+
+ obs-keywords = "Keywords" *WSP ":" obs-phrase-list CRLF
+
+
+
+
+
+
+Resnick Standards Track [Page 37]
+
+RFC 5322 Internet Message Format October 2008
+
+
+4.5.6. Obsolete Resent Fields
+
+ The obsolete syntax adds a "Resent-Reply-To:" field, which consists
+ of the field name, the optional comments and folding white space, the
+ colon, and a comma separated list of addresses.
+
+ obs-resent-from = "Resent-From" *WSP ":" mailbox-list CRLF
+
+ obs-resent-send = "Resent-Sender" *WSP ":" mailbox CRLF
+
+ obs-resent-date = "Resent-Date" *WSP ":" date-time CRLF
+
+ obs-resent-to = "Resent-To" *WSP ":" address-list CRLF
+
+ obs-resent-cc = "Resent-Cc" *WSP ":" address-list CRLF
+
+ obs-resent-bcc = "Resent-Bcc" *WSP ":"
+ (address-list / (*([CFWS] ",") [CFWS])) CRLF
+
+ obs-resent-mid = "Resent-Message-ID" *WSP ":" msg-id CRLF
+
+ obs-resent-rply = "Resent-Reply-To" *WSP ":" address-list CRLF
+
+ As with other resent fields, the "Resent-Reply-To:" field is to be
+ treated as trace information only.
+
+4.5.7. Obsolete Trace Fields
+
+ The obs-return and obs-received are again given here as template
+ definitions, just as return and received are in section 3. Their
+ full syntax is given in [RFC5321].
+
+ obs-return = "Return-Path" *WSP ":" path CRLF
+
+ obs-received = "Received" *WSP ":" *received-token CRLF
+
+4.5.8. Obsolete optional fields
+
+ obs-optional = field-name *WSP ":" unstructured CRLF
+
+5. Security Considerations
+
+ Care needs to be taken when displaying messages on a terminal or
+ terminal emulator. Powerful terminals may act on escape sequences
+ and other combinations of US-ASCII control characters with a variety
+ of consequences. They can remap the keyboard or permit other
+ modifications to the terminal that could lead to denial of service or
+ even damaged data. They can trigger (sometimes programmable)
+
+
+
+Resnick Standards Track [Page 38]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ answerback messages that can allow a message to cause commands to be
+ issued on the recipient's behalf. They can also affect the operation
+ of terminal attached devices such as printers. Message viewers may
+ wish to strip potentially dangerous terminal escape sequences from
+ the message prior to display. However, other escape sequences appear
+ in messages for useful purposes (cf. [ISO.2022.1994], [RFC2045],
+ [RFC2046], [RFC2047], [RFC2049], [RFC4288], [RFC4289]) and therefore
+ should not be stripped indiscriminately.
+
+ Transmission of non-text objects in messages raises additional
+ security issues. These issues are discussed in [RFC2045], [RFC2046],
+ [RFC2047], [RFC2049], [RFC4288], and [RFC4289].
+
+ Many implementations use the "Bcc:" (blind carbon copy) field,
+ described in section 3.6.3, to facilitate sending messages to
+ recipients without revealing the addresses of one or more of the
+ addressees to the other recipients. Mishandling this use of "Bcc:"
+ may disclose confidential information that could eventually lead to
+ security problems through knowledge of even the existence of a
+ particular mail address. For example, if using the first method
+ described in section 3.6.3, where the "Bcc:" line is removed from the
+ message, blind recipients have no explicit indication that they have
+ been sent a blind copy, except insofar as their address does not
+ appear in the header section of a message. Because of this, one of
+ the blind addressees could potentially send a reply to all of the
+ shown recipients and accidentally reveal that the message went to the
+ blind recipient. When the second method from section 3.6.3 is used,
+ the blind recipient's address appears in the "Bcc:" field of a
+ separate copy of the message. If the "Bcc:" field sent contains all
+ of the blind addressees, all of the "Bcc:" recipients will be seen by
+ each "Bcc:" recipient. Even if a separate message is sent to each
+ "Bcc:" recipient with only the individual's address, implementations
+ still need to be careful to process replies to the message as per
+ section 3.6.3 so as not to accidentally reveal the blind recipient to
+ other recipients.
+
+6. IANA Considerations
+
+ This document updates the registrations that appeared in [RFC4021]
+ that referred to the definitions in [RFC2822]. IANA has updated the
+ Permanent Message Header Field Repository with the following header
+ fields, in accordance with the procedures set out in [RFC3864].
+
+ Header field name: Date
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.1)
+
+
+
+Resnick Standards Track [Page 39]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ Header field name: From
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.2)
+
+ Header field name: Sender
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.2)
+
+ Header field name: Reply-To
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.2)
+
+ Header field name: To
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.3)
+
+ Header field name: Cc
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.3)
+
+ Header field name: Bcc
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.3)
+
+ Header field name: Message-ID
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.4)
+
+ Header field name: In-Reply-To
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.4)
+
+
+
+
+Resnick Standards Track [Page 40]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ Header field name: References
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.4)
+
+ Header field name: Subject
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.5)
+
+ Header field name: Comments
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.5)
+
+ Header field name: Keywords
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.5)
+
+ Header field name: Resent-Date
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.6)
+
+ Header field name: Resent-From
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.6)
+
+ Header field name: Resent-Sender
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.6)
+
+ Header field name: Resent-To
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.6)
+
+
+
+
+Resnick Standards Track [Page 41]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ Header field name: Resent-Cc
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.6)
+
+ Header field name: Resent-Bcc
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.6)
+
+ Header field name: Resent-Reply-To
+ Applicable protocol: Mail
+ Status: obsolete
+ Author/Change controller: IETF
+ Specification document(s): This document (section 4.5.6)
+
+ Header field name: Resent-Message-ID
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.6)
+
+ Header field name: Return-Path
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.7)
+
+ Header field name: Received
+ Applicable protocol: Mail
+ Status: standard
+ Author/Change controller: IETF
+ Specification document(s): This document (section 3.6.7)
+ Related information: [RFC5321]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 42]
+
+RFC 5322 Internet Message Format October 2008
+
+
+Appendix A. Example Messages
+
+ This section presents a selection of messages. These are intended to
+ assist in the implementation of this specification, but should not be
+ taken as normative; that is to say, although the examples in this
+ section were carefully reviewed, if there happens to be a conflict
+ between these examples and the syntax described in sections 3 and 4
+ of this document, the syntax in those sections is to be taken as
+ correct.
+
+ In the text version of this document, messages in this section are
+ delimited between lines of "----". The "----" lines are not part of
+ the message itself.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 43]
+
+RFC 5322 Internet Message Format October 2008
+
+
+Appendix A.1. Addressing Examples
+
+ The following are examples of messages that might be sent between two
+ individuals.
+
+Appendix A.1.1. A Message from One Person to Another with Simple
+ Addressing
+
+ This could be called a canonical message. It has a single author,
+ John Doe, a single recipient, Mary Smith, a subject, the date, a
+ message identifier, and a textual message in the body.
+
+ ----
+ From: John Doe <jdoe@machine.example>
+ To: Mary Smith <mary@example.net>
+ Subject: Saying Hello
+ Date: Fri, 21 Nov 1997 09:55:06 -0600
+ Message-ID: <1234@local.machine.example>
+
+ This is a message just to say hello.
+ So, "Hello".
+ ----
+
+ If John's secretary Michael actually sent the message, even though
+ John was the author and replies to this message should go back to
+ him, the sender field would be used:
+
+ ----
+ From: John Doe <jdoe@machine.example>
+ Sender: Michael Jones <mjones@machine.example>
+ To: Mary Smith <mary@example.net>
+ Subject: Saying Hello
+ Date: Fri, 21 Nov 1997 09:55:06 -0600
+ Message-ID: <1234@local.machine.example>
+
+ This is a message just to say hello.
+ So, "Hello".
+ ----
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 44]
+
+RFC 5322 Internet Message Format October 2008
+
+
+Appendix A.1.2. Different Types of Mailboxes
+
+ This message includes multiple addresses in the destination fields
+ and also uses several different forms of addresses.
+
+ ----
+ From: "Joe Q. Public" <john.q.public@example.com>
+ To: Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>
+ Cc: <boss@nil.test>, "Giant; \"Big\" Box" <sysservices@example.net>
+ Date: Tue, 1 Jul 2003 10:52:37 +0200
+ Message-ID: <5678.21-Nov-1997@example.com>
+
+ Hi everyone.
+ ----
+
+ Note that the display names for Joe Q. Public and Giant; "Big" Box
+ needed to be enclosed in double-quotes because the former contains
+ the period and the latter contains both semicolon and double-quote
+ characters (the double-quote characters appearing as quoted-pair
+ constructs). Conversely, the display name for Who? could appear
+ without them because the question mark is legal in an atom. Notice
+ also that jdoe@example.org and boss@nil.test have no display names
+ associated with them at all, and jdoe@example.org uses the simpler
+ address form without the angle brackets.
+
+Appendix A.1.3. Group Addresses
+
+ ----
+ From: Pete <pete@silly.example>
+ To: A Group:Ed Jones <c@a.test>,joe@where.test,John <jdoe@one.test>;
+ Cc: Undisclosed recipients:;
+ Date: Thu, 13 Feb 1969 23:32:54 -0330
+ Message-ID: <testabcd.1234@silly.example>
+
+ Testing.
+ ----
+
+ In this message, the "To:" field has a single group recipient named
+ "A Group", which contains 3 addresses, and a "Cc:" field with an
+ empty group recipient named Undisclosed recipients.
+
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 45]
+
+RFC 5322 Internet Message Format October 2008
+
+
+Appendix A.2. Reply Messages
+
+ The following is a series of three messages that make up a
+ conversation thread between John and Mary. John first sends a
+ message to Mary, Mary then replies to John's message, and then John
+ replies to Mary's reply message.
+
+ Note especially the "Message-ID:", "References:", and "In-Reply-To:"
+ fields in each message.
+
+ ----
+ From: John Doe <jdoe@machine.example>
+ To: Mary Smith <mary@example.net>
+ Subject: Saying Hello
+ Date: Fri, 21 Nov 1997 09:55:06 -0600
+ Message-ID: <1234@local.machine.example>
+
+ This is a message just to say hello.
+ So, "Hello".
+ ----
+
+ When sending replies, the Subject field is often retained, though
+ prepended with "Re: " as described in section 3.6.5.
+
+ ----
+ From: Mary Smith <mary@example.net>
+ To: John Doe <jdoe@machine.example>
+ Reply-To: "Mary Smith: Personal Account" <smith@home.example>
+ Subject: Re: Saying Hello
+ Date: Fri, 21 Nov 1997 10:01:10 -0600
+ Message-ID: <3456@example.net>
+ In-Reply-To: <1234@local.machine.example>
+ References: <1234@local.machine.example>
+
+ This is a reply to your hello.
+ ----
+
+ Note the "Reply-To:" field in the above message. When John replies
+ to Mary's message above, the reply should go to the address in the
+ "Reply-To:" field instead of the address in the "From:" field.
+
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 46]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ ----
+ To: "Mary Smith: Personal Account" <smith@home.example>
+ From: John Doe <jdoe@machine.example>
+ Subject: Re: Saying Hello
+ Date: Fri, 21 Nov 1997 11:00:00 -0600
+ Message-ID: <abcd.1234@local.machine.test>
+ In-Reply-To: <3456@example.net>
+ References: <1234@local.machine.example> <3456@example.net>
+
+ This is a reply to your reply.
+ ----
+
+Appendix A.3. Resent Messages
+
+ Start with the message that has been used as an example several
+ times:
+
+ ----
+ From: John Doe <jdoe@machine.example>
+ To: Mary Smith <mary@example.net>
+ Subject: Saying Hello
+ Date: Fri, 21 Nov 1997 09:55:06 -0600
+ Message-ID: <1234@local.machine.example>
+
+ This is a message just to say hello.
+ So, "Hello".
+ ----
+
+ Say that Mary, upon receiving this message, wishes to send a copy of
+ the message to Jane such that (a) the message would appear to have
+ come straight from John; (b) if Jane replies to the message, the
+ reply should go back to John; and (c) all of the original
+ information, like the date the message was originally sent to Mary,
+ the message identifier, and the original addressee, is preserved. In
+ this case, resent fields are prepended to the message:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 47]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ ----
+ Resent-From: Mary Smith <mary@example.net>
+ Resent-To: Jane Brown <j-brown@other.example>
+ Resent-Date: Mon, 24 Nov 1997 14:22:01 -0800
+ Resent-Message-ID: <78910@example.net>
+ From: John Doe <jdoe@machine.example>
+ To: Mary Smith <mary@example.net>
+ Subject: Saying Hello
+ Date: Fri, 21 Nov 1997 09:55:06 -0600
+ Message-ID: <1234@local.machine.example>
+
+ This is a message just to say hello.
+ So, "Hello".
+ ----
+
+ If Jane, in turn, wished to resend this message to another person,
+ she would prepend her own set of resent header fields to the above
+ and send that. (Note that for brevity, trace fields are not shown.)
+
+Appendix A.4. Messages with Trace Fields
+
+ As messages are sent through the transport system as described in
+ [RFC5321], trace fields are prepended to the message. The following
+ is an example of what those trace fields might look like. Note that
+ there is some folding white space in the first one since these lines
+ can be long.
+
+ ----
+ Received: from x.y.test
+ by example.net
+ via TCP
+ with ESMTP
+ id ABC12345
+ for <mary@example.net>; 21 Nov 1997 10:05:43 -0600
+ Received: from node.example by x.y.test; 21 Nov 1997 10:01:22 -0600
+ From: John Doe <jdoe@node.example>
+ To: Mary Smith <mary@example.net>
+ Subject: Saying Hello
+ Date: Fri, 21 Nov 1997 09:55:06 -0600
+ Message-ID: <1234@local.node.example>
+
+ This is a message just to say hello.
+ So, "Hello".
+ ----
+
+
+
+
+
+
+
+Resnick Standards Track [Page 48]
+
+RFC 5322 Internet Message Format October 2008
+
+
+Appendix A.5. White Space, Comments, and Other Oddities
+
+ White space, including folding white space, and comments can be
+ inserted between many of the tokens of fields. Taking the example
+ from A.1.3, white space and comments can be inserted into all of the
+ fields.
+
+ ----
+ From: Pete(A nice \) chap) <pete(his account)@silly.test(his host)>
+ To:A Group(Some people)
+ :Chris Jones <c@(Chris's host.)public.example>,
+ joe@example.org,
+ John <jdoe@one.test> (my dear friend); (the end of the group)
+ Cc:(Empty list)(start)Hidden recipients :(nobody(that I know)) ;
+ Date: Thu,
+ 13
+ Feb
+ 1969
+ 23:32
+ -0330 (Newfoundland Time)
+ Message-ID: <testabcd.1234@silly.test>
+
+ Testing.
+ ----
+
+ The above example is aesthetically displeasing, but perfectly legal.
+ Note particularly (1) the comments in the "From:" field (including
+ one that has a ")" character appearing as part of a quoted-pair); (2)
+ the white space absent after the ":" in the "To:" field as well as
+ the comment and folding white space after the group name, the special
+ character (".") in the comment in Chris Jones's address, and the
+ folding white space before and after "joe@example.org,"; (3) the
+ multiple and nested comments in the "Cc:" field as well as the
+ comment immediately following the ":" after "Cc"; (4) the folding
+ white space (but no comments except at the end) and the missing
+ seconds in the time of the date field; and (5) the white space before
+ (but not within) the identifier in the "Message-ID:" field.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 49]
+
+RFC 5322 Internet Message Format October 2008
+
+
+Appendix A.6. Obsoleted Forms
+
+ The following are examples of obsolete (that is, the "MUST NOT
+ generate") syntactic elements described in section 4 of this
+ document.
+
+Appendix A.6.1. Obsolete Addressing
+
+ Note in the example below the lack of quotes around Joe Q. Public,
+ the route that appears in the address for Mary Smith, the two commas
+ that appear in the "To:" field, and the spaces that appear around the
+ "." in the jdoe address.
+
+ ----
+ From: Joe Q. Public <john.q.public@example.com>
+ To: Mary Smith <@node.test:mary@example.net>, , jdoe@test . example
+ Date: Tue, 1 Jul 2003 10:52:37 +0200
+ Message-ID: <5678.21-Nov-1997@example.com>
+
+ Hi everyone.
+ ----
+
+Appendix A.6.2. Obsolete Dates
+
+ The following message uses an obsolete date format, including a non-
+ numeric time zone and a two digit year. Note that although the day-
+ of-week is missing, that is not specific to the obsolete syntax; it
+ is optional in the current syntax as well.
+
+ ----
+ From: John Doe <jdoe@machine.example>
+ To: Mary Smith <mary@example.net>
+ Subject: Saying Hello
+ Date: 21 Nov 97 09:55:06 GMT
+ Message-ID: <1234@local.machine.example>
+
+ This is a message just to say hello.
+ So, "Hello".
+ ----
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 50]
+
+RFC 5322 Internet Message Format October 2008
+
+
+Appendix A.6.3. Obsolete White Space and Comments
+
+ White space and comments can appear between many more elements than
+ in the current syntax. Also, folding lines that are made up entirely
+ of white space are legal.
+
+ ----
+ From : John Doe <jdoe@machine(comment). example>
+ To : Mary Smith
+ __
+ <mary@example.net>
+ Subject : Saying Hello
+ Date : Fri, 21 Nov 1997 09(comment): 55 : 06 -0600
+ Message-ID : <1234 @ local(blah) .machine .example>
+
+ This is a message just to say hello.
+ So, "Hello".
+ ----
+
+ Note especially the second line of the "To:" field. It starts with
+ two space characters. (Note that "__" represent blank spaces.)
+ Therefore, it is considered part of the folding, as described in
+ section 4.2. Also, the comments and white space throughout
+ addresses, dates, and message identifiers are all part of the
+ obsolete syntax.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 51]
+
+RFC 5322 Internet Message Format October 2008
+
+
+Appendix B. Differences from Earlier Specifications
+
+ This appendix contains a list of changes that have been made in the
+ Internet Message Format from earlier specifications, specifically
+ [RFC0822], [RFC1123], and [RFC2822]. Items marked with an asterisk
+ (*) below are items which appear in section 4 of this document and
+ therefore can no longer be generated.
+
+ The following are the changes made from [RFC0822] and [RFC1123] to
+ [RFC2822] that remain in this document:
+
+ 1. Period allowed in obsolete form of phrase.
+ 2. ABNF moved out of document, now in [RFC5234].
+ 3. Four or more digits allowed for year.
+ 4. Header field ordering (and lack thereof) made explicit.
+ 5. Encrypted header field removed.
+ 6. Specifically allow and give meaning to "-0000" time zone.
+ 7. Folding white space is not allowed between every token.
+ 8. Requirement for destinations removed.
+ 9. Forwarding and resending redefined.
+ 10. Extension header fields no longer specifically called out.
+ 11. ASCII 0 (null) removed.*
+ 12. Folding continuation lines cannot contain only white space.*
+ 13. Free insertion of comments not allowed in date.*
+ 14. Non-numeric time zones not allowed.*
+ 15. Two digit years not allowed.*
+ 16. Three digit years interpreted, but not allowed for generation.*
+ 17. Routes in addresses not allowed.*
+ 18. CFWS within local-parts and domains not allowed.*
+ 19. Empty members of address lists not allowed.*
+ 20. Folding white space between field name and colon not allowed.*
+ 21. Comments between field name and colon not allowed.
+ 22. Tightened syntax of in-reply-to and references.*
+ 23. CFWS within msg-id not allowed.*
+ 24. Tightened semantics of resent fields as informational only.
+ 25. Resent-Reply-To not allowed.*
+ 26. No multiple occurrences of fields (except resent and received).*
+ 27. Free CR and LF not allowed.*
+ 28. Line length limits specified.
+ 29. Bcc more clearly specified.
+
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 52]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ The following are changes from [RFC2822].
+ 1. Assorted typographical/grammatical errors fixed and
+ clarifications made.
+ 2. Changed "standard" to "document" or "specification" throughout.
+ 3. Made distinction between "header field" and "header section".
+ 4. Removed NO-WS-CTL from ctext, qtext, dtext, and unstructured.*
+ 5. Moved discussion of specials to the "Atom" section. Moved text
+ to "Overall message syntax" section.
+ 6. Simplified CFWS syntax.
+ 7. Fixed unstructured syntax.
+ 8. Changed date and time syntax to deal with white space in
+ obsolete date syntax.
+ 9. Removed quoted-pair from domain literals and message
+ identifiers.*
+ 10. Clarified that other specifications limit domain syntax.
+ 11. Simplified "Bcc:" and "Resent-Bcc:" syntax.
+ 12. Allowed optional-field to appear within trace information.
+ 13. Removed no-fold-quote from msg-id. Clarified syntax
+ limitations.
+ 14. Generalized "Received:" syntax to fix bugs and move definition
+ out of this document.
+ 15. Simplified obs-qp. Fixed and simplified obs-utext (which now
+ only appears in the obsolete syntax). Removed obs-text and obs-
+ char, adding obs-body.
+ 16. Fixed obsolete date syntax to allow for more (or less) comments
+ and white space.
+ 17. Fixed all obsolete list syntax (obs-domain-list, obs-mbox-list,
+ obs-addr-list, obs-phrase-list, and the newly added obs-group-
+ list).
+ 18. Fixed obs-reply-to syntax.
+ 19. Fixed obs-bcc and obs-resent-bcc to allow empty lists.
+ 20. Removed obs-path.
+
+Appendix C. Acknowledgements
+
+ Many people contributed to this document. They included folks who
+ participated in the Detailed Revision and Update of Messaging
+ Standards (DRUMS) Working Group of the Internet Engineering Task
+ Force (IETF), the chair of DRUMS, the Area Directors of the IETF, and
+ people who simply sent their comments in via email. The editor is
+ deeply indebted to them all and thanks them sincerely. The below
+ list includes everyone who sent email concerning both this document
+ and [RFC2822]. Hopefully, everyone who contributed is named here:
+
+ +--------------------+----------------------+---------------------+
+ | Matti Aarnio | Tanaka Akira | Russ Allbery |
+ | Eric Allman | Harald Alvestrand | Ran Atkinson |
+ | Jos Backus | Bruce Balden | Dave Barr |
+
+
+
+Resnick Standards Track [Page 53]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ | Alan Barrett | John Beck | J Robert von Behren |
+ | Jos den Bekker | D J Bernstein | James Berriman |
+ | Oliver Block | Norbert Bollow | Raj Bose |
+ | Antony Bowesman | Scott Bradner | Randy Bush |
+ | Tom Byrer | Bruce Campbell | Larry Campbell |
+ | W J Carpenter | Michael Chapman | Richard Clayton |
+ | Maurizio Codogno | Jim Conklin | R Kelley Cook |
+ | Nathan Coulter | Steve Coya | Mark Crispin |
+ | Dave Crocker | Matt Curtin | Michael D'Errico |
+ | Cyrus Daboo | Michael D Dean | Jutta Degener |
+ | Mark Delany | Steve Dorner | Harold A Driscoll |
+ | Michael Elkins | Frank Ellerman | Robert Elz |
+ | Johnny Eriksson | Erik E Fair | Roger Fajman |
+ | Patrik Faeltstroem | Claus Andre Faerber | Barry Finkel |
+ | Erik Forsberg | Chuck Foster | Paul Fox |
+ | Klaus M Frank | Ned Freed | Jochen Friedrich |
+ | Randall C Gellens | Sukvinder Singh Gill | Tim Goodwin |
+ | Philip Guenther | Arnt Gulbrandsen | Eric A Hall |
+ | Tony Hansen | John Hawkinson | Philip Hazel |
+ | Kai Henningsen | Robert Herriot | Paul Hethmon |
+ | Jim Hill | Alfred Hoenes | Paul E Hoffman |
+ | Steve Hole | Kari Hurtta | Marco S Hyman |
+ | Ofer Inbar | Olle Jarnefors | Kevin Johnson |
+ | Sudish Joseph | Maynard Kang | Prabhat Keni |
+ | John C Klensin | Graham Klyne | Brad Knowles |
+ | Shuhei Kobayashi | Peter Koch | Dan Kohn |
+ | Christian Kuhtz | Anand Kumria | Steen Larsen |
+ | Eliot Lear | Barry Leiba | Jay Levitt |
+ | Bruce Lilly | Lars-Johan Liman | Charles Lindsey |
+ | Pete Loshin | Simon Lyall | Bill Manning |
+ | John Martin | Mark Martinec | Larry Masinter |
+ | Denis McKeon | William P McQuillan | Alexey Melnikov |
+ | Perry E Metzger | Steven Miller | S Moonesamy |
+ | Keith Moore | John Gardiner Myers | Chris Newman |
+ | John W Noerenberg | Eric Norman | Mike O'Dell |
+ | Larry Osterman | Paul Overell | Jacob Palme |
+ | Michael A Patton | Uzi Paz | Michael A Quinlan |
+ | Robert Rapplean | Eric S Raymond | Sam Roberts |
+ | Hugh Sasse | Bart Schaefer | Tom Scola |
+ | Wolfgang Segmuller | Nick Shelness | John Stanley |
+ | Einar Stefferud | Jeff Stephenson | Bernard Stern |
+ | Peter Sylvester | Mark Symons | Eric Thomas |
+ | Lee Thompson | Karel De Vriendt | Matthew Wall |
+ | Rolf Weber | Brent B Welch | Dan Wing |
+ | Jack De Winter | Gregory J Woodhouse | Greg A Woods |
+ | Kazu Yamamoto | Alain Zahm | Jamie Zawinski |
+ | Timothy S Zurcher | | |
+ +--------------------+----------------------+---------------------+
+
+
+
+Resnick Standards Track [Page 54]
+
+RFC 5322 Internet Message Format October 2008
+
+
+7. References
+
+7.1. Normative References
+
+ [ANSI.X3-4.1986] American National Standards Institute, "Coded
+ Character Set - 7-bit American Standard Code for
+ Information Interchange", ANSI X3.4, 1986.
+
+ [RFC1034] Mockapetris, P., "Domain names - concepts and
+ facilities", STD 13, RFC 1034, November 1987.
+
+ [RFC1035] Mockapetris, P., "Domain names - implementation and
+ specification", STD 13, RFC 1035, November 1987.
+
+ [RFC1123] Braden, R., "Requirements for Internet Hosts -
+ Application and Support", STD 3, RFC 1123,
+ October 1989.
+
+ [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate
+ Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+ [RFC5234] Crocker, D. and P. Overell, "Augmented BNF for
+ Syntax Specifications: ABNF", STD 68, RFC 5234,
+ January 2008.
+
+7.2. Informative References
+
+ [RFC0822] Crocker, D., "Standard for the format of ARPA
+ Internet text messages", STD 11, RFC 822,
+ August 1982.
+
+ [RFC1305] Mills, D., "Network Time Protocol (Version 3)
+ Specification, Implementation", RFC 1305,
+ March 1992.
+
+ [ISO.2022.1994] International Organization for Standardization,
+ "Information technology - Character code structure
+ and extension techniques", ISO Standard 2022, 1994.
+
+ [RFC2045] Freed, N. and N. Borenstein, "Multipurpose Internet
+ Mail Extensions (MIME) Part One: Format of Internet
+ Message Bodies", RFC 2045, November 1996.
+
+ [RFC2046] Freed, N. and N. Borenstein, "Multipurpose Internet
+ Mail Extensions (MIME) Part Two: Media Types",
+ RFC 2046, November 1996.
+
+
+
+
+
+Resnick Standards Track [Page 55]
+
+RFC 5322 Internet Message Format October 2008
+
+
+ [RFC2047] Moore, K., "MIME (Multipurpose Internet Mail
+ Extensions) Part Three: Message Header Extensions
+ for Non-ASCII Text", RFC 2047, November 1996.
+
+ [RFC2049] Freed, N. and N. Borenstein, "Multipurpose Internet
+ Mail Extensions (MIME) Part Five: Conformance
+ Criteria and Examples", RFC 2049, November 1996.
+
+ [RFC2822] Resnick, P., "Internet Message Format", RFC 2822,
+ April 2001.
+
+ [RFC3864] Klyne, G., Nottingham, M., and J. Mogul,
+ "Registration Procedures for Message Header
+ Fields", BCP 90, RFC 3864, September 2004.
+
+ [RFC4021] Klyne, G. and J. Palme, "Registration of Mail and
+ MIME Header Fields", RFC 4021, March 2005.
+
+ [RFC4288] Freed, N. and J. Klensin, "Media Type
+ Specifications and Registration Procedures",
+ BCP 13, RFC 4288, December 2005.
+
+ [RFC4289] Freed, N. and J. Klensin, "Multipurpose Internet
+ Mail Extensions (MIME) Part Four: Registration
+ Procedures", BCP 13, RFC 4289, December 2005.
+
+ [RFC5321] Klensin, J., "Simple Mail Transfer Protocol",
+ RFC 5321, October 2008.
+
+Author's Address
+
+ Peter W. Resnick (editor)
+ Qualcomm Incorporated
+ 5775 Morehouse Drive
+ San Diego, CA 92121-1714
+ US
+
+ Phone: +1 858 651 4478
+ EMail: presnick@qualcomm.com
+ URI: http://www.qualcomm.com/~presnick/
+
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 56]
+
+RFC 5322 Internet Message Format October 2008
+
+
+Full Copyright Statement
+
+ Copyright (C) The IETF Trust (2008).
+
+ This document is subject to the rights, licenses and restrictions
+ contained in BCP 78, and except as set forth therein, the authors
+ retain all their rights.
+
+ This document and the information contained herein are provided on an
+ "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS
+ OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY, THE IETF TRUST AND
+ THE INTERNET ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF
+ THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED
+ WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Intellectual Property
+
+ The IETF takes no position regarding the validity or scope of any
+ Intellectual Property Rights or other rights that might be claimed to
+ pertain to the implementation or use of the technology described in
+ this document or the extent to which any license under such rights
+ might or might not be available; nor does it represent that it has
+ made any independent effort to identify any such rights. Information
+ on the procedures with respect to rights in RFC documents can be
+ found in BCP 78 and BCP 79.
+
+ Copies of IPR disclosures made to the IETF Secretariat and any
+ assurances of licenses to be made available, or the result of an
+ attempt made to obtain a general license or permission for the use of
+ such proprietary rights by implementers or users of this
+ specification can be obtained from the IETF on-line IPR repository at
+ http://www.ietf.org/ipr.
+
+ The IETF invites any interested party to bring to its attention any
+ copyrights, patents or patent applications, or other proprietary
+ rights that may cover technology that may be required to implement
+ this standard. Please address the information to the IETF at
+ ietf-ipr@ietf.org.
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick Standards Track [Page 57]
+
diff --git a/doc/mbox-rfc4155.txt b/doc/mbox-rfc4155.txt
@@ -0,0 +1,507 @@
+
+
+
+
+
+
+Network Working Group E. Hall
+Request for Comments: 4155 September 2005
+Category: Informational
+
+
+ The application/mbox Media Type
+
+Status of This Memo
+
+ This memo provides information for the Internet community. It does
+ not specify an Internet standard of any kind. Distribution of this
+ memo is unlimited.
+
+Copyright Notice
+
+ Copyright (C) The Internet Society (2005).
+
+Abstract
+
+ This memo requests that the application/mbox media type be authorized
+ for allocation by the IESG, according to the terms specified in RFC
+ 2048. This memo also defines a default format for the mbox database,
+ which must be supported by all conformant implementations.
+
+1. Background and Overview
+
+ UNIX-like operating systems have historically made widespread use of
+ "mbox" database files for a variety of local email purposes. In the
+ common case, mbox files store linear sequences of one or more
+ electronic mail messages, with local email clients treating the
+ database as a logical folder of email messages. mbox databases are
+ also used by a variety of other messaging tools, such as mailing list
+ management programs, archiving and filtering utilities, messaging
+ servers, and other related applications. In recent years, mbox
+ databases have also become common on a large number of non-UNIX
+ computing platforms, for similar kinds of purposes.
+
+ The increased pervasiveness of these files has led to an increased
+ demand for a standardized, network-wide interchange of these files as
+ discrete database objects. In turn, this dictates a need for a
+ general media type definition for mbox files, which is the subject
+ and purpose of this memo.
+
+
+
+
+
+
+
+
+
+Hall Informational [Page 1]
+
+RFC 4155 The application/mbox Media Type September 2005
+
+
+2. About the mbox Database
+
+ The mbox database format is not documented in an authoritative
+ specification, but instead exists as a well-known output format that
+ is anecdotally documented, or which is only authoritatively
+ documented for a specific platform or tool.
+
+ mbox databases typically contain a linear sequence of electronic mail
+ messages. Each message begins with a separator line that identifies
+ the message sender, and also identifies the date and time at which
+ the message was received by the final recipient (either the last-hop
+ system in the transfer path, or the system which serves as the
+ recipient's mailstore). Each message is typically terminated by an
+ empty line. The end of the database is usually recognized by either
+ the absence of any additional data, or by the presence of an explicit
+ end-of-file marker.
+
+ The structure of the separator lines vary across implementations, but
+ usually contain the exact character sequence of "From", followed by a
+ single Space character (0x20), an email address of some kind, another
+ Space character, a timestamp sequence of some kind, and an end-of-
+ line marker. However, due to the lack of any authoritative
+ specification, each of these attributes are known to vary widely
+ across implementations. For example, the email address can reflect
+ any addressing syntax that has ever been used on any messaging system
+ in all of history (specifically including address forms that are not
+ compatible with Internet messages, as defined by RFC 2822 [RFC2822]).
+ Similarly, the timestamp sequences can also vary according to system
+ output, while the end-of-line sequences will often reflect platform-
+ specific requirements. Different data formats can even appear within
+ a single database as a result of multiple mbox files being
+ concatenated together, or because a single file was accessed by
+ multiple messaging clients, each of which has used its own syntax for
+ the separator line.
+
+ Message data within mbox databases often reflects site-specific
+ peculiarities. For example, it is entirely possible for the message
+ body or headers in an mbox database to contain untagged eight-bit
+ character data that implicitly reflects a site-specific default
+ language or locale, or that reflects local defaults for timestamps
+ and email addresses; none of this data is widely portable beyond the
+ local scope. Similarly, message data can also contain unencoded
+ eight-bit binary data, or can use encoding formats that represent a
+ specific platform (e.g., BINHEX or UUENCODE sequences).
+
+
+
+
+
+
+
+Hall Informational [Page 2]
+
+RFC 4155 The application/mbox Media Type September 2005
+
+
+ Many implementations are also known to escape message body lines that
+ begin with the character sequence of "From ", so as to prevent
+ confusion with overly-liberal parsers that do not search for full
+ separator lines. In the common case, a leading Greater-Than symbol
+ (0x3E) is used for this purpose (with "From " becoming ">From ").
+ However, other implementations are known not to escape such lines
+ unless they are immediately preceded by a blank line or if they also
+ appear to contain an email address and a timestamp. Other
+ implementations are also known to perform secondary escapes against
+ these lines if they are already escaped or quoted, while others
+ ignore these mechanisms altogether.
+
+ A comprehensive description of mbox database files on UNIX-like
+ systems can be found at http://qmail.org./man/man5/mbox.html, which
+ should be treated as mostly authoritative for those variations that
+ are otherwise only documented in anecdotal form. However, readers
+ are advised that many other platforms and tools make use of mbox
+ databases, and that there are many more potential variations that can
+ be encountered in the wild.
+
+ In order to mitigate errors that may arise from such vagaries, this
+ specification defines a "format" parameter to the application/mbox
+ media type declaration, which can be used to identify the specific
+ kind of mbox database that is being transferred. Furthermore, this
+ specification defines a "default" database format which MUST be
+ supported by implementations that claim to be compliant with this
+ specification, and which is to be used as the implicit format for
+ undeclared application/mbox data objects. Additional format types
+ are to be defined in subsequent specifications. Messaging systems
+ that receive an mbox database with an unknown format parameter value
+ SHOULD treat the data as an opaque binary object, as if the data had
+ been declared as application/octet-stream
+
+ Refer to Appendix A for a description of the default mbox format.
+
+ Note that RFC 2046 [RFC2046] defines the multipart/digest media type
+ for transferring platform-independent message files. Because that
+ specification defines a set of neutral and strict formatting rules,
+ the multipart/digest media type already facilitates highly-
+ predictable transfer and conversion operations; as such, implementers
+ are strongly encouraged to support and use that media type where
+ possible.
+
+
+
+
+
+
+
+
+
+Hall Informational [Page 3]
+
+RFC 4155 The application/mbox Media Type September 2005
+
+
+3. Prerequisites and Terminology
+
+ Readers of this document are expected to be familiar with the
+ specification for MIME [RFC2045] and MIME-type registrations
+ [RFC2048].
+
+ The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+ "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+ document are to be interpreted as described in RFC 2119 [RFC2119].
+
+4. The application/mbox Media Type Registration
+
+ This section provides the media type registration application (as per
+ [RFC2048]).
+
+ MIME media type name: application
+
+ MIME subtype name: mbox
+
+ Required parameters: none
+
+ Optional parameters: The "format" parameter identifies the format of
+ the mbox database and the messages contained therein. The default
+ value for the "format" parameter is "default", and refers to the
+ formatting rules defined in Appendix A of this memo. mbox databases
+ that do not have a "format" parameter SHOULD be interpreted as having
+ the implicit "format" value of "default". mbox databases that have
+ an unknown value for the "format" parameter SHOULD be treated as
+ opaque data objects, as if the media type had been specified as
+ application/octet-stream. Additional values for the format parameter
+ are to be defined in subsequent specifications, and registered with
+ IANA.
+
+ Encoding considerations: If an email client receives an mbox database
+ as a message attachment, and then stores that attachment within a
+ local mbox database, the contents of the two database files may
+ become irreversibly intermingled, such that both databases are
+ rendered unrecognizable. In order to avoid these collisions,
+ messaging systems that support this specification MUST encode an mbox
+ database (or at a minimum, the separator lines) with non-transparent
+ transfer encoding (such as BASE64 or Quoted-Printable) whenever an
+ application/mbox object is transferred via messaging protocols.
+ Other transfer services are generally encouraged to adopt similar
+ encoding strategies in order to allow for any subsequent
+ retransmission that might occur, but this is not a requirement.
+ Implementers should also be prepared to encode mbox data locally if
+ non-compliant data is received.
+
+
+
+
+Hall Informational [Page 4]
+
+RFC 4155 The application/mbox Media Type September 2005
+
+
+ Security considerations: mbox data is passive, and does not generally
+ represent a unique or new security threat. However, there is risk in
+ sharing any kind of data, because unintentional information may be
+ exposed, and this risk certainly applies to mbox data as well.
+
+ Interoperability considerations: Due to the lack of a single
+ authoritative specification for mbox databases, there are a large
+ number of variations between database formats (refer to the
+ introduction text for common examples), and it is expected that non-
+ conformant data will be erroneously tagged or exchanged. Although
+ the "default" format specified in this memo does not allow for these
+ kinds of vagaries, prior negotiation or agreement between humans may
+ sometimes be needed.
+
+ Published specification: see Appendix A.
+
+ Applications that use this media type: hundreds of messaging products
+ make use of the mbox database format, in one form or another.
+
+ Magic number(s): mbox database files can be recognized by having a
+ leading character sequence of "From", followed by a single Space
+ character (0x20), followed by additional printable character data
+ (refer to the description in Appendix A for details). However,
+ implementers are cautioned that all such files will not be compliant
+ with all of the formatting rules, therefore implementers should treat
+ these files with an appropriate amount of circumspection.
+
+ File extension(s): mbox database files sometimes have an ".mbox"
+ extension, but this is not required nor expected. As with magic
+ numbers, implementers should avoid reflexive assumptions about the
+ contents of such files.
+
+ Macintosh File Type Code(s): None are known to be common.
+
+ Person & email address to contact for further information: Eric A.
+ Hall (ehall@ntrg.com)
+
+ Intended usage: COMMON
+
+5. Security Considerations
+
+ See the discussion in section 4.
+
+
+
+
+
+
+
+
+
+Hall Informational [Page 5]
+
+RFC 4155 The application/mbox Media Type September 2005
+
+
+6. IANA Considerations
+
+ The IANA has registered the application/mbox media type in the MIME
+ registry, using the application provided in section 4 above.
+
+ Furthermore, IANA has established and will maintain a registry of
+ values for the "format" parameter as described in this memo. The
+ first registration is the "default" value, using the description
+ provided in Appendix A. Subsequent values for the "format" parameter
+ MUST be accompanied by some form of recognizable, complete, and
+ legitimate specification, such as an IESG-approved specification, or
+ some kind of authoritative vendor documentation.
+
+7. Normative References
+
+ [RFC2045] Freed, N. and N. Borenstein, "Multipurpose Internet Mail
+ Extensions (MIME) Part One: Format of Internet Message
+ Bodies", RFC 2045, November 1996.
+
+ [RFC2046] Freed, N. and N. Borenstein, "Multipurpose Internet Mail
+ Extensions (MIME) Part Two: Media Types", RFC 2046,
+ November 1996.
+
+ [RFC2048] Freed, N., Klensin, J., and J. Postel, "Multipurpose
+ Internet Mail Extensions (MIME) Part Four: Registration
+ Procedures", BCP 13, RFC 2048, November 1996.
+
+ [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate
+ Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+ [RFC2822] Resnick, P., "Internet Message Format", RFC 2822, April
+ 2001.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Hall Informational [Page 6]
+
+RFC 4155 The application/mbox Media Type September 2005
+
+
+Appendix A. The "default" mbox Database Format
+
+ In order to improve interoperability among messaging systems, this
+ memo defines a "default" mbox database format, which MUST be
+ supported by all implementations that claim to be compliant with this
+ specification.
+
+ The "default" mbox database format uses a linear sequence of Internet
+ messages, with each message being immediately prefaced by a separator
+ line, and being terminated by an empty line. More specifically:
+
+ o Each message within the database MUST follow the syntax and
+ formatting rules defined in RFC 2822 [RFC2822] and its related
+ specifications, with the exception that the canonical mbox
+ database MUST use a single Line-Feed character (0x0A) as the
+ end-of-line sequence, and MUST NOT use a Carriage-Return/Line-
+ Feed pair (NB: this requirement only applies to the canonical
+ mbox database as transferred, and does not override any other
+ specifications). This usage represents the most common
+ historical representation of the mbox database format, and
+ allows for the least amount of conversion.
+
+ o Messages within the default mbox database MUST consist of
+ seven-bit characters within an eight-bit stream. Eight-bit data
+ within the stream MUST be converted to a seven-bit form (using
+ appropriate, standardized encoding) and appropriately tagged
+ (with the correct header fields) before the database is
+ transferred.
+
+ o Message headers and data in the default mbox database MUST be
+ fully-qualified, as per the relevant specification(s). For
+ example, email addresses in the various header fields MUST have
+ legitimate domain names (as per RFC 2822), while extended
+ characters and encodings MUST be specified in the appropriate
+ location (as per the appropriate MIME specifications), and so
+ forth.
+
+ o Each message in the mbox database MUST be immediately preceded
+ by a single separator line, which MUST conform to the following
+ syntax:
+
+ The exact character sequence of "From";
+
+ a single Space character (0x20);
+
+ the email address of the message sender (as obtained from the
+ message envelope or other authoritative source), conformant
+ with the "addr-spec" syntax from RFC 2822;
+
+
+
+Hall Informational [Page 7]
+
+RFC 4155 The application/mbox Media Type September 2005
+
+
+ a single Space character;
+
+ a timestamp indicating the UTC date and time when the message
+ was originally received, conformant with the syntax of the
+ traditional UNIX 'ctime' output sans timezone (note that the
+ use of UTC precludes the need for a timezone indicator);
+
+ an end-of-line marker.
+
+ o Each message in the database MUST be terminated by an empty
+ line, containing a single end-of-line marker.
+
+ Note that the first message in an mbox database will only be prefaced
+ by a separator line, while every other message will begin with two
+ end-of-line sequences (one at the end of the message itself, and
+ another to mark the end of the message within the mbox database file
+ stream) and a separator line (marking the new message). The end of
+ the database is implicitly reached when no more message data or
+ separator lines are found.
+
+ Also note that this specification does not prescribe any escape
+ syntax for message body lines that begin with the character sequence
+ of "From ". Recipient systems are expected to parse full separator
+ lines as they are documented above.
+
+Author's Address
+
+ Eric A. Hall
+
+ EMail: ehall@ntrg.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Hall Informational [Page 8]
+
+RFC 4155 The application/mbox Media Type September 2005
+
+
+Full Copyright Statement
+
+ Copyright (C) The Internet Society (2005).
+
+ This document is subject to the rights, licenses and restrictions
+ contained in BCP 78, and except as set forth therein, the authors
+ retain all their rights.
+
+ This document and the information contained herein are provided on an
+ "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS
+ OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET
+ ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED,
+ INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE
+ INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED
+ WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Intellectual Property
+
+ The IETF takes no position regarding the validity or scope of any
+ Intellectual Property Rights or other rights that might be claimed to
+ pertain to the implementation or use of the technology described in
+ this document or the extent to which any license under such rights
+ might or might not be available; nor does it represent that it has
+ made any independent effort to identify any such rights. Information
+ on the procedures with respect to rights in RFC documents can be
+ found in BCP 78 and BCP 79.
+
+ Copies of IPR disclosures made to the IETF Secretariat and any
+ assurances of licenses to be made available, or the result of an
+ attempt made to obtain a general license or permission for the use of
+ such proprietary rights by implementers or users of this
+ specification can be obtained from the IETF on-line IPR repository at
+ http://www.ietf.org/ipr.
+
+ The IETF invites any interested party to bring to its attention any
+ copyrights, patents or patent applications, or other proprietary
+ rights that may cover technology that may be required to implement
+ this standard. Please address the information to the IETF at ietf-
+ ipr@ietf.org.
+
+Acknowledgement
+
+ Funding for the RFC Editor function is currently provided by the
+ Internet Society.
+
+
+
+
+
+
+
+Hall Informational [Page 9]
+
diff --git a/doc/mime-p1-rfc2045.txt b/doc/mime-p1-rfc2045.txt
@@ -0,0 +1,1739 @@
+
+
+
+
+
+
+Network Working Group N. Freed
+Request for Comments: 2045 Innosoft
+Obsoletes: 1521, 1522, 1590 N. Borenstein
+Category: Standards Track First Virtual
+ November 1996
+
+
+ Multipurpose Internet Mail Extensions
+ (MIME) Part One:
+ Format of Internet Message Bodies
+
+Status of this Memo
+
+ This document specifies an Internet standards track protocol for the
+ Internet community, and requests discussion and suggestions for
+ improvements. Please refer to the current edition of the "Internet
+ Official Protocol Standards" (STD 1) for the standardization state
+ and status of this protocol. Distribution of this memo is unlimited.
+
+Abstract
+
+ STD 11, RFC 822, defines a message representation protocol specifying
+ considerable detail about US-ASCII message headers, and leaves the
+ message content, or message body, as flat US-ASCII text. This set of
+ documents, collectively called the Multipurpose Internet Mail
+ Extensions, or MIME, redefines the format of messages to allow for
+
+ (1) textual message bodies in character sets other than
+ US-ASCII,
+
+ (2) an extensible set of different formats for non-textual
+ message bodies,
+
+ (3) multi-part message bodies, and
+
+ (4) textual header information in character sets other than
+ US-ASCII.
+
+ These documents are based on earlier work documented in RFC 934, STD
+ 11, and RFC 1049, but extends and revises them. Because RFC 822 said
+ so little about message bodies, these documents are largely
+ orthogonal to (rather than a revision of) RFC 822.
+
+ This initial document specifies the various headers used to describe
+ the structure of MIME messages. The second document, RFC 2046,
+ defines the general structure of the MIME media typing system and
+ defines an initial set of media types. The third document, RFC 2047,
+ describes extensions to RFC 822 to allow non-US-ASCII text data in
+
+
+
+Freed & Borenstein Standards Track [Page 1]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ Internet mail header fields. The fourth document, RFC 2048, specifies
+ various IANA registration procedures for MIME-related facilities. The
+ fifth and final document, RFC 2049, describes MIME conformance
+ criteria as well as providing some illustrative examples of MIME
+ message formats, acknowledgements, and the bibliography.
+
+ These documents are revisions of RFCs 1521, 1522, and 1590, which
+ themselves were revisions of RFCs 1341 and 1342. An appendix in RFC
+ 2049 describes differences and changes from previous versions.
+
+Table of Contents
+
+ 1. Introduction ......................................... 3
+ 2. Definitions, Conventions, and Generic BNF Grammar .... 5
+ 2.1 CRLF ................................................ 5
+ 2.2 Character Set ....................................... 6
+ 2.3 Message ............................................. 6
+ 2.4 Entity .............................................. 6
+ 2.5 Body Part ........................................... 7
+ 2.6 Body ................................................ 7
+ 2.7 7bit Data ........................................... 7
+ 2.8 8bit Data ........................................... 7
+ 2.9 Binary Data ......................................... 7
+ 2.10 Lines .............................................. 7
+ 3. MIME Header Fields ................................... 8
+ 4. MIME-Version Header Field ............................ 8
+ 5. Content-Type Header Field ............................ 10
+ 5.1 Syntax of the Content-Type Header Field ............. 12
+ 5.2 Content-Type Defaults ............................... 14
+ 6. Content-Transfer-Encoding Header Field ............... 14
+ 6.1 Content-Transfer-Encoding Syntax .................... 14
+ 6.2 Content-Transfer-Encodings Semantics ................ 15
+ 6.3 New Content-Transfer-Encodings ...................... 16
+ 6.4 Interpretation and Use .............................. 16
+ 6.5 Translating Encodings ............................... 18
+ 6.6 Canonical Encoding Model ............................ 19
+ 6.7 Quoted-Printable Content-Transfer-Encoding .......... 19
+ 6.8 Base64 Content-Transfer-Encoding .................... 24
+ 7. Content-ID Header Field .............................. 26
+ 8. Content-Description Header Field ..................... 27
+ 9. Additional MIME Header Fields ........................ 27
+ 10. Summary ............................................. 27
+ 11. Security Considerations ............................. 27
+ 12. Authors' Addresses .................................. 28
+ A. Collected Grammar .................................... 29
+
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 2]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+1. Introduction
+
+ Since its publication in 1982, RFC 822 has defined the standard
+ format of textual mail messages on the Internet. Its success has
+ been such that the RFC 822 format has been adopted, wholly or
+ partially, well beyond the confines of the Internet and the Internet
+ SMTP transport defined by RFC 821. As the format has seen wider use,
+ a number of limitations have proven increasingly restrictive for the
+ user community.
+
+ RFC 822 was intended to specify a format for text messages. As such,
+ non-text messages, such as multimedia messages that might include
+ audio or images, are simply not mentioned. Even in the case of text,
+ however, RFC 822 is inadequate for the needs of mail users whose
+ languages require the use of character sets richer than US-ASCII.
+ Since RFC 822 does not specify mechanisms for mail containing audio,
+ video, Asian language text, or even text in most European languages,
+ additional specifications are needed.
+
+ One of the notable limitations of RFC 821/822 based mail systems is
+ the fact that they limit the contents of electronic mail messages to
+ relatively short lines (e.g. 1000 characters or less [RFC-821]) of
+ 7bit US-ASCII. This forces users to convert any non-textual data
+ that they may wish to send into seven-bit bytes representable as
+ printable US-ASCII characters before invoking a local mail UA (User
+ Agent, a program with which human users send and receive mail).
+ Examples of such encodings currently used in the Internet include
+ pure hexadecimal, uuencode, the 3-in-4 base 64 scheme specified in
+ RFC 1421, the Andrew Toolkit Representation [ATK], and many others.
+
+ The limitations of RFC 822 mail become even more apparent as gateways
+ are designed to allow for the exchange of mail messages between RFC
+ 822 hosts and X.400 hosts. X.400 [X400] specifies mechanisms for the
+ inclusion of non-textual material within electronic mail messages.
+ The current standards for the mapping of X.400 messages to RFC 822
+ messages specify either that X.400 non-textual material must be
+ converted to (not encoded in) IA5Text format, or that they must be
+ discarded, notifying the RFC 822 user that discarding has occurred.
+ This is clearly undesirable, as information that a user may wish to
+ receive is lost. Even though a user agent may not have the
+ capability of dealing with the non-textual material, the user might
+ have some mechanism external to the UA that can extract useful
+ information from the material. Moreover, it does not allow for the
+ fact that the message may eventually be gatewayed back into an X.400
+ message handling system (i.e., the X.400 message is "tunneled"
+ through Internet mail), where the non-textual information would
+ definitely become useful again.
+
+
+
+
+Freed & Borenstein Standards Track [Page 3]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ This document describes several mechanisms that combine to solve most
+ of these problems without introducing any serious incompatibilities
+ with the existing world of RFC 822 mail. In particular, it
+ describes:
+
+ (1) A MIME-Version header field, which uses a version
+ number to declare a message to be conformant with MIME
+ and allows mail processing agents to distinguish
+ between such messages and those generated by older or
+ non-conformant software, which are presumed to lack
+ such a field.
+
+ (2) A Content-Type header field, generalized from RFC 1049,
+ which can be used to specify the media type and subtype
+ of data in the body of a message and to fully specify
+ the native representation (canonical form) of such
+ data.
+
+ (3) A Content-Transfer-Encoding header field, which can be
+ used to specify both the encoding transformation that
+ was applied to the body and the domain of the result.
+ Encoding transformations other than the identity
+ transformation are usually applied to data in order to
+ allow it to pass through mail transport mechanisms
+ which may have data or character set limitations.
+
+ (4) Two additional header fields that can be used to
+ further describe the data in a body, the Content-ID and
+ Content-Description header fields.
+
+ All of the header fields defined in this document are subject to the
+ general syntactic rules for header fields specified in RFC 822. In
+ particular, all of these header fields except for Content-Disposition
+ can include RFC 822 comments, which have no semantic content and
+ should be ignored during MIME processing.
+
+ Finally, to specify and promote interoperability, RFC 2049 provides a
+ basic applicability statement for a subset of the above mechanisms
+ that defines a minimal level of "conformance" with this document.
+
+ HISTORICAL NOTE: Several of the mechanisms described in this set of
+ documents may seem somewhat strange or even baroque at first reading.
+ It is important to note that compatibility with existing standards
+ AND robustness across existing practice were two of the highest
+ priorities of the working group that developed this set of documents.
+ In particular, compatibility was always favored over elegance.
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 4]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ Please refer to the current edition of the "Internet Official
+ Protocol Standards" for the standardization state and status of this
+ protocol. RFC 822 and STD 3, RFC 1123 also provide essential
+ background for MIME since no conforming implementation of MIME can
+ violate them. In addition, several other informational RFC documents
+ will be of interest to the MIME implementor, in particular RFC 1344,
+ RFC 1345, and RFC 1524.
+
+2. Definitions, Conventions, and Generic BNF Grammar
+
+ Although the mechanisms specified in this set of documents are all
+ described in prose, most are also described formally in the augmented
+ BNF notation of RFC 822. Implementors will need to be familiar with
+ this notation in order to understand this set of documents, and are
+ referred to RFC 822 for a complete explanation of the augmented BNF
+ notation.
+
+ Some of the augmented BNF in this set of documents makes named
+ references to syntax rules defined in RFC 822. A complete formal
+ grammar, then, is obtained by combining the collected grammar
+ appendices in each document in this set with the BNF of RFC 822 plus
+ the modifications to RFC 822 defined in RFC 1123 (which specifically
+ changes the syntax for `return', `date' and `mailbox').
+
+ All numeric and octet values are given in decimal notation in this
+ set of documents. All media type values, subtype values, and
+ parameter names as defined are case-insensitive. However, parameter
+ values are case-sensitive unless otherwise specified for the specific
+ parameter.
+
+ FORMATTING NOTE: Notes, such at this one, provide additional
+ nonessential information which may be skipped by the reader without
+ missing anything essential. The primary purpose of these non-
+ essential notes is to convey information about the rationale of this
+ set of documents, or to place these documents in the proper
+ historical or evolutionary context. Such information may in
+ particular be skipped by those who are focused entirely on building a
+ conformant implementation, but may be of use to those who wish to
+ understand why certain design choices were made.
+
+2.1. CRLF
+
+ The term CRLF, in this set of documents, refers to the sequence of
+ octets corresponding to the two US-ASCII characters CR (decimal value
+ 13) and LF (decimal value 10) which, taken together, in this order,
+ denote a line break in RFC 822 mail.
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 5]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+2.2. Character Set
+
+ The term "character set" is used in MIME to refer to a method of
+ converting a sequence of octets into a sequence of characters. Note
+ that unconditional and unambiguous conversion in the other direction
+ is not required, in that not all characters may be representable by a
+ given character set and a character set may provide more than one
+ sequence of octets to represent a particular sequence of characters.
+
+ This definition is intended to allow various kinds of character
+ encodings, from simple single-table mappings such as US-ASCII to
+ complex table switching methods such as those that use ISO 2022's
+ techniques, to be used as character sets. However, the definition
+ associated with a MIME character set name must fully specify the
+ mapping to be performed. In particular, use of external profiling
+ information to determine the exact mapping is not permitted.
+
+ NOTE: The term "character set" was originally to describe such
+ straightforward schemes as US-ASCII and ISO-8859-1 which have a
+ simple one-to-one mapping from single octets to single characters.
+ Multi-octet coded character sets and switching techniques make the
+ situation more complex. For example, some communities use the term
+ "character encoding" for what MIME calls a "character set", while
+ using the phrase "coded character set" to denote an abstract mapping
+ from integers (not octets) to characters.
+
+2.3. Message
+
+ The term "message", when not further qualified, means either a
+ (complete or "top-level") RFC 822 message being transferred on a
+ network, or a message encapsulated in a body of type "message/rfc822"
+ or "message/partial".
+
+2.4. Entity
+
+ The term "entity", refers specifically to the MIME-defined header
+ fields and contents of either a message or one of the parts in the
+ body of a multipart entity. The specification of such entities is
+ the essence of MIME. Since the contents of an entity are often
+ called the "body", it makes sense to speak about the body of an
+ entity. Any sort of field may be present in the header of an entity,
+ but only those fields whose names begin with "content-" actually have
+ any MIME-related meaning. Note that this does NOT imply thay they
+ have no meaning at all -- an entity that is also a message has non-
+ MIME header fields whose meanings are defined by RFC 822.
+
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 6]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+2.5. Body Part
+
+ The term "body part" refers to an entity inside of a multipart
+ entity.
+
+2.6. Body
+
+ The term "body", when not further qualified, means the body of an
+ entity, that is, the body of either a message or of a body part.
+
+ NOTE: The previous four definitions are clearly circular. This is
+ unavoidable, since the overall structure of a MIME message is indeed
+ recursive.
+
+2.7. 7bit Data
+
+ "7bit data" refers to data that is all represented as relatively
+ short lines with 998 octets or less between CRLF line separation
+ sequences [RFC-821]. No octets with decimal values greater than 127
+ are allowed and neither are NULs (octets with decimal value 0). CR
+ (decimal value 13) and LF (decimal value 10) octets only occur as
+ part of CRLF line separation sequences.
+
+2.8. 8bit Data
+
+ "8bit data" refers to data that is all represented as relatively
+ short lines with 998 octets or less between CRLF line separation
+ sequences [RFC-821]), but octets with decimal values greater than 127
+ may be used. As with "7bit data" CR and LF octets only occur as part
+ of CRLF line separation sequences and no NULs are allowed.
+
+2.9. Binary Data
+
+ "Binary data" refers to data where any sequence of octets whatsoever
+ is allowed.
+
+2.10. Lines
+
+ "Lines" are defined as sequences of octets separated by a CRLF
+ sequences. This is consistent with both RFC 821 and RFC 822.
+ "Lines" only refers to a unit of data in a message, which may or may
+ not correspond to something that is actually displayed by a user
+ agent.
+
+
+
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 7]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+3. MIME Header Fields
+
+ MIME defines a number of new RFC 822 header fields that are used to
+ describe the content of a MIME entity. These header fields occur in
+ at least two contexts:
+
+ (1) As part of a regular RFC 822 message header.
+
+ (2) In a MIME body part header within a multipart
+ construct.
+
+ The formal definition of these header fields is as follows:
+
+ entity-headers := [ content CRLF ]
+ [ encoding CRLF ]
+ [ id CRLF ]
+ [ description CRLF ]
+ *( MIME-extension-field CRLF )
+
+ MIME-message-headers := entity-headers
+ fields
+ version CRLF
+ ; The ordering of the header
+ ; fields implied by this BNF
+ ; definition should be ignored.
+
+ MIME-part-headers := entity-headers
+ [ fields ]
+ ; Any field not beginning with
+ ; "content-" can have no defined
+ ; meaning and may be ignored.
+ ; The ordering of the header
+ ; fields implied by this BNF
+ ; definition should be ignored.
+
+ The syntax of the various specific MIME header fields will be
+ described in the following sections.
+
+4. MIME-Version Header Field
+
+ Since RFC 822 was published in 1982, there has really been only one
+ format standard for Internet messages, and there has been little
+ perceived need to declare the format standard in use. This document
+ is an independent specification that complements RFC 822. Although
+ the extensions in this document have been defined in such a way as to
+ be compatible with RFC 822, there are still circumstances in which it
+ might be desirable for a mail-processing agent to know whether a
+ message was composed with the new standard in mind.
+
+
+
+Freed & Borenstein Standards Track [Page 8]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ Therefore, this document defines a new header field, "MIME-Version",
+ which is to be used to declare the version of the Internet message
+ body format standard in use.
+
+ Messages composed in accordance with this document MUST include such
+ a header field, with the following verbatim text:
+
+ MIME-Version: 1.0
+
+ The presence of this header field is an assertion that the message
+ has been composed in compliance with this document.
+
+ Since it is possible that a future document might extend the message
+ format standard again, a formal BNF is given for the content of the
+ MIME-Version field:
+
+ version := "MIME-Version" ":" 1*DIGIT "." 1*DIGIT
+
+ Thus, future format specifiers, which might replace or extend "1.0",
+ are constrained to be two integer fields, separated by a period. If
+ a message is received with a MIME-version value other than "1.0", it
+ cannot be assumed to conform with this document.
+
+ Note that the MIME-Version header field is required at the top level
+ of a message. It is not required for each body part of a multipart
+ entity. It is required for the embedded headers of a body of type
+ "message/rfc822" or "message/partial" if and only if the embedded
+ message is itself claimed to be MIME-conformant.
+
+ It is not possible to fully specify how a mail reader that conforms
+ with MIME as defined in this document should treat a message that
+ might arrive in the future with some value of MIME-Version other than
+ "1.0".
+
+ It is also worth noting that version control for specific media types
+ is not accomplished using the MIME-Version mechanism. In particular,
+ some formats (such as application/postscript) have version numbering
+ conventions that are internal to the media format. Where such
+ conventions exist, MIME does nothing to supersede them. Where no
+ such conventions exist, a MIME media type might use a "version"
+ parameter in the content-type field if necessary.
+
+
+
+
+
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 9]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ NOTE TO IMPLEMENTORS: When checking MIME-Version values any RFC 822
+ comment strings that are present must be ignored. In particular, the
+ following four MIME-Version fields are equivalent:
+
+ MIME-Version: 1.0
+
+ MIME-Version: 1.0 (produced by MetaSend Vx.x)
+
+ MIME-Version: (produced by MetaSend Vx.x) 1.0
+
+ MIME-Version: 1.(produced by MetaSend Vx.x)0
+
+ In the absence of a MIME-Version field, a receiving mail user agent
+ (whether conforming to MIME requirements or not) may optionally
+ choose to interpret the body of the message according to local
+ conventions. Many such conventions are currently in use and it
+ should be noted that in practice non-MIME messages can contain just
+ about anything.
+
+ It is impossible to be certain that a non-MIME mail message is
+ actually plain text in the US-ASCII character set since it might well
+ be a message that, using some set of nonstandard local conventions
+ that predate MIME, includes text in another character set or non-
+ textual data presented in a manner that cannot be automatically
+ recognized (e.g., a uuencoded compressed UNIX tar file).
+
+5. Content-Type Header Field
+
+ The purpose of the Content-Type field is to describe the data
+ contained in the body fully enough that the receiving user agent can
+ pick an appropriate agent or mechanism to present the data to the
+ user, or otherwise deal with the data in an appropriate manner. The
+ value in this field is called a media type.
+
+ HISTORICAL NOTE: The Content-Type header field was first defined in
+ RFC 1049. RFC 1049 used a simpler and less powerful syntax, but one
+ that is largely compatible with the mechanism given here.
+
+ The Content-Type header field specifies the nature of the data in the
+ body of an entity by giving media type and subtype identifiers, and
+ by providing auxiliary information that may be required for certain
+ media types. After the media type and subtype names, the remainder
+ of the header field is simply a set of parameters, specified in an
+ attribute=value notation. The ordering of parameters is not
+ significant.
+
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 10]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ In general, the top-level media type is used to declare the general
+ type of data, while the subtype specifies a specific format for that
+ type of data. Thus, a media type of "image/xyz" is enough to tell a
+ user agent that the data is an image, even if the user agent has no
+ knowledge of the specific image format "xyz". Such information can
+ be used, for example, to decide whether or not to show a user the raw
+ data from an unrecognized subtype -- such an action might be
+ reasonable for unrecognized subtypes of text, but not for
+ unrecognized subtypes of image or audio. For this reason, registered
+ subtypes of text, image, audio, and video should not contain embedded
+ information that is really of a different type. Such compound
+ formats should be represented using the "multipart" or "application"
+ types.
+
+ Parameters are modifiers of the media subtype, and as such do not
+ fundamentally affect the nature of the content. The set of
+ meaningful parameters depends on the media type and subtype. Most
+ parameters are associated with a single specific subtype. However, a
+ given top-level media type may define parameters which are applicable
+ to any subtype of that type. Parameters may be required by their
+ defining content type or subtype or they may be optional. MIME
+ implementations must ignore any parameters whose names they do not
+ recognize.
+
+ For example, the "charset" parameter is applicable to any subtype of
+ "text", while the "boundary" parameter is required for any subtype of
+ the "multipart" media type.
+
+ There are NO globally-meaningful parameters that apply to all media
+ types. Truly global mechanisms are best addressed, in the MIME
+ model, by the definition of additional Content-* header fields.
+
+ An initial set of seven top-level media types is defined in RFC 2046.
+ Five of these are discrete types whose content is essentially opaque
+ as far as MIME processing is concerned. The remaining two are
+ composite types whose contents require additional handling by MIME
+ processors.
+
+ This set of top-level media types is intended to be substantially
+ complete. It is expected that additions to the larger set of
+ supported types can generally be accomplished by the creation of new
+ subtypes of these initial types. In the future, more top-level types
+ may be defined only by a standards-track extension to this standard.
+ If another top-level type is to be used for any reason, it must be
+ given a name starting with "X-" to indicate its non-standard status
+ and to avoid a potential conflict with a future official name.
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 11]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+5.1. Syntax of the Content-Type Header Field
+
+ In the Augmented BNF notation of RFC 822, a Content-Type header field
+ value is defined as follows:
+
+ content := "Content-Type" ":" type "/" subtype
+ *(";" parameter)
+ ; Matching of media type and subtype
+ ; is ALWAYS case-insensitive.
+
+ type := discrete-type / composite-type
+
+ discrete-type := "text" / "image" / "audio" / "video" /
+ "application" / extension-token
+
+ composite-type := "message" / "multipart" / extension-token
+
+ extension-token := ietf-token / x-token
+
+ ietf-token := <An extension token defined by a
+ standards-track RFC and registered
+ with IANA.>
+
+ x-token := <The two characters "X-" or "x-" followed, with
+ no intervening white space, by any token>
+
+ subtype := extension-token / iana-token
+
+ iana-token := <A publicly-defined extension token. Tokens
+ of this form must be registered with IANA
+ as specified in RFC 2048.>
+
+ parameter := attribute "=" value
+
+ attribute := token
+ ; Matching of attributes
+ ; is ALWAYS case-insensitive.
+
+ value := token / quoted-string
+
+ token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
+ or tspecials>
+
+ tspecials := "(" / ")" / "<" / ">" / "@" /
+ "," / ";" / ":" / "\" / <">
+ "/" / "[" / "]" / "?" / "="
+ ; Must be in quoted-string,
+ ; to use within parameter values
+
+
+
+Freed & Borenstein Standards Track [Page 12]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ Note that the definition of "tspecials" is the same as the RFC 822
+ definition of "specials" with the addition of the three characters
+ "/", "?", and "=", and the removal of ".".
+
+ Note also that a subtype specification is MANDATORY -- it may not be
+ omitted from a Content-Type header field. As such, there are no
+ default subtypes.
+
+ The type, subtype, and parameter names are not case sensitive. For
+ example, TEXT, Text, and TeXt are all equivalent top-level media
+ types. Parameter values are normally case sensitive, but sometimes
+ are interpreted in a case-insensitive fashion, depending on the
+ intended use. (For example, multipart boundaries are case-sensitive,
+ but the "access-type" parameter for message/External-body is not
+ case-sensitive.)
+
+ Note that the value of a quoted string parameter does not include the
+ quotes. That is, the quotation marks in a quoted-string are not a
+ part of the value of the parameter, but are merely used to delimit
+ that parameter value. In addition, comments are allowed in
+ accordance with RFC 822 rules for structured header fields. Thus the
+ following two forms
+
+ Content-type: text/plain; charset=us-ascii (Plain text)
+
+ Content-type: text/plain; charset="us-ascii"
+
+ are completely equivalent.
+
+ Beyond this syntax, the only syntactic constraint on the definition
+ of subtype names is the desire that their uses must not conflict.
+ That is, it would be undesirable to have two different communities
+ using "Content-Type: application/foobar" to mean two different
+ things. The process of defining new media subtypes, then, is not
+ intended to be a mechanism for imposing restrictions, but simply a
+ mechanism for publicizing their definition and usage. There are,
+ therefore, two acceptable mechanisms for defining new media subtypes:
+
+ (1) Private values (starting with "X-") may be defined
+ bilaterally between two cooperating agents without
+ outside registration or standardization. Such values
+ cannot be registered or standardized.
+
+ (2) New standard values should be registered with IANA as
+ described in RFC 2048.
+
+ The second document in this set, RFC 2046, defines the initial set of
+ media types for MIME.
+
+
+
+Freed & Borenstein Standards Track [Page 13]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+5.2. Content-Type Defaults
+
+ Default RFC 822 messages without a MIME Content-Type header are taken
+ by this protocol to be plain text in the US-ASCII character set,
+ which can be explicitly specified as:
+
+ Content-type: text/plain; charset=us-ascii
+
+ This default is assumed if no Content-Type header field is specified.
+ It is also recommend that this default be assumed when a
+ syntactically invalid Content-Type header field is encountered. In
+ the presence of a MIME-Version header field and the absence of any
+ Content-Type header field, a receiving User Agent can also assume
+ that plain US-ASCII text was the sender's intent. Plain US-ASCII
+ text may still be assumed in the absence of a MIME-Version or the
+ presence of an syntactically invalid Content-Type header field, but
+ the sender's intent might have been otherwise.
+
+6. Content-Transfer-Encoding Header Field
+
+ Many media types which could be usefully transported via email are
+ represented, in their "natural" format, as 8bit character or binary
+ data. Such data cannot be transmitted over some transfer protocols.
+ For example, RFC 821 (SMTP) restricts mail messages to 7bit US-ASCII
+ data with lines no longer than 1000 characters including any trailing
+ CRLF line separator.
+
+ It is necessary, therefore, to define a standard mechanism for
+ encoding such data into a 7bit short line format. Proper labelling
+ of unencoded material in less restrictive formats for direct use over
+ less restrictive transports is also desireable. This document
+ specifies that such encodings will be indicated by a new "Content-
+ Transfer-Encoding" header field. This field has not been defined by
+ any previous standard.
+
+6.1. Content-Transfer-Encoding Syntax
+
+ The Content-Transfer-Encoding field's value is a single token
+ specifying the type of encoding, as enumerated below. Formally:
+
+ encoding := "Content-Transfer-Encoding" ":" mechanism
+
+ mechanism := "7bit" / "8bit" / "binary" /
+ "quoted-printable" / "base64" /
+ ietf-token / x-token
+
+ These values are not case sensitive -- Base64 and BASE64 and bAsE64
+ are all equivalent. An encoding type of 7BIT requires that the body
+
+
+
+Freed & Borenstein Standards Track [Page 14]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ is already in a 7bit mail-ready representation. This is the default
+ value -- that is, "Content-Transfer-Encoding: 7BIT" is assumed if the
+ Content-Transfer-Encoding header field is not present.
+
+6.2. Content-Transfer-Encodings Semantics
+
+ This single Content-Transfer-Encoding token actually provides two
+ pieces of information. It specifies what sort of encoding
+ transformation the body was subjected to and hence what decoding
+ operation must be used to restore it to its original form, and it
+ specifies what the domain of the result is.
+
+ The transformation part of any Content-Transfer-Encodings specifies,
+ either explicitly or implicitly, a single, well-defined decoding
+ algorithm, which for any sequence of encoded octets either transforms
+ it to the original sequence of octets which was encoded, or shows
+ that it is illegal as an encoded sequence. Content-Transfer-
+ Encodings transformations never depend on any additional external
+ profile information for proper operation. Note that while decoders
+ must produce a single, well-defined output for a valid encoding no
+ such restrictions exist for encoders: Encoding a given sequence of
+ octets to different, equivalent encoded sequences is perfectly legal.
+
+ Three transformations are currently defined: identity, the "quoted-
+ printable" encoding, and the "base64" encoding. The domains are
+ "binary", "8bit" and "7bit".
+
+ The Content-Transfer-Encoding values "7bit", "8bit", and "binary" all
+ mean that the identity (i.e. NO) encoding transformation has been
+ performed. As such, they serve simply as indicators of the domain of
+ the body data, and provide useful information about the sort of
+ encoding that might be needed for transmission in a given transport
+ system. The terms "7bit data", "8bit data", and "binary data" are
+ all defined in Section 2.
+
+ The quoted-printable and base64 encodings transform their input from
+ an arbitrary domain into material in the "7bit" range, thus making it
+ safe to carry over restricted transports. The specific definition of
+ the transformations are given below.
+
+ The proper Content-Transfer-Encoding label must always be used.
+ Labelling unencoded data containing 8bit characters as "7bit" is not
+ allowed, nor is labelling unencoded non-line-oriented data as
+ anything other than "binary" allowed.
+
+ Unlike media subtypes, a proliferation of Content-Transfer-Encoding
+ values is both undesirable and unnecessary. However, establishing
+ only a single transformation into the "7bit" domain does not seem
+
+
+
+Freed & Borenstein Standards Track [Page 15]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ possible. There is a tradeoff between the desire for a compact and
+ efficient encoding of largely- binary data and the desire for a
+ somewhat readable encoding of data that is mostly, but not entirely,
+ 7bit. For this reason, at least two encoding mechanisms are
+ necessary: a more or less readable encoding (quoted-printable) and a
+ "dense" or "uniform" encoding (base64).
+
+ Mail transport for unencoded 8bit data is defined in RFC 1652. As of
+ the initial publication of this document, there are no standardized
+ Internet mail transports for which it is legitimate to include
+ unencoded binary data in mail bodies. Thus there are no
+ circumstances in which the "binary" Content-Transfer-Encoding is
+ actually valid in Internet mail. However, in the event that binary
+ mail transport becomes a reality in Internet mail, or when MIME is
+ used in conjunction with any other binary-capable mail transport
+ mechanism, binary bodies must be labelled as such using this
+ mechanism.
+
+ NOTE: The five values defined for the Content-Transfer-Encoding field
+ imply nothing about the media type other than the algorithm by which
+ it was encoded or the transport system requirements if unencoded.
+
+6.3. New Content-Transfer-Encodings
+
+ Implementors may, if necessary, define private Content-Transfer-
+ Encoding values, but must use an x-token, which is a name prefixed by
+ "X-", to indicate its non-standard status, e.g., "Content-Transfer-
+ Encoding: x-my-new-encoding". Additional standardized Content-
+ Transfer-Encoding values must be specified by a standards-track RFC.
+ The requirements such specifications must meet are given in RFC 2048.
+ As such, all content-transfer-encoding namespace except that
+ beginning with "X-" is explicitly reserved to the IETF for future
+ use.
+
+ Unlike media types and subtypes, the creation of new Content-
+ Transfer-Encoding values is STRONGLY discouraged, as it seems likely
+ to hinder interoperability with little potential benefit
+
+6.4. Interpretation and Use
+
+ If a Content-Transfer-Encoding header field appears as part of a
+ message header, it applies to the entire body of that message. If a
+ Content-Transfer-Encoding header field appears as part of an entity's
+ headers, it applies only to the body of that entity. If an entity is
+ of type "multipart" the Content-Transfer-Encoding is not permitted to
+ have any value other than "7bit", "8bit" or "binary". Even more
+ severe restrictions apply to some subtypes of the "message" type.
+
+
+
+
+Freed & Borenstein Standards Track [Page 16]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ It should be noted that most media types are defined in terms of
+ octets rather than bits, so that the mechanisms described here are
+ mechanisms for encoding arbitrary octet streams, not bit streams. If
+ a bit stream is to be encoded via one of these mechanisms, it must
+ first be converted to an 8bit byte stream using the network standard
+ bit order ("big-endian"), in which the earlier bits in a stream
+ become the higher-order bits in a 8bit byte. A bit stream not ending
+ at an 8bit boundary must be padded with zeroes. RFC 2046 provides a
+ mechanism for noting the addition of such padding in the case of the
+ application/octet-stream media type, which has a "padding" parameter.
+
+ The encoding mechanisms defined here explicitly encode all data in
+ US-ASCII. Thus, for example, suppose an entity has header fields
+ such as:
+
+ Content-Type: text/plain; charset=ISO-8859-1
+ Content-transfer-encoding: base64
+
+ This must be interpreted to mean that the body is a base64 US-ASCII
+ encoding of data that was originally in ISO-8859-1, and will be in
+ that character set again after decoding.
+
+ Certain Content-Transfer-Encoding values may only be used on certain
+ media types. In particular, it is EXPRESSLY FORBIDDEN to use any
+ encodings other than "7bit", "8bit", or "binary" with any composite
+ media type, i.e. one that recursively includes other Content-Type
+ fields. Currently the only composite media types are "multipart" and
+ "message". All encodings that are desired for bodies of type
+ multipart or message must be done at the innermost level, by encoding
+ the actual body that needs to be encoded.
+
+ It should also be noted that, by definition, if a composite entity
+ has a transfer-encoding value such as "7bit", but one of the enclosed
+ entities has a less restrictive value such as "8bit", then either the
+ outer "7bit" labelling is in error, because 8bit data are included,
+ or the inner "8bit" labelling placed an unnecessarily high demand on
+ the transport system because the actual included data were actually
+ 7bit-safe.
+
+ NOTE ON ENCODING RESTRICTIONS: Though the prohibition against using
+ content-transfer-encodings on composite body data may seem overly
+ restrictive, it is necessary to prevent nested encodings, in which
+ data are passed through an encoding algorithm multiple times, and
+ must be decoded multiple times in order to be properly viewed.
+ Nested encodings add considerable complexity to user agents: Aside
+ from the obvious efficiency problems with such multiple encodings,
+ they can obscure the basic structure of a message. In particular,
+ they can imply that several decoding operations are necessary simply
+
+
+
+Freed & Borenstein Standards Track [Page 17]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ to find out what types of bodies a message contains. Banning nested
+ encodings may complicate the job of certain mail gateways, but this
+ seems less of a problem than the effect of nested encodings on user
+ agents.
+
+ Any entity with an unrecognized Content-Transfer-Encoding must be
+ treated as if it has a Content-Type of "application/octet-stream",
+ regardless of what the Content-Type header field actually says.
+
+ NOTE ON THE RELATIONSHIP BETWEEN CONTENT-TYPE AND CONTENT-TRANSFER-
+ ENCODING: It may seem that the Content-Transfer-Encoding could be
+ inferred from the characteristics of the media that is to be encoded,
+ or, at the very least, that certain Content-Transfer-Encodings could
+ be mandated for use with specific media types. There are several
+ reasons why this is not the case. First, given the varying types of
+ transports used for mail, some encodings may be appropriate for some
+ combinations of media types and transports but not for others. (For
+ example, in an 8bit transport, no encoding would be required for text
+ in certain character sets, while such encodings are clearly required
+ for 7bit SMTP.)
+
+ Second, certain media types may require different types of transfer
+ encoding under different circumstances. For example, many PostScript
+ bodies might consist entirely of short lines of 7bit data and hence
+ require no encoding at all. Other PostScript bodies (especially
+ those using Level 2 PostScript's binary encoding mechanism) may only
+ be reasonably represented using a binary transport encoding.
+ Finally, since the Content-Type field is intended to be an open-ended
+ specification mechanism, strict specification of an association
+ between media types and encodings effectively couples the
+ specification of an application protocol with a specific lower-level
+ transport. This is not desirable since the developers of a media
+ type should not have to be aware of all the transports in use and
+ what their limitations are.
+
+6.5. Translating Encodings
+
+ The quoted-printable and base64 encodings are designed so that
+ conversion between them is possible. The only issue that arises in
+ such a conversion is the handling of hard line breaks in quoted-
+ printable encoding output. When converting from quoted-printable to
+ base64 a hard line break in the quoted-printable form represents a
+ CRLF sequence in the canonical form of the data. It must therefore be
+ converted to a corresponding encoded CRLF in the base64 form of the
+ data. Similarly, a CRLF sequence in the canonical form of the data
+ obtained after base64 decoding must be converted to a quoted-
+ printable hard line break, but ONLY when converting text data.
+
+
+
+
+Freed & Borenstein Standards Track [Page 18]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+6.6. Canonical Encoding Model
+
+ There was some confusion, in the previous versions of this RFC,
+ regarding the model for when email data was to be converted to
+ canonical form and encoded, and in particular how this process would
+ affect the treatment of CRLFs, given that the representation of
+ newlines varies greatly from system to system, and the relationship
+ between content-transfer-encodings and character sets. A canonical
+ model for encoding is presented in RFC 2049 for this reason.
+
+6.7. Quoted-Printable Content-Transfer-Encoding
+
+ The Quoted-Printable encoding is intended to represent data that
+ largely consists of octets that correspond to printable characters in
+ the US-ASCII character set. It encodes the data in such a way that
+ the resulting octets are unlikely to be modified by mail transport.
+ If the data being encoded are mostly US-ASCII text, the encoded form
+ of the data remains largely recognizable by humans. A body which is
+ entirely US-ASCII may also be encoded in Quoted-Printable to ensure
+ the integrity of the data should the message pass through a
+ character-translating, and/or line-wrapping gateway.
+
+ In this encoding, octets are to be represented as determined by the
+ following rules:
+
+ (1) (General 8bit representation) Any octet, except a CR or
+ LF that is part of a CRLF line break of the canonical
+ (standard) form of the data being encoded, may be
+ represented by an "=" followed by a two digit
+ hexadecimal representation of the octet's value. The
+ digits of the hexadecimal alphabet, for this purpose,
+ are "0123456789ABCDEF". Uppercase letters must be
+ used; lowercase letters are not allowed. Thus, for
+ example, the decimal value 12 (US-ASCII form feed) can
+ be represented by "=0C", and the decimal value 61 (US-
+ ASCII EQUAL SIGN) can be represented by "=3D". This
+ rule must be followed except when the following rules
+ allow an alternative encoding.
+
+ (2) (Literal representation) Octets with decimal values of
+ 33 through 60 inclusive, and 62 through 126, inclusive,
+ MAY be represented as the US-ASCII characters which
+ correspond to those octets (EXCLAMATION POINT through
+ LESS THAN, and GREATER THAN through TILDE,
+ respectively).
+
+ (3) (White Space) Octets with values of 9 and 32 MAY be
+ represented as US-ASCII TAB (HT) and SPACE characters,
+
+
+
+Freed & Borenstein Standards Track [Page 19]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ respectively, but MUST NOT be so represented at the end
+ of an encoded line. Any TAB (HT) or SPACE characters
+ on an encoded line MUST thus be followed on that line
+ by a printable character. In particular, an "=" at the
+ end of an encoded line, indicating a soft line break
+ (see rule #5) may follow one or more TAB (HT) or SPACE
+ characters. It follows that an octet with decimal
+ value 9 or 32 appearing at the end of an encoded line
+ must be represented according to Rule #1. This rule is
+ necessary because some MTAs (Message Transport Agents,
+ programs which transport messages from one user to
+ another, or perform a portion of such transfers) are
+ known to pad lines of text with SPACEs, and others are
+ known to remove "white space" characters from the end
+ of a line. Therefore, when decoding a Quoted-Printable
+ body, any trailing white space on a line must be
+ deleted, as it will necessarily have been added by
+ intermediate transport agents.
+
+ (4) (Line Breaks) A line break in a text body, represented
+ as a CRLF sequence in the text canonical form, must be
+ represented by a (RFC 822) line break, which is also a
+ CRLF sequence, in the Quoted-Printable encoding. Since
+ the canonical representation of media types other than
+ text do not generally include the representation of
+ line breaks as CRLF sequences, no hard line breaks
+ (i.e. line breaks that are intended to be meaningful
+ and to be displayed to the user) can occur in the
+ quoted-printable encoding of such types. Sequences
+ like "=0D", "=0A", "=0A=0D" and "=0D=0A" will routinely
+ appear in non-text data represented in quoted-
+ printable, of course.
+
+ Note that many implementations may elect to encode the
+ local representation of various content types directly
+ rather than converting to canonical form first,
+ encoding, and then converting back to local
+ representation. In particular, this may apply to plain
+ text material on systems that use newline conventions
+ other than a CRLF terminator sequence. Such an
+ implementation optimization is permissible, but only
+ when the combined canonicalization-encoding step is
+ equivalent to performing the three steps separately.
+
+ (5) (Soft Line Breaks) The Quoted-Printable encoding
+ REQUIRES that encoded lines be no more than 76
+ characters long. If longer lines are to be encoded
+ with the Quoted-Printable encoding, "soft" line breaks
+
+
+
+Freed & Borenstein Standards Track [Page 20]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ must be used. An equal sign as the last character on a
+ encoded line indicates such a non-significant ("soft")
+ line break in the encoded text.
+
+ Thus if the "raw" form of the line is a single unencoded line that
+ says:
+
+ Now's the time for all folk to come to the aid of their country.
+
+ This can be represented, in the Quoted-Printable encoding, as:
+
+ Now's the time =
+ for all folk to come=
+ to the aid of their country.
+
+ This provides a mechanism with which long lines are encoded in such a
+ way as to be restored by the user agent. The 76 character limit does
+ not count the trailing CRLF, but counts all other characters,
+ including any equal signs.
+
+ Since the hyphen character ("-") may be represented as itself in the
+ Quoted-Printable encoding, care must be taken, when encapsulating a
+ quoted-printable encoded body inside one or more multipart entities,
+ to ensure that the boundary delimiter does not appear anywhere in the
+ encoded body. (A good strategy is to choose a boundary that includes
+ a character sequence such as "=_" which can never appear in a
+ quoted-printable body. See the definition of multipart messages in
+ RFC 2046.)
+
+ NOTE: The quoted-printable encoding represents something of a
+ compromise between readability and reliability in transport. Bodies
+ encoded with the quoted-printable encoding will work reliably over
+ most mail gateways, but may not work perfectly over a few gateways,
+ notably those involving translation into EBCDIC. A higher level of
+ confidence is offered by the base64 Content-Transfer-Encoding. A way
+ to get reasonably reliable transport through EBCDIC gateways is to
+ also quote the US-ASCII characters
+
+ !"#$@[\]^`{|}~
+
+ according to rule #1.
+
+ Because quoted-printable data is generally assumed to be line-
+ oriented, it is to be expected that the representation of the breaks
+ between the lines of quoted-printable data may be altered in
+ transport, in the same manner that plain text mail has always been
+ altered in Internet mail when passing between systems with differing
+ newline conventions. If such alterations are likely to constitute a
+
+
+
+Freed & Borenstein Standards Track [Page 21]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ corruption of the data, it is probably more sensible to use the
+ base64 encoding rather than the quoted-printable encoding.
+
+ NOTE: Several kinds of substrings cannot be generated according to
+ the encoding rules for the quoted-printable content-transfer-
+ encoding, and hence are formally illegal if they appear in the output
+ of a quoted-printable encoder. This note enumerates these cases and
+ suggests ways to handle such illegal substrings if any are
+ encountered in quoted-printable data that is to be decoded.
+
+ (1) An "=" followed by two hexadecimal digits, one or both
+ of which are lowercase letters in "abcdef", is formally
+ illegal. A robust implementation might choose to
+ recognize them as the corresponding uppercase letters.
+
+ (2) An "=" followed by a character that is neither a
+ hexadecimal digit (including "abcdef") nor the CR
+ character of a CRLF pair is illegal. This case can be
+ the result of US-ASCII text having been included in a
+ quoted-printable part of a message without itself
+ having been subjected to quoted-printable encoding. A
+ reasonable approach by a robust implementation might be
+ to include the "=" character and the following
+ character in the decoded data without any
+ transformation and, if possible, indicate to the user
+ that proper decoding was not possible at this point in
+ the data.
+
+ (3) An "=" cannot be the ultimate or penultimate character
+ in an encoded object. This could be handled as in case
+ (2) above.
+
+ (4) Control characters other than TAB, or CR and LF as
+ parts of CRLF pairs, must not appear. The same is true
+ for octets with decimal values greater than 126. If
+ found in incoming quoted-printable data by a decoder, a
+ robust implementation might exclude them from the
+ decoded data and warn the user that illegal characters
+ were discovered.
+
+ (5) Encoded lines must not be longer than 76 characters,
+ not counting the trailing CRLF. If longer lines are
+ found in incoming, encoded data, a robust
+ implementation might nevertheless decode the lines, and
+ might report the erroneous encoding to the user.
+
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 22]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ WARNING TO IMPLEMENTORS: If binary data is encoded in quoted-
+ printable, care must be taken to encode CR and LF characters as "=0D"
+ and "=0A", respectively. In particular, a CRLF sequence in binary
+ data should be encoded as "=0D=0A". Otherwise, if CRLF were
+ represented as a hard line break, it might be incorrectly decoded on
+ platforms with different line break conventions.
+
+ For formalists, the syntax of quoted-printable data is described by
+ the following grammar:
+
+ quoted-printable := qp-line *(CRLF qp-line)
+
+ qp-line := *(qp-segment transport-padding CRLF)
+ qp-part transport-padding
+
+ qp-part := qp-section
+ ; Maximum length of 76 characters
+
+ qp-segment := qp-section *(SPACE / TAB) "="
+ ; Maximum length of 76 characters
+
+ qp-section := [*(ptext / SPACE / TAB) ptext]
+
+ ptext := hex-octet / safe-char
+
+ safe-char := <any octet with decimal value of 33 through
+ 60 inclusive, and 62 through 126>
+ ; Characters not listed as "mail-safe" in
+ ; RFC 2049 are also not recommended.
+
+ hex-octet := "=" 2(DIGIT / "A" / "B" / "C" / "D" / "E" / "F")
+ ; Octet must be used for characters > 127, =,
+ ; SPACEs or TABs at the ends of lines, and is
+ ; recommended for any character not listed in
+ ; RFC 2049 as "mail-safe".
+
+ transport-padding := *LWSP-char
+ ; Composers MUST NOT generate
+ ; non-zero length transport
+ ; padding, but receivers MUST
+ ; be able to handle padding
+ ; added by message transports.
+
+ IMPORTANT: The addition of LWSP between the elements shown in this
+ BNF is NOT allowed since this BNF does not specify a structured
+ header field.
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 23]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+6.8. Base64 Content-Transfer-Encoding
+
+ The Base64 Content-Transfer-Encoding is designed to represent
+ arbitrary sequences of octets in a form that need not be humanly
+ readable. The encoding and decoding algorithms are simple, but the
+ encoded data are consistently only about 33 percent larger than the
+ unencoded data. This encoding is virtually identical to the one used
+ in Privacy Enhanced Mail (PEM) applications, as defined in RFC 1421.
+
+ A 65-character subset of US-ASCII is used, enabling 6 bits to be
+ represented per printable character. (The extra 65th character, "=",
+ is used to signify a special processing function.)
+
+ NOTE: This subset has the important property that it is represented
+ identically in all versions of ISO 646, including US-ASCII, and all
+ characters in the subset are also represented identically in all
+ versions of EBCDIC. Other popular encodings, such as the encoding
+ used by the uuencode utility, Macintosh binhex 4.0 [RFC-1741], and
+ the base85 encoding specified as part of Level 2 PostScript, do not
+ share these properties, and thus do not fulfill the portability
+ requirements a binary transport encoding for mail must meet.
+
+ The encoding process represents 24-bit groups of input bits as output
+ strings of 4 encoded characters. Proceeding from left to right, a
+ 24-bit input group is formed by concatenating 3 8bit input groups.
+ These 24 bits are then treated as 4 concatenated 6-bit groups, each
+ of which is translated into a single digit in the base64 alphabet.
+ When encoding a bit stream via the base64 encoding, the bit stream
+ must be presumed to be ordered with the most-significant-bit first.
+ That is, the first bit in the stream will be the high-order bit in
+ the first 8bit byte, and the eighth bit will be the low-order bit in
+ the first 8bit byte, and so on.
+
+ Each 6-bit group is used as an index into an array of 64 printable
+ characters. The character referenced by the index is placed in the
+ output string. These characters, identified in Table 1, below, are
+ selected so as to be universally representable, and the set excludes
+ characters with particular significance to SMTP (e.g., ".", CR, LF)
+ and to the multipart boundary delimiters defined in RFC 2046 (e.g.,
+ "-").
+
+
+
+
+
+
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 24]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ Table 1: The Base64 Alphabet
+
+ Value Encoding Value Encoding Value Encoding Value Encoding
+ 0 A 17 R 34 i 51 z
+ 1 B 18 S 35 j 52 0
+ 2 C 19 T 36 k 53 1
+ 3 D 20 U 37 l 54 2
+ 4 E 21 V 38 m 55 3
+ 5 F 22 W 39 n 56 4
+ 6 G 23 X 40 o 57 5
+ 7 H 24 Y 41 p 58 6
+ 8 I 25 Z 42 q 59 7
+ 9 J 26 a 43 r 60 8
+ 10 K 27 b 44 s 61 9
+ 11 L 28 c 45 t 62 +
+ 12 M 29 d 46 u 63 /
+ 13 N 30 e 47 v
+ 14 O 31 f 48 w (pad) =
+ 15 P 32 g 49 x
+ 16 Q 33 h 50 y
+
+ The encoded output stream must be represented in lines of no more
+ than 76 characters each. All line breaks or other characters not
+ found in Table 1 must be ignored by decoding software. In base64
+ data, characters other than those in Table 1, line breaks, and other
+ white space probably indicate a transmission error, about which a
+ warning message or even a message rejection might be appropriate
+ under some circumstances.
+
+ Special processing is performed if fewer than 24 bits are available
+ at the end of the data being encoded. A full encoding quantum is
+ always completed at the end of a body. When fewer than 24 input bits
+ are available in an input group, zero bits are added (on the right)
+ to form an integral number of 6-bit groups. Padding at the end of
+ the data is performed using the "=" character. Since all base64
+ input is an integral number of octets, only the following cases can
+ arise: (1) the final quantum of encoding input is an integral
+ multiple of 24 bits; here, the final unit of encoded output will be
+ an integral multiple of 4 characters with no "=" padding, (2) the
+ final quantum of encoding input is exactly 8 bits; here, the final
+ unit of encoded output will be two characters followed by two "="
+ padding characters, or (3) the final quantum of encoding input is
+ exactly 16 bits; here, the final unit of encoded output will be three
+ characters followed by one "=" padding character.
+
+ Because it is used only for padding at the end of the data, the
+ occurrence of any "=" characters may be taken as evidence that the
+ end of the data has been reached (without truncation in transit). No
+
+
+
+Freed & Borenstein Standards Track [Page 25]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ such assurance is possible, however, when the number of octets
+ transmitted was a multiple of three and no "=" characters are
+ present.
+
+ Any characters outside of the base64 alphabet are to be ignored in
+ base64-encoded data.
+
+ Care must be taken to use the proper octets for line breaks if base64
+ encoding is applied directly to text material that has not been
+ converted to canonical form. In particular, text line breaks must be
+ converted into CRLF sequences prior to base64 encoding. The
+ important thing to note is that this may be done directly by the
+ encoder rather than in a prior canonicalization step in some
+ implementations.
+
+ NOTE: There is no need to worry about quoting potential boundary
+ delimiters within base64-encoded bodies within multipart entities
+ because no hyphen characters are used in the base64 encoding.
+
+7. Content-ID Header Field
+
+ In constructing a high-level user agent, it may be desirable to allow
+ one body to make reference to another. Accordingly, bodies may be
+ labelled using the "Content-ID" header field, which is syntactically
+ identical to the "Message-ID" header field:
+
+ id := "Content-ID" ":" msg-id
+
+ Like the Message-ID values, Content-ID values must be generated to be
+ world-unique.
+
+ The Content-ID value may be used for uniquely identifying MIME
+ entities in several contexts, particularly for caching data
+ referenced by the message/external-body mechanism. Although the
+ Content-ID header is generally optional, its use is MANDATORY in
+ implementations which generate data of the optional MIME media type
+ "message/external-body". That is, each message/external-body entity
+ must have a Content-ID field to permit caching of such data.
+
+ It is also worth noting that the Content-ID value has special
+ semantics in the case of the multipart/alternative media type. This
+ is explained in the section of RFC 2046 dealing with
+ multipart/alternative.
+
+
+
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 26]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+8. Content-Description Header Field
+
+ The ability to associate some descriptive information with a given
+ body is often desirable. For example, it may be useful to mark an
+ "image" body as "a picture of the Space Shuttle Endeavor." Such text
+ may be placed in the Content-Description header field. This header
+ field is always optional.
+
+ description := "Content-Description" ":" *text
+
+ The description is presumed to be given in the US-ASCII character
+ set, although the mechanism specified in RFC 2047 may be used for
+ non-US-ASCII Content-Description values.
+
+9. Additional MIME Header Fields
+
+ Future documents may elect to define additional MIME header fields
+ for various purposes. Any new header field that further describes
+ the content of a message should begin with the string "Content-" to
+ allow such fields which appear in a message header to be
+ distinguished from ordinary RFC 822 message header fields.
+
+ MIME-extension-field := <Any RFC 822 header field which
+ begins with the string
+ "Content-">
+
+10. Summary
+
+ Using the MIME-Version, Content-Type, and Content-Transfer-Encoding
+ header fields, it is possible to include, in a standardized way,
+ arbitrary types of data with RFC 822 conformant mail messages. No
+ restrictions imposed by either RFC 821 or RFC 822 are violated, and
+ care has been taken to avoid problems caused by additional
+ restrictions imposed by the characteristics of some Internet mail
+ transport mechanisms (see RFC 2049).
+
+ The next document in this set, RFC 2046, specifies the initial set of
+ media types that can be labelled and transported using these headers.
+
+11. Security Considerations
+
+ Security issues are discussed in the second document in this set, RFC
+ 2046.
+
+
+
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 27]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+12. Authors' Addresses
+
+ For more information, the authors of this document are best contacted
+ via Internet mail:
+
+ Ned Freed
+ Innosoft International, Inc.
+ 1050 East Garvey Avenue South
+ West Covina, CA 91790
+ USA
+
+ Phone: +1 818 919 3600
+ Fax: +1 818 919 3614
+ EMail: ned@innosoft.com
+
+
+ Nathaniel S. Borenstein
+ First Virtual Holdings
+ 25 Washington Avenue
+ Morristown, NJ 07960
+ USA
+
+ Phone: +1 201 540 8967
+ Fax: +1 201 993 3032
+ EMail: nsb@nsb.fv.com
+
+
+ MIME is a result of the work of the Internet Engineering Task Force
+ Working Group on RFC 822 Extensions. The chairman of that group,
+ Greg Vaudreuil, may be reached at:
+
+ Gregory M. Vaudreuil
+ Octel Network Services
+ 17080 Dallas Parkway
+ Dallas, TX 75248-1905
+ USA
+
+ EMail: Greg.Vaudreuil@Octel.Com
+
+
+
+
+
+
+
+
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 28]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+Appendix A -- Collected Grammar
+
+ This appendix contains the complete BNF grammar for all the syntax
+ specified by this document.
+
+ By itself, however, this grammar is incomplete. It refers by name to
+ several syntax rules that are defined by RFC 822. Rather than
+ reproduce those definitions here, and risk unintentional differences
+ between the two, this document simply refers the reader to RFC 822
+ for the remaining definitions. Wherever a term is undefined, it
+ refers to the RFC 822 definition.
+
+ attribute := token
+ ; Matching of attributes
+ ; is ALWAYS case-insensitive.
+
+ composite-type := "message" / "multipart" / extension-token
+
+ content := "Content-Type" ":" type "/" subtype
+ *(";" parameter)
+ ; Matching of media type and subtype
+ ; is ALWAYS case-insensitive.
+
+ description := "Content-Description" ":" *text
+
+ discrete-type := "text" / "image" / "audio" / "video" /
+ "application" / extension-token
+
+ encoding := "Content-Transfer-Encoding" ":" mechanism
+
+ entity-headers := [ content CRLF ]
+ [ encoding CRLF ]
+ [ id CRLF ]
+ [ description CRLF ]
+ *( MIME-extension-field CRLF )
+
+ extension-token := ietf-token / x-token
+
+ hex-octet := "=" 2(DIGIT / "A" / "B" / "C" / "D" / "E" / "F")
+ ; Octet must be used for characters > 127, =,
+ ; SPACEs or TABs at the ends of lines, and is
+ ; recommended for any character not listed in
+ ; RFC 2049 as "mail-safe".
+
+ iana-token := <A publicly-defined extension token. Tokens
+ of this form must be registered with IANA
+ as specified in RFC 2048.>
+
+
+
+
+Freed & Borenstein Standards Track [Page 29]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ ietf-token := <An extension token defined by a
+ standards-track RFC and registered
+ with IANA.>
+
+ id := "Content-ID" ":" msg-id
+
+ mechanism := "7bit" / "8bit" / "binary" /
+ "quoted-printable" / "base64" /
+ ietf-token / x-token
+
+ MIME-extension-field := <Any RFC 822 header field which
+ begins with the string
+ "Content-">
+
+ MIME-message-headers := entity-headers
+ fields
+ version CRLF
+ ; The ordering of the header
+ ; fields implied by this BNF
+ ; definition should be ignored.
+
+ MIME-part-headers := entity-headers
+ [fields]
+ ; Any field not beginning with
+ ; "content-" can have no defined
+ ; meaning and may be ignored.
+ ; The ordering of the header
+ ; fields implied by this BNF
+ ; definition should be ignored.
+
+ parameter := attribute "=" value
+
+ ptext := hex-octet / safe-char
+
+ qp-line := *(qp-segment transport-padding CRLF)
+ qp-part transport-padding
+
+ qp-part := qp-section
+ ; Maximum length of 76 characters
+
+ qp-section := [*(ptext / SPACE / TAB) ptext]
+
+ qp-segment := qp-section *(SPACE / TAB) "="
+ ; Maximum length of 76 characters
+
+ quoted-printable := qp-line *(CRLF qp-line)
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 30]
+
+RFC 2045 Internet Message Bodies November 1996
+
+
+ safe-char := <any octet with decimal value of 33 through
+ 60 inclusive, and 62 through 126>
+ ; Characters not listed as "mail-safe" in
+ ; RFC 2049 are also not recommended.
+
+ subtype := extension-token / iana-token
+
+ token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
+ or tspecials>
+
+ transport-padding := *LWSP-char
+ ; Composers MUST NOT generate
+ ; non-zero length transport
+ ; padding, but receivers MUST
+ ; be able to handle padding
+ ; added by message transports.
+
+ tspecials := "(" / ")" / "<" / ">" / "@" /
+ "," / ";" / ":" / "\" / <">
+ "/" / "[" / "]" / "?" / "="
+ ; Must be in quoted-string,
+ ; to use within parameter values
+
+ type := discrete-type / composite-type
+
+ value := token / quoted-string
+
+ version := "MIME-Version" ":" 1*DIGIT "." 1*DIGIT
+
+ x-token := <The two characters "X-" or "x-" followed, with
+ no intervening white space, by any token>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 31]
+
diff --git a/doc/mime-p2-rfc2046.txt b/doc/mime-p2-rfc2046.txt
@@ -0,0 +1,2467 @@
+
+
+
+
+
+
+Network Working Group N. Freed
+Request for Comments: 2046 Innosoft
+Obsoletes: 1521, 1522, 1590 N. Borenstein
+Category: Standards Track First Virtual
+ November 1996
+
+
+ Multipurpose Internet Mail Extensions
+ (MIME) Part Two:
+ Media Types
+
+Status of this Memo
+
+ This document specifies an Internet standards track protocol for the
+ Internet community, and requests discussion and suggestions for
+ improvements. Please refer to the current edition of the "Internet
+ Official Protocol Standards" (STD 1) for the standardization state
+ and status of this protocol. Distribution of this memo is unlimited.
+
+Abstract
+
+ STD 11, RFC 822 defines a message representation protocol specifying
+ considerable detail about US-ASCII message headers, but which leaves
+ the message content, or message body, as flat US-ASCII text. This
+ set of documents, collectively called the Multipurpose Internet Mail
+ Extensions, or MIME, redefines the format of messages to allow for
+
+ (1) textual message bodies in character sets other than
+ US-ASCII,
+
+ (2) an extensible set of different formats for non-textual
+ message bodies,
+
+ (3) multi-part message bodies, and
+
+ (4) textual header information in character sets other than
+ US-ASCII.
+
+ These documents are based on earlier work documented in RFC 934, STD
+ 11, and RFC 1049, but extends and revises them. Because RFC 822 said
+ so little about message bodies, these documents are largely
+ orthogonal to (rather than a revision of) RFC 822.
+
+ The initial document in this set, RFC 2045, specifies the various
+ headers used to describe the structure of MIME messages. This second
+ document defines the general structure of the MIME media typing
+ system and defines an initial set of media types. The third document,
+ RFC 2047, describes extensions to RFC 822 to allow non-US-ASCII text
+
+
+
+Freed & Borenstein Standards Track [Page 1]
+
+RFC 2046 Media Types November 1996
+
+
+ data in Internet mail header fields. The fourth document, RFC 2048,
+ specifies various IANA registration procedures for MIME-related
+ facilities. The fifth and final document, RFC 2049, describes MIME
+ conformance criteria as well as providing some illustrative examples
+ of MIME message formats, acknowledgements, and the bibliography.
+
+ These documents are revisions of RFCs 1521 and 1522, which themselves
+ were revisions of RFCs 1341 and 1342. An appendix in RFC 2049
+ describes differences and changes from previous versions.
+
+Table of Contents
+
+ 1. Introduction ......................................... 3
+ 2. Definition of a Top-Level Media Type ................. 4
+ 3. Overview Of The Initial Top-Level Media Types ........ 4
+ 4. Discrete Media Type Values ........................... 6
+ 4.1 Text Media Type ..................................... 6
+ 4.1.1 Representation of Line Breaks ..................... 7
+ 4.1.2 Charset Parameter ................................. 7
+ 4.1.3 Plain Subtype ..................................... 11
+ 4.1.4 Unrecognized Subtypes ............................. 11
+ 4.2 Image Media Type .................................... 11
+ 4.3 Audio Media Type .................................... 11
+ 4.4 Video Media Type .................................... 12
+ 4.5 Application Media Type .............................. 12
+ 4.5.1 Octet-Stream Subtype .............................. 13
+ 4.5.2 PostScript Subtype ................................ 14
+ 4.5.3 Other Application Subtypes ........................ 17
+ 5. Composite Media Type Values .......................... 17
+ 5.1 Multipart Media Type ................................ 17
+ 5.1.1 Common Syntax ..................................... 19
+ 5.1.2 Handling Nested Messages and Multiparts ........... 24
+ 5.1.3 Mixed Subtype ..................................... 24
+ 5.1.4 Alternative Subtype ............................... 24
+ 5.1.5 Digest Subtype .................................... 26
+ 5.1.6 Parallel Subtype .................................. 27
+ 5.1.7 Other Multipart Subtypes .......................... 28
+ 5.2 Message Media Type .................................. 28
+ 5.2.1 RFC822 Subtype .................................... 28
+ 5.2.2 Partial Subtype ................................... 29
+ 5.2.2.1 Message Fragmentation and Reassembly ............ 30
+ 5.2.2.2 Fragmentation and Reassembly Example ............ 31
+ 5.2.3 External-Body Subtype ............................. 33
+ 5.2.4 Other Message Subtypes ............................ 40
+ 6. Experimental Media Type Values ....................... 40
+ 7. Summary .............................................. 41
+ 8. Security Considerations .............................. 41
+ 9. Authors' Addresses ................................... 42
+
+
+
+Freed & Borenstein Standards Track [Page 2]
+
+RFC 2046 Media Types November 1996
+
+
+ A. Collected Grammar .................................... 43
+
+1. Introduction
+
+ The first document in this set, RFC 2045, defines a number of header
+ fields, including Content-Type. The Content-Type field is used to
+ specify the nature of the data in the body of a MIME entity, by
+ giving media type and subtype identifiers, and by providing auxiliary
+ information that may be required for certain media types. After the
+ type and subtype names, the remainder of the header field is simply a
+ set of parameters, specified in an attribute/value notation. The
+ ordering of parameters is not significant.
+
+ In general, the top-level media type is used to declare the general
+ type of data, while the subtype specifies a specific format for that
+ type of data. Thus, a media type of "image/xyz" is enough to tell a
+ user agent that the data is an image, even if the user agent has no
+ knowledge of the specific image format "xyz". Such information can
+ be used, for example, to decide whether or not to show a user the raw
+ data from an unrecognized subtype -- such an action might be
+ reasonable for unrecognized subtypes of "text", but not for
+ unrecognized subtypes of "image" or "audio". For this reason,
+ registered subtypes of "text", "image", "audio", and "video" should
+ not contain embedded information that is really of a different type.
+ Such compound formats should be represented using the "multipart" or
+ "application" types.
+
+ Parameters are modifiers of the media subtype, and as such do not
+ fundamentally affect the nature of the content. The set of
+ meaningful parameters depends on the media type and subtype. Most
+ parameters are associated with a single specific subtype. However, a
+ given top-level media type may define parameters which are applicable
+ to any subtype of that type. Parameters may be required by their
+ defining media type or subtype or they may be optional. MIME
+ implementations must also ignore any parameters whose names they do
+ not recognize.
+
+ MIME's Content-Type header field and media type mechanism has been
+ carefully designed to be extensible, and it is expected that the set
+ of media type/subtype pairs and their associated parameters will grow
+ significantly over time. Several other MIME facilities, such as
+ transfer encodings and "message/external-body" access types, are
+ likely to have new values defined over time. In order to ensure that
+ the set of such values is developed in an orderly, well-specified,
+ and public manner, MIME sets up a registration process which uses the
+ Internet Assigned Numbers Authority (IANA) as a central registry for
+ MIME's various areas of extensibility. The registration process for
+ these areas is described in a companion document, RFC 2048.
+
+
+
+Freed & Borenstein Standards Track [Page 3]
+
+RFC 2046 Media Types November 1996
+
+
+ The initial seven standard top-level media type are defined and
+ described in the remainder of this document.
+
+2. Definition of a Top-Level Media Type
+
+ The definition of a top-level media type consists of:
+
+ (1) a name and a description of the type, including
+ criteria for whether a particular type would qualify
+ under that type,
+
+ (2) the names and definitions of parameters, if any, which
+ are defined for all subtypes of that type (including
+ whether such parameters are required or optional),
+
+ (3) how a user agent and/or gateway should handle unknown
+ subtypes of this type,
+
+ (4) general considerations on gatewaying entities of this
+ top-level type, if any, and
+
+ (5) any restrictions on content-transfer-encodings for
+ entities of this top-level type.
+
+3. Overview Of The Initial Top-Level Media Types
+
+ The five discrete top-level media types are:
+
+ (1) text -- textual information. The subtype "plain" in
+ particular indicates plain text containing no
+ formatting commands or directives of any sort. Plain
+ text is intended to be displayed "as-is". No special
+ software is required to get the full meaning of the
+ text, aside from support for the indicated character
+ set. Other subtypes are to be used for enriched text in
+ forms where application software may enhance the
+ appearance of the text, but such software must not be
+ required in order to get the general idea of the
+ content. Possible subtypes of "text" thus include any
+ word processor format that can be read without
+ resorting to software that understands the format. In
+ particular, formats that employ embeddded binary
+ formatting information are not considered directly
+ readable. A very simple and portable subtype,
+ "richtext", was defined in RFC 1341, with a further
+ revision in RFC 1896 under the name "enriched".
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 4]
+
+RFC 2046 Media Types November 1996
+
+
+ (2) image -- image data. "Image" requires a display device
+ (such as a graphical display, a graphics printer, or a
+ FAX machine) to view the information. An initial
+ subtype is defined for the widely-used image format
+ JPEG. . subtypes are defined for two widely-used image
+ formats, jpeg and gif.
+
+ (3) audio -- audio data. "Audio" requires an audio output
+ device (such as a speaker or a telephone) to "display"
+ the contents. An initial subtype "basic" is defined in
+ this document.
+
+ (4) video -- video data. "Video" requires the capability
+ to display moving images, typically including
+ specialized hardware and software. An initial subtype
+ "mpeg" is defined in this document.
+
+ (5) application -- some other kind of data, typically
+ either uninterpreted binary data or information to be
+ processed by an application. The subtype "octet-
+ stream" is to be used in the case of uninterpreted
+ binary data, in which case the simplest recommended
+ action is to offer to write the information into a file
+ for the user. The "PostScript" subtype is also defined
+ for the transport of PostScript material. Other
+ expected uses for "application" include spreadsheets,
+ data for mail-based scheduling systems, and languages
+ for "active" (computational) messaging, and word
+ processing formats that are not directly readable.
+ Note that security considerations may exist for some
+ types of application data, most notably
+ "application/PostScript" and any form of active
+ messaging. These issues are discussed later in this
+ document.
+
+ The two composite top-level media types are:
+
+ (1) multipart -- data consisting of multiple entities of
+ independent data types. Four subtypes are initially
+ defined, including the basic "mixed" subtype specifying
+ a generic mixed set of parts, "alternative" for
+ representing the same data in multiple formats,
+ "parallel" for parts intended to be viewed
+ simultaneously, and "digest" for multipart entities in
+ which each part has a default type of "message/rfc822".
+
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 5]
+
+RFC 2046 Media Types November 1996
+
+
+ (2) message -- an encapsulated message. A body of media
+ type "message" is itself all or a portion of some kind
+ of message object. Such objects may or may not in turn
+ contain other entities. The "rfc822" subtype is used
+ when the encapsulated content is itself an RFC 822
+ message. The "partial" subtype is defined for partial
+ RFC 822 messages, to permit the fragmented transmission
+ of bodies that are thought to be too large to be passed
+ through transport facilities in one piece. Another
+ subtype, "external-body", is defined for specifying
+ large bodies by reference to an external data source.
+
+ It should be noted that the list of media type values given here may
+ be augmented in time, via the mechanisms described above, and that
+ the set of subtypes is expected to grow substantially.
+
+4. Discrete Media Type Values
+
+ Five of the seven initial media type values refer to discrete bodies.
+ The content of these types must be handled by non-MIME mechanisms;
+ they are opaque to MIME processors.
+
+4.1. Text Media Type
+
+ The "text" media type is intended for sending material which is
+ principally textual in form. A "charset" parameter may be used to
+ indicate the character set of the body text for "text" subtypes,
+ notably including the subtype "text/plain", which is a generic
+ subtype for plain text. Plain text does not provide for or allow
+ formatting commands, font attribute specifications, processing
+ instructions, interpretation directives, or content markup. Plain
+ text is seen simply as a linear sequence of characters, possibly
+ interrupted by line breaks or page breaks. Plain text may allow the
+ stacking of several characters in the same position in the text.
+ Plain text in scripts like Arabic and Hebrew may also include
+ facilitites that allow the arbitrary mixing of text segments with
+ opposite writing directions.
+
+ Beyond plain text, there are many formats for representing what might
+ be known as "rich text". An interesting characteristic of many such
+ representations is that they are to some extent readable even without
+ the software that interprets them. It is useful, then, to
+ distinguish them, at the highest level, from such unreadable data as
+ images, audio, or text represented in an unreadable form. In the
+ absence of appropriate interpretation software, it is reasonable to
+ show subtypes of "text" to the user, while it is not reasonable to do
+ so with most nontextual data. Such formatted textual data should be
+ represented using subtypes of "text".
+
+
+
+Freed & Borenstein Standards Track [Page 6]
+
+RFC 2046 Media Types November 1996
+
+
+4.1.1. Representation of Line Breaks
+
+ The canonical form of any MIME "text" subtype MUST always represent a
+ line break as a CRLF sequence. Similarly, any occurrence of CRLF in
+ MIME "text" MUST represent a line break. Use of CR and LF outside of
+ line break sequences is also forbidden.
+
+ This rule applies regardless of format or character set or sets
+ involved.
+
+ NOTE: The proper interpretation of line breaks when a body is
+ displayed depends on the media type. In particular, while it is
+ appropriate to treat a line break as a transition to a new line when
+ displaying a "text/plain" body, this treatment is actually incorrect
+ for other subtypes of "text" like "text/enriched" [RFC-1896].
+ Similarly, whether or not line breaks should be added during display
+ operations is also a function of the media type. It should not be
+ necessary to add any line breaks to display "text/plain" correctly,
+ whereas proper display of "text/enriched" requires the appropriate
+ addition of line breaks.
+
+ NOTE: Some protocols defines a maximum line length. E.g. SMTP [RFC-
+ 821] allows a maximum of 998 octets before the next CRLF sequence.
+ To be transported by such protocols, data which includes too long
+ segments without CRLF sequences must be encoded with a suitable
+ content-transfer-encoding.
+
+4.1.2. Charset Parameter
+
+ A critical parameter that may be specified in the Content-Type field
+ for "text/plain" data is the character set. This is specified with a
+ "charset" parameter, as in:
+
+ Content-type: text/plain; charset=iso-8859-1
+
+ Unlike some other parameter values, the values of the charset
+ parameter are NOT case sensitive. The default character set, which
+ must be assumed in the absence of a charset parameter, is US-ASCII.
+
+ The specification for any future subtypes of "text" must specify
+ whether or not they will also utilize a "charset" parameter, and may
+ possibly restrict its values as well. For other subtypes of "text"
+ than "text/plain", the semantics of the "charset" parameter should be
+ defined to be identical to those specified here for "text/plain",
+ i.e., the body consists entirely of characters in the given charset.
+ In particular, definers of future "text" subtypes should pay close
+ attention to the implications of multioctet character sets for their
+ subtype definitions.
+
+
+
+Freed & Borenstein Standards Track [Page 7]
+
+RFC 2046 Media Types November 1996
+
+
+ The charset parameter for subtypes of "text" gives a name of a
+ character set, as "character set" is defined in RFC 2045. The rules
+ regarding line breaks detailed in the previous section must also be
+ observed -- a character set whose definition does not conform to
+ these rules cannot be used in a MIME "text" subtype.
+
+ An initial list of predefined character set names can be found at the
+ end of this section. Additional character sets may be registered
+ with IANA.
+
+ Other media types than subtypes of "text" might choose to employ the
+ charset parameter as defined here, but with the CRLF/line break
+ restriction removed. Therefore, all character sets that conform to
+ the general definition of "character set" in RFC 2045 can be
+ registered for MIME use.
+
+ Note that if the specified character set includes 8-bit characters
+ and such characters are used in the body, a Content-Transfer-Encoding
+ header field and a corresponding encoding on the data are required in
+ order to transmit the body via some mail transfer protocols, such as
+ SMTP [RFC-821].
+
+ The default character set, US-ASCII, has been the subject of some
+ confusion and ambiguity in the past. Not only were there some
+ ambiguities in the definition, there have been wide variations in
+ practice. In order to eliminate such ambiguity and variations in the
+ future, it is strongly recommended that new user agents explicitly
+ specify a character set as a media type parameter in the Content-Type
+ header field. "US-ASCII" does not indicate an arbitrary 7-bit
+ character set, but specifies that all octets in the body must be
+ interpreted as characters according to the US-ASCII character set.
+ National and application-oriented versions of ISO 646 [ISO-646] are
+ usually NOT identical to US-ASCII, and in that case their use in
+ Internet mail is explicitly discouraged. The omission of the ISO 646
+ character set from this document is deliberate in this regard. The
+ character set name of "US-ASCII" explicitly refers to the character
+ set defined in ANSI X3.4-1986 [US- ASCII]. The new international
+ reference version (IRV) of the 1991 edition of ISO 646 is identical
+ to US-ASCII. The character set name "ASCII" is reserved and must not
+ be used for any purpose.
+
+ NOTE: RFC 821 explicitly specifies "ASCII", and references an earlier
+ version of the American Standard. Insofar as one of the purposes of
+ specifying a media type and character set is to permit the receiver
+ to unambiguously determine how the sender intended the coded message
+ to be interpreted, assuming anything other than "strict ASCII" as the
+ default would risk unintentional and incompatible changes to the
+ semantics of messages now being transmitted. This also implies that
+
+
+
+Freed & Borenstein Standards Track [Page 8]
+
+RFC 2046 Media Types November 1996
+
+
+ messages containing characters coded according to other versions of
+ ISO 646 than US-ASCII and the 1991 IRV, or using code-switching
+ procedures (e.g., those of ISO 2022), as well as 8bit or multiple
+ octet character encodings MUST use an appropriate character set
+ specification to be consistent with MIME.
+
+ The complete US-ASCII character set is listed in ANSI X3.4- 1986.
+ Note that the control characters including DEL (0-31, 127) have no
+ defined meaning in apart from the combination CRLF (US-ASCII values
+ 13 and 10) indicating a new line. Two of the characters have de
+ facto meanings in wide use: FF (12) often means "start subsequent
+ text on the beginning of a new page"; and TAB or HT (9) often (though
+ not always) means "move the cursor to the next available column after
+ the current position where the column number is a multiple of 8
+ (counting the first column as column 0)." Aside from these
+ conventions, any use of the control characters or DEL in a body must
+ either occur
+
+ (1) because a subtype of text other than "plain"
+ specifically assigns some additional meaning, or
+
+ (2) within the context of a private agreement between the
+ sender and recipient. Such private agreements are
+ discouraged and should be replaced by the other
+ capabilities of this document.
+
+ NOTE: An enormous proliferation of character sets exist beyond US-
+ ASCII. A large number of partially or totally overlapping character
+ sets is NOT a good thing. A SINGLE character set that can be used
+ universally for representing all of the world's languages in Internet
+ mail would be preferrable. Unfortunately, existing practice in
+ several communities seems to point to the continued use of multiple
+ character sets in the near future. A small number of standard
+ character sets are, therefore, defined for Internet use in this
+ document.
+
+ The defined charset values are:
+
+ (1) US-ASCII -- as defined in ANSI X3.4-1986 [US-ASCII].
+
+ (2) ISO-8859-X -- where "X" is to be replaced, as
+ necessary, for the parts of ISO-8859 [ISO-8859]. Note
+ that the ISO 646 character sets have deliberately been
+ omitted in favor of their 8859 replacements, which are
+ the designated character sets for Internet mail. As of
+ the publication of this document, the legitimate values
+ for "X" are the digits 1 through 10.
+
+
+
+
+Freed & Borenstein Standards Track [Page 9]
+
+RFC 2046 Media Types November 1996
+
+
+ Characters in the range 128-159 has no assigned meaning in ISO-8859-
+ X. Characters with values below 128 in ISO-8859-X have the same
+ assigned meaning as they do in US-ASCII.
+
+ Part 6 of ISO 8859 (Latin/Arabic alphabet) and part 8 (Latin/Hebrew
+ alphabet) includes both characters for which the normal writing
+ direction is right to left and characters for which it is left to
+ right, but do not define a canonical ordering method for representing
+ bi-directional text. The charset values "ISO-8859-6" and "ISO-8859-
+ 8", however, specify that the visual method is used [RFC-1556].
+
+ All of these character sets are used as pure 7bit or 8bit sets
+ without any shift or escape functions. The meaning of shift and
+ escape sequences in these character sets is not defined.
+
+ The character sets specified above are the ones that were relatively
+ uncontroversial during the drafting of MIME. This document does not
+ endorse the use of any particular character set other than US-ASCII,
+ and recognizes that the future evolution of world character sets
+ remains unclear.
+
+ Note that the character set used, if anything other than US- ASCII,
+ must always be explicitly specified in the Content-Type field.
+
+ No character set name other than those defined above may be used in
+ Internet mail without the publication of a formal specification and
+ its registration with IANA, or by private agreement, in which case
+ the character set name must begin with "X-".
+
+ Implementors are discouraged from defining new character sets unless
+ absolutely necessary.
+
+ The "charset" parameter has been defined primarily for the purpose of
+ textual data, and is described in this section for that reason.
+ However, it is conceivable that non-textual data might also wish to
+ specify a charset value for some purpose, in which case the same
+ syntax and values should be used.
+
+ In general, composition software should always use the "lowest common
+ denominator" character set possible. For example, if a body contains
+ only US-ASCII characters, it SHOULD be marked as being in the US-
+ ASCII character set, not ISO-8859-1, which, like all the ISO-8859
+ family of character sets, is a superset of US-ASCII. More generally,
+ if a widely-used character set is a subset of another character set,
+ and a body contains only characters in the widely-used subset, it
+ should be labelled as being in that subset. This will increase the
+ chances that the recipient will be able to view the resulting entity
+ correctly.
+
+
+
+Freed & Borenstein Standards Track [Page 10]
+
+RFC 2046 Media Types November 1996
+
+
+4.1.3. Plain Subtype
+
+ The simplest and most important subtype of "text" is "plain". This
+ indicates plain text that does not contain any formatting commands or
+ directives. Plain text is intended to be displayed "as-is", that is,
+ no interpretation of embedded formatting commands, font attribute
+ specifications, processing instructions, interpretation directives,
+ or content markup should be necessary for proper display. The
+ default media type of "text/plain; charset=us-ascii" for Internet
+ mail describes existing Internet practice. That is, it is the type
+ of body defined by RFC 822.
+
+ No other "text" subtype is defined by this document.
+
+4.1.4. Unrecognized Subtypes
+
+ Unrecognized subtypes of "text" should be treated as subtype "plain"
+ as long as the MIME implementation knows how to handle the charset.
+ Unrecognized subtypes which also specify an unrecognized charset
+ should be treated as "application/octet- stream".
+
+4.2. Image Media Type
+
+ A media type of "image" indicates that the body contains an image.
+ The subtype names the specific image format. These names are not
+ case sensitive. An initial subtype is "jpeg" for the JPEG format
+ using JFIF encoding [JPEG].
+
+ The list of "image" subtypes given here is neither exclusive nor
+ exhaustive, and is expected to grow as more types are registered with
+ IANA, as described in RFC 2048.
+
+ Unrecognized subtypes of "image" should at a miniumum be treated as
+ "application/octet-stream". Implementations may optionally elect to
+ pass subtypes of "image" that they do not specifically recognize to a
+ secure and robust general-purpose image viewing application, if such
+ an application is available.
+
+ NOTE: Using of a generic-purpose image viewing application this way
+ inherits the security problems of the most dangerous type supported
+ by the application.
+
+4.3. Audio Media Type
+
+ A media type of "audio" indicates that the body contains audio data.
+ Although there is not yet a consensus on an "ideal" audio format for
+ use with computers, there is a pressing need for a format capable of
+ providing interoperable behavior.
+
+
+
+Freed & Borenstein Standards Track [Page 11]
+
+RFC 2046 Media Types November 1996
+
+
+ The initial subtype of "basic" is specified to meet this requirement
+ by providing an absolutely minimal lowest common denominator audio
+ format. It is expected that richer formats for higher quality and/or
+ lower bandwidth audio will be defined by a later document.
+
+ The content of the "audio/basic" subtype is single channel audio
+ encoded using 8bit ISDN mu-law [PCM] at a sample rate of 8000 Hz.
+
+ Unrecognized subtypes of "audio" should at a miniumum be treated as
+ "application/octet-stream". Implementations may optionally elect to
+ pass subtypes of "audio" that they do not specifically recognize to a
+ robust general-purpose audio playing application, if such an
+ application is available.
+
+4.4. Video Media Type
+
+ A media type of "video" indicates that the body contains a time-
+ varying-picture image, possibly with color and coordinated sound.
+ The term 'video' is used in its most generic sense, rather than with
+ reference to any particular technology or format, and is not meant to
+ preclude subtypes such as animated drawings encoded compactly. The
+ subtype "mpeg" refers to video coded according to the MPEG standard
+ [MPEG].
+
+ Note that although in general this document strongly discourages the
+ mixing of multiple media in a single body, it is recognized that many
+ so-called video formats include a representation for synchronized
+ audio, and this is explicitly permitted for subtypes of "video".
+
+ Unrecognized subtypes of "video" should at a minumum be treated as
+ "application/octet-stream". Implementations may optionally elect to
+ pass subtypes of "video" that they do not specifically recognize to a
+ robust general-purpose video display application, if such an
+ application is available.
+
+4.5. Application Media Type
+
+ The "application" media type is to be used for discrete data which do
+ not fit in any of the other categories, and particularly for data to
+ be processed by some type of application program. This is
+ information which must be processed by an application before it is
+ viewable or usable by a user. Expected uses for the "application"
+ media type include file transfer, spreadsheets, data for mail-based
+ scheduling systems, and languages for "active" (computational)
+ material. (The latter, in particular, can pose security problems
+ which must be understood by implementors, and are considered in
+ detail in the discussion of the "application/PostScript" media type.)
+
+
+
+
+Freed & Borenstein Standards Track [Page 12]
+
+RFC 2046 Media Types November 1996
+
+
+ For example, a meeting scheduler might define a standard
+ representation for information about proposed meeting dates. An
+ intelligent user agent would use this information to conduct a dialog
+ with the user, and might then send additional material based on that
+ dialog. More generally, there have been several "active" messaging
+ languages developed in which programs in a suitably specialized
+ language are transported to a remote location and automatically run
+ in the recipient's environment.
+
+ Such applications may be defined as subtypes of the "application"
+ media type. This document defines two subtypes:
+
+ octet-stream, and PostScript.
+
+ The subtype of "application" will often be either the name or include
+ part of the name of the application for which the data are intended.
+ This does not mean, however, that any application program name may be
+ used freely as a subtype of "application".
+
+4.5.1. Octet-Stream Subtype
+
+ The "octet-stream" subtype is used to indicate that a body contains
+ arbitrary binary data. The set of currently defined parameters is:
+
+ (1) TYPE -- the general type or category of binary data.
+ This is intended as information for the human recipient
+ rather than for any automatic processing.
+
+ (2) PADDING -- the number of bits of padding that were
+ appended to the bit-stream comprising the actual
+ contents to produce the enclosed 8bit byte-oriented
+ data. This is useful for enclosing a bit-stream in a
+ body when the total number of bits is not a multiple of
+ 8.
+
+ Both of these parameters are optional.
+
+ An additional parameter, "CONVERSIONS", was defined in RFC 1341 but
+ has since been removed. RFC 1341 also defined the use of a "NAME"
+ parameter which gave a suggested file name to be used if the data
+ were to be written to a file. This has been deprecated in
+ anticipation of a separate Content-Disposition header field, to be
+ defined in a subsequent RFC.
+
+ The recommended action for an implementation that receives an
+ "application/octet-stream" entity is to simply offer to put the data
+ in a file, with any Content-Transfer-Encoding undone, or perhaps to
+ use it as input to a user-specified process.
+
+
+
+Freed & Borenstein Standards Track [Page 13]
+
+RFC 2046 Media Types November 1996
+
+
+ To reduce the danger of transmitting rogue programs, it is strongly
+ recommended that implementations NOT implement a path-search
+ mechanism whereby an arbitrary program named in the Content-Type
+ parameter (e.g., an "interpreter=" parameter) is found and executed
+ using the message body as input.
+
+4.5.2. PostScript Subtype
+
+ A media type of "application/postscript" indicates a PostScript
+ program. Currently two variants of the PostScript language are
+ allowed; the original level 1 variant is described in [POSTSCRIPT]
+ and the more recent level 2 variant is described in [POSTSCRIPT2].
+
+ PostScript is a registered trademark of Adobe Systems, Inc. Use of
+ the MIME media type "application/postscript" implies recognition of
+ that trademark and all the rights it entails.
+
+ The PostScript language definition provides facilities for internal
+ labelling of the specific language features a given program uses.
+ This labelling, called the PostScript document structuring
+ conventions, or DSC, is very general and provides substantially more
+ information than just the language level. The use of document
+ structuring conventions, while not required, is strongly recommended
+ as an aid to interoperability. Documents which lack proper
+ structuring conventions cannot be tested to see whether or not they
+ will work in a given environment. As such, some systems may assume
+ the worst and refuse to process unstructured documents.
+
+ The execution of general-purpose PostScript interpreters entails
+ serious security risks, and implementors are discouraged from simply
+ sending PostScript bodies to "off- the-shelf" interpreters. While it
+ is usually safe to send PostScript to a printer, where the potential
+ for harm is greatly constrained by typical printer environments,
+ implementors should consider all of the following before they add
+ interactive display of PostScript bodies to their MIME readers.
+
+ The remainder of this section outlines some, though probably not all,
+ of the possible problems with the transport of PostScript entities.
+
+ (1) Dangerous operations in the PostScript language
+ include, but may not be limited to, the PostScript
+ operators "deletefile", "renamefile", "filenameforall",
+ and "file". "File" is only dangerous when applied to
+ something other than standard input or output.
+ Implementations may also define additional nonstandard
+ file operators; these may also pose a threat to
+ security. "Filenameforall", the wildcard file search
+ operator, may appear at first glance to be harmless.
+
+
+
+Freed & Borenstein Standards Track [Page 14]
+
+RFC 2046 Media Types November 1996
+
+
+ Note, however, that this operator has the potential to
+ reveal information about what files the recipient has
+ access to, and this information may itself be
+ sensitive. Message senders should avoid the use of
+ potentially dangerous file operators, since these
+ operators are quite likely to be unavailable in secure
+ PostScript implementations. Message receiving and
+ displaying software should either completely disable
+ all potentially dangerous file operators or take
+ special care not to delegate any special authority to
+ their operation. These operators should be viewed as
+ being done by an outside agency when interpreting
+ PostScript documents. Such disabling and/or checking
+ should be done completely outside of the reach of the
+ PostScript language itself; care should be taken to
+ insure that no method exists for re-enabling full-
+ function versions of these operators.
+
+ (2) The PostScript language provides facilities for exiting
+ the normal interpreter, or server, loop. Changes made
+ in this "outer" environment are customarily retained
+ across documents, and may in some cases be retained
+ semipermanently in nonvolatile memory. The operators
+ associated with exiting the interpreter loop have the
+ potential to interfere with subsequent document
+ processing. As such, their unrestrained use
+ constitutes a threat of service denial. PostScript
+ operators that exit the interpreter loop include, but
+ may not be limited to, the exitserver and startjob
+ operators. Message sending software should not
+ generate PostScript that depends on exiting the
+ interpreter loop to operate, since the ability to exit
+ will probably be unavailable in secure PostScript
+ implementations. Message receiving and displaying
+ software should completely disable the ability to make
+ retained changes to the PostScript environment by
+ eliminating or disabling the "startjob" and
+ "exitserver" operations. If these operations cannot be
+ eliminated or completely disabled the password
+ associated with them should at least be set to a hard-
+ to-guess value.
+
+ (3) PostScript provides operators for setting system-wide
+ and device-specific parameters. These parameter
+ settings may be retained across jobs and may
+ potentially pose a threat to the correct operation of
+ the interpreter. The PostScript operators that set
+ system and device parameters include, but may not be
+
+
+
+Freed & Borenstein Standards Track [Page 15]
+
+RFC 2046 Media Types November 1996
+
+
+ limited to, the "setsystemparams" and "setdevparams"
+ operators. Message sending software should not
+ generate PostScript that depends on the setting of
+ system or device parameters to operate correctly. The
+ ability to set these parameters will probably be
+ unavailable in secure PostScript implementations.
+ Message receiving and displaying software should
+ disable the ability to change system and device
+ parameters. If these operators cannot be completely
+ disabled the password associated with them should at
+ least be set to a hard-to-guess value.
+
+ (4) Some PostScript implementations provide nonstandard
+ facilities for the direct loading and execution of
+ machine code. Such facilities are quite obviously open
+ to substantial abuse. Message sending software should
+ not make use of such features. Besides being totally
+ hardware-specific, they are also likely to be
+ unavailable in secure implementations of PostScript.
+ Message receiving and displaying software should not
+ allow such operators to be used if they exist.
+
+ (5) PostScript is an extensible language, and many, if not
+ most, implementations of it provide a number of their
+ own extensions. This document does not deal with such
+ extensions explicitly since they constitute an unknown
+ factor. Message sending software should not make use
+ of nonstandard extensions; they are likely to be
+ missing from some implementations. Message receiving
+ and displaying software should make sure that any
+ nonstandard PostScript operators are secure and don't
+ present any kind of threat.
+
+ (6) It is possible to write PostScript that consumes huge
+ amounts of various system resources. It is also
+ possible to write PostScript programs that loop
+ indefinitely. Both types of programs have the
+ potential to cause damage if sent to unsuspecting
+ recipients. Message-sending software should avoid the
+ construction and dissemination of such programs, which
+ is antisocial. Message receiving and displaying
+ software should provide appropriate mechanisms to abort
+ processing after a reasonable amount of time has
+ elapsed. In addition, PostScript interpreters should be
+ limited to the consumption of only a reasonable amount
+ of any given system resource.
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 16]
+
+RFC 2046 Media Types November 1996
+
+
+ (7) It is possible to include raw binary information inside
+ PostScript in various forms. This is not recommended
+ for use in Internet mail, both because it is not
+ supported by all PostScript interpreters and because it
+ significantly complicates the use of a MIME Content-
+ Transfer-Encoding. (Without such binary, PostScript
+ may typically be viewed as line-oriented data. The
+ treatment of CRLF sequences becomes extremely
+ problematic if binary and line-oriented data are mixed
+ in a single Postscript data stream.)
+
+ (8) Finally, bugs may exist in some PostScript interpreters
+ which could possibly be exploited to gain unauthorized
+ access to a recipient's system. Apart from noting this
+ possibility, there is no specific action to take to
+ prevent this, apart from the timely correction of such
+ bugs if any are found.
+
+4.5.3. Other Application Subtypes
+
+ It is expected that many other subtypes of "application" will be
+ defined in the future. MIME implementations must at a minimum treat
+ any unrecognized subtypes as being equivalent to "application/octet-
+ stream".
+
+5. Composite Media Type Values
+
+ The remaining two of the seven initial Content-Type values refer to
+ composite entities. Composite entities are handled using MIME
+ mechanisms -- a MIME processor typically handles the body directly.
+
+5.1. Multipart Media Type
+
+ In the case of multipart entities, in which one or more different
+ sets of data are combined in a single body, a "multipart" media type
+ field must appear in the entity's header. The body must then contain
+ one or more body parts, each preceded by a boundary delimiter line,
+ and the last one followed by a closing boundary delimiter line.
+ After its boundary delimiter line, each body part then consists of a
+ header area, a blank line, and a body area. Thus a body part is
+ similar to an RFC 822 message in syntax, but different in meaning.
+
+ A body part is an entity and hence is NOT to be interpreted as
+ actually being an RFC 822 message. To begin with, NO header fields
+ are actually required in body parts. A body part that starts with a
+ blank line, therefore, is allowed and is a body part for which all
+ default values are to be assumed. In such a case, the absence of a
+ Content-Type header usually indicates that the corresponding body has
+
+
+
+Freed & Borenstein Standards Track [Page 17]
+
+RFC 2046 Media Types November 1996
+
+
+ a content-type of "text/plain; charset=US-ASCII".
+
+ The only header fields that have defined meaning for body parts are
+ those the names of which begin with "Content-". All other header
+ fields may be ignored in body parts. Although they should generally
+ be retained if at all possible, they may be discarded by gateways if
+ necessary. Such other fields are permitted to appear in body parts
+ but must not be depended on. "X-" fields may be created for
+ experimental or private purposes, with the recognition that the
+ information they contain may be lost at some gateways.
+
+ NOTE: The distinction between an RFC 822 message and a body part is
+ subtle, but important. A gateway between Internet and X.400 mail,
+ for example, must be able to tell the difference between a body part
+ that contains an image and a body part that contains an encapsulated
+ message, the body of which is a JPEG image. In order to represent
+ the latter, the body part must have "Content-Type: message/rfc822",
+ and its body (after the blank line) must be the encapsulated message,
+ with its own "Content-Type: image/jpeg" header field. The use of
+ similar syntax facilitates the conversion of messages to body parts,
+ and vice versa, but the distinction between the two must be
+ understood by implementors. (For the special case in which parts
+ actually are messages, a "digest" subtype is also defined.)
+
+ As stated previously, each body part is preceded by a boundary
+ delimiter line that contains the boundary delimiter. The boundary
+ delimiter MUST NOT appear inside any of the encapsulated parts, on a
+ line by itself or as the prefix of any line. This implies that it is
+ crucial that the composing agent be able to choose and specify a
+ unique boundary parameter value that does not contain the boundary
+ parameter value of an enclosing multipart as a prefix.
+
+ All present and future subtypes of the "multipart" type must use an
+ identical syntax. Subtypes may differ in their semantics, and may
+ impose additional restrictions on syntax, but must conform to the
+ required syntax for the "multipart" type. This requirement ensures
+ that all conformant user agents will at least be able to recognize
+ and separate the parts of any multipart entity, even those of an
+ unrecognized subtype.
+
+ As stated in the definition of the Content-Transfer-Encoding field
+ [RFC 2045], no encoding other than "7bit", "8bit", or "binary" is
+ permitted for entities of type "multipart". The "multipart" boundary
+ delimiters and header fields are always represented as 7bit US-ASCII
+ in any case (though the header fields may encode non-US-ASCII header
+ text as per RFC 2047) and data within the body parts can be encoded
+ on a part-by-part basis, with Content-Transfer-Encoding fields for
+ each appropriate body part.
+
+
+
+Freed & Borenstein Standards Track [Page 18]
+
+RFC 2046 Media Types November 1996
+
+
+5.1.1. Common Syntax
+
+ This section defines a common syntax for subtypes of "multipart".
+ All subtypes of "multipart" must use this syntax. A simple example
+ of a multipart message also appears in this section. An example of a
+ more complex multipart message is given in RFC 2049.
+
+ The Content-Type field for multipart entities requires one parameter,
+ "boundary". The boundary delimiter line is then defined as a line
+ consisting entirely of two hyphen characters ("-", decimal value 45)
+ followed by the boundary parameter value from the Content-Type header
+ field, optional linear whitespace, and a terminating CRLF.
+
+ NOTE: The hyphens are for rough compatibility with the earlier RFC
+ 934 method of message encapsulation, and for ease of searching for
+ the boundaries in some implementations. However, it should be noted
+ that multipart messages are NOT completely compatible with RFC 934
+ encapsulations; in particular, they do not obey RFC 934 quoting
+ conventions for embedded lines that begin with hyphens. This
+ mechanism was chosen over the RFC 934 mechanism because the latter
+ causes lines to grow with each level of quoting. The combination of
+ this growth with the fact that SMTP implementations sometimes wrap
+ long lines made the RFC 934 mechanism unsuitable for use in the event
+ that deeply-nested multipart structuring is ever desired.
+
+ WARNING TO IMPLEMENTORS: The grammar for parameters on the Content-
+ type field is such that it is often necessary to enclose the boundary
+ parameter values in quotes on the Content-type line. This is not
+ always necessary, but never hurts. Implementors should be sure to
+ study the grammar carefully in order to avoid producing invalid
+ Content-type fields. Thus, a typical "multipart" Content-Type header
+ field might look like this:
+
+ Content-Type: multipart/mixed; boundary=gc0p4Jq0M2Yt08j34c0p
+
+ But the following is not valid:
+
+ Content-Type: multipart/mixed; boundary=gc0pJq0M:08jU534c0p
+
+ (because of the colon) and must instead be represented as
+
+ Content-Type: multipart/mixed; boundary="gc0pJq0M:08jU534c0p"
+
+ This Content-Type value indicates that the content consists of one or
+ more parts, each with a structure that is syntactically identical to
+ an RFC 822 message, except that the header area is allowed to be
+ completely empty, and that the parts are each preceded by the line
+
+
+
+
+Freed & Borenstein Standards Track [Page 19]
+
+RFC 2046 Media Types November 1996
+
+
+ --gc0pJq0M:08jU534c0p
+
+ The boundary delimiter MUST occur at the beginning of a line, i.e.,
+ following a CRLF, and the initial CRLF is considered to be attached
+ to the boundary delimiter line rather than part of the preceding
+ part. The boundary may be followed by zero or more characters of
+ linear whitespace. It is then terminated by either another CRLF and
+ the header fields for the next part, or by two CRLFs, in which case
+ there are no header fields for the next part. If no Content-Type
+ field is present it is assumed to be "message/rfc822" in a
+ "multipart/digest" and "text/plain" otherwise.
+
+ NOTE: The CRLF preceding the boundary delimiter line is conceptually
+ attached to the boundary so that it is possible to have a part that
+ does not end with a CRLF (line break). Body parts that must be
+ considered to end with line breaks, therefore, must have two CRLFs
+ preceding the boundary delimiter line, the first of which is part of
+ the preceding body part, and the second of which is part of the
+ encapsulation boundary.
+
+ Boundary delimiters must not appear within the encapsulated material,
+ and must be no longer than 70 characters, not counting the two
+ leading hyphens.
+
+ The boundary delimiter line following the last body part is a
+ distinguished delimiter that indicates that no further body parts
+ will follow. Such a delimiter line is identical to the previous
+ delimiter lines, with the addition of two more hyphens after the
+ boundary parameter value.
+
+ --gc0pJq0M:08jU534c0p--
+
+ NOTE TO IMPLEMENTORS: Boundary string comparisons must compare the
+ boundary value with the beginning of each candidate line. An exact
+ match of the entire candidate line is not required; it is sufficient
+ that the boundary appear in its entirety following the CRLF.
+
+ There appears to be room for additional information prior to the
+ first boundary delimiter line and following the final boundary
+ delimiter line. These areas should generally be left blank, and
+ implementations must ignore anything that appears before the first
+ boundary delimiter line or after the last one.
+
+ NOTE: These "preamble" and "epilogue" areas are generally not used
+ because of the lack of proper typing of these parts and the lack of
+ clear semantics for handling these areas at gateways, particularly
+ X.400 gateways. However, rather than leaving the preamble area
+ blank, many MIME implementations have found this to be a convenient
+
+
+
+Freed & Borenstein Standards Track [Page 20]
+
+RFC 2046 Media Types November 1996
+
+
+ place to insert an explanatory note for recipients who read the
+ message with pre-MIME software, since such notes will be ignored by
+ MIME-compliant software.
+
+ NOTE: Because boundary delimiters must not appear in the body parts
+ being encapsulated, a user agent must exercise care to choose a
+ unique boundary parameter value. The boundary parameter value in the
+ example above could have been the result of an algorithm designed to
+ produce boundary delimiters with a very low probability of already
+ existing in the data to be encapsulated without having to prescan the
+ data. Alternate algorithms might result in more "readable" boundary
+ delimiters for a recipient with an old user agent, but would require
+ more attention to the possibility that the boundary delimiter might
+ appear at the beginning of some line in the encapsulated part. The
+ simplest boundary delimiter line possible is something like "---",
+ with a closing boundary delimiter line of "-----".
+
+ As a very simple example, the following multipart message has two
+ parts, both of them plain text, one of them explicitly typed and one
+ of them implicitly typed:
+
+ From: Nathaniel Borenstein <nsb@bellcore.com>
+ To: Ned Freed <ned@innosoft.com>
+ Date: Sun, 21 Mar 1993 23:56:48 -0800 (PST)
+ Subject: Sample message
+ MIME-Version: 1.0
+ Content-type: multipart/mixed; boundary="simple boundary"
+
+ This is the preamble. It is to be ignored, though it
+ is a handy place for composition agents to include an
+ explanatory note to non-MIME conformant readers.
+
+ --simple boundary
+
+ This is implicitly typed plain US-ASCII text.
+ It does NOT end with a linebreak.
+ --simple boundary
+ Content-type: text/plain; charset=us-ascii
+
+ This is explicitly typed plain US-ASCII text.
+ It DOES end with a linebreak.
+
+ --simple boundary--
+
+ This is the epilogue. It is also to be ignored.
+
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 21]
+
+RFC 2046 Media Types November 1996
+
+
+ The use of a media type of "multipart" in a body part within another
+ "multipart" entity is explicitly allowed. In such cases, for obvious
+ reasons, care must be taken to ensure that each nested "multipart"
+ entity uses a different boundary delimiter. See RFC 2049 for an
+ example of nested "multipart" entities.
+
+ The use of the "multipart" media type with only a single body part
+ may be useful in certain contexts, and is explicitly permitted.
+
+ NOTE: Experience has shown that a "multipart" media type with a
+ single body part is useful for sending non-text media types. It has
+ the advantage of providing the preamble as a place to include
+ decoding instructions. In addition, a number of SMTP gateways move
+ or remove the MIME headers, and a clever MIME decoder can take a good
+ guess at multipart boundaries even in the absence of the Content-Type
+ header and thereby successfully decode the message.
+
+ The only mandatory global parameter for the "multipart" media type is
+ the boundary parameter, which consists of 1 to 70 characters from a
+ set of characters known to be very robust through mail gateways, and
+ NOT ending with white space. (If a boundary delimiter line appears to
+ end with white space, the white space must be presumed to have been
+ added by a gateway, and must be deleted.) It is formally specified
+ by the following BNF:
+
+ boundary := 0*69<bchars> bcharsnospace
+
+ bchars := bcharsnospace / " "
+
+ bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" /
+ "+" / "_" / "," / "-" / "." /
+ "/" / ":" / "=" / "?"
+
+ Overall, the body of a "multipart" entity may be specified as
+ follows:
+
+ dash-boundary := "--" boundary
+ ; boundary taken from the value of
+ ; boundary parameter of the
+ ; Content-Type field.
+
+ multipart-body := [preamble CRLF]
+ dash-boundary transport-padding CRLF
+ body-part *encapsulation
+ close-delimiter transport-padding
+ [CRLF epilogue]
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 22]
+
+RFC 2046 Media Types November 1996
+
+
+ transport-padding := *LWSP-char
+ ; Composers MUST NOT generate
+ ; non-zero length transport
+ ; padding, but receivers MUST
+ ; be able to handle padding
+ ; added by message transports.
+
+ encapsulation := delimiter transport-padding
+ CRLF body-part
+
+ delimiter := CRLF dash-boundary
+
+ close-delimiter := delimiter "--"
+
+ preamble := discard-text
+
+ epilogue := discard-text
+
+ discard-text := *(*text CRLF) *text
+ ; May be ignored or discarded.
+
+ body-part := MIME-part-headers [CRLF *OCTET]
+ ; Lines in a body-part must not start
+ ; with the specified dash-boundary and
+ ; the delimiter must not appear anywhere
+ ; in the body part. Note that the
+ ; semantics of a body-part differ from
+ ; the semantics of a message, as
+ ; described in the text.
+
+ OCTET := <any 0-255 octet value>
+
+ IMPORTANT: The free insertion of linear-white-space and RFC 822
+ comments between the elements shown in this BNF is NOT allowed since
+ this BNF does not specify a structured header field.
+
+ NOTE: In certain transport enclaves, RFC 822 restrictions such as
+ the one that limits bodies to printable US-ASCII characters may not
+ be in force. (That is, the transport domains may exist that resemble
+ standard Internet mail transport as specified in RFC 821 and assumed
+ by RFC 822, but without certain restrictions.) The relaxation of
+ these restrictions should be construed as locally extending the
+ definition of bodies, for example to include octets outside of the
+ US-ASCII range, as long as these extensions are supported by the
+ transport and adequately documented in the Content- Transfer-Encoding
+ header field. However, in no event are headers (either message
+ headers or body part headers) allowed to contain anything other than
+ US-ASCII characters.
+
+
+
+Freed & Borenstein Standards Track [Page 23]
+
+RFC 2046 Media Types November 1996
+
+
+ NOTE: Conspicuously missing from the "multipart" type is a notion of
+ structured, related body parts. It is recommended that those wishing
+ to provide more structured or integrated multipart messaging
+ facilities should define subtypes of multipart that are syntactically
+ identical but define relationships between the various parts. For
+ example, subtypes of multipart could be defined that include a
+ distinguished part which in turn is used to specify the relationships
+ between the other parts, probably referring to them by their
+ Content-ID field. Old implementations will not recognize the new
+ subtype if this approach is used, but will treat it as
+ multipart/mixed and will thus be able to show the user the parts that
+ are recognized.
+
+5.1.2. Handling Nested Messages and Multiparts
+
+ The "message/rfc822" subtype defined in a subsequent section of this
+ document has no terminating condition other than running out of data.
+ Similarly, an improperly truncated "multipart" entity may not have
+ any terminating boundary marker, and can turn up operationally due to
+ mail system malfunctions.
+
+ It is essential that such entities be handled correctly when they are
+ themselves imbedded inside of another "multipart" structure. MIME
+ implementations are therefore required to recognize outer level
+ boundary markers at ANY level of inner nesting. It is not sufficient
+ to only check for the next expected marker or other terminating
+ condition.
+
+5.1.3. Mixed Subtype
+
+ The "mixed" subtype of "multipart" is intended for use when the body
+ parts are independent and need to be bundled in a particular order.
+ Any "multipart" subtypes that an implementation does not recognize
+ must be treated as being of subtype "mixed".
+
+5.1.4. Alternative Subtype
+
+ The "multipart/alternative" type is syntactically identical to
+ "multipart/mixed", but the semantics are different. In particular,
+ each of the body parts is an "alternative" version of the same
+ information.
+
+ Systems should recognize that the content of the various parts are
+ interchangeable. Systems should choose the "best" type based on the
+ local environment and references, in some cases even through user
+ interaction. As with "multipart/mixed", the order of body parts is
+ significant. In this case, the alternatives appear in an order of
+ increasing faithfulness to the original content. In general, the
+
+
+
+Freed & Borenstein Standards Track [Page 24]
+
+RFC 2046 Media Types November 1996
+
+
+ best choice is the LAST part of a type supported by the recipient
+ system's local environment.
+
+ "Multipart/alternative" may be used, for example, to send a message
+ in a fancy text format in such a way that it can easily be displayed
+ anywhere:
+
+ From: Nathaniel Borenstein <nsb@bellcore.com>
+ To: Ned Freed <ned@innosoft.com>
+ Date: Mon, 22 Mar 1993 09:41:09 -0800 (PST)
+ Subject: Formatted text mail
+ MIME-Version: 1.0
+ Content-Type: multipart/alternative; boundary=boundary42
+
+ --boundary42
+ Content-Type: text/plain; charset=us-ascii
+
+ ... plain text version of message goes here ...
+
+ --boundary42
+ Content-Type: text/enriched
+
+ ... RFC 1896 text/enriched version of same message
+ goes here ...
+
+ --boundary42
+ Content-Type: application/x-whatever
+
+ ... fanciest version of same message goes here ...
+
+ --boundary42--
+
+ In this example, users whose mail systems understood the
+ "application/x-whatever" format would see only the fancy version,
+ while other users would see only the enriched or plain text version,
+ depending on the capabilities of their system.
+
+ In general, user agents that compose "multipart/alternative" entities
+ must place the body parts in increasing order of preference, that is,
+ with the preferred format last. For fancy text, the sending user
+ agent should put the plainest format first and the richest format
+ last. Receiving user agents should pick and display the last format
+ they are capable of displaying. In the case where one of the
+ alternatives is itself of type "multipart" and contains unrecognized
+ sub-parts, the user agent may choose either to show that alternative,
+ an earlier alternative, or both.
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 25]
+
+RFC 2046 Media Types November 1996
+
+
+ NOTE: From an implementor's perspective, it might seem more sensible
+ to reverse this ordering, and have the plainest alternative last.
+ However, placing the plainest alternative first is the friendliest
+ possible option when "multipart/alternative" entities are viewed
+ using a non-MIME-conformant viewer. While this approach does impose
+ some burden on conformant MIME viewers, interoperability with older
+ mail readers was deemed to be more important in this case.
+
+ It may be the case that some user agents, if they can recognize more
+ than one of the formats, will prefer to offer the user the choice of
+ which format to view. This makes sense, for example, if a message
+ includes both a nicely- formatted image version and an easily-edited
+ text version. What is most critical, however, is that the user not
+ automatically be shown multiple versions of the same data. Either
+ the user should be shown the last recognized version or should be
+ given the choice.
+
+ THE SEMANTICS OF CONTENT-ID IN MULTIPART/ALTERNATIVE: Each part of a
+ "multipart/alternative" entity represents the same data, but the
+ mappings between the two are not necessarily without information
+ loss. For example, information is lost when translating ODA to
+ PostScript or plain text. It is recommended that each part should
+ have a different Content-ID value in the case where the information
+ content of the two parts is not identical. And when the information
+ content is identical -- for example, where several parts of type
+ "message/external-body" specify alternate ways to access the
+ identical data -- the same Content-ID field value should be used, to
+ optimize any caching mechanisms that might be present on the
+ recipient's end. However, the Content-ID values used by the parts
+ should NOT be the same Content-ID value that describes the
+ "multipart/alternative" as a whole, if there is any such Content-ID
+ field. That is, one Content-ID value will refer to the
+ "multipart/alternative" entity, while one or more other Content-ID
+ values will refer to the parts inside it.
+
+5.1.5. Digest Subtype
+
+ This document defines a "digest" subtype of the "multipart" Content-
+ Type. This type is syntactically identical to "multipart/mixed", but
+ the semantics are different. In particular, in a digest, the default
+ Content-Type value for a body part is changed from "text/plain" to
+ "message/rfc822". This is done to allow a more readable digest
+ format that is largely compatible (except for the quoting convention)
+ with RFC 934.
+
+ Note: Though it is possible to specify a Content-Type value for a
+ body part in a digest which is other than "message/rfc822", such as a
+ "text/plain" part containing a description of the material in the
+
+
+
+Freed & Borenstein Standards Track [Page 26]
+
+RFC 2046 Media Types November 1996
+
+
+ digest, actually doing so is undesireble. The "multipart/digest"
+ Content-Type is intended to be used to send collections of messages.
+ If a "text/plain" part is needed, it should be included as a seperate
+ part of a "multipart/mixed" message.
+
+ A digest in this format might, then, look something like this:
+
+ From: Moderator-Address
+ To: Recipient-List
+ Date: Mon, 22 Mar 1994 13:34:51 +0000
+ Subject: Internet Digest, volume 42
+ MIME-Version: 1.0
+ Content-Type: multipart/mixed;
+ boundary="---- main boundary ----"
+
+ ------ main boundary ----
+
+ ...Introductory text or table of contents...
+
+ ------ main boundary ----
+ Content-Type: multipart/digest;
+ boundary="---- next message ----"
+
+ ------ next message ----
+
+ From: someone-else
+ Date: Fri, 26 Mar 1993 11:13:32 +0200
+ Subject: my opinion
+
+ ...body goes here ...
+
+ ------ next message ----
+
+ From: someone-else-again
+ Date: Fri, 26 Mar 1993 10:07:13 -0500
+ Subject: my different opinion
+
+ ... another body goes here ...
+
+ ------ next message ------
+
+ ------ main boundary ------
+
+5.1.6. Parallel Subtype
+
+ This document defines a "parallel" subtype of the "multipart"
+ Content-Type. This type is syntactically identical to
+ "multipart/mixed", but the semantics are different. In particular,
+
+
+
+Freed & Borenstein Standards Track [Page 27]
+
+RFC 2046 Media Types November 1996
+
+
+ in a parallel entity, the order of body parts is not significant.
+
+ A common presentation of this type is to display all of the parts
+ simultaneously on hardware and software that are capable of doing so.
+ However, composing agents should be aware that many mail readers will
+ lack this capability and will show the parts serially in any event.
+
+5.1.7. Other Multipart Subtypes
+
+ Other "multipart" subtypes are expected in the future. MIME
+ implementations must in general treat unrecognized subtypes of
+ "multipart" as being equivalent to "multipart/mixed".
+
+5.2. Message Media Type
+
+ It is frequently desirable, in sending mail, to encapsulate another
+ mail message. A special media type, "message", is defined to
+ facilitate this. In particular, the "rfc822" subtype of "message" is
+ used to encapsulate RFC 822 messages.
+
+ NOTE: It has been suggested that subtypes of "message" might be
+ defined for forwarded or rejected messages. However, forwarded and
+ rejected messages can be handled as multipart messages in which the
+ first part contains any control or descriptive information, and a
+ second part, of type "message/rfc822", is the forwarded or rejected
+ message. Composing rejection and forwarding messages in this manner
+ will preserve the type information on the original message and allow
+ it to be correctly presented to the recipient, and hence is strongly
+ encouraged.
+
+ Subtypes of "message" often impose restrictions on what encodings are
+ allowed. These restrictions are described in conjunction with each
+ specific subtype.
+
+ Mail gateways, relays, and other mail handling agents are commonly
+ known to alter the top-level header of an RFC 822 message. In
+ particular, they frequently add, remove, or reorder header fields.
+ These operations are explicitly forbidden for the encapsulated
+ headers embedded in the bodies of messages of type "message."
+
+5.2.1. RFC822 Subtype
+
+ A media type of "message/rfc822" indicates that the body contains an
+ encapsulated message, with the syntax of an RFC 822 message.
+ However, unlike top-level RFC 822 messages, the restriction that each
+ "message/rfc822" body must include a "From", "Date", and at least one
+ destination header is removed and replaced with the requirement that
+ at least one of "From", "Subject", or "Date" must be present.
+
+
+
+Freed & Borenstein Standards Track [Page 28]
+
+RFC 2046 Media Types November 1996
+
+
+ It should be noted that, despite the use of the numbers "822", a
+ "message/rfc822" entity isn't restricted to material in strict
+ conformance to RFC822, nor are the semantics of "message/rfc822"
+ objects restricted to the semantics defined in RFC822. More
+ specifically, a "message/rfc822" message could well be a News article
+ or a MIME message.
+
+ No encoding other than "7bit", "8bit", or "binary" is permitted for
+ the body of a "message/rfc822" entity. The message header fields are
+ always US-ASCII in any case, and data within the body can still be
+ encoded, in which case the Content-Transfer-Encoding header field in
+ the encapsulated message will reflect this. Non-US-ASCII text in the
+ headers of an encapsulated message can be specified using the
+ mechanisms described in RFC 2047.
+
+5.2.2. Partial Subtype
+
+ The "partial" subtype is defined to allow large entities to be
+ delivered as several separate pieces of mail and automatically
+ reassembled by a receiving user agent. (The concept is similar to IP
+ fragmentation and reassembly in the basic Internet Protocols.) This
+ mechanism can be used when intermediate transport agents limit the
+ size of individual messages that can be sent. The media type
+ "message/partial" thus indicates that the body contains a fragment of
+ a larger entity.
+
+ Because data of type "message" may never be encoded in base64 or
+ quoted-printable, a problem might arise if "message/partial" entities
+ are constructed in an environment that supports binary or 8bit
+ transport. The problem is that the binary data would be split into
+ multiple "message/partial" messages, each of them requiring binary
+ transport. If such messages were encountered at a gateway into a
+ 7bit transport environment, there would be no way to properly encode
+ them for the 7bit world, aside from waiting for all of the fragments,
+ reassembling the inner message, and then encoding the reassembled
+ data in base64 or quoted-printable. Since it is possible that
+ different fragments might go through different gateways, even this is
+ not an acceptable solution. For this reason, it is specified that
+ entities of type "message/partial" must always have a content-
+ transfer-encoding of 7bit (the default). In particular, even in
+ environments that support binary or 8bit transport, the use of a
+ content- transfer-encoding of "8bit" or "binary" is explicitly
+ prohibited for MIME entities of type "message/partial". This in turn
+ implies that the inner message must not use "8bit" or "binary"
+ encoding.
+
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 29]
+
+RFC 2046 Media Types November 1996
+
+
+ Because some message transfer agents may choose to automatically
+ fragment large messages, and because such agents may use very
+ different fragmentation thresholds, it is possible that the pieces of
+ a partial message, upon reassembly, may prove themselves to comprise
+ a partial message. This is explicitly permitted.
+
+ Three parameters must be specified in the Content-Type field of type
+ "message/partial": The first, "id", is a unique identifier, as close
+ to a world-unique identifier as possible, to be used to match the
+ fragments together. (In general, the identifier is essentially a
+ message-id; if placed in double quotes, it can be ANY message-id, in
+ accordance with the BNF for "parameter" given in RFC 2045.) The
+ second, "number", an integer, is the fragment number, which indicates
+ where this fragment fits into the sequence of fragments. The third,
+ "total", another integer, is the total number of fragments. This
+ third subfield is required on the final fragment, and is optional
+ (though encouraged) on the earlier fragments. Note also that these
+ parameters may be given in any order.
+
+ Thus, the second piece of a 3-piece message may have either of the
+ following header fields:
+
+ Content-Type: Message/Partial; number=2; total=3;
+ id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
+
+ Content-Type: Message/Partial;
+ id="oc=jpbe0M2Yt4s@thumper.bellcore.com";
+ number=2
+
+ But the third piece MUST specify the total number of fragments:
+
+ Content-Type: Message/Partial; number=3; total=3;
+ id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
+
+ Note that fragment numbering begins with 1, not 0.
+
+ When the fragments of an entity broken up in this manner are put
+ together, the result is always a complete MIME entity, which may have
+ its own Content-Type header field, and thus may contain any other
+ data type.
+
+5.2.2.1. Message Fragmentation and Reassembly
+
+ The semantics of a reassembled partial message must be those of the
+ "inner" message, rather than of a message containing the inner
+ message. This makes it possible, for example, to send a large audio
+ message as several partial messages, and still have it appear to the
+ recipient as a simple audio message rather than as an encapsulated
+
+
+
+Freed & Borenstein Standards Track [Page 30]
+
+RFC 2046 Media Types November 1996
+
+
+ message containing an audio message. That is, the encapsulation of
+ the message is considered to be "transparent".
+
+ When generating and reassembling the pieces of a "message/partial"
+ message, the headers of the encapsulated message must be merged with
+ the headers of the enclosing entities. In this process the following
+ rules must be observed:
+
+ (1) Fragmentation agents must split messages at line
+ boundaries only. This restriction is imposed because
+ splits at points other than the ends of lines in turn
+ depends on message transports being able to preserve
+ the semantics of messages that don't end with a CRLF
+ sequence. Many transports are incapable of preserving
+ such semantics.
+
+ (2) All of the header fields from the initial enclosing
+ message, except those that start with "Content-" and
+ the specific header fields "Subject", "Message-ID",
+ "Encrypted", and "MIME-Version", must be copied, in
+ order, to the new message.
+
+ (3) The header fields in the enclosed message which start
+ with "Content-", plus the "Subject", "Message-ID",
+ "Encrypted", and "MIME-Version" fields, must be
+ appended, in order, to the header fields of the new
+ message. Any header fields in the enclosed message
+ which do not start with "Content-" (except for the
+ "Subject", "Message-ID", "Encrypted", and "MIME-
+ Version" fields) will be ignored and dropped.
+
+ (4) All of the header fields from the second and any
+ subsequent enclosing messages are discarded by the
+ reassembly process.
+
+5.2.2.2. Fragmentation and Reassembly Example
+
+ If an audio message is broken into two pieces, the first piece might
+ look something like this:
+
+ X-Weird-Header-1: Foo
+ From: Bill@host.com
+ To: joe@otherhost.com
+ Date: Fri, 26 Mar 1993 12:59:38 -0500 (EST)
+ Subject: Audio mail (part 1 of 2)
+ Message-ID: <id1@host.com>
+ MIME-Version: 1.0
+ Content-type: message/partial; id="ABC@host.com";
+
+
+
+Freed & Borenstein Standards Track [Page 31]
+
+RFC 2046 Media Types November 1996
+
+
+ number=1; total=2
+
+ X-Weird-Header-1: Bar
+ X-Weird-Header-2: Hello
+ Message-ID: <anotherid@foo.com>
+ Subject: Audio mail
+ MIME-Version: 1.0
+ Content-type: audio/basic
+ Content-transfer-encoding: base64
+
+ ... first half of encoded audio data goes here ...
+
+ and the second half might look something like this:
+
+ From: Bill@host.com
+ To: joe@otherhost.com
+ Date: Fri, 26 Mar 1993 12:59:38 -0500 (EST)
+ Subject: Audio mail (part 2 of 2)
+ MIME-Version: 1.0
+ Message-ID: <id2@host.com>
+ Content-type: message/partial;
+ id="ABC@host.com"; number=2; total=2
+
+ ... second half of encoded audio data goes here ...
+
+ Then, when the fragmented message is reassembled, the resulting
+ message to be displayed to the user should look something like this:
+
+ X-Weird-Header-1: Foo
+ From: Bill@host.com
+ To: joe@otherhost.com
+ Date: Fri, 26 Mar 1993 12:59:38 -0500 (EST)
+ Subject: Audio mail
+ Message-ID: <anotherid@foo.com>
+ MIME-Version: 1.0
+ Content-type: audio/basic
+ Content-transfer-encoding: base64
+
+ ... first half of encoded audio data goes here ...
+ ... second half of encoded audio data goes here ...
+
+ The inclusion of a "References" field in the headers of the second
+ and subsequent pieces of a fragmented message that references the
+ Message-Id on the previous piece may be of benefit to mail readers
+ that understand and track references. However, the generation of
+ such "References" fields is entirely optional.
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 32]
+
+RFC 2046 Media Types November 1996
+
+
+ Finally, it should be noted that the "Encrypted" header field has
+ been made obsolete by Privacy Enhanced Messaging (PEM) [RFC-1421,
+ RFC-1422, RFC-1423, RFC-1424], but the rules above are nevertheless
+ believed to describe the correct way to treat it if it is encountered
+ in the context of conversion to and from "message/partial" fragments.
+
+5.2.3. External-Body Subtype
+
+ The external-body subtype indicates that the actual body data are not
+ included, but merely referenced. In this case, the parameters
+ describe a mechanism for accessing the external data.
+
+ When a MIME entity is of type "message/external-body", it consists of
+ a header, two consecutive CRLFs, and the message header for the
+ encapsulated message. If another pair of consecutive CRLFs appears,
+ this of course ends the message header for the encapsulated message.
+ However, since the encapsulated message's body is itself external, it
+ does NOT appear in the area that follows. For example, consider the
+ following message:
+
+ Content-type: message/external-body;
+ access-type=local-file;
+ name="/u/nsb/Me.jpeg"
+
+ Content-type: image/jpeg
+ Content-ID: <id42@guppylake.bellcore.com>
+ Content-Transfer-Encoding: binary
+
+ THIS IS NOT REALLY THE BODY!
+
+ The area at the end, which might be called the "phantom body", is
+ ignored for most external-body messages. However, it may be used to
+ contain auxiliary information for some such messages, as indeed it is
+ when the access-type is "mail- server". The only access-type defined
+ in this document that uses the phantom body is "mail-server", but
+ other access-types may be defined in the future in other
+ specifications that use this area.
+
+ The encapsulated headers in ALL "message/external-body" entities MUST
+ include a Content-ID header field to give a unique identifier by
+ which to reference the data. This identifier may be used for caching
+ mechanisms, and for recognizing the receipt of the data when the
+ access-type is "mail-server".
+
+ Note that, as specified here, the tokens that describe external-body
+ data, such as file names and mail server commands, are required to be
+ in the US-ASCII character set.
+
+
+
+
+Freed & Borenstein Standards Track [Page 33]
+
+RFC 2046 Media Types November 1996
+
+
+ If this proves problematic in practice, a new mechanism may be
+ required as a future extension to MIME, either as newly defined
+ access-types for "message/external-body" or by some other mechanism.
+
+ As with "message/partial", MIME entities of type "message/external-
+ body" MUST have a content-transfer-encoding of 7bit (the default).
+ In particular, even in environments that support binary or 8bit
+ transport, the use of a content- transfer-encoding of "8bit" or
+ "binary" is explicitly prohibited for entities of type
+ "message/external-body".
+
+5.2.3.1. General External-Body Parameters
+
+ The parameters that may be used with any "message/external- body"
+ are:
+
+ (1) ACCESS-TYPE -- A word indicating the supported access
+ mechanism by which the file or data may be obtained.
+ This word is not case sensitive. Values include, but
+ are not limited to, "FTP", "ANON-FTP", "TFTP", "LOCAL-
+ FILE", and "MAIL-SERVER". Future values, except for
+ experimental values beginning with "X-", must be
+ registered with IANA, as described in RFC 2048.
+ This parameter is unconditionally mandatory and MUST be
+ present on EVERY "message/external-body".
+
+ (2) EXPIRATION -- The date (in the RFC 822 "date-time"
+ syntax, as extended by RFC 1123 to permit 4 digits in
+ the year field) after which the existence of the
+ external data is not guaranteed. This parameter may be
+ used with ANY access-type and is ALWAYS optional.
+
+ (3) SIZE -- The size (in octets) of the data. The intent
+ of this parameter is to help the recipient decide
+ whether or not to expend the necessary resources to
+ retrieve the external data. Note that this describes
+ the size of the data in its canonical form, that is,
+ before any Content-Transfer-Encoding has been applied
+ or after the data have been decoded. This parameter
+ may be used with ANY access-type and is ALWAYS
+ optional.
+
+ (4) PERMISSION -- A case-insensitive field that indicates
+ whether or not it is expected that clients might also
+ attempt to overwrite the data. By default, or if
+ permission is "read", the assumption is that they are
+ not, and that if the data is retrieved once, it is
+ never needed again. If PERMISSION is "read-write",
+
+
+
+Freed & Borenstein Standards Track [Page 34]
+
+RFC 2046 Media Types November 1996
+
+
+ this assumption is invalid, and any local copy must be
+ considered no more than a cache. "Read" and "Read-
+ write" are the only defined values of permission. This
+ parameter may be used with ANY access-type and is
+ ALWAYS optional.
+
+ The precise semantics of the access-types defined here are described
+ in the sections that follow.
+
+5.2.3.2. The 'ftp' and 'tftp' Access-Types
+
+ An access-type of FTP or TFTP indicates that the message body is
+ accessible as a file using the FTP [RFC-959] or TFTP [RFC- 783]
+ protocols, respectively. For these access-types, the following
+ additional parameters are mandatory:
+
+ (1) NAME -- The name of the file that contains the actual
+ body data.
+
+ (2) SITE -- A machine from which the file may be obtained,
+ using the given protocol. This must be a fully
+ qualified domain name, not a nickname.
+
+ (3) Before any data are retrieved, using FTP, the user will
+ generally need to be asked to provide a login id and a
+ password for the machine named by the site parameter.
+ For security reasons, such an id and password are not
+ specified as content-type parameters, but must be
+ obtained from the user.
+
+ In addition, the following parameters are optional:
+
+ (1) DIRECTORY -- A directory from which the data named by
+ NAME should be retrieved.
+
+ (2) MODE -- A case-insensitive string indicating the mode
+ to be used when retrieving the information. The valid
+ values for access-type "TFTP" are "NETASCII", "OCTET",
+ and "MAIL", as specified by the TFTP protocol [RFC-
+ 783]. The valid values for access-type "FTP" are
+ "ASCII", "EBCDIC", "IMAGE", and "LOCALn" where "n" is a
+ decimal integer, typically 8. These correspond to the
+ representation types "A" "E" "I" and "L n" as specified
+ by the FTP protocol [RFC-959]. Note that "BINARY" and
+ "TENEX" are not valid values for MODE and that "OCTET"
+ or "IMAGE" or "LOCAL8" should be used instead. IF MODE
+ is not specified, the default value is "NETASCII" for
+ TFTP and "ASCII" otherwise.
+
+
+
+Freed & Borenstein Standards Track [Page 35]
+
+RFC 2046 Media Types November 1996
+
+
+5.2.3.3. The 'anon-ftp' Access-Type
+
+ The "anon-ftp" access-type is identical to the "ftp" access type,
+ except that the user need not be asked to provide a name and password
+ for the specified site. Instead, the ftp protocol will be used with
+ login "anonymous" and a password that corresponds to the user's mail
+ address.
+
+5.2.3.4. The 'local-file' Access-Type
+
+ An access-type of "local-file" indicates that the actual body is
+ accessible as a file on the local machine. Two additional parameters
+ are defined for this access type:
+
+ (1) NAME -- The name of the file that contains the actual
+ body data. This parameter is mandatory for the
+ "local-file" access-type.
+
+ (2) SITE -- A domain specifier for a machine or set of
+ machines that are known to have access to the data
+ file. This optional parameter is used to describe the
+ locality of reference for the data, that is, the site
+ or sites at which the file is expected to be visible.
+ Asterisks may be used for wildcard matching to a part
+ of a domain name, such as "*.bellcore.com", to indicate
+ a set of machines on which the data should be directly
+ visible, while a single asterisk may be used to
+ indicate a file that is expected to be universally
+ available, e.g., via a global file system.
+
+5.2.3.5. The 'mail-server' Access-Type
+
+ The "mail-server" access-type indicates that the actual body is
+ available from a mail server. Two additional parameters are defined
+ for this access-type:
+
+ (1) SERVER -- The addr-spec of the mail server from which
+ the actual body data can be obtained. This parameter
+ is mandatory for the "mail-server" access-type.
+
+ (2) SUBJECT -- The subject that is to be used in the mail
+ that is sent to obtain the data. Note that keying mail
+ servers on Subject lines is NOT recommended, but such
+ mail servers are known to exist. This is an optional
+ parameter.
+
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 36]
+
+RFC 2046 Media Types November 1996
+
+
+ Because mail servers accept a variety of syntaxes, some of which is
+ multiline, the full command to be sent to a mail server is not
+ included as a parameter in the content-type header field. Instead,
+ it is provided as the "phantom body" when the media type is
+ "message/external-body" and the access-type is mail-server.
+
+ Note that MIME does not define a mail server syntax. Rather, it
+ allows the inclusion of arbitrary mail server commands in the phantom
+ body. Implementations must include the phantom body in the body of
+ the message it sends to the mail server address to retrieve the
+ relevant data.
+
+ Unlike other access-types, mail-server access is asynchronous and
+ will happen at an unpredictable time in the future. For this reason,
+ it is important that there be a mechanism by which the returned data
+ can be matched up with the original "message/external-body" entity.
+ MIME mail servers must use the same Content-ID field on the returned
+ message that was used in the original "message/external-body"
+ entities, to facilitate such matching.
+
+5.2.3.6. External-Body Security Issues
+
+ "Message/external-body" entities give rise to two important security
+ issues:
+
+ (1) Accessing data via a "message/external-body" reference
+ effectively results in the message recipient performing
+ an operation that was specified by the message
+ originator. It is therefore possible for the message
+ originator to trick a recipient into doing something
+ they would not have done otherwise. For example, an
+ originator could specify a action that attempts
+ retrieval of material that the recipient is not
+ authorized to obtain, causing the recipient to
+ unwittingly violate some security policy. For this
+ reason, user agents capable of resolving external
+ references must always take steps to describe the
+ action they are to take to the recipient and ask for
+ explicit permisssion prior to performing it.
+
+ The 'mail-server' access-type is particularly
+ vulnerable, in that it causes the recipient to send a
+ new message whose contents are specified by the
+ original message's originator. Given the potential for
+ abuse, any such request messages that are constructed
+ should contain a clear indication that they were
+ generated automatically (e.g. in a Comments: header
+ field) in an attempt to resolve a MIME
+
+
+
+Freed & Borenstein Standards Track [Page 37]
+
+RFC 2046 Media Types November 1996
+
+
+ "message/external-body" reference.
+
+ (2) MIME will sometimes be used in environments that
+ provide some guarantee of message integrity and
+ authenticity. If present, such guarantees may apply
+ only to the actual direct content of messages -- they
+ may or may not apply to data accessed through MIME's
+ "message/external-body" mechanism. In particular, it
+ may be possible to subvert certain access mechanisms
+ even when the messaging system itself is secure.
+
+ It should be noted that this problem exists either with
+ or without the availabilty of MIME mechanisms. A
+ casual reference to an FTP site containing a document
+ in the text of a secure message brings up similar
+ issues -- the only difference is that MIME provides for
+ automatic retrieval of such material, and users may
+ place unwarranted trust is such automatic retrieval
+ mechanisms.
+
+5.2.3.7. Examples and Further Explanations
+
+ When the external-body mechanism is used in conjunction with the
+ "multipart/alternative" media type it extends the functionality of
+ "multipart/alternative" to include the case where the same entity is
+ provided in the same format but via different accces mechanisms.
+ When this is done the originator of the message must order the parts
+ first in terms of preferred formats and then by preferred access
+ mechanisms. The recipient's viewer should then evaluate the list
+ both in terms of format and access mechanisms.
+
+ With the emerging possibility of very wide-area file systems, it
+ becomes very hard to know in advance the set of machines where a file
+ will and will not be accessible directly from the file system.
+ Therefore it may make sense to provide both a file name, to be tried
+ directly, and the name of one or more sites from which the file is
+ known to be accessible. An implementation can try to retrieve remote
+ files using FTP or any other protocol, using anonymous file retrieval
+ or prompting the user for the necessary name and password. If an
+ external body is accessible via multiple mechanisms, the sender may
+ include multiple entities of type "message/external-body" within the
+ body parts of an enclosing "multipart/alternative" entity.
+
+ However, the external-body mechanism is not intended to be limited to
+ file retrieval, as shown by the mail-server access-type. Beyond
+ this, one can imagine, for example, using a video server for external
+ references to video clips.
+
+
+
+
+Freed & Borenstein Standards Track [Page 38]
+
+RFC 2046 Media Types November 1996
+
+
+ The embedded message header fields which appear in the body of the
+ "message/external-body" data must be used to declare the media type
+ of the external body if it is anything other than plain US-ASCII
+ text, since the external body does not have a header section to
+ declare its type. Similarly, any Content-transfer-encoding other
+ than "7bit" must also be declared here. Thus a complete
+ "message/external-body" message, referring to an object in PostScript
+ format, might look like this:
+
+ From: Whomever
+ To: Someone
+ Date: Whenever
+ Subject: whatever
+ MIME-Version: 1.0
+ Message-ID: <id1@host.com>
+ Content-Type: multipart/alternative; boundary=42
+ Content-ID: <id001@guppylake.bellcore.com>
+
+ --42
+ Content-Type: message/external-body; name="BodyFormats.ps";
+ site="thumper.bellcore.com"; mode="image";
+ access-type=ANON-FTP; directory="pub";
+ expiration="Fri, 14 Jun 1991 19:13:14 -0400 (EDT)"
+
+ Content-type: application/postscript
+ Content-ID: <id42@guppylake.bellcore.com>
+
+ --42
+ Content-Type: message/external-body; access-type=local-file;
+ name="/u/nsb/writing/rfcs/RFC-MIME.ps";
+ site="thumper.bellcore.com";
+ expiration="Fri, 14 Jun 1991 19:13:14 -0400 (EDT)"
+
+ Content-type: application/postscript
+ Content-ID: <id42@guppylake.bellcore.com>
+
+ --42
+ Content-Type: message/external-body;
+ access-type=mail-server
+ server="listserv@bogus.bitnet";
+ expiration="Fri, 14 Jun 1991 19:13:14 -0400 (EDT)"
+
+ Content-type: application/postscript
+ Content-ID: <id42@guppylake.bellcore.com>
+
+ get RFC-MIME.DOC
+
+ --42--
+
+
+
+Freed & Borenstein Standards Track [Page 39]
+
+RFC 2046 Media Types November 1996
+
+
+ Note that in the above examples, the default Content-transfer-
+ encoding of "7bit" is assumed for the external postscript data.
+
+ Like the "message/partial" type, the "message/external-body" media
+ type is intended to be transparent, that is, to convey the data type
+ in the external body rather than to convey a message with a body of
+ that type. Thus the headers on the outer and inner parts must be
+ merged using the same rules as for "message/partial". In particular,
+ this means that the Content-type and Subject fields are overridden,
+ but the From field is preserved.
+
+ Note that since the external bodies are not transported along with
+ the external body reference, they need not conform to transport
+ limitations that apply to the reference itself. In particular,
+ Internet mail transports may impose 7bit and line length limits, but
+ these do not automatically apply to binary external body references.
+ Thus a Content-Transfer-Encoding is not generally necessary, though
+ it is permitted.
+
+ Note that the body of a message of type "message/external-body" is
+ governed by the basic syntax for an RFC 822 message. In particular,
+ anything before the first consecutive pair of CRLFs is header
+ information, while anything after it is body information, which is
+ ignored for most access-types.
+
+5.2.4. Other Message Subtypes
+
+ MIME implementations must in general treat unrecognized subtypes of
+ "message" as being equivalent to "application/octet-stream".
+
+ Future subtypes of "message" intended for use with email should be
+ restricted to "7bit" encoding. A type other than "message" should be
+ used if restriction to "7bit" is not possible.
+
+6. Experimental Media Type Values
+
+ A media type value beginning with the characters "X-" is a private
+ value, to be used by consenting systems by mutual agreement. Any
+ format without a rigorous and public definition must be named with an
+ "X-" prefix, and publicly specified values shall never begin with
+ "X-". (Older versions of the widely used Andrew system use the "X-
+ BE2" name, so new systems should probably choose a different name.)
+
+ In general, the use of "X-" top-level types is strongly discouraged.
+ Implementors should invent subtypes of the existing types whenever
+ possible. In many cases, a subtype of "application" will be more
+ appropriate than a new top-level type.
+
+
+
+
+Freed & Borenstein Standards Track [Page 40]
+
+RFC 2046 Media Types November 1996
+
+
+7. Summary
+
+ The five discrete media types provide provide a standardized
+ mechanism for tagging entities as "audio", "image", or several other
+ kinds of data. The composite "multipart" and "message" media types
+ allow mixing and hierarchical structuring of entities of different
+ types in a single message. A distinguished parameter syntax allows
+ further specification of data format details, particularly the
+ specification of alternate character sets. Additional optional
+ header fields provide mechanisms for certain extensions deemed
+ desirable by many implementors. Finally, a number of useful media
+ types are defined for general use by consenting user agents, notably
+ "message/partial" and "message/external-body".
+
+9. Security Considerations
+
+ Security issues are discussed in the context of the
+ "application/postscript" type, the "message/external-body" type, and
+ in RFC 2048. Implementors should pay special attention to the
+ security implications of any media types that can cause the remote
+ execution of any actions in the recipient's environment. In such
+ cases, the discussion of the "application/postscript" type may serve
+ as a model for considering other media types with remote execution
+ capabilities.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 41]
+
+RFC 2046 Media Types November 1996
+
+
+9. Authors' Addresses
+
+ For more information, the authors of this document are best contacted
+ via Internet mail:
+
+ Ned Freed
+ Innosoft International, Inc.
+ 1050 East Garvey Avenue South
+ West Covina, CA 91790
+ USA
+
+ Phone: +1 818 919 3600
+ Fax: +1 818 919 3614
+ EMail: ned@innosoft.com
+
+
+ Nathaniel S. Borenstein
+ First Virtual Holdings
+ 25 Washington Avenue
+ Morristown, NJ 07960
+ USA
+
+ Phone: +1 201 540 8967
+ Fax: +1 201 993 3032
+ EMail: nsb@nsb.fv.com
+
+
+ MIME is a result of the work of the Internet Engineering Task Force
+ Working Group on RFC 822 Extensions. The chairman of that group,
+ Greg Vaudreuil, may be reached at:
+
+ Gregory M. Vaudreuil
+ Octel Network Services
+ 17080 Dallas Parkway
+ Dallas, TX 75248-1905
+ USA
+
+ EMail: Greg.Vaudreuil@Octel.Com
+
+
+
+
+
+
+
+
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 42]
+
+RFC 2046 Media Types November 1996
+
+
+Appendix A -- Collected Grammar
+
+ This appendix contains the complete BNF grammar for all the syntax
+ specified by this document.
+
+ By itself, however, this grammar is incomplete. It refers by name to
+ several syntax rules that are defined by RFC 822. Rather than
+ reproduce those definitions here, and risk unintentional differences
+ between the two, this document simply refers the reader to RFC 822
+ for the remaining definitions. Wherever a term is undefined, it
+ refers to the RFC 822 definition.
+
+ boundary := 0*69<bchars> bcharsnospace
+
+ bchars := bcharsnospace / " "
+
+ bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" /
+ "+" / "_" / "," / "-" / "." /
+ "/" / ":" / "=" / "?"
+
+ body-part := <"message" as defined in RFC 822, with all
+ header fields optional, not starting with the
+ specified dash-boundary, and with the
+ delimiter not occurring anywhere in the
+ body part. Note that the semantics of a
+ part differ from the semantics of a message,
+ as described in the text.>
+
+ close-delimiter := delimiter "--"
+
+ dash-boundary := "--" boundary
+ ; boundary taken from the value of
+ ; boundary parameter of the
+ ; Content-Type field.
+
+ delimiter := CRLF dash-boundary
+
+ discard-text := *(*text CRLF)
+ ; May be ignored or discarded.
+
+ encapsulation := delimiter transport-padding
+ CRLF body-part
+
+ epilogue := discard-text
+
+ multipart-body := [preamble CRLF]
+ dash-boundary transport-padding CRLF
+ body-part *encapsulation
+
+
+
+Freed & Borenstein Standards Track [Page 43]
+
+RFC 2046 Media Types November 1996
+
+
+ close-delimiter transport-padding
+ [CRLF epilogue]
+
+ preamble := discard-text
+
+ transport-padding := *LWSP-char
+ ; Composers MUST NOT generate
+ ; non-zero length transport
+ ; padding, but receivers MUST
+ ; be able to handle padding
+ ; added by message transports.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Freed & Borenstein Standards Track [Page 44]
+
diff --git a/filter.c b/filter.c
@@ -19,10 +19,10 @@ int main(int argc, char **argv) {
for (i = filter; i < argc; i++)
if (!strcmp (argv[i], "-e"))
edit = i;
- }
- for (i = 0; i < argc; i++) {
- strncpy (argv2[i], argv[i], 1023);
- argv2[i][1023] = '\0';
+ for (i = 0; i < argc; i++) {
+ strncpy (argv2[i], argv[i], 1023);
+ argv2[i][1023] = '\0';
+ }
}
memset (b, '\0', 1024);
/* Headers */
@@ -33,7 +33,7 @@ int main(int argc, char **argv) {
if (!strncmp (b, argv[i], strlen(argv[i])) || argv[i][0] == ':') {
/* Edit/Remove Headers */
print = 1;
- for (j = edit + 1; j < argc && argv[j]; j++)
+ for (j = edit + 1; !value && j < argc && argv[j]; j++)
if ((ptr = strchr (argv[j], ':')) &&
!strncmp (b, argv[j], ptr - argv[j] + 1)) {
if (ptr[1] != '\0' && argv2[j][0])
diff --git a/mbox.c b/mbox.c
@@ -8,7 +8,6 @@
FILE *fd;
static char word[1024];
-// XXX maybe so many [1024] stuff. can this cause truncated mails?
static void mbox_ls() {
char b[1024], from[1024], subject[1024], date[1024], *ptr;
int m = 0, headers = 1;