dmc

dynamic mail client
git clone git://git.suckless.org/dmc
Log | Files | Refs | README | LICENSE

commit 4a53c1f65beb951587ed0664a107e5339b3f8c22
parent 60bfa2edb213e82738037473393e80c144982b31
Author: nibble <unknown>
Date:   Mon,  9 Nov 2009 19:28:21 +0100

* Added missing rfc's
* Minor optimization in dmc-filter when using -v
Diffstat:
doc/imf-rfc5322.txt | 3195+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
doc/mbox-rfc4155.txt | 507+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
doc/mime-p1-rfc2045.txt | 1739+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
doc/mime-p2-rfc2046.txt | 2467++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
filter.c | 10+++++-----
mbox.c | 1-
6 files changed, 7913 insertions(+), 6 deletions(-)

diff --git a/doc/imf-rfc5322.txt b/doc/imf-rfc5322.txt @@ -0,0 +1,3195 @@ + + + + + + +Network Working Group P. Resnick, Ed. +Request for Comments: 5322 Qualcomm Incorporated +Obsoletes: 2822 October 2008 +Updates: 4021 +Category: Standards Track + + + Internet Message Format + +Status of This Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Abstract + + This document specifies the Internet Message Format (IMF), a syntax + for text messages that are sent between computer users, within the + framework of "electronic mail" messages. This specification is a + revision of Request For Comments (RFC) 2822, which itself superseded + Request For Comments (RFC) 822, "Standard for the Format of ARPA + Internet Text Messages", updating it to reflect current practice and + incorporating incremental changes that were specified in other RFCs. + + + + + + + + + + + + + + + + + + + + + + + + + +Resnick Standards Track [Page 1] + +RFC 5322 Internet Message Format October 2008 + + +Table of Contents + + 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 4 + 1.1. Scope . . . . . . . . . . . . . . . . . . . . . . . . . . 4 + 1.2. Notational Conventions . . . . . . . . . . . . . . . . . . 5 + 1.2.1. Requirements Notation . . . . . . . . . . . . . . . . 5 + 1.2.2. Syntactic Notation . . . . . . . . . . . . . . . . . . 5 + 1.2.3. Structure of This Document . . . . . . . . . . . . . . 5 + 2. Lexical Analysis of Messages . . . . . . . . . . . . . . . . . 6 + 2.1. General Description . . . . . . . . . . . . . . . . . . . 6 + 2.1.1. Line Length Limits . . . . . . . . . . . . . . . . . . 7 + 2.2. Header Fields . . . . . . . . . . . . . . . . . . . . . . 8 + 2.2.1. Unstructured Header Field Bodies . . . . . . . . . . . 8 + 2.2.2. Structured Header Field Bodies . . . . . . . . . . . . 8 + 2.2.3. Long Header Fields . . . . . . . . . . . . . . . . . . 8 + 2.3. Body . . . . . . . . . . . . . . . . . . . . . . . . . . . 9 + 3. Syntax . . . . . . . . . . . . . . . . . . . . . . . . . . . . 10 + 3.1. Introduction . . . . . . . . . . . . . . . . . . . . . . . 10 + 3.2. Lexical Tokens . . . . . . . . . . . . . . . . . . . . . . 10 + 3.2.1. Quoted characters . . . . . . . . . . . . . . . . . . 10 + 3.2.2. Folding White Space and Comments . . . . . . . . . . . 11 + 3.2.3. Atom . . . . . . . . . . . . . . . . . . . . . . . . . 12 + 3.2.4. Quoted Strings . . . . . . . . . . . . . . . . . . . . 13 + 3.2.5. Miscellaneous Tokens . . . . . . . . . . . . . . . . . 14 + 3.3. Date and Time Specification . . . . . . . . . . . . . . . 14 + 3.4. Address Specification . . . . . . . . . . . . . . . . . . 16 + 3.4.1. Addr-Spec Specification . . . . . . . . . . . . . . . 17 + 3.5. Overall Message Syntax . . . . . . . . . . . . . . . . . . 18 + 3.6. Field Definitions . . . . . . . . . . . . . . . . . . . . 19 + 3.6.1. The Origination Date Field . . . . . . . . . . . . . . 22 + 3.6.2. Originator Fields . . . . . . . . . . . . . . . . . . 22 + 3.6.3. Destination Address Fields . . . . . . . . . . . . . . 23 + 3.6.4. Identification Fields . . . . . . . . . . . . . . . . 25 + 3.6.5. Informational Fields . . . . . . . . . . . . . . . . . 27 + 3.6.6. Resent Fields . . . . . . . . . . . . . . . . . . . . 28 + 3.6.7. Trace Fields . . . . . . . . . . . . . . . . . . . . . 30 + 3.6.8. Optional Fields . . . . . . . . . . . . . . . . . . . 30 + 4. Obsolete Syntax . . . . . . . . . . . . . . . . . . . . . . . 31 + 4.1. Miscellaneous Obsolete Tokens . . . . . . . . . . . . . . 32 + 4.2. Obsolete Folding White Space . . . . . . . . . . . . . . . 33 + 4.3. Obsolete Date and Time . . . . . . . . . . . . . . . . . . 33 + 4.4. Obsolete Addressing . . . . . . . . . . . . . . . . . . . 35 + 4.5. Obsolete Header Fields . . . . . . . . . . . . . . . . . . 35 + 4.5.1. Obsolete Origination Date Field . . . . . . . . . . . 36 + 4.5.2. Obsolete Originator Fields . . . . . . . . . . . . . . 36 + 4.5.3. Obsolete Destination Address Fields . . . . . . . . . 37 + 4.5.4. Obsolete Identification Fields . . . . . . . . . . . . 37 + 4.5.5. Obsolete Informational Fields . . . . . . . . . . . . 37 + + + +Resnick Standards Track [Page 2] + +RFC 5322 Internet Message Format October 2008 + + + 4.5.6. Obsolete Resent Fields . . . . . . . . . . . . . . . . 38 + 4.5.7. Obsolete Trace Fields . . . . . . . . . . . . . . . . 38 + 4.5.8. Obsolete optional fields . . . . . . . . . . . . . . . 38 + 5. Security Considerations . . . . . . . . . . . . . . . . . . . 38 + 6. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 39 + Appendix A. Example Messages . . . . . . . . . . . . . . . . . 43 + Appendix A.1. Addressing Examples . . . . . . . . . . . . . . . 44 + Appendix A.1.1. A Message from One Person to Another with + Simple Addressing . . . . . . . . . . . . . . . . 44 + Appendix A.1.2. Different Types of Mailboxes . . . . . . . . . . . 45 + Appendix A.1.3. Group Addresses . . . . . . . . . . . . . . . . . 45 + Appendix A.2. Reply Messages . . . . . . . . . . . . . . . . . . 46 + Appendix A.3. Resent Messages . . . . . . . . . . . . . . . . . 47 + Appendix A.4. Messages with Trace Fields . . . . . . . . . . . . 48 + Appendix A.5. White Space, Comments, and Other Oddities . . . . 49 + Appendix A.6. Obsoleted Forms . . . . . . . . . . . . . . . . . 50 + Appendix A.6.1. Obsolete Addressing . . . . . . . . . . . . . . . 50 + Appendix A.6.2. Obsolete Dates . . . . . . . . . . . . . . . . . . 50 + Appendix A.6.3. Obsolete White Space and Comments . . . . . . . . 51 + Appendix B. Differences from Earlier Specifications . . . . . 52 + Appendix C. Acknowledgements . . . . . . . . . . . . . . . . . 53 + 7. References . . . . . . . . . . . . . . . . . . . . . . . . . . 55 + 7.1. Normative References . . . . . . . . . . . . . . . . . . . 55 + 7.2. Informative References . . . . . . . . . . . . . . . . . . 55 + + + + + + + + + + + + + + + + + + + + + + + + + + + +Resnick Standards Track [Page 3] + +RFC 5322 Internet Message Format October 2008 + + +1. Introduction + +1.1. Scope + + This document specifies the Internet Message Format (IMF), a syntax + for text messages that are sent between computer users, within the + framework of "electronic mail" messages. This specification is an + update to [RFC2822], which itself superseded [RFC0822], updating it + to reflect current practice and incorporating incremental changes + that were specified in other RFCs such as [RFC1123]. + + This document specifies a syntax only for text messages. In + particular, it makes no provision for the transmission of images, + audio, or other sorts of structured data in electronic mail messages. + There are several extensions published, such as the MIME document + series ([RFC2045], [RFC2046], [RFC2049]), which describe mechanisms + for the transmission of such data through electronic mail, either by + extending the syntax provided here or by structuring such messages to + conform to this syntax. Those mechanisms are outside of the scope of + this specification. + + In the context of electronic mail, messages are viewed as having an + envelope and contents. The envelope contains whatever information is + needed to accomplish transmission and delivery. (See [RFC5321] for a + discussion of the envelope.) The contents comprise the object to be + delivered to the recipient. This specification applies only to the + format and some of the semantics of message contents. It contains no + specification of the information in the envelope. + + However, some message systems may use information from the contents + to create the envelope. It is intended that this specification + facilitate the acquisition of such information by programs. + + This specification is intended as a definition of what message + content format is to be passed between systems. Though some message + systems locally store messages in this format (which eliminates the + need for translation between formats) and others use formats that + differ from the one specified in this specification, local storage is + outside of the scope of this specification. + + Note: This specification is not intended to dictate the internal + formats used by sites, the specific message system features that + they are expected to support, or any of the characteristics of + user interface programs that create or read messages. In + addition, this document does not specify an encoding of the + characters for either transport or storage; that is, it does not + specify the number of bits used or how those bits are specifically + transferred over the wire or stored on disk. + + + +Resnick Standards Track [Page 4] + +RFC 5322 Internet Message Format October 2008 + + +1.2. Notational Conventions + +1.2.1. Requirements Notation + + This document occasionally uses terms that appear in capital letters. + When the terms "MUST", "SHOULD", "RECOMMENDED", "MUST NOT", "SHOULD + NOT", and "MAY" appear capitalized, they are being used to indicate + particular requirements of this specification. A discussion of the + meanings of these terms appears in [RFC2119]. + +1.2.2. Syntactic Notation + + This specification uses the Augmented Backus-Naur Form (ABNF) + [RFC5234] notation for the formal definitions of the syntax of + messages. Characters will be specified either by a decimal value + (e.g., the value %d65 for uppercase A and %d97 for lowercase A) or by + a case-insensitive literal value enclosed in quotation marks (e.g., + "A" for either uppercase or lowercase A). + +1.2.3. Structure of This Document + + This document is divided into several sections. + + This section, section 1, is a short introduction to the document. + + Section 2 lays out the general description of a message and its + constituent parts. This is an overview to help the reader understand + some of the general principles used in the later portions of this + document. Any examples in this section MUST NOT be taken as + specification of the formal syntax of any part of a message. + + Section 3 specifies formal ABNF rules for the structure of each part + of a message (the syntax) and describes the relationship between + those parts and their meaning in the context of a message (the + semantics). That is, it lays out the actual rules for the structure + of each part of a message (the syntax) as well as a description of + the parts and instructions for their interpretation (the semantics). + This includes analysis of the syntax and semantics of subparts of + messages that have specific structure. The syntax included in + section 3 represents messages as they MUST be created. There are + also notes in section 3 to indicate if any of the options specified + in the syntax SHOULD be used over any of the others. + + Both sections 2 and 3 describe messages that are legal to generate + for purposes of this specification. + + + + + + +Resnick Standards Track [Page 5] + +RFC 5322 Internet Message Format October 2008 + + + Section 4 of this document specifies an "obsolete" syntax. There are + references in section 3 to these obsolete syntactic elements. The + rules of the obsolete syntax are elements that have appeared in + earlier versions of this specification or have previously been widely + used in Internet messages. As such, these elements MUST be + interpreted by parsers of messages in order to be conformant to this + specification. However, since items in this syntax have been + determined to be non-interoperable or to cause significant problems + for recipients of messages, they MUST NOT be generated by creators of + conformant messages. + + Section 5 details security considerations to take into account when + implementing this specification. + + Appendix A lists examples of different sorts of messages. These + examples are not exhaustive of the types of messages that appear on + the Internet, but give a broad overview of certain syntactic forms. + + Appendix B lists the differences between this specification and + earlier specifications for Internet messages. + + Appendix C contains acknowledgements. + +2. Lexical Analysis of Messages + +2.1. General Description + + At the most basic level, a message is a series of characters. A + message that is conformant with this specification is composed of + characters with values in the range of 1 through 127 and interpreted + as US-ASCII [ANSI.X3-4.1986] characters. For brevity, this document + sometimes refers to this range of characters as simply "US-ASCII + characters". + + Note: This document specifies that messages are made up of + characters in the US-ASCII range of 1 through 127. There are + other documents, specifically the MIME document series ([RFC2045], + [RFC2046], [RFC2047], [RFC2049], [RFC4288], [RFC4289]), that + extend this specification to allow for values outside of that + range. Discussion of those mechanisms is not within the scope of + this specification. + + Messages are divided into lines of characters. A line is a series of + characters that is delimited with the two characters carriage-return + and line-feed; that is, the carriage return (CR) character (ASCII + value 13) followed immediately by the line feed (LF) character (ASCII + value 10). (The carriage return/line feed pair is usually written in + this document as "CRLF".) + + + +Resnick Standards Track [Page 6] + +RFC 5322 Internet Message Format October 2008 + + + A message consists of header fields (collectively called "the header + section of the message") followed, optionally, by a body. The header + section is a sequence of lines of characters with special syntax as + defined in this specification. The body is simply a sequence of + characters that follows the header section and is separated from the + header section by an empty line (i.e., a line with nothing preceding + the CRLF). + + Note: Common parlance and earlier versions of this specification + use the term "header" to either refer to the entire header section + or to refer to an individual header field. To avoid ambiguity, + this document does not use the terms "header" or "headers" in + isolation, but instead always uses "header field" to refer to the + individual field and "header section" to refer to the entire + collection. + +2.1.1. Line Length Limits + + There are two limits that this specification places on the number of + characters in a line. Each line of characters MUST be no more than + 998 characters, and SHOULD be no more than 78 characters, excluding + the CRLF. + + The 998 character limit is due to limitations in many implementations + that send, receive, or store IMF messages which simply cannot handle + more than 998 characters on a line. Receiving implementations would + do well to handle an arbitrarily large number of characters in a line + for robustness sake. However, there are so many implementations that + (in compliance with the transport requirements of [RFC5321]) do not + accept messages containing more than 1000 characters including the CR + and LF per line, it is important for implementations not to create + such messages. + + The more conservative 78 character recommendation is to accommodate + the many implementations of user interfaces that display these + messages which may truncate, or disastrously wrap, the display of + more than 78 characters per line, in spite of the fact that such + implementations are non-conformant to the intent of this + specification (and that of [RFC5321] if they actually cause + information to be lost). Again, even though this limitation is put + on messages, it is incumbent upon implementations that display + messages to handle an arbitrarily large number of characters in a + line (certainly at least up to the 998 character limit) for the sake + of robustness. + + + + + + + +Resnick Standards Track [Page 7] + +RFC 5322 Internet Message Format October 2008 + + +2.2. Header Fields + + Header fields are lines beginning with a field name, followed by a + colon (":"), followed by a field body, and terminated by CRLF. A + field name MUST be composed of printable US-ASCII characters (i.e., + characters that have values between 33 and 126, inclusive), except + colon. A field body may be composed of printable US-ASCII characters + as well as the space (SP, ASCII value 32) and horizontal tab (HTAB, + ASCII value 9) characters (together known as the white space + characters, WSP). A field body MUST NOT include CR and LF except + when used in "folding" and "unfolding", as described in section + 2.2.3. All field bodies MUST conform to the syntax described in + sections 3 and 4 of this specification. + +2.2.1. Unstructured Header Field Bodies + + Some field bodies in this specification are defined simply as + "unstructured" (which is specified in section 3.2.5 as any printable + US-ASCII characters plus white space characters) with no further + restrictions. These are referred to as unstructured field bodies. + Semantically, unstructured field bodies are simply to be treated as a + single line of characters with no further processing (except for + "folding" and "unfolding" as described in section 2.2.3). + +2.2.2. Structured Header Field Bodies + + Some field bodies in this specification have a syntax that is more + restrictive than the unstructured field bodies described above. + These are referred to as "structured" field bodies. Structured field + bodies are sequences of specific lexical tokens as described in + sections 3 and 4 of this specification. Many of these tokens are + allowed (according to their syntax) to be introduced or end with + comments (as described in section 3.2.2) as well as the white space + characters, and those white space characters are subject to "folding" + and "unfolding" as described in section 2.2.3. Semantic analysis of + structured field bodies is given along with their syntax. + +2.2.3. Long Header Fields + + Each header field is logically a single line of characters comprising + the field name, the colon, and the field body. For convenience + however, and to deal with the 998/78 character limitations per line, + the field body portion of a header field can be split into a + multiple-line representation; this is called "folding". The general + rule is that wherever this specification allows for folding white + space (not simply WSP characters), a CRLF may be inserted before any + WSP. + + + + +Resnick Standards Track [Page 8] + +RFC 5322 Internet Message Format October 2008 + + + For example, the header field: + + Subject: This is a test + + can be represented as: + + Subject: This + is a test + + Note: Though structured field bodies are defined in such a way + that folding can take place between many of the lexical tokens + (and even within some of the lexical tokens), folding SHOULD be + limited to placing the CRLF at higher-level syntactic breaks. For + instance, if a field body is defined as comma-separated values, it + is recommended that folding occur after the comma separating the + structured items in preference to other places where the field + could be folded, even if it is allowed elsewhere. + + The process of moving from this folded multiple-line representation + of a header field to its single line representation is called + "unfolding". Unfolding is accomplished by simply removing any CRLF + that is immediately followed by WSP. Each header field should be + treated in its unfolded form for further syntactic and semantic + evaluation. An unfolded header field has no length restriction and + therefore may be indeterminately long. + +2.3. Body + + The body of a message is simply lines of US-ASCII characters. The + only two limitations on the body are as follows: + + o CR and LF MUST only occur together as CRLF; they MUST NOT appear + independently in the body. + o Lines of characters in the body MUST be limited to 998 characters, + and SHOULD be limited to 78 characters, excluding the CRLF. + + Note: As was stated earlier, there are other documents, + specifically the MIME documents ([RFC2045], [RFC2046], [RFC2049], + [RFC4288], [RFC4289]), that extend (and limit) this specification + to allow for different sorts of message bodies. Again, these + mechanisms are beyond the scope of this document. + + + + + + + + + + +Resnick Standards Track [Page 9] + +RFC 5322 Internet Message Format October 2008 + + +3. Syntax + +3.1. Introduction + + The syntax as given in this section defines the legal syntax of + Internet messages. Messages that are conformant to this + specification MUST conform to the syntax in this section. If there + are options in this section where one option SHOULD be generated, + that is indicated either in the prose or in a comment next to the + syntax. + + For the defined expressions, a short description of the syntax and + use is given, followed by the syntax in ABNF, followed by a semantic + analysis. The following primitive tokens that are used but otherwise + unspecified are taken from the "Core Rules" of [RFC5234], Appendix + B.1: CR, LF, CRLF, HTAB, SP, WSP, DQUOTE, DIGIT, ALPHA, and VCHAR. + + In some of the definitions, there will be non-terminals whose names + start with "obs-". These "obs-" elements refer to tokens defined in + the obsolete syntax in section 4. In all cases, these productions + are to be ignored for the purposes of generating legal Internet + messages and MUST NOT be used as part of such a message. However, + when interpreting messages, these tokens MUST be honored as part of + the legal syntax. In this sense, section 3 defines a grammar for the + generation of messages, with "obs-" elements that are to be ignored, + while section 4 adds grammar for the interpretation of messages. + +3.2. Lexical Tokens + + The following rules are used to define an underlying lexical + analyzer, which feeds tokens to the higher-level parsers. This + section defines the tokens used in structured header field bodies. + + Note: Readers of this specification need to pay special attention + to how these lexical tokens are used in both the lower-level and + higher-level syntax later in the document. Particularly, the + white space tokens and the comment tokens defined in section 3.2.2 + get used in the lower-level tokens defined here, and those lower- + level tokens are in turn used as parts of the higher-level tokens + defined later. Therefore, white space and comments may be allowed + in the higher-level tokens even though they may not explicitly + appear in a particular definition. + +3.2.1. Quoted characters + + Some characters are reserved for special interpretation, such as + delimiting lexical tokens. To permit use of these characters as + uninterpreted data, a quoting mechanism is provided. + + + +Resnick Standards Track [Page 10] + +RFC 5322 Internet Message Format October 2008 + + + quoted-pair = ("\" (VCHAR / WSP)) / obs-qp + + Where any quoted-pair appears, it is to be interpreted as the + character alone. That is to say, the "\" character that appears as + part of a quoted-pair is semantically "invisible". + + Note: The "\" character may appear in a message where it is not + part of a quoted-pair. A "\" character that does not appear in a + quoted-pair is not semantically invisible. The only places in + this specification where quoted-pair currently appears are + ccontent, qcontent, and in obs-dtext in section 4. + +3.2.2. Folding White Space and Comments + + White space characters, including white space used in folding + (described in section 2.2.3), may appear between many elements in + header field bodies. Also, strings of characters that are treated as + comments may be included in structured field bodies as characters + enclosed in parentheses. The following defines the folding white + space (FWS) and comment constructs. + + Strings of characters enclosed in parentheses are considered comments + so long as they do not appear within a "quoted-string", as defined in + section 3.2.4. Comments may nest. + + There are several places in this specification where comments and FWS + may be freely inserted. To accommodate that syntax, an additional + token for "CFWS" is defined for places where comments and/or FWS can + occur. However, where CFWS occurs in this specification, it MUST NOT + be inserted in such a way that any line of a folded header field is + made up entirely of WSP characters and nothing else. + + FWS = ([*WSP CRLF] 1*WSP) / obs-FWS + ; Folding white space + + ctext = %d33-39 / ; Printable US-ASCII + %d42-91 / ; characters not including + %d93-126 / ; "(", ")", or "\" + obs-ctext + + ccontent = ctext / quoted-pair / comment + + comment = "(" *([FWS] ccontent) [FWS] ")" + + CFWS = (1*([FWS] comment) [FWS]) / FWS + + + + + + +Resnick Standards Track [Page 11] + +RFC 5322 Internet Message Format October 2008 + + + Throughout this specification, where FWS (the folding white space + token) appears, it indicates a place where folding, as discussed in + section 2.2.3, may take place. Wherever folding appears in a message + (that is, a header field body containing a CRLF followed by any WSP), + unfolding (removal of the CRLF) is performed before any further + semantic analysis is performed on that header field according to this + specification. That is to say, any CRLF that appears in FWS is + semantically "invisible". + + A comment is normally used in a structured field body to provide some + human-readable informational text. Since a comment is allowed to + contain FWS, folding is permitted within the comment. Also note that + since quoted-pair is allowed in a comment, the parentheses and + backslash characters may appear in a comment, so long as they appear + as a quoted-pair. Semantically, the enclosing parentheses are not + part of the comment; the comment is what is contained between the two + parentheses. As stated earlier, the "\" in any quoted-pair and the + CRLF in any FWS that appears within the comment are semantically + "invisible" and therefore not part of the comment either. + + Runs of FWS, comment, or CFWS that occur between lexical tokens in a + structured header field are semantically interpreted as a single + space character. + +3.2.3. Atom + + Several productions in structured header field bodies are simply + strings of certain basic characters. Such productions are called + atoms. + + Some of the structured header field bodies also allow the period + character (".", ASCII value 46) within runs of atext. An additional + "dot-atom" token is defined for those purposes. + + Note: The "specials" token does not appear anywhere else in this + specification. It is simply the visible (i.e., non-control, non- + white space) characters that do not appear in atext. It is + provided only because it is useful for implementers who use tools + that lexically analyze messages. Each of the characters in + specials can be used to indicate a tokenization point in lexical + analysis. + + + + + + + + + + +Resnick Standards Track [Page 12] + +RFC 5322 Internet Message Format October 2008 + + + atext = ALPHA / DIGIT / ; Printable US-ASCII + "!" / "#" / ; characters not including + "$" / "%" / ; specials. Used for atoms. + "&" / "'" / + "*" / "+" / + "-" / "/" / + "=" / "?" / + "^" / "_" / + "`" / "{" / + "|" / "}" / + "~" + + atom = [CFWS] 1*atext [CFWS] + + dot-atom-text = 1*atext *("." 1*atext) + + dot-atom = [CFWS] dot-atom-text [CFWS] + + specials = "(" / ")" / ; Special characters that do + "<" / ">" / ; not appear in atext + "[" / "]" / + ":" / ";" / + "@" / "\" / + "," / "." / + DQUOTE + + Both atom and dot-atom are interpreted as a single unit, comprising + the string of characters that make it up. Semantically, the optional + comments and FWS surrounding the rest of the characters are not part + of the atom; the atom is only the run of atext characters in an atom, + or the atext and "." characters in a dot-atom. + +3.2.4. Quoted Strings + + Strings of characters that include characters other than those + allowed in atoms can be represented in a quoted string format, where + the characters are surrounded by quote (DQUOTE, ASCII value 34) + characters. + + + + + + + + + + + + + +Resnick Standards Track [Page 13] + +RFC 5322 Internet Message Format October 2008 + + + qtext = %d33 / ; Printable US-ASCII + %d35-91 / ; characters not including + %d93-126 / ; "\" or the quote character + obs-qtext + + qcontent = qtext / quoted-pair + + quoted-string = [CFWS] + DQUOTE *([FWS] qcontent) [FWS] DQUOTE + [CFWS] + + A quoted-string is treated as a unit. That is, quoted-string is + identical to atom, semantically. Since a quoted-string is allowed to + contain FWS, folding is permitted. Also note that since quoted-pair + is allowed in a quoted-string, the quote and backslash characters may + appear in a quoted-string so long as they appear as a quoted-pair. + + Semantically, neither the optional CFWS outside of the quote + characters nor the quote characters themselves are part of the + quoted-string; the quoted-string is what is contained between the two + quote characters. As stated earlier, the "\" in any quoted-pair and + the CRLF in any FWS/CFWS that appears within the quoted-string are + semantically "invisible" and therefore not part of the quoted-string + either. + +3.2.5. Miscellaneous Tokens + + Three additional tokens are defined: word and phrase for combinations + of atoms and/or quoted-strings, and unstructured for use in + unstructured header fields and in some places within structured + header fields. + + word = atom / quoted-string + + phrase = 1*word / obs-phrase + + unstructured = (*([FWS] VCHAR) *WSP) / obs-unstruct + +3.3. Date and Time Specification + + Date and time values occur in several header fields. This section + specifies the syntax for a full date and time specification. Though + folding white space is permitted throughout the date-time + specification, it is RECOMMENDED that a single space be used in each + place that FWS appears (whether it is required or optional); some + older implementations will not interpret longer sequences of folding + white space correctly. + + + + +Resnick Standards Track [Page 14] + +RFC 5322 Internet Message Format October 2008 + + + date-time = [ day-of-week "," ] date time [CFWS] + + day-of-week = ([FWS] day-name) / obs-day-of-week + + day-name = "Mon" / "Tue" / "Wed" / "Thu" / + "Fri" / "Sat" / "Sun" + + date = day month year + + day = ([FWS] 1*2DIGIT FWS) / obs-day + + month = "Jan" / "Feb" / "Mar" / "Apr" / + "May" / "Jun" / "Jul" / "Aug" / + "Sep" / "Oct" / "Nov" / "Dec" + + year = (FWS 4*DIGIT FWS) / obs-year + + time = time-of-day zone + + time-of-day = hour ":" minute [ ":" second ] + + hour = 2DIGIT / obs-hour + + minute = 2DIGIT / obs-minute + + second = 2DIGIT / obs-second + + zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone + + The day is the numeric day of the month. The year is any numeric + year 1900 or later. + + The time-of-day specifies the number of hours, minutes, and + optionally seconds since midnight of the date indicated. + + The date and time-of-day SHOULD express local time. + + The zone specifies the offset from Coordinated Universal Time (UTC, + formerly referred to as "Greenwich Mean Time") that the date and + time-of-day represent. The "+" or "-" indicates whether the time-of- + day is ahead of (i.e., east of) or behind (i.e., west of) Universal + Time. The first two digits indicate the number of hours difference + from Universal Time, and the last two digits indicate the number of + additional minutes difference from Universal Time. (Hence, +hhmm + means +(hh * 60 + mm) minutes, and -hhmm means -(hh * 60 + mm) + minutes). The form "+0000" SHOULD be used to indicate a time zone at + Universal Time. Though "-0000" also indicates Universal Time, it is + + + + +Resnick Standards Track [Page 15] + +RFC 5322 Internet Message Format October 2008 + + + used to indicate that the time was generated on a system that may be + in a local time zone other than Universal Time and that the date-time + contains no information about the local time zone. + + A date-time specification MUST be semantically valid. That is, the + day-of-week (if included) MUST be the day implied by the date, the + numeric day-of-month MUST be between 1 and the number of days allowed + for the specified month (in the specified year), the time-of-day MUST + be in the range 00:00:00 through 23:59:60 (the number of seconds + allowing for a leap second; see [RFC1305]), and the last two digits + of the zone MUST be within the range 00 through 59. + +3.4. Address Specification + + Addresses occur in several message header fields to indicate senders + and recipients of messages. An address may either be an individual + mailbox, or a group of mailboxes. + + address = mailbox / group + + mailbox = name-addr / addr-spec + + name-addr = [display-name] angle-addr + + angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / + obs-angle-addr + + group = display-name ":" [group-list] ";" [CFWS] + + display-name = phrase + + mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list + + address-list = (address *("," address)) / obs-addr-list + + group-list = mailbox-list / CFWS / obs-group-list + + A mailbox receives mail. It is a conceptual entity that does not + necessarily pertain to file storage. For example, some sites may + choose to print mail on a printer and deliver the output to the + addressee's desk. + + Normally, a mailbox is composed of two parts: (1) an optional display + name that indicates the name of the recipient (which can be a person + or a system) that could be displayed to the user of a mail + application, and (2) an addr-spec address enclosed in angle brackets + + + + + +Resnick Standards Track [Page 16] + +RFC 5322 Internet Message Format October 2008 + + + ("<" and ">"). There is an alternate simple form of a mailbox where + the addr-spec address appears alone, without the recipient's name or + the angle brackets. The Internet addr-spec address is described in + section 3.4.1. + + Note: Some legacy implementations used the simple form where the + addr-spec appears without the angle brackets, but included the + name of the recipient in parentheses as a comment following the + addr-spec. Since the meaning of the information in a comment is + unspecified, implementations SHOULD use the full name-addr form of + the mailbox, instead of the legacy form, to specify the display + name associated with a mailbox. Also, because some legacy + implementations interpret the comment, comments generally SHOULD + NOT be used in address fields to avoid confusing such + implementations. + + When it is desirable to treat several mailboxes as a single unit + (i.e., in a distribution list), the group construct can be used. The + group construct allows the sender to indicate a named group of + recipients. This is done by giving a display name for the group, + followed by a colon, followed by a comma-separated list of any number + of mailboxes (including zero and one), and ending with a semicolon. + Because the list of mailboxes can be empty, using the group construct + is also a simple way to communicate to recipients that the message + was sent to one or more named sets of recipients, without actually + providing the individual mailbox address for any of those recipients. + +3.4.1. Addr-Spec Specification + + An addr-spec is a specific Internet identifier that contains a + locally interpreted string followed by the at-sign character ("@", + ASCII value 64) followed by an Internet domain. The locally + interpreted string is either a quoted-string or a dot-atom. If the + string can be represented as a dot-atom (that is, it contains no + characters other than atext characters or "." surrounded by atext + characters), then the dot-atom form SHOULD be used and the quoted- + string form SHOULD NOT be used. Comments and folding white space + SHOULD NOT be used around the "@" in the addr-spec. + + Note: A liberal syntax for the domain portion of addr-spec is + given here. However, the domain portion contains addressing + information specified by and used in other protocols (e.g., + [RFC1034], [RFC1035], [RFC1123], [RFC5321]). It is therefore + incumbent upon implementations to conform to the syntax of + addresses for the context in which they are used. + + + + + + +Resnick Standards Track [Page 17] + +RFC 5322 Internet Message Format October 2008 + + + addr-spec = local-part "@" domain + + local-part = dot-atom / quoted-string / obs-local-part + + domain = dot-atom / domain-literal / obs-domain + + domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] + + dtext = %d33-90 / ; Printable US-ASCII + %d94-126 / ; characters not including + obs-dtext ; "[", "]", or "\" + + The domain portion identifies the point to which the mail is + delivered. In the dot-atom form, this is interpreted as an Internet + domain name (either a host name or a mail exchanger name) as + described in [RFC1034], [RFC1035], and [RFC1123]. In the domain- + literal form, the domain is interpreted as the literal Internet + address of the particular host. In both cases, how addressing is + used and how messages are transported to a particular host is covered + in separate documents, such as [RFC5321]. These mechanisms are + outside of the scope of this document. + + The local-part portion is a domain-dependent string. In addresses, + it is simply interpreted on the particular host as a name of a + particular mailbox. + +3.5. Overall Message Syntax + + A message consists of header fields, optionally followed by a message + body. Lines in a message MUST be a maximum of 998 characters + excluding the CRLF, but it is RECOMMENDED that lines be limited to 78 + characters excluding the CRLF. (See section 2.1.1 for explanation.) + In a message body, though all of the characters listed in the text + rule MAY be used, the use of US-ASCII control characters (values 1 + through 8, 11, 12, and 14 through 31) is discouraged since their + interpretation by receivers for display is not guaranteed. + + message = (fields / obs-fields) + [CRLF body] + + body = (*(*998text CRLF) *998text) / obs-body + + text = %d1-9 / ; Characters excluding CR + %d11 / ; and LF + %d12 / + %d14-127 + + + + + +Resnick Standards Track [Page 18] + +RFC 5322 Internet Message Format October 2008 + + + The header fields carry most of the semantic information and are + defined in section 3.6. The body is simply a series of lines of text + that are uninterpreted for the purposes of this specification. + +3.6. Field Definitions + + The header fields of a message are defined here. All header fields + have the same general syntactic structure: a field name, followed by + a colon, followed by the field body. The specific syntax for each + header field is defined in the subsequent sections. + + Note: In the ABNF syntax for each field in subsequent sections, + each field name is followed by the required colon. However, for + brevity, sometimes the colon is not referred to in the textual + description of the syntax. It is, nonetheless, required. + + It is important to note that the header fields are not guaranteed to + be in a particular order. They may appear in any order, and they + have been known to be reordered occasionally when transported over + the Internet. However, for the purposes of this specification, + header fields SHOULD NOT be reordered when a message is transported + or transformed. More importantly, the trace header fields and resent + header fields MUST NOT be reordered, and SHOULD be kept in blocks + prepended to the message. See sections 3.6.6 and 3.6.7 for more + information. + + The only required header fields are the origination date field and + the originator address field(s). All other header fields are + syntactically optional. More information is contained in the table + following this definition. + + + + + + + + + + + + + + + + + + + + + +Resnick Standards Track [Page 19] + +RFC 5322 Internet Message Format October 2008 + + + fields = *(trace + *optional-field / + *(resent-date / + resent-from / + resent-sender / + resent-to / + resent-cc / + resent-bcc / + resent-msg-id)) + *(orig-date / + from / + sender / + reply-to / + to / + cc / + bcc / + message-id / + in-reply-to / + references / + subject / + comments / + keywords / + optional-field) + + The following table indicates limits on the number of times each + field may occur in the header section of a message as well as any + special limitations on the use of those fields. An asterisk ("*") + next to a value in the minimum or maximum column indicates that a + special restriction appears in the Notes column. + + + + + + + + + + + + + + + + + + + + + + +Resnick Standards Track [Page 20] + +RFC 5322 Internet Message Format October 2008 + + + +----------------+--------+------------+----------------------------+ + | Field | Min | Max number | Notes | + | | number | | | + +----------------+--------+------------+----------------------------+ + | trace | 0 | unlimited | Block prepended - see | + | | | | 3.6.7 | + | resent-date | 0* | unlimited* | One per block, required if | + | | | | other resent fields are | + | | | | present - see 3.6.6 | + | resent-from | 0 | unlimited* | One per block - see 3.6.6 | + | resent-sender | 0* | unlimited* | One per block, MUST occur | + | | | | with multi-address | + | | | | resent-from - see 3.6.6 | + | resent-to | 0 | unlimited* | One per block - see 3.6.6 | + | resent-cc | 0 | unlimited* | One per block - see 3.6.6 | + | resent-bcc | 0 | unlimited* | One per block - see 3.6.6 | + | resent-msg-id | 0 | unlimited* | One per block - see 3.6.6 | + | orig-date | 1 | 1 | | + | from | 1 | 1 | See sender and 3.6.2 | + | sender | 0* | 1 | MUST occur with | + | | | | multi-address from - see | + | | | | 3.6.2 | + | reply-to | 0 | 1 | | + | to | 0 | 1 | | + | cc | 0 | 1 | | + | bcc | 0 | 1 | | + | message-id | 0* | 1 | SHOULD be present - see | + | | | | 3.6.4 | + | in-reply-to | 0* | 1 | SHOULD occur in some | + | | | | replies - see 3.6.4 | + | references | 0* | 1 | SHOULD occur in some | + | | | | replies - see 3.6.4 | + | subject | 0 | 1 | | + | comments | 0 | unlimited | | + | keywords | 0 | unlimited | | + | optional-field | 0 | unlimited | | + +----------------+--------+------------+----------------------------+ + + The exact interpretation of each field is described in subsequent + sections. + + + + + + + + + + + +Resnick Standards Track [Page 21] + +RFC 5322 Internet Message Format October 2008 + + +3.6.1. The Origination Date Field + + The origination date field consists of the field name "Date" followed + by a date-time specification. + + orig-date = "Date:" date-time CRLF + + The origination date specifies the date and time at which the creator + of the message indicated that the message was complete and ready to + enter the mail delivery system. For instance, this might be the time + that a user pushes the "send" or "submit" button in an application + program. In any case, it is specifically not intended to convey the + time that the message is actually transported, but rather the time at + which the human or other creator of the message has put the message + into its final form, ready for transport. (For example, a portable + computer user who is not connected to a network might queue a message + for delivery. The origination date is intended to contain the date + and time that the user queued the message, not the time when the user + connected to the network to send the message.) + +3.6.2. Originator Fields + + The originator fields of a message consist of the from field, the + sender field (when applicable), and optionally the reply-to field. + The from field consists of the field name "From" and a comma- + separated list of one or more mailbox specifications. If the from + field contains more than one mailbox specification in the mailbox- + list, then the sender field, containing the field name "Sender" and a + single mailbox specification, MUST appear in the message. In either + case, an optional reply-to field MAY also be included, which contains + the field name "Reply-To" and a comma-separated list of one or more + addresses. + + from = "From:" mailbox-list CRLF + + sender = "Sender:" mailbox CRLF + + reply-to = "Reply-To:" address-list CRLF + + The originator fields indicate the mailbox(es) of the source of the + message. The "From:" field specifies the author(s) of the message, + that is, the mailbox(es) of the person(s) or system(s) responsible + for the writing of the message. The "Sender:" field specifies the + mailbox of the agent responsible for the actual transmission of the + message. For example, if a secretary were to send a message for + another person, the mailbox of the secretary would appear in the + "Sender:" field and the mailbox of the actual author would appear in + the "From:" field. If the originator of the message can be indicated + + + +Resnick Standards Track [Page 22] + +RFC 5322 Internet Message Format October 2008 + + + by a single mailbox and the author and transmitter are identical, the + "Sender:" field SHOULD NOT be used. Otherwise, both fields SHOULD + appear. + + Note: The transmitter information is always present. The absence + of the "Sender:" field is sometimes mistakenly taken to mean that + the agent responsible for transmission of the message has not been + specified. This absence merely means that the transmitter is + identical to the author and is therefore not redundantly placed + into the "Sender:" field. + + The originator fields also provide the information required when + replying to a message. When the "Reply-To:" field is present, it + indicates the address(es) to which the author of the message suggests + that replies be sent. In the absence of the "Reply-To:" field, + replies SHOULD by default be sent to the mailbox(es) specified in the + "From:" field unless otherwise specified by the person composing the + reply. + + In all cases, the "From:" field SHOULD NOT contain any mailbox that + does not belong to the author(s) of the message. See also section + 3.6.3 for more information on forming the destination addresses for a + reply. + +3.6.3. Destination Address Fields + + The destination fields of a message consist of three possible fields, + each of the same form: the field name, which is either "To", "Cc", or + "Bcc", followed by a comma-separated list of one or more addresses + (either mailbox or group syntax). + + to = "To:" address-list CRLF + + cc = "Cc:" address-list CRLF + + bcc = "Bcc:" [address-list / CFWS] CRLF + + The destination fields specify the recipients of the message. Each + destination field may have one or more addresses, and the addresses + indicate the intended recipients of the message. The only difference + between the three fields is how each is used. + + The "To:" field contains the address(es) of the primary recipient(s) + of the message. + + + + + + + +Resnick Standards Track [Page 23] + +RFC 5322 Internet Message Format October 2008 + + + The "Cc:" field (where the "Cc" means "Carbon Copy" in the sense of + making a copy on a typewriter using carbon paper) contains the + addresses of others who are to receive the message, though the + content of the message may not be directed at them. + + The "Bcc:" field (where the "Bcc" means "Blind Carbon Copy") contains + addresses of recipients of the message whose addresses are not to be + revealed to other recipients of the message. There are three ways in + which the "Bcc:" field is used. In the first case, when a message + containing a "Bcc:" field is prepared to be sent, the "Bcc:" line is + removed even though all of the recipients (including those specified + in the "Bcc:" field) are sent a copy of the message. In the second + case, recipients specified in the "To:" and "Cc:" lines each are sent + a copy of the message with the "Bcc:" line removed as above, but the + recipients on the "Bcc:" line get a separate copy of the message + containing a "Bcc:" line. (When there are multiple recipient + addresses in the "Bcc:" field, some implementations actually send a + separate copy of the message to each recipient with a "Bcc:" + containing only the address of that particular recipient.) Finally, + since a "Bcc:" field may contain no addresses, a "Bcc:" field can be + sent without any addresses indicating to the recipients that blind + copies were sent to someone. Which method to use with "Bcc:" fields + is implementation dependent, but refer to the "Security + Considerations" section of this document for a discussion of each. + + When a message is a reply to another message, the mailboxes of the + authors of the original message (the mailboxes in the "From:" field) + or mailboxes specified in the "Reply-To:" field (if it exists) MAY + appear in the "To:" field of the reply since these would normally be + the primary recipients of the reply. If a reply is sent to a message + that has destination fields, it is often desirable to send a copy of + the reply to all of the recipients of the message, in addition to the + author. When such a reply is formed, addresses in the "To:" and + "Cc:" fields of the original message MAY appear in the "Cc:" field of + the reply, since these are normally secondary recipients of the + reply. If a "Bcc:" field is present in the original message, + addresses in that field MAY appear in the "Bcc:" field of the reply, + but they SHOULD NOT appear in the "To:" or "Cc:" fields. + + Note: Some mail applications have automatic reply commands that + include the destination addresses of the original message in the + destination addresses of the reply. How those reply commands + behave is implementation dependent and is beyond the scope of this + document. In particular, whether or not to include the original + destination addresses when the original message had a "Reply-To:" + field is not addressed here. + + + + + +Resnick Standards Track [Page 24] + +RFC 5322 Internet Message Format October 2008 + + +3.6.4. Identification Fields + + Though listed as optional in the table in section 3.6, every message + SHOULD have a "Message-ID:" field. Furthermore, reply messages + SHOULD have "In-Reply-To:" and "References:" fields as appropriate + and as described below. + + The "Message-ID:" field contains a single unique message identifier. + The "References:" and "In-Reply-To:" fields each contain one or more + unique message identifiers, optionally separated by CFWS. + + The message identifier (msg-id) syntax is a limited version of the + addr-spec construct enclosed in the angle bracket characters, "<" and + ">". Unlike addr-spec, this syntax only permits the dot-atom-text + form on the left-hand side of the "@" and does not have internal CFWS + anywhere in the message identifier. + + Note: As with addr-spec, a liberal syntax is given for the right- + hand side of the "@" in a msg-id. However, later in this section, + the use of a domain for the right-hand side of the "@" is + RECOMMENDED. Again, the syntax of domain constructs is specified + by and used in other protocols (e.g., [RFC1034], [RFC1035], + [RFC1123], [RFC5321]). It is therefore incumbent upon + implementations to conform to the syntax of addresses for the + context in which they are used. + + message-id = "Message-ID:" msg-id CRLF + + in-reply-to = "In-Reply-To:" 1*msg-id CRLF + + references = "References:" 1*msg-id CRLF + + msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS] + + id-left = dot-atom-text / obs-id-left + + id-right = dot-atom-text / no-fold-literal / obs-id-right + + no-fold-literal = "[" *dtext "]" + + The "Message-ID:" field provides a unique message identifier that + refers to a particular version of a particular message. The + uniqueness of the message identifier is guaranteed by the host that + generates it (see below). This message identifier is intended to be + machine readable and not necessarily meaningful to humans. A message + identifier pertains to exactly one version of a particular message; + subsequent revisions to the message each receive new message + identifiers. + + + +Resnick Standards Track [Page 25] + +RFC 5322 Internet Message Format October 2008 + + + Note: There are many instances when messages are "changed", but + those changes do not constitute a new instantiation of that + message, and therefore the message would not get a new message + identifier. For example, when messages are introduced into the + transport system, they are often prepended with additional header + fields such as trace fields (described in section 3.6.7) and + resent fields (described in section 3.6.6). The addition of such + header fields does not change the identity of the message and + therefore the original "Message-ID:" field is retained. In all + cases, it is the meaning that the sender of the message wishes to + convey (i.e., whether this is the same message or a different + message) that determines whether or not the "Message-ID:" field + changes, not any particular syntactic difference that appears (or + does not appear) in the message. + + The "In-Reply-To:" and "References:" fields are used when creating a + reply to a message. They hold the message identifier of the original + message and the message identifiers of other messages (for example, + in the case of a reply to a message that was itself a reply). The + "In-Reply-To:" field may be used to identify the message (or + messages) to which the new message is a reply, while the + "References:" field may be used to identify a "thread" of + conversation. + + When creating a reply to a message, the "In-Reply-To:" and + "References:" fields of the resultant message are constructed as + follows: + + The "In-Reply-To:" field will contain the contents of the + "Message-ID:" field of the message to which this one is a reply (the + "parent message"). If there is more than one parent message, then + the "In-Reply-To:" field will contain the contents of all of the + parents' "Message-ID:" fields. If there is no "Message-ID:" field in + any of the parent messages, then the new message will have no "In- + Reply-To:" field. + + The "References:" field will contain the contents of the parent's + "References:" field (if any) followed by the contents of the parent's + "Message-ID:" field (if any). If the parent message does not contain + a "References:" field but does have an "In-Reply-To:" field + containing a single message identifier, then the "References:" field + will contain the contents of the parent's "In-Reply-To:" field + followed by the contents of the parent's "Message-ID:" field (if + any). If the parent has none of the "References:", "In-Reply-To:", + or "Message-ID:" fields, then the new message will have no + "References:" field. + + + + + +Resnick Standards Track [Page 26] + +RFC 5322 Internet Message Format October 2008 + + + Note: Some implementations parse the "References:" field to + display the "thread of the discussion". These implementations + assume that each new message is a reply to a single parent and + hence that they can walk backwards through the "References:" field + to find the parent of each message listed there. Therefore, + trying to form a "References:" field for a reply that has multiple + parents is discouraged; how to do so is not defined in this + document. + + The message identifier (msg-id) itself MUST be a globally unique + identifier for a message. The generator of the message identifier + MUST guarantee that the msg-id is unique. There are several + algorithms that can be used to accomplish this. Since the msg-id has + a similar syntax to addr-spec (identical except that quoted strings, + comments, and folding white space are not allowed), a good method is + to put the domain name (or a domain literal IP address) of the host + on which the message identifier was created on the right-hand side of + the "@" (since domain names and IP addresses are normally unique), + and put a combination of the current absolute date and time along + with some other currently unique (perhaps sequential) identifier + available on the system (for example, a process id number) on the + left-hand side. Though other algorithms will work, it is RECOMMENDED + that the right-hand side contain some domain identifier (either of + the host itself or otherwise) such that the generator of the message + identifier can guarantee the uniqueness of the left-hand side within + the scope of that domain. + + Semantically, the angle bracket characters are not part of the + msg-id; the msg-id is what is contained between the two angle bracket + characters. + +3.6.5. Informational Fields + + The informational fields are all optional. The "Subject:" and + "Comments:" fields are unstructured fields as defined in section + 2.2.1, and therefore may contain text or folding white space. The + "Keywords:" field contains a comma-separated list of one or more + words or quoted-strings. + + subject = "Subject:" unstructured CRLF + + comments = "Comments:" unstructured CRLF + + keywords = "Keywords:" phrase *("," phrase) CRLF + + These three fields are intended to have only human-readable content + with information about the message. The "Subject:" field is the most + common and contains a short string identifying the topic of the + + + +Resnick Standards Track [Page 27] + +RFC 5322 Internet Message Format October 2008 + + + message. When used in a reply, the field body MAY start with the + string "Re: " (an abbreviation of the Latin "in re", meaning "in the + matter of") followed by the contents of the "Subject:" field body of + the original message. If this is done, only one instance of the + literal string "Re: " ought to be used since use of other strings or + more than one instance can lead to undesirable consequences. The + "Comments:" field contains any additional comments on the text of the + body of the message. The "Keywords:" field contains a comma- + separated list of important words and phrases that might be useful + for the recipient. + +3.6.6. Resent Fields + + Resent fields SHOULD be added to any message that is reintroduced by + a user into the transport system. A separate set of resent fields + SHOULD be added each time this is done. All of the resent fields + corresponding to a particular resending of the message SHOULD be + grouped together. Each new set of resent fields is prepended to the + message; that is, the most recent set of resent fields appears + earlier in the message. No other fields in the message are changed + when resent fields are added. + + Each of the resent fields corresponds to a particular field elsewhere + in the syntax. For instance, the "Resent-Date:" field corresponds to + the "Date:" field and the "Resent-To:" field corresponds to the "To:" + field. In each case, the syntax for the field body is identical to + the syntax given previously for the corresponding field. + + When resent fields are used, the "Resent-From:" and "Resent-Date:" + fields MUST be sent. The "Resent-Message-ID:" field SHOULD be sent. + "Resent-Sender:" SHOULD NOT be used if "Resent-Sender:" would be + identical to "Resent-From:". + + resent-date = "Resent-Date:" date-time CRLF + + resent-from = "Resent-From:" mailbox-list CRLF + + resent-sender = "Resent-Sender:" mailbox CRLF + + resent-to = "Resent-To:" address-list CRLF + + resent-cc = "Resent-Cc:" address-list CRLF + + resent-bcc = "Resent-Bcc:" [address-list / CFWS] CRLF + + resent-msg-id = "Resent-Message-ID:" msg-id CRLF + + + + + +Resnick Standards Track [Page 28] + +RFC 5322 Internet Message Format October 2008 + + + Resent fields are used to identify a message as having been + reintroduced into the transport system by a user. The purpose of + using resent fields is to have the message appear to the final + recipient as if it were sent directly by the original sender, with + all of the original fields remaining the same. Each set of resent + fields correspond to a particular resending event. That is, if a + message is resent multiple times, each set of resent fields gives + identifying information for each individual time. Resent fields are + strictly informational. They MUST NOT be used in the normal + processing of replies or other such automatic actions on messages. + + Note: Reintroducing a message into the transport system and using + resent fields is a different operation from "forwarding". + "Forwarding" has two meanings: One sense of forwarding is that a + mail reading program can be told by a user to forward a copy of a + message to another person, making the forwarded message the body + of the new message. A forwarded message in this sense does not + appear to have come from the original sender, but is an entirely + new message from the forwarder of the message. Forwarding may + also mean that a mail transport program gets a message and + forwards it on to a different destination for final delivery. + Resent header fields are not intended for use with either type of + forwarding. + + The resent originator fields indicate the mailbox of the person(s) or + system(s) that resent the message. As with the regular originator + fields, there are two forms: a simple "Resent-From:" form, which + contains the mailbox of the individual doing the resending, and the + more complex form, when one individual (identified in the "Resent- + Sender:" field) resends a message on behalf of one or more others + (identified in the "Resent-From:" field). + + Note: When replying to a resent message, replies behave just as + they would with any other message, using the original "From:", + "Reply-To:", "Message-ID:", and other fields. The resent fields + are only informational and MUST NOT be used in the normal + processing of replies. + + The "Resent-Date:" indicates the date and time at which the resent + message is dispatched by the resender of the message. Like the + "Date:" field, it is not the date and time that the message was + actually transported. + + The "Resent-To:", "Resent-Cc:", and "Resent-Bcc:" fields function + identically to the "To:", "Cc:", and "Bcc:" fields, respectively, + except that they indicate the recipients of the resent message, not + the recipients of the original message. + + + + +Resnick Standards Track [Page 29] + +RFC 5322 Internet Message Format October 2008 + + + The "Resent-Message-ID:" field provides a unique identifier for the + resent message. + +3.6.7. Trace Fields + + The trace fields are a group of header fields consisting of an + optional "Return-Path:" field, and one or more "Received:" fields. + The "Return-Path:" header field contains a pair of angle brackets + that enclose an optional addr-spec. The "Received:" field contains a + (possibly empty) list of tokens followed by a semicolon and a date- + time specification. Each token must be a word, angle-addr, addr- + spec, or a domain. Further restrictions are applied to the syntax of + the trace fields by specifications that provide for their use, such + as [RFC5321]. + + trace = [return] + 1*received + + return = "Return-Path:" path CRLF + + path = angle-addr / ([CFWS] "<" [CFWS] ">" [CFWS]) + + received = "Received:" *received-token ";" date-time CRLF + + received-token = word / angle-addr / addr-spec / domain + + A full discussion of the Internet mail use of trace fields is + contained in [RFC5321]. For the purposes of this specification, the + trace fields are strictly informational, and any formal + interpretation of them is outside of the scope of this document. + +3.6.8. Optional Fields + + Fields may appear in messages that are otherwise unspecified in this + document. They MUST conform to the syntax of an optional-field. + This is a field name, made up of the printable US-ASCII characters + except SP and colon, followed by a colon, followed by any text that + conforms to the unstructured syntax. + + The field names of any optional field MUST NOT be identical to any + field name specified elsewhere in this document. + + + + + + + + + + +Resnick Standards Track [Page 30] + +RFC 5322 Internet Message Format October 2008 + + + optional-field = field-name ":" unstructured CRLF + + field-name = 1*ftext + + ftext = %d33-57 / ; Printable US-ASCII + %d59-126 ; characters not including + ; ":". + + For the purposes of this specification, any optional field is + uninterpreted. + +4. Obsolete Syntax + + Earlier versions of this specification allowed for different (usually + more liberal) syntax than is allowed in this version. Also, there + have been syntactic elements used in messages on the Internet whose + interpretations have never been documented. Though these syntactic + forms MUST NOT be generated according to the grammar in section 3, + they MUST be accepted and parsed by a conformant receiver. This + section documents many of these syntactic elements. Taking the + grammar in section 3 and adding the definitions presented in this + section will result in the grammar to use for the interpretation of + messages. + + Note: This section identifies syntactic forms that any + implementation MUST reasonably interpret. However, there are + certainly Internet messages that do not conform to even the + additional syntax given in this section. The fact that a + particular form does not appear in any section of this document is + not justification for computer programs to crash or for malformed + data to be irretrievably lost by any implementation. It is up to + the implementation to deal with messages robustly. + + One important difference between the obsolete (interpreting) and the + current (generating) syntax is that in structured header field bodies + (i.e., between the colon and the CRLF of any structured header + field), white space characters, including folding white space, and + comments could be freely inserted between any syntactic tokens. This + allowed many complex forms that have proven difficult for some + implementations to parse. + + Another key difference between the obsolete and the current syntax is + that the rule in section 3.2.2 regarding lines composed entirely of + white space in comments and folding white space does not apply. See + the discussion of folding white space in section 4.2 below. + + Finally, certain characters that were formerly allowed in messages + appear in this section. The NUL character (ASCII value 0) was once + + + +Resnick Standards Track [Page 31] + +RFC 5322 Internet Message Format October 2008 + + + allowed, but is no longer for compatibility reasons. Similarly, US- + ASCII control characters other than CR, LF, SP, and HTAB (ASCII + values 1 through 8, 11, 12, 14 through 31, and 127) were allowed to + appear in header field bodies. CR and LF were allowed to appear in + messages other than as CRLF; this use is also shown here. + + Other differences in syntax and semantics are noted in the following + sections. + +4.1. Miscellaneous Obsolete Tokens + + These syntactic elements are used elsewhere in the obsolete syntax or + in the main syntax. Bare CR, bare LF, and NUL are added to obs-qp, + obs-body, and obs-unstruct. US-ASCII control characters are added to + obs-qp, obs-unstruct, obs-ctext, and obs-qtext. The period character + is added to obs-phrase. The obs-phrase-list provides for a + (potentially empty) comma-separated list of phrases that may include + "null" elements. That is, there could be two or more commas in such + a list with nothing in between them, or commas at the beginning or + end of the list. + + Note: The "period" (or "full stop") character (".") in obs-phrase + is not a form that was allowed in earlier versions of this or any + other specification. Period (nor any other character from + specials) was not allowed in phrase because it introduced a + parsing difficulty distinguishing between phrases and portions of + an addr-spec (see section 4.4). It appears here because the + period character is currently used in many messages in the + display-name portion of addresses, especially for initials in + names, and therefore must be interpreted properly. + + obs-NO-WS-CTL = %d1-8 / ; US-ASCII control + %d11 / ; characters that do not + %d12 / ; include the carriage + %d14-31 / ; return, line feed, and + %d127 ; white space characters + + obs-ctext = obs-NO-WS-CTL + + obs-qtext = obs-NO-WS-CTL + + obs-utext = %d0 / obs-NO-WS-CTL / VCHAR + + obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR) + + obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF) + + obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS) + + + +Resnick Standards Track [Page 32] + +RFC 5322 Internet Message Format October 2008 + + + obs-phrase = word *(word / "." / CFWS) + + obs-phrase-list = [phrase / CFWS] *("," [phrase / CFWS]) + + Bare CR and bare LF appear in messages with two different meanings. + In many cases, bare CR or bare LF are used improperly instead of CRLF + to indicate line separators. In other cases, bare CR and bare LF are + used simply as US-ASCII control characters with their traditional + ASCII meanings. + +4.2. Obsolete Folding White Space + + In the obsolete syntax, any amount of folding white space MAY be + inserted where the obs-FWS rule is allowed. This creates the + possibility of having two consecutive "folds" in a line, and + therefore the possibility that a line which makes up a folded header + field could be composed entirely of white space. + + obs-FWS = 1*WSP *(CRLF 1*WSP) + +4.3. Obsolete Date and Time + + The syntax for the obsolete date format allows a 2 digit year in the + date field and allows for a list of alphabetic time zone specifiers + that were used in earlier versions of this specification. It also + permits comments and folding white space between many of the tokens. + + obs-day-of-week = [CFWS] day-name [CFWS] + + obs-day = [CFWS] 1*2DIGIT [CFWS] + + obs-year = [CFWS] 2*DIGIT [CFWS] + + obs-hour = [CFWS] 2DIGIT [CFWS] + + obs-minute = [CFWS] 2DIGIT [CFWS] + + obs-second = [CFWS] 2DIGIT [CFWS] + + obs-zone = "UT" / "GMT" / ; Universal Time + ; North American UT + ; offsets + "EST" / "EDT" / ; Eastern: - 5/ - 4 + "CST" / "CDT" / ; Central: - 6/ - 5 + "MST" / "MDT" / ; Mountain: - 7/ - 6 + "PST" / "PDT" / ; Pacific: - 8/ - 7 + ; + + + + +Resnick Standards Track [Page 33] + +RFC 5322 Internet Message Format October 2008 + + + %d65-73 / ; Military zones - "A" + %d75-90 / ; through "I" and "K" + %d97-105 / ; through "Z", both + %d107-122 ; upper and lower case + + Where a two or three digit year occurs in a date, the year is to be + interpreted as follows: If a two digit year is encountered whose + value is between 00 and 49, the year is interpreted by adding 2000, + ending up with a value between 2000 and 2049. If a two digit year is + encountered with a value between 50 and 99, or any three digit year + is encountered, the year is interpreted by adding 1900. + + In the obsolete time zone, "UT" and "GMT" are indications of + "Universal Time" and "Greenwich Mean Time", respectively, and are + both semantically identical to "+0000". + + The remaining three character zones are the US time zones. The first + letter, "E", "C", "M", or "P" stands for "Eastern", "Central", + "Mountain", and "Pacific". The second letter is either "S" for + "Standard" time, or "D" for "Daylight Savings" (or summer) time. + Their interpretations are as follows: + + EDT is semantically equivalent to -0400 + EST is semantically equivalent to -0500 + CDT is semantically equivalent to -0500 + CST is semantically equivalent to -0600 + MDT is semantically equivalent to -0600 + MST is semantically equivalent to -0700 + PDT is semantically equivalent to -0700 + PST is semantically equivalent to -0800 + + The 1 character military time zones were defined in a non-standard + way in [RFC0822] and are therefore unpredictable in their meaning. + The original definitions of the military zones "A" through "I" are + equivalent to "+0100" through "+0900", respectively; "K", "L", and + "M" are equivalent to "+1000", "+1100", and "+1200", respectively; + "N" through "Y" are equivalent to "-0100" through "-1200". + respectively; and "Z" is equivalent to "+0000". However, because of + the error in [RFC0822], they SHOULD all be considered equivalent to + "-0000" unless there is out-of-band information confirming their + meaning. + + Other multi-character (usually between 3 and 5) alphabetic time zones + have been used in Internet messages. Any such time zone whose + meaning is not known SHOULD be considered equivalent to "-0000" + unless there is out-of-band information confirming their meaning. + + + + + +Resnick Standards Track [Page 34] + +RFC 5322 Internet Message Format October 2008 + + +4.4. Obsolete Addressing + + There are four primary differences in addressing. First, mailbox + addresses were allowed to have a route portion before the addr-spec + when enclosed in "<" and ">". The route is simply a comma-separated + list of domain names, each preceded by "@", and the list terminated + by a colon. Second, CFWS were allowed between the period-separated + elements of local-part and domain (i.e., dot-atom was not used). In + addition, local-part is allowed to contain quoted-string in addition + to just atom. Third, mailbox-list and address-list were allowed to + have "null" members. That is, there could be two or more commas in + such a list with nothing in between them, or commas at the beginning + or end of the list. Finally, US-ASCII control characters and quoted- + pairs were allowed in domain literals and are added here. + + obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS] + + obs-route = obs-domain-list ":" + + obs-domain-list = *(CFWS / ",") "@" domain + *("," [CFWS] ["@" domain]) + + obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS]) + + obs-addr-list = *([CFWS] ",") address *("," [address / CFWS]) + + obs-group-list = 1*([CFWS] ",") [CFWS] + + obs-local-part = word *("." word) + + obs-domain = atom *("." atom) + + obs-dtext = obs-NO-WS-CTL / quoted-pair + + When interpreting addresses, the route portion SHOULD be ignored. + +4.5. Obsolete Header Fields + + Syntactically, the primary difference in the obsolete field syntax is + that it allows multiple occurrences of any of the fields and they may + occur in any order. Also, any amount of white space is allowed + before the ":" at the end of the field name. + + + + + + + + + +Resnick Standards Track [Page 35] + +RFC 5322 Internet Message Format October 2008 + + + obs-fields = *(obs-return / + obs-received / + obs-orig-date / + obs-from / + obs-sender / + obs-reply-to / + obs-to / + obs-cc / + obs-bcc / + obs-message-id / + obs-in-reply-to / + obs-references / + obs-subject / + obs-comments / + obs-keywords / + obs-resent-date / + obs-resent-from / + obs-resent-send / + obs-resent-rply / + obs-resent-to / + obs-resent-cc / + obs-resent-bcc / + obs-resent-mid / + obs-optional) + + Except for destination address fields (described in section 4.5.3), + the interpretation of multiple occurrences of fields is unspecified. + Also, the interpretation of trace fields and resent fields that do + not occur in blocks prepended to the message is unspecified as well. + Unless otherwise noted in the following sections, interpretation of + other fields is identical to the interpretation of their non-obsolete + counterparts in section 3. + +4.5.1. Obsolete Origination Date Field + + obs-orig-date = "Date" *WSP ":" date-time CRLF + +4.5.2. Obsolete Originator Fields + + obs-from = "From" *WSP ":" mailbox-list CRLF + + obs-sender = "Sender" *WSP ":" mailbox CRLF + + obs-reply-to = "Reply-To" *WSP ":" address-list CRLF + + + + + + + +Resnick Standards Track [Page 36] + +RFC 5322 Internet Message Format October 2008 + + +4.5.3. Obsolete Destination Address Fields + + obs-to = "To" *WSP ":" address-list CRLF + + obs-cc = "Cc" *WSP ":" address-list CRLF + + obs-bcc = "Bcc" *WSP ":" + (address-list / (*([CFWS] ",") [CFWS])) CRLF + + When multiple occurrences of destination address fields occur in a + message, they SHOULD be treated as if the address list in the first + occurrence of the field is combined with the address lists of the + subsequent occurrences by adding a comma and concatenating. + +4.5.4. Obsolete Identification Fields + + The obsolete "In-Reply-To:" and "References:" fields differ from the + current syntax in that they allow phrase (words or quoted strings) to + appear. The obsolete forms of the left and right sides of msg-id + allow interspersed CFWS, making them syntactically identical to + local-part and domain, respectively. + + obs-message-id = "Message-ID" *WSP ":" msg-id CRLF + + obs-in-reply-to = "In-Reply-To" *WSP ":" *(phrase / msg-id) CRLF + + obs-references = "References" *WSP ":" *(phrase / msg-id) CRLF + + obs-id-left = local-part + + obs-id-right = domain + + For purposes of interpretation, the phrases in the "In-Reply-To:" and + "References:" fields are ignored. + + Semantically, none of the optional CFWS in the local-part and the + domain is part of the obs-id-left and obs-id-right, respectively. + +4.5.5. Obsolete Informational Fields + + obs-subject = "Subject" *WSP ":" unstructured CRLF + + obs-comments = "Comments" *WSP ":" unstructured CRLF + + obs-keywords = "Keywords" *WSP ":" obs-phrase-list CRLF + + + + + + +Resnick Standards Track [Page 37] + +RFC 5322 Internet Message Format October 2008 + + +4.5.6. Obsolete Resent Fields + + The obsolete syntax adds a "Resent-Reply-To:" field, which consists + of the field name, the optional comments and folding white space, the + colon, and a comma separated list of addresses. + + obs-resent-from = "Resent-From" *WSP ":" mailbox-list CRLF + + obs-resent-send = "Resent-Sender" *WSP ":" mailbox CRLF + + obs-resent-date = "Resent-Date" *WSP ":" date-time CRLF + + obs-resent-to = "Resent-To" *WSP ":" address-list CRLF + + obs-resent-cc = "Resent-Cc" *WSP ":" address-list CRLF + + obs-resent-bcc = "Resent-Bcc" *WSP ":" + (address-list / (*([CFWS] ",") [CFWS])) CRLF + + obs-resent-mid = "Resent-Message-ID" *WSP ":" msg-id CRLF + + obs-resent-rply = "Resent-Reply-To" *WSP ":" address-list CRLF + + As with other resent fields, the "Resent-Reply-To:" field is to be + treated as trace information only. + +4.5.7. Obsolete Trace Fields + + The obs-return and obs-received are again given here as template + definitions, just as return and received are in section 3. Their + full syntax is given in [RFC5321]. + + obs-return = "Return-Path" *WSP ":" path CRLF + + obs-received = "Received" *WSP ":" *received-token CRLF + +4.5.8. Obsolete optional fields + + obs-optional = field-name *WSP ":" unstructured CRLF + +5. Security Considerations + + Care needs to be taken when displaying messages on a terminal or + terminal emulator. Powerful terminals may act on escape sequences + and other combinations of US-ASCII control characters with a variety + of consequences. They can remap the keyboard or permit other + modifications to the terminal that could lead to denial of service or + even damaged data. They can trigger (sometimes programmable) + + + +Resnick Standards Track [Page 38] + +RFC 5322 Internet Message Format October 2008 + + + answerback messages that can allow a message to cause commands to be + issued on the recipient's behalf. They can also affect the operation + of terminal attached devices such as printers. Message viewers may + wish to strip potentially dangerous terminal escape sequences from + the message prior to display. However, other escape sequences appear + in messages for useful purposes (cf. [ISO.2022.1994], [RFC2045], + [RFC2046], [RFC2047], [RFC2049], [RFC4288], [RFC4289]) and therefore + should not be stripped indiscriminately. + + Transmission of non-text objects in messages raises additional + security issues. These issues are discussed in [RFC2045], [RFC2046], + [RFC2047], [RFC2049], [RFC4288], and [RFC4289]. + + Many implementations use the "Bcc:" (blind carbon copy) field, + described in section 3.6.3, to facilitate sending messages to + recipients without revealing the addresses of one or more of the + addressees to the other recipients. Mishandling this use of "Bcc:" + may disclose confidential information that could eventually lead to + security problems through knowledge of even the existence of a + particular mail address. For example, if using the first method + described in section 3.6.3, where the "Bcc:" line is removed from the + message, blind recipients have no explicit indication that they have + been sent a blind copy, except insofar as their address does not + appear in the header section of a message. Because of this, one of + the blind addressees could potentially send a reply to all of the + shown recipients and accidentally reveal that the message went to the + blind recipient. When the second method from section 3.6.3 is used, + the blind recipient's address appears in the "Bcc:" field of a + separate copy of the message. If the "Bcc:" field sent contains all + of the blind addressees, all of the "Bcc:" recipients will be seen by + each "Bcc:" recipient. Even if a separate message is sent to each + "Bcc:" recipient with only the individual's address, implementations + still need to be careful to process replies to the message as per + section 3.6.3 so as not to accidentally reveal the blind recipient to + other recipients. + +6. IANA Considerations + + This document updates the registrations that appeared in [RFC4021] + that referred to the definitions in [RFC2822]. IANA has updated the + Permanent Message Header Field Repository with the following header + fields, in accordance with the procedures set out in [RFC3864]. + + Header field name: Date + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.1) + + + +Resnick Standards Track [Page 39] + +RFC 5322 Internet Message Format October 2008 + + + Header field name: From + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.2) + + Header field name: Sender + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.2) + + Header field name: Reply-To + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.2) + + Header field name: To + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.3) + + Header field name: Cc + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.3) + + Header field name: Bcc + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.3) + + Header field name: Message-ID + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.4) + + Header field name: In-Reply-To + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.4) + + + + +Resnick Standards Track [Page 40] + +RFC 5322 Internet Message Format October 2008 + + + Header field name: References + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.4) + + Header field name: Subject + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.5) + + Header field name: Comments + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.5) + + Header field name: Keywords + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.5) + + Header field name: Resent-Date + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.6) + + Header field name: Resent-From + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.6) + + Header field name: Resent-Sender + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.6) + + Header field name: Resent-To + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.6) + + + + +Resnick Standards Track [Page 41] + +RFC 5322 Internet Message Format October 2008 + + + Header field name: Resent-Cc + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.6) + + Header field name: Resent-Bcc + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.6) + + Header field name: Resent-Reply-To + Applicable protocol: Mail + Status: obsolete + Author/Change controller: IETF + Specification document(s): This document (section 4.5.6) + + Header field name: Resent-Message-ID + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.6) + + Header field name: Return-Path + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.7) + + Header field name: Received + Applicable protocol: Mail + Status: standard + Author/Change controller: IETF + Specification document(s): This document (section 3.6.7) + Related information: [RFC5321] + + + + + + + + + + + + + + + +Resnick Standards Track [Page 42] + +RFC 5322 Internet Message Format October 2008 + + +Appendix A. Example Messages + + This section presents a selection of messages. These are intended to + assist in the implementation of this specification, but should not be + taken as normative; that is to say, although the examples in this + section were carefully reviewed, if there happens to be a conflict + between these examples and the syntax described in sections 3 and 4 + of this document, the syntax in those sections is to be taken as + correct. + + In the text version of this document, messages in this section are + delimited between lines of "----". The "----" lines are not part of + the message itself. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Resnick Standards Track [Page 43] + +RFC 5322 Internet Message Format October 2008 + + +Appendix A.1. Addressing Examples + + The following are examples of messages that might be sent between two + individuals. + +Appendix A.1.1. A Message from One Person to Another with Simple + Addressing + + This could be called a canonical message. It has a single author, + John Doe, a single recipient, Mary Smith, a subject, the date, a + message identifier, and a textual message in the body. + + ---- + From: John Doe <jdoe@machine.example> + To: Mary Smith <mary@example.net> + Subject: Saying Hello + Date: Fri, 21 Nov 1997 09:55:06 -0600 + Message-ID: <1234@local.machine.example> + + This is a message just to say hello. + So, "Hello". + ---- + + If John's secretary Michael actually sent the message, even though + John was the author and replies to this message should go back to + him, the sender field would be used: + + ---- + From: John Doe <jdoe@machine.example> + Sender: Michael Jones <mjones@machine.example> + To: Mary Smith <mary@example.net> + Subject: Saying Hello + Date: Fri, 21 Nov 1997 09:55:06 -0600 + Message-ID: <1234@local.machine.example> + + This is a message just to say hello. + So, "Hello". + ---- + + + + + + + + + + + + + +Resnick Standards Track [Page 44] + +RFC 5322 Internet Message Format October 2008 + + +Appendix A.1.2. Different Types of Mailboxes + + This message includes multiple addresses in the destination fields + and also uses several different forms of addresses. + + ---- + From: "Joe Q. Public" <john.q.public@example.com> + To: Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test> + Cc: <boss@nil.test>, "Giant; \"Big\" Box" <sysservices@example.net> + Date: Tue, 1 Jul 2003 10:52:37 +0200 + Message-ID: <5678.21-Nov-1997@example.com> + + Hi everyone. + ---- + + Note that the display names for Joe Q. Public and Giant; "Big" Box + needed to be enclosed in double-quotes because the former contains + the period and the latter contains both semicolon and double-quote + characters (the double-quote characters appearing as quoted-pair + constructs). Conversely, the display name for Who? could appear + without them because the question mark is legal in an atom. Notice + also that jdoe@example.org and boss@nil.test have no display names + associated with them at all, and jdoe@example.org uses the simpler + address form without the angle brackets. + +Appendix A.1.3. Group Addresses + + ---- + From: Pete <pete@silly.example> + To: A Group:Ed Jones <c@a.test>,joe@where.test,John <jdoe@one.test>; + Cc: Undisclosed recipients:; + Date: Thu, 13 Feb 1969 23:32:54 -0330 + Message-ID: <testabcd.1234@silly.example> + + Testing. + ---- + + In this message, the "To:" field has a single group recipient named + "A Group", which contains 3 addresses, and a "Cc:" field with an + empty group recipient named Undisclosed recipients. + + + + + + + + + + + +Resnick Standards Track [Page 45] + +RFC 5322 Internet Message Format October 2008 + + +Appendix A.2. Reply Messages + + The following is a series of three messages that make up a + conversation thread between John and Mary. John first sends a + message to Mary, Mary then replies to John's message, and then John + replies to Mary's reply message. + + Note especially the "Message-ID:", "References:", and "In-Reply-To:" + fields in each message. + + ---- + From: John Doe <jdoe@machine.example> + To: Mary Smith <mary@example.net> + Subject: Saying Hello + Date: Fri, 21 Nov 1997 09:55:06 -0600 + Message-ID: <1234@local.machine.example> + + This is a message just to say hello. + So, "Hello". + ---- + + When sending replies, the Subject field is often retained, though + prepended with "Re: " as described in section 3.6.5. + + ---- + From: Mary Smith <mary@example.net> + To: John Doe <jdoe@machine.example> + Reply-To: "Mary Smith: Personal Account" <smith@home.example> + Subject: Re: Saying Hello + Date: Fri, 21 Nov 1997 10:01:10 -0600 + Message-ID: <3456@example.net> + In-Reply-To: <1234@local.machine.example> + References: <1234@local.machine.example> + + This is a reply to your hello. + ---- + + Note the "Reply-To:" field in the above message. When John replies + to Mary's message above, the reply should go to the address in the + "Reply-To:" field instead of the address in the "From:" field. + + + + + + + + + + + +Resnick Standards Track [Page 46] + +RFC 5322 Internet Message Format October 2008 + + + ---- + To: "Mary Smith: Personal Account" <smith@home.example> + From: John Doe <jdoe@machine.example> + Subject: Re: Saying Hello + Date: Fri, 21 Nov 1997 11:00:00 -0600 + Message-ID: <abcd.1234@local.machine.test> + In-Reply-To: <3456@example.net> + References: <1234@local.machine.example> <3456@example.net> + + This is a reply to your reply. + ---- + +Appendix A.3. Resent Messages + + Start with the message that has been used as an example several + times: + + ---- + From: John Doe <jdoe@machine.example> + To: Mary Smith <mary@example.net> + Subject: Saying Hello + Date: Fri, 21 Nov 1997 09:55:06 -0600 + Message-ID: <1234@local.machine.example> + + This is a message just to say hello. + So, "Hello". + ---- + + Say that Mary, upon receiving this message, wishes to send a copy of + the message to Jane such that (a) the message would appear to have + come straight from John; (b) if Jane replies to the message, the + reply should go back to John; and (c) all of the original + information, like the date the message was originally sent to Mary, + the message identifier, and the original addressee, is preserved. In + this case, resent fields are prepended to the message: + + + + + + + + + + + + + + + + +Resnick Standards Track [Page 47] + +RFC 5322 Internet Message Format October 2008 + + + ---- + Resent-From: Mary Smith <mary@example.net> + Resent-To: Jane Brown <j-brown@other.example> + Resent-Date: Mon, 24 Nov 1997 14:22:01 -0800 + Resent-Message-ID: <78910@example.net> + From: John Doe <jdoe@machine.example> + To: Mary Smith <mary@example.net> + Subject: Saying Hello + Date: Fri, 21 Nov 1997 09:55:06 -0600 + Message-ID: <1234@local.machine.example> + + This is a message just to say hello. + So, "Hello". + ---- + + If Jane, in turn, wished to resend this message to another person, + she would prepend her own set of resent header fields to the above + and send that. (Note that for brevity, trace fields are not shown.) + +Appendix A.4. Messages with Trace Fields + + As messages are sent through the transport system as described in + [RFC5321], trace fields are prepended to the message. The following + is an example of what those trace fields might look like. Note that + there is some folding white space in the first one since these lines + can be long. + + ---- + Received: from x.y.test + by example.net + via TCP + with ESMTP + id ABC12345 + for <mary@example.net>; 21 Nov 1997 10:05:43 -0600 + Received: from node.example by x.y.test; 21 Nov 1997 10:01:22 -0600 + From: John Doe <jdoe@node.example> + To: Mary Smith <mary@example.net> + Subject: Saying Hello + Date: Fri, 21 Nov 1997 09:55:06 -0600 + Message-ID: <1234@local.node.example> + + This is a message just to say hello. + So, "Hello". + ---- + + + + + + + +Resnick Standards Track [Page 48] + +RFC 5322 Internet Message Format October 2008 + + +Appendix A.5. White Space, Comments, and Other Oddities + + White space, including folding white space, and comments can be + inserted between many of the tokens of fields. Taking the example + from A.1.3, white space and comments can be inserted into all of the + fields. + + ---- + From: Pete(A nice \) chap) <pete(his account)@silly.test(his host)> + To:A Group(Some people) + :Chris Jones <c@(Chris's host.)public.example>, + joe@example.org, + John <jdoe@one.test> (my dear friend); (the end of the group) + Cc:(Empty list)(start)Hidden recipients :(nobody(that I know)) ; + Date: Thu, + 13 + Feb + 1969 + 23:32 + -0330 (Newfoundland Time) + Message-ID: <testabcd.1234@silly.test> + + Testing. + ---- + + The above example is aesthetically displeasing, but perfectly legal. + Note particularly (1) the comments in the "From:" field (including + one that has a ")" character appearing as part of a quoted-pair); (2) + the white space absent after the ":" in the "To:" field as well as + the comment and folding white space after the group name, the special + character (".") in the comment in Chris Jones's address, and the + folding white space before and after "joe@example.org,"; (3) the + multiple and nested comments in the "Cc:" field as well as the + comment immediately following the ":" after "Cc"; (4) the folding + white space (but no comments except at the end) and the missing + seconds in the time of the date field; and (5) the white space before + (but not within) the identifier in the "Message-ID:" field. + + + + + + + + + + + + + + +Resnick Standards Track [Page 49] + +RFC 5322 Internet Message Format October 2008 + + +Appendix A.6. Obsoleted Forms + + The following are examples of obsolete (that is, the "MUST NOT + generate") syntactic elements described in section 4 of this + document. + +Appendix A.6.1. Obsolete Addressing + + Note in the example below the lack of quotes around Joe Q. Public, + the route that appears in the address for Mary Smith, the two commas + that appear in the "To:" field, and the spaces that appear around the + "." in the jdoe address. + + ---- + From: Joe Q. Public <john.q.public@example.com> + To: Mary Smith <@node.test:mary@example.net>, , jdoe@test . example + Date: Tue, 1 Jul 2003 10:52:37 +0200 + Message-ID: <5678.21-Nov-1997@example.com> + + Hi everyone. + ---- + +Appendix A.6.2. Obsolete Dates + + The following message uses an obsolete date format, including a non- + numeric time zone and a two digit year. Note that although the day- + of-week is missing, that is not specific to the obsolete syntax; it + is optional in the current syntax as well. + + ---- + From: John Doe <jdoe@machine.example> + To: Mary Smith <mary@example.net> + Subject: Saying Hello + Date: 21 Nov 97 09:55:06 GMT + Message-ID: <1234@local.machine.example> + + This is a message just to say hello. + So, "Hello". + ---- + + + + + + + + + + + + +Resnick Standards Track [Page 50] + +RFC 5322 Internet Message Format October 2008 + + +Appendix A.6.3. Obsolete White Space and Comments + + White space and comments can appear between many more elements than + in the current syntax. Also, folding lines that are made up entirely + of white space are legal. + + ---- + From : John Doe <jdoe@machine(comment). example> + To : Mary Smith + __ + <mary@example.net> + Subject : Saying Hello + Date : Fri, 21 Nov 1997 09(comment): 55 : 06 -0600 + Message-ID : <1234 @ local(blah) .machine .example> + + This is a message just to say hello. + So, "Hello". + ---- + + Note especially the second line of the "To:" field. It starts with + two space characters. (Note that "__" represent blank spaces.) + Therefore, it is considered part of the folding, as described in + section 4.2. Also, the comments and white space throughout + addresses, dates, and message identifiers are all part of the + obsolete syntax. + + + + + + + + + + + + + + + + + + + + + + + + + + +Resnick Standards Track [Page 51] + +RFC 5322 Internet Message Format October 2008 + + +Appendix B. Differences from Earlier Specifications + + This appendix contains a list of changes that have been made in the + Internet Message Format from earlier specifications, specifically + [RFC0822], [RFC1123], and [RFC2822]. Items marked with an asterisk + (*) below are items which appear in section 4 of this document and + therefore can no longer be generated. + + The following are the changes made from [RFC0822] and [RFC1123] to + [RFC2822] that remain in this document: + + 1. Period allowed in obsolete form of phrase. + 2. ABNF moved out of document, now in [RFC5234]. + 3. Four or more digits allowed for year. + 4. Header field ordering (and lack thereof) made explicit. + 5. Encrypted header field removed. + 6. Specifically allow and give meaning to "-0000" time zone. + 7. Folding white space is not allowed between every token. + 8. Requirement for destinations removed. + 9. Forwarding and resending redefined. + 10. Extension header fields no longer specifically called out. + 11. ASCII 0 (null) removed.* + 12. Folding continuation lines cannot contain only white space.* + 13. Free insertion of comments not allowed in date.* + 14. Non-numeric time zones not allowed.* + 15. Two digit years not allowed.* + 16. Three digit years interpreted, but not allowed for generation.* + 17. Routes in addresses not allowed.* + 18. CFWS within local-parts and domains not allowed.* + 19. Empty members of address lists not allowed.* + 20. Folding white space between field name and colon not allowed.* + 21. Comments between field name and colon not allowed. + 22. Tightened syntax of in-reply-to and references.* + 23. CFWS within msg-id not allowed.* + 24. Tightened semantics of resent fields as informational only. + 25. Resent-Reply-To not allowed.* + 26. No multiple occurrences of fields (except resent and received).* + 27. Free CR and LF not allowed.* + 28. Line length limits specified. + 29. Bcc more clearly specified. + + + + + + + + + + + +Resnick Standards Track [Page 52] + +RFC 5322 Internet Message Format October 2008 + + + The following are changes from [RFC2822]. + 1. Assorted typographical/grammatical errors fixed and + clarifications made. + 2. Changed "standard" to "document" or "specification" throughout. + 3. Made distinction between "header field" and "header section". + 4. Removed NO-WS-CTL from ctext, qtext, dtext, and unstructured.* + 5. Moved discussion of specials to the "Atom" section. Moved text + to "Overall message syntax" section. + 6. Simplified CFWS syntax. + 7. Fixed unstructured syntax. + 8. Changed date and time syntax to deal with white space in + obsolete date syntax. + 9. Removed quoted-pair from domain literals and message + identifiers.* + 10. Clarified that other specifications limit domain syntax. + 11. Simplified "Bcc:" and "Resent-Bcc:" syntax. + 12. Allowed optional-field to appear within trace information. + 13. Removed no-fold-quote from msg-id. Clarified syntax + limitations. + 14. Generalized "Received:" syntax to fix bugs and move definition + out of this document. + 15. Simplified obs-qp. Fixed and simplified obs-utext (which now + only appears in the obsolete syntax). Removed obs-text and obs- + char, adding obs-body. + 16. Fixed obsolete date syntax to allow for more (or less) comments + and white space. + 17. Fixed all obsolete list syntax (obs-domain-list, obs-mbox-list, + obs-addr-list, obs-phrase-list, and the newly added obs-group- + list). + 18. Fixed obs-reply-to syntax. + 19. Fixed obs-bcc and obs-resent-bcc to allow empty lists. + 20. Removed obs-path. + +Appendix C. Acknowledgements + + Many people contributed to this document. They included folks who + participated in the Detailed Revision and Update of Messaging + Standards (DRUMS) Working Group of the Internet Engineering Task + Force (IETF), the chair of DRUMS, the Area Directors of the IETF, and + people who simply sent their comments in via email. The editor is + deeply indebted to them all and thanks them sincerely. The below + list includes everyone who sent email concerning both this document + and [RFC2822]. Hopefully, everyone who contributed is named here: + + +--------------------+----------------------+---------------------+ + | Matti Aarnio | Tanaka Akira | Russ Allbery | + | Eric Allman | Harald Alvestrand | Ran Atkinson | + | Jos Backus | Bruce Balden | Dave Barr | + + + +Resnick Standards Track [Page 53] + +RFC 5322 Internet Message Format October 2008 + + + | Alan Barrett | John Beck | J Robert von Behren | + | Jos den Bekker | D J Bernstein | James Berriman | + | Oliver Block | Norbert Bollow | Raj Bose | + | Antony Bowesman | Scott Bradner | Randy Bush | + | Tom Byrer | Bruce Campbell | Larry Campbell | + | W J Carpenter | Michael Chapman | Richard Clayton | + | Maurizio Codogno | Jim Conklin | R Kelley Cook | + | Nathan Coulter | Steve Coya | Mark Crispin | + | Dave Crocker | Matt Curtin | Michael D'Errico | + | Cyrus Daboo | Michael D Dean | Jutta Degener | + | Mark Delany | Steve Dorner | Harold A Driscoll | + | Michael Elkins | Frank Ellerman | Robert Elz | + | Johnny Eriksson | Erik E Fair | Roger Fajman | + | Patrik Faeltstroem | Claus Andre Faerber | Barry Finkel | + | Erik Forsberg | Chuck Foster | Paul Fox | + | Klaus M Frank | Ned Freed | Jochen Friedrich | + | Randall C Gellens | Sukvinder Singh Gill | Tim Goodwin | + | Philip Guenther | Arnt Gulbrandsen | Eric A Hall | + | Tony Hansen | John Hawkinson | Philip Hazel | + | Kai Henningsen | Robert Herriot | Paul Hethmon | + | Jim Hill | Alfred Hoenes | Paul E Hoffman | + | Steve Hole | Kari Hurtta | Marco S Hyman | + | Ofer Inbar | Olle Jarnefors | Kevin Johnson | + | Sudish Joseph | Maynard Kang | Prabhat Keni | + | John C Klensin | Graham Klyne | Brad Knowles | + | Shuhei Kobayashi | Peter Koch | Dan Kohn | + | Christian Kuhtz | Anand Kumria | Steen Larsen | + | Eliot Lear | Barry Leiba | Jay Levitt | + | Bruce Lilly | Lars-Johan Liman | Charles Lindsey | + | Pete Loshin | Simon Lyall | Bill Manning | + | John Martin | Mark Martinec | Larry Masinter | + | Denis McKeon | William P McQuillan | Alexey Melnikov | + | Perry E Metzger | Steven Miller | S Moonesamy | + | Keith Moore | John Gardiner Myers | Chris Newman | + | John W Noerenberg | Eric Norman | Mike O'Dell | + | Larry Osterman | Paul Overell | Jacob Palme | + | Michael A Patton | Uzi Paz | Michael A Quinlan | + | Robert Rapplean | Eric S Raymond | Sam Roberts | + | Hugh Sasse | Bart Schaefer | Tom Scola | + | Wolfgang Segmuller | Nick Shelness | John Stanley | + | Einar Stefferud | Jeff Stephenson | Bernard Stern | + | Peter Sylvester | Mark Symons | Eric Thomas | + | Lee Thompson | Karel De Vriendt | Matthew Wall | + | Rolf Weber | Brent B Welch | Dan Wing | + | Jack De Winter | Gregory J Woodhouse | Greg A Woods | + | Kazu Yamamoto | Alain Zahm | Jamie Zawinski | + | Timothy S Zurcher | | | + +--------------------+----------------------+---------------------+ + + + +Resnick Standards Track [Page 54] + +RFC 5322 Internet Message Format October 2008 + + +7. References + +7.1. Normative References + + [ANSI.X3-4.1986] American National Standards Institute, "Coded + Character Set - 7-bit American Standard Code for + Information Interchange", ANSI X3.4, 1986. + + [RFC1034] Mockapetris, P., "Domain names - concepts and + facilities", STD 13, RFC 1034, November 1987. + + [RFC1035] Mockapetris, P., "Domain names - implementation and + specification", STD 13, RFC 1035, November 1987. + + [RFC1123] Braden, R., "Requirements for Internet Hosts - + Application and Support", STD 3, RFC 1123, + October 1989. + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [RFC5234] Crocker, D. and P. Overell, "Augmented BNF for + Syntax Specifications: ABNF", STD 68, RFC 5234, + January 2008. + +7.2. Informative References + + [RFC0822] Crocker, D., "Standard for the format of ARPA + Internet text messages", STD 11, RFC 822, + August 1982. + + [RFC1305] Mills, D., "Network Time Protocol (Version 3) + Specification, Implementation", RFC 1305, + March 1992. + + [ISO.2022.1994] International Organization for Standardization, + "Information technology - Character code structure + and extension techniques", ISO Standard 2022, 1994. + + [RFC2045] Freed, N. and N. Borenstein, "Multipurpose Internet + Mail Extensions (MIME) Part One: Format of Internet + Message Bodies", RFC 2045, November 1996. + + [RFC2046] Freed, N. and N. Borenstein, "Multipurpose Internet + Mail Extensions (MIME) Part Two: Media Types", + RFC 2046, November 1996. + + + + + +Resnick Standards Track [Page 55] + +RFC 5322 Internet Message Format October 2008 + + + [RFC2047] Moore, K., "MIME (Multipurpose Internet Mail + Extensions) Part Three: Message Header Extensions + for Non-ASCII Text", RFC 2047, November 1996. + + [RFC2049] Freed, N. and N. Borenstein, "Multipurpose Internet + Mail Extensions (MIME) Part Five: Conformance + Criteria and Examples", RFC 2049, November 1996. + + [RFC2822] Resnick, P., "Internet Message Format", RFC 2822, + April 2001. + + [RFC3864] Klyne, G., Nottingham, M., and J. Mogul, + "Registration Procedures for Message Header + Fields", BCP 90, RFC 3864, September 2004. + + [RFC4021] Klyne, G. and J. Palme, "Registration of Mail and + MIME Header Fields", RFC 4021, March 2005. + + [RFC4288] Freed, N. and J. Klensin, "Media Type + Specifications and Registration Procedures", + BCP 13, RFC 4288, December 2005. + + [RFC4289] Freed, N. and J. Klensin, "Multipurpose Internet + Mail Extensions (MIME) Part Four: Registration + Procedures", BCP 13, RFC 4289, December 2005. + + [RFC5321] Klensin, J., "Simple Mail Transfer Protocol", + RFC 5321, October 2008. + +Author's Address + + Peter W. Resnick (editor) + Qualcomm Incorporated + 5775 Morehouse Drive + San Diego, CA 92121-1714 + US + + Phone: +1 858 651 4478 + EMail: presnick@qualcomm.com + URI: http://www.qualcomm.com/~presnick/ + + + + + + + + + + + +Resnick Standards Track [Page 56] + +RFC 5322 Internet Message Format October 2008 + + +Full Copyright Statement + + Copyright (C) The IETF Trust (2008). + + This document is subject to the rights, licenses and restrictions + contained in BCP 78, and except as set forth therein, the authors + retain all their rights. + + This document and the information contained herein are provided on an + "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS + OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY, THE IETF TRUST AND + THE INTERNET ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF + THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED + WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Intellectual Property + + The IETF takes no position regarding the validity or scope of any + Intellectual Property Rights or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; nor does it represent that it has + made any independent effort to identify any such rights. Information + on the procedures with respect to rights in RFC documents can be + found in BCP 78 and BCP 79. + + Copies of IPR disclosures made to the IETF Secretariat and any + assurances of licenses to be made available, or the result of an + attempt made to obtain a general license or permission for the use of + such proprietary rights by implementers or users of this + specification can be obtained from the IETF on-line IPR repository at + http://www.ietf.org/ipr. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights that may cover technology that may be required to implement + this standard. Please address the information to the IETF at + ietf-ipr@ietf.org. + + + + + + + + + + + + +Resnick Standards Track [Page 57] + diff --git a/doc/mbox-rfc4155.txt b/doc/mbox-rfc4155.txt @@ -0,0 +1,507 @@ + + + + + + +Network Working Group E. Hall +Request for Comments: 4155 September 2005 +Category: Informational + + + The application/mbox Media Type + +Status of This Memo + + This memo provides information for the Internet community. It does + not specify an Internet standard of any kind. Distribution of this + memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2005). + +Abstract + + This memo requests that the application/mbox media type be authorized + for allocation by the IESG, according to the terms specified in RFC + 2048. This memo also defines a default format for the mbox database, + which must be supported by all conformant implementations. + +1. Background and Overview + + UNIX-like operating systems have historically made widespread use of + "mbox" database files for a variety of local email purposes. In the + common case, mbox files store linear sequences of one or more + electronic mail messages, with local email clients treating the + database as a logical folder of email messages. mbox databases are + also used by a variety of other messaging tools, such as mailing list + management programs, archiving and filtering utilities, messaging + servers, and other related applications. In recent years, mbox + databases have also become common on a large number of non-UNIX + computing platforms, for similar kinds of purposes. + + The increased pervasiveness of these files has led to an increased + demand for a standardized, network-wide interchange of these files as + discrete database objects. In turn, this dictates a need for a + general media type definition for mbox files, which is the subject + and purpose of this memo. + + + + + + + + + +Hall Informational [Page 1] + +RFC 4155 The application/mbox Media Type September 2005 + + +2. About the mbox Database + + The mbox database format is not documented in an authoritative + specification, but instead exists as a well-known output format that + is anecdotally documented, or which is only authoritatively + documented for a specific platform or tool. + + mbox databases typically contain a linear sequence of electronic mail + messages. Each message begins with a separator line that identifies + the message sender, and also identifies the date and time at which + the message was received by the final recipient (either the last-hop + system in the transfer path, or the system which serves as the + recipient's mailstore). Each message is typically terminated by an + empty line. The end of the database is usually recognized by either + the absence of any additional data, or by the presence of an explicit + end-of-file marker. + + The structure of the separator lines vary across implementations, but + usually contain the exact character sequence of "From", followed by a + single Space character (0x20), an email address of some kind, another + Space character, a timestamp sequence of some kind, and an end-of- + line marker. However, due to the lack of any authoritative + specification, each of these attributes are known to vary widely + across implementations. For example, the email address can reflect + any addressing syntax that has ever been used on any messaging system + in all of history (specifically including address forms that are not + compatible with Internet messages, as defined by RFC 2822 [RFC2822]). + Similarly, the timestamp sequences can also vary according to system + output, while the end-of-line sequences will often reflect platform- + specific requirements. Different data formats can even appear within + a single database as a result of multiple mbox files being + concatenated together, or because a single file was accessed by + multiple messaging clients, each of which has used its own syntax for + the separator line. + + Message data within mbox databases often reflects site-specific + peculiarities. For example, it is entirely possible for the message + body or headers in an mbox database to contain untagged eight-bit + character data that implicitly reflects a site-specific default + language or locale, or that reflects local defaults for timestamps + and email addresses; none of this data is widely portable beyond the + local scope. Similarly, message data can also contain unencoded + eight-bit binary data, or can use encoding formats that represent a + specific platform (e.g., BINHEX or UUENCODE sequences). + + + + + + + +Hall Informational [Page 2] + +RFC 4155 The application/mbox Media Type September 2005 + + + Many implementations are also known to escape message body lines that + begin with the character sequence of "From ", so as to prevent + confusion with overly-liberal parsers that do not search for full + separator lines. In the common case, a leading Greater-Than symbol + (0x3E) is used for this purpose (with "From " becoming ">From "). + However, other implementations are known not to escape such lines + unless they are immediately preceded by a blank line or if they also + appear to contain an email address and a timestamp. Other + implementations are also known to perform secondary escapes against + these lines if they are already escaped or quoted, while others + ignore these mechanisms altogether. + + A comprehensive description of mbox database files on UNIX-like + systems can be found at http://qmail.org./man/man5/mbox.html, which + should be treated as mostly authoritative for those variations that + are otherwise only documented in anecdotal form. However, readers + are advised that many other platforms and tools make use of mbox + databases, and that there are many more potential variations that can + be encountered in the wild. + + In order to mitigate errors that may arise from such vagaries, this + specification defines a "format" parameter to the application/mbox + media type declaration, which can be used to identify the specific + kind of mbox database that is being transferred. Furthermore, this + specification defines a "default" database format which MUST be + supported by implementations that claim to be compliant with this + specification, and which is to be used as the implicit format for + undeclared application/mbox data objects. Additional format types + are to be defined in subsequent specifications. Messaging systems + that receive an mbox database with an unknown format parameter value + SHOULD treat the data as an opaque binary object, as if the data had + been declared as application/octet-stream + + Refer to Appendix A for a description of the default mbox format. + + Note that RFC 2046 [RFC2046] defines the multipart/digest media type + for transferring platform-independent message files. Because that + specification defines a set of neutral and strict formatting rules, + the multipart/digest media type already facilitates highly- + predictable transfer and conversion operations; as such, implementers + are strongly encouraged to support and use that media type where + possible. + + + + + + + + + +Hall Informational [Page 3] + +RFC 4155 The application/mbox Media Type September 2005 + + +3. Prerequisites and Terminology + + Readers of this document are expected to be familiar with the + specification for MIME [RFC2045] and MIME-type registrations + [RFC2048]. + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in RFC 2119 [RFC2119]. + +4. The application/mbox Media Type Registration + + This section provides the media type registration application (as per + [RFC2048]). + + MIME media type name: application + + MIME subtype name: mbox + + Required parameters: none + + Optional parameters: The "format" parameter identifies the format of + the mbox database and the messages contained therein. The default + value for the "format" parameter is "default", and refers to the + formatting rules defined in Appendix A of this memo. mbox databases + that do not have a "format" parameter SHOULD be interpreted as having + the implicit "format" value of "default". mbox databases that have + an unknown value for the "format" parameter SHOULD be treated as + opaque data objects, as if the media type had been specified as + application/octet-stream. Additional values for the format parameter + are to be defined in subsequent specifications, and registered with + IANA. + + Encoding considerations: If an email client receives an mbox database + as a message attachment, and then stores that attachment within a + local mbox database, the contents of the two database files may + become irreversibly intermingled, such that both databases are + rendered unrecognizable. In order to avoid these collisions, + messaging systems that support this specification MUST encode an mbox + database (or at a minimum, the separator lines) with non-transparent + transfer encoding (such as BASE64 or Quoted-Printable) whenever an + application/mbox object is transferred via messaging protocols. + Other transfer services are generally encouraged to adopt similar + encoding strategies in order to allow for any subsequent + retransmission that might occur, but this is not a requirement. + Implementers should also be prepared to encode mbox data locally if + non-compliant data is received. + + + + +Hall Informational [Page 4] + +RFC 4155 The application/mbox Media Type September 2005 + + + Security considerations: mbox data is passive, and does not generally + represent a unique or new security threat. However, there is risk in + sharing any kind of data, because unintentional information may be + exposed, and this risk certainly applies to mbox data as well. + + Interoperability considerations: Due to the lack of a single + authoritative specification for mbox databases, there are a large + number of variations between database formats (refer to the + introduction text for common examples), and it is expected that non- + conformant data will be erroneously tagged or exchanged. Although + the "default" format specified in this memo does not allow for these + kinds of vagaries, prior negotiation or agreement between humans may + sometimes be needed. + + Published specification: see Appendix A. + + Applications that use this media type: hundreds of messaging products + make use of the mbox database format, in one form or another. + + Magic number(s): mbox database files can be recognized by having a + leading character sequence of "From", followed by a single Space + character (0x20), followed by additional printable character data + (refer to the description in Appendix A for details). However, + implementers are cautioned that all such files will not be compliant + with all of the formatting rules, therefore implementers should treat + these files with an appropriate amount of circumspection. + + File extension(s): mbox database files sometimes have an ".mbox" + extension, but this is not required nor expected. As with magic + numbers, implementers should avoid reflexive assumptions about the + contents of such files. + + Macintosh File Type Code(s): None are known to be common. + + Person & email address to contact for further information: Eric A. + Hall (ehall@ntrg.com) + + Intended usage: COMMON + +5. Security Considerations + + See the discussion in section 4. + + + + + + + + + +Hall Informational [Page 5] + +RFC 4155 The application/mbox Media Type September 2005 + + +6. IANA Considerations + + The IANA has registered the application/mbox media type in the MIME + registry, using the application provided in section 4 above. + + Furthermore, IANA has established and will maintain a registry of + values for the "format" parameter as described in this memo. The + first registration is the "default" value, using the description + provided in Appendix A. Subsequent values for the "format" parameter + MUST be accompanied by some form of recognizable, complete, and + legitimate specification, such as an IESG-approved specification, or + some kind of authoritative vendor documentation. + +7. Normative References + + [RFC2045] Freed, N. and N. Borenstein, "Multipurpose Internet Mail + Extensions (MIME) Part One: Format of Internet Message + Bodies", RFC 2045, November 1996. + + [RFC2046] Freed, N. and N. Borenstein, "Multipurpose Internet Mail + Extensions (MIME) Part Two: Media Types", RFC 2046, + November 1996. + + [RFC2048] Freed, N., Klensin, J., and J. Postel, "Multipurpose + Internet Mail Extensions (MIME) Part Four: Registration + Procedures", BCP 13, RFC 2048, November 1996. + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [RFC2822] Resnick, P., "Internet Message Format", RFC 2822, April + 2001. + + + + + + + + + + + + + + + + + + + +Hall Informational [Page 6] + +RFC 4155 The application/mbox Media Type September 2005 + + +Appendix A. The "default" mbox Database Format + + In order to improve interoperability among messaging systems, this + memo defines a "default" mbox database format, which MUST be + supported by all implementations that claim to be compliant with this + specification. + + The "default" mbox database format uses a linear sequence of Internet + messages, with each message being immediately prefaced by a separator + line, and being terminated by an empty line. More specifically: + + o Each message within the database MUST follow the syntax and + formatting rules defined in RFC 2822 [RFC2822] and its related + specifications, with the exception that the canonical mbox + database MUST use a single Line-Feed character (0x0A) as the + end-of-line sequence, and MUST NOT use a Carriage-Return/Line- + Feed pair (NB: this requirement only applies to the canonical + mbox database as transferred, and does not override any other + specifications). This usage represents the most common + historical representation of the mbox database format, and + allows for the least amount of conversion. + + o Messages within the default mbox database MUST consist of + seven-bit characters within an eight-bit stream. Eight-bit data + within the stream MUST be converted to a seven-bit form (using + appropriate, standardized encoding) and appropriately tagged + (with the correct header fields) before the database is + transferred. + + o Message headers and data in the default mbox database MUST be + fully-qualified, as per the relevant specification(s). For + example, email addresses in the various header fields MUST have + legitimate domain names (as per RFC 2822), while extended + characters and encodings MUST be specified in the appropriate + location (as per the appropriate MIME specifications), and so + forth. + + o Each message in the mbox database MUST be immediately preceded + by a single separator line, which MUST conform to the following + syntax: + + The exact character sequence of "From"; + + a single Space character (0x20); + + the email address of the message sender (as obtained from the + message envelope or other authoritative source), conformant + with the "addr-spec" syntax from RFC 2822; + + + +Hall Informational [Page 7] + +RFC 4155 The application/mbox Media Type September 2005 + + + a single Space character; + + a timestamp indicating the UTC date and time when the message + was originally received, conformant with the syntax of the + traditional UNIX 'ctime' output sans timezone (note that the + use of UTC precludes the need for a timezone indicator); + + an end-of-line marker. + + o Each message in the database MUST be terminated by an empty + line, containing a single end-of-line marker. + + Note that the first message in an mbox database will only be prefaced + by a separator line, while every other message will begin with two + end-of-line sequences (one at the end of the message itself, and + another to mark the end of the message within the mbox database file + stream) and a separator line (marking the new message). The end of + the database is implicitly reached when no more message data or + separator lines are found. + + Also note that this specification does not prescribe any escape + syntax for message body lines that begin with the character sequence + of "From ". Recipient systems are expected to parse full separator + lines as they are documented above. + +Author's Address + + Eric A. Hall + + EMail: ehall@ntrg.com + + + + + + + + + + + + + + + + + + + + + +Hall Informational [Page 8] + +RFC 4155 The application/mbox Media Type September 2005 + + +Full Copyright Statement + + Copyright (C) The Internet Society (2005). + + This document is subject to the rights, licenses and restrictions + contained in BCP 78, and except as set forth therein, the authors + retain all their rights. + + This document and the information contained herein are provided on an + "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS + OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET + ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE + INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED + WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Intellectual Property + + The IETF takes no position regarding the validity or scope of any + Intellectual Property Rights or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; nor does it represent that it has + made any independent effort to identify any such rights. Information + on the procedures with respect to rights in RFC documents can be + found in BCP 78 and BCP 79. + + Copies of IPR disclosures made to the IETF Secretariat and any + assurances of licenses to be made available, or the result of an + attempt made to obtain a general license or permission for the use of + such proprietary rights by implementers or users of this + specification can be obtained from the IETF on-line IPR repository at + http://www.ietf.org/ipr. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights that may cover technology that may be required to implement + this standard. Please address the information to the IETF at ietf- + ipr@ietf.org. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + +Hall Informational [Page 9] + diff --git a/doc/mime-p1-rfc2045.txt b/doc/mime-p1-rfc2045.txt @@ -0,0 +1,1739 @@ + + + + + + +Network Working Group N. Freed +Request for Comments: 2045 Innosoft +Obsoletes: 1521, 1522, 1590 N. Borenstein +Category: Standards Track First Virtual + November 1996 + + + Multipurpose Internet Mail Extensions + (MIME) Part One: + Format of Internet Message Bodies + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Abstract + + STD 11, RFC 822, defines a message representation protocol specifying + considerable detail about US-ASCII message headers, and leaves the + message content, or message body, as flat US-ASCII text. This set of + documents, collectively called the Multipurpose Internet Mail + Extensions, or MIME, redefines the format of messages to allow for + + (1) textual message bodies in character sets other than + US-ASCII, + + (2) an extensible set of different formats for non-textual + message bodies, + + (3) multi-part message bodies, and + + (4) textual header information in character sets other than + US-ASCII. + + These documents are based on earlier work documented in RFC 934, STD + 11, and RFC 1049, but extends and revises them. Because RFC 822 said + so little about message bodies, these documents are largely + orthogonal to (rather than a revision of) RFC 822. + + This initial document specifies the various headers used to describe + the structure of MIME messages. The second document, RFC 2046, + defines the general structure of the MIME media typing system and + defines an initial set of media types. The third document, RFC 2047, + describes extensions to RFC 822 to allow non-US-ASCII text data in + + + +Freed & Borenstein Standards Track [Page 1] + +RFC 2045 Internet Message Bodies November 1996 + + + Internet mail header fields. The fourth document, RFC 2048, specifies + various IANA registration procedures for MIME-related facilities. The + fifth and final document, RFC 2049, describes MIME conformance + criteria as well as providing some illustrative examples of MIME + message formats, acknowledgements, and the bibliography. + + These documents are revisions of RFCs 1521, 1522, and 1590, which + themselves were revisions of RFCs 1341 and 1342. An appendix in RFC + 2049 describes differences and changes from previous versions. + +Table of Contents + + 1. Introduction ......................................... 3 + 2. Definitions, Conventions, and Generic BNF Grammar .... 5 + 2.1 CRLF ................................................ 5 + 2.2 Character Set ....................................... 6 + 2.3 Message ............................................. 6 + 2.4 Entity .............................................. 6 + 2.5 Body Part ........................................... 7 + 2.6 Body ................................................ 7 + 2.7 7bit Data ........................................... 7 + 2.8 8bit Data ........................................... 7 + 2.9 Binary Data ......................................... 7 + 2.10 Lines .............................................. 7 + 3. MIME Header Fields ................................... 8 + 4. MIME-Version Header Field ............................ 8 + 5. Content-Type Header Field ............................ 10 + 5.1 Syntax of the Content-Type Header Field ............. 12 + 5.2 Content-Type Defaults ............................... 14 + 6. Content-Transfer-Encoding Header Field ............... 14 + 6.1 Content-Transfer-Encoding Syntax .................... 14 + 6.2 Content-Transfer-Encodings Semantics ................ 15 + 6.3 New Content-Transfer-Encodings ...................... 16 + 6.4 Interpretation and Use .............................. 16 + 6.5 Translating Encodings ............................... 18 + 6.6 Canonical Encoding Model ............................ 19 + 6.7 Quoted-Printable Content-Transfer-Encoding .......... 19 + 6.8 Base64 Content-Transfer-Encoding .................... 24 + 7. Content-ID Header Field .............................. 26 + 8. Content-Description Header Field ..................... 27 + 9. Additional MIME Header Fields ........................ 27 + 10. Summary ............................................. 27 + 11. Security Considerations ............................. 27 + 12. Authors' Addresses .................................. 28 + A. Collected Grammar .................................... 29 + + + + + + +Freed & Borenstein Standards Track [Page 2] + +RFC 2045 Internet Message Bodies November 1996 + + +1. Introduction + + Since its publication in 1982, RFC 822 has defined the standard + format of textual mail messages on the Internet. Its success has + been such that the RFC 822 format has been adopted, wholly or + partially, well beyond the confines of the Internet and the Internet + SMTP transport defined by RFC 821. As the format has seen wider use, + a number of limitations have proven increasingly restrictive for the + user community. + + RFC 822 was intended to specify a format for text messages. As such, + non-text messages, such as multimedia messages that might include + audio or images, are simply not mentioned. Even in the case of text, + however, RFC 822 is inadequate for the needs of mail users whose + languages require the use of character sets richer than US-ASCII. + Since RFC 822 does not specify mechanisms for mail containing audio, + video, Asian language text, or even text in most European languages, + additional specifications are needed. + + One of the notable limitations of RFC 821/822 based mail systems is + the fact that they limit the contents of electronic mail messages to + relatively short lines (e.g. 1000 characters or less [RFC-821]) of + 7bit US-ASCII. This forces users to convert any non-textual data + that they may wish to send into seven-bit bytes representable as + printable US-ASCII characters before invoking a local mail UA (User + Agent, a program with which human users send and receive mail). + Examples of such encodings currently used in the Internet include + pure hexadecimal, uuencode, the 3-in-4 base 64 scheme specified in + RFC 1421, the Andrew Toolkit Representation [ATK], and many others. + + The limitations of RFC 822 mail become even more apparent as gateways + are designed to allow for the exchange of mail messages between RFC + 822 hosts and X.400 hosts. X.400 [X400] specifies mechanisms for the + inclusion of non-textual material within electronic mail messages. + The current standards for the mapping of X.400 messages to RFC 822 + messages specify either that X.400 non-textual material must be + converted to (not encoded in) IA5Text format, or that they must be + discarded, notifying the RFC 822 user that discarding has occurred. + This is clearly undesirable, as information that a user may wish to + receive is lost. Even though a user agent may not have the + capability of dealing with the non-textual material, the user might + have some mechanism external to the UA that can extract useful + information from the material. Moreover, it does not allow for the + fact that the message may eventually be gatewayed back into an X.400 + message handling system (i.e., the X.400 message is "tunneled" + through Internet mail), where the non-textual information would + definitely become useful again. + + + + +Freed & Borenstein Standards Track [Page 3] + +RFC 2045 Internet Message Bodies November 1996 + + + This document describes several mechanisms that combine to solve most + of these problems without introducing any serious incompatibilities + with the existing world of RFC 822 mail. In particular, it + describes: + + (1) A MIME-Version header field, which uses a version + number to declare a message to be conformant with MIME + and allows mail processing agents to distinguish + between such messages and those generated by older or + non-conformant software, which are presumed to lack + such a field. + + (2) A Content-Type header field, generalized from RFC 1049, + which can be used to specify the media type and subtype + of data in the body of a message and to fully specify + the native representation (canonical form) of such + data. + + (3) A Content-Transfer-Encoding header field, which can be + used to specify both the encoding transformation that + was applied to the body and the domain of the result. + Encoding transformations other than the identity + transformation are usually applied to data in order to + allow it to pass through mail transport mechanisms + which may have data or character set limitations. + + (4) Two additional header fields that can be used to + further describe the data in a body, the Content-ID and + Content-Description header fields. + + All of the header fields defined in this document are subject to the + general syntactic rules for header fields specified in RFC 822. In + particular, all of these header fields except for Content-Disposition + can include RFC 822 comments, which have no semantic content and + should be ignored during MIME processing. + + Finally, to specify and promote interoperability, RFC 2049 provides a + basic applicability statement for a subset of the above mechanisms + that defines a minimal level of "conformance" with this document. + + HISTORICAL NOTE: Several of the mechanisms described in this set of + documents may seem somewhat strange or even baroque at first reading. + It is important to note that compatibility with existing standards + AND robustness across existing practice were two of the highest + priorities of the working group that developed this set of documents. + In particular, compatibility was always favored over elegance. + + + + + +Freed & Borenstein Standards Track [Page 4] + +RFC 2045 Internet Message Bodies November 1996 + + + Please refer to the current edition of the "Internet Official + Protocol Standards" for the standardization state and status of this + protocol. RFC 822 and STD 3, RFC 1123 also provide essential + background for MIME since no conforming implementation of MIME can + violate them. In addition, several other informational RFC documents + will be of interest to the MIME implementor, in particular RFC 1344, + RFC 1345, and RFC 1524. + +2. Definitions, Conventions, and Generic BNF Grammar + + Although the mechanisms specified in this set of documents are all + described in prose, most are also described formally in the augmented + BNF notation of RFC 822. Implementors will need to be familiar with + this notation in order to understand this set of documents, and are + referred to RFC 822 for a complete explanation of the augmented BNF + notation. + + Some of the augmented BNF in this set of documents makes named + references to syntax rules defined in RFC 822. A complete formal + grammar, then, is obtained by combining the collected grammar + appendices in each document in this set with the BNF of RFC 822 plus + the modifications to RFC 822 defined in RFC 1123 (which specifically + changes the syntax for `return', `date' and `mailbox'). + + All numeric and octet values are given in decimal notation in this + set of documents. All media type values, subtype values, and + parameter names as defined are case-insensitive. However, parameter + values are case-sensitive unless otherwise specified for the specific + parameter. + + FORMATTING NOTE: Notes, such at this one, provide additional + nonessential information which may be skipped by the reader without + missing anything essential. The primary purpose of these non- + essential notes is to convey information about the rationale of this + set of documents, or to place these documents in the proper + historical or evolutionary context. Such information may in + particular be skipped by those who are focused entirely on building a + conformant implementation, but may be of use to those who wish to + understand why certain design choices were made. + +2.1. CRLF + + The term CRLF, in this set of documents, refers to the sequence of + octets corresponding to the two US-ASCII characters CR (decimal value + 13) and LF (decimal value 10) which, taken together, in this order, + denote a line break in RFC 822 mail. + + + + + +Freed & Borenstein Standards Track [Page 5] + +RFC 2045 Internet Message Bodies November 1996 + + +2.2. Character Set + + The term "character set" is used in MIME to refer to a method of + converting a sequence of octets into a sequence of characters. Note + that unconditional and unambiguous conversion in the other direction + is not required, in that not all characters may be representable by a + given character set and a character set may provide more than one + sequence of octets to represent a particular sequence of characters. + + This definition is intended to allow various kinds of character + encodings, from simple single-table mappings such as US-ASCII to + complex table switching methods such as those that use ISO 2022's + techniques, to be used as character sets. However, the definition + associated with a MIME character set name must fully specify the + mapping to be performed. In particular, use of external profiling + information to determine the exact mapping is not permitted. + + NOTE: The term "character set" was originally to describe such + straightforward schemes as US-ASCII and ISO-8859-1 which have a + simple one-to-one mapping from single octets to single characters. + Multi-octet coded character sets and switching techniques make the + situation more complex. For example, some communities use the term + "character encoding" for what MIME calls a "character set", while + using the phrase "coded character set" to denote an abstract mapping + from integers (not octets) to characters. + +2.3. Message + + The term "message", when not further qualified, means either a + (complete or "top-level") RFC 822 message being transferred on a + network, or a message encapsulated in a body of type "message/rfc822" + or "message/partial". + +2.4. Entity + + The term "entity", refers specifically to the MIME-defined header + fields and contents of either a message or one of the parts in the + body of a multipart entity. The specification of such entities is + the essence of MIME. Since the contents of an entity are often + called the "body", it makes sense to speak about the body of an + entity. Any sort of field may be present in the header of an entity, + but only those fields whose names begin with "content-" actually have + any MIME-related meaning. Note that this does NOT imply thay they + have no meaning at all -- an entity that is also a message has non- + MIME header fields whose meanings are defined by RFC 822. + + + + + + +Freed & Borenstein Standards Track [Page 6] + +RFC 2045 Internet Message Bodies November 1996 + + +2.5. Body Part + + The term "body part" refers to an entity inside of a multipart + entity. + +2.6. Body + + The term "body", when not further qualified, means the body of an + entity, that is, the body of either a message or of a body part. + + NOTE: The previous four definitions are clearly circular. This is + unavoidable, since the overall structure of a MIME message is indeed + recursive. + +2.7. 7bit Data + + "7bit data" refers to data that is all represented as relatively + short lines with 998 octets or less between CRLF line separation + sequences [RFC-821]. No octets with decimal values greater than 127 + are allowed and neither are NULs (octets with decimal value 0). CR + (decimal value 13) and LF (decimal value 10) octets only occur as + part of CRLF line separation sequences. + +2.8. 8bit Data + + "8bit data" refers to data that is all represented as relatively + short lines with 998 octets or less between CRLF line separation + sequences [RFC-821]), but octets with decimal values greater than 127 + may be used. As with "7bit data" CR and LF octets only occur as part + of CRLF line separation sequences and no NULs are allowed. + +2.9. Binary Data + + "Binary data" refers to data where any sequence of octets whatsoever + is allowed. + +2.10. Lines + + "Lines" are defined as sequences of octets separated by a CRLF + sequences. This is consistent with both RFC 821 and RFC 822. + "Lines" only refers to a unit of data in a message, which may or may + not correspond to something that is actually displayed by a user + agent. + + + + + + + + +Freed & Borenstein Standards Track [Page 7] + +RFC 2045 Internet Message Bodies November 1996 + + +3. MIME Header Fields + + MIME defines a number of new RFC 822 header fields that are used to + describe the content of a MIME entity. These header fields occur in + at least two contexts: + + (1) As part of a regular RFC 822 message header. + + (2) In a MIME body part header within a multipart + construct. + + The formal definition of these header fields is as follows: + + entity-headers := [ content CRLF ] + [ encoding CRLF ] + [ id CRLF ] + [ description CRLF ] + *( MIME-extension-field CRLF ) + + MIME-message-headers := entity-headers + fields + version CRLF + ; The ordering of the header + ; fields implied by this BNF + ; definition should be ignored. + + MIME-part-headers := entity-headers + [ fields ] + ; Any field not beginning with + ; "content-" can have no defined + ; meaning and may be ignored. + ; The ordering of the header + ; fields implied by this BNF + ; definition should be ignored. + + The syntax of the various specific MIME header fields will be + described in the following sections. + +4. MIME-Version Header Field + + Since RFC 822 was published in 1982, there has really been only one + format standard for Internet messages, and there has been little + perceived need to declare the format standard in use. This document + is an independent specification that complements RFC 822. Although + the extensions in this document have been defined in such a way as to + be compatible with RFC 822, there are still circumstances in which it + might be desirable for a mail-processing agent to know whether a + message was composed with the new standard in mind. + + + +Freed & Borenstein Standards Track [Page 8] + +RFC 2045 Internet Message Bodies November 1996 + + + Therefore, this document defines a new header field, "MIME-Version", + which is to be used to declare the version of the Internet message + body format standard in use. + + Messages composed in accordance with this document MUST include such + a header field, with the following verbatim text: + + MIME-Version: 1.0 + + The presence of this header field is an assertion that the message + has been composed in compliance with this document. + + Since it is possible that a future document might extend the message + format standard again, a formal BNF is given for the content of the + MIME-Version field: + + version := "MIME-Version" ":" 1*DIGIT "." 1*DIGIT + + Thus, future format specifiers, which might replace or extend "1.0", + are constrained to be two integer fields, separated by a period. If + a message is received with a MIME-version value other than "1.0", it + cannot be assumed to conform with this document. + + Note that the MIME-Version header field is required at the top level + of a message. It is not required for each body part of a multipart + entity. It is required for the embedded headers of a body of type + "message/rfc822" or "message/partial" if and only if the embedded + message is itself claimed to be MIME-conformant. + + It is not possible to fully specify how a mail reader that conforms + with MIME as defined in this document should treat a message that + might arrive in the future with some value of MIME-Version other than + "1.0". + + It is also worth noting that version control for specific media types + is not accomplished using the MIME-Version mechanism. In particular, + some formats (such as application/postscript) have version numbering + conventions that are internal to the media format. Where such + conventions exist, MIME does nothing to supersede them. Where no + such conventions exist, a MIME media type might use a "version" + parameter in the content-type field if necessary. + + + + + + + + + + +Freed & Borenstein Standards Track [Page 9] + +RFC 2045 Internet Message Bodies November 1996 + + + NOTE TO IMPLEMENTORS: When checking MIME-Version values any RFC 822 + comment strings that are present must be ignored. In particular, the + following four MIME-Version fields are equivalent: + + MIME-Version: 1.0 + + MIME-Version: 1.0 (produced by MetaSend Vx.x) + + MIME-Version: (produced by MetaSend Vx.x) 1.0 + + MIME-Version: 1.(produced by MetaSend Vx.x)0 + + In the absence of a MIME-Version field, a receiving mail user agent + (whether conforming to MIME requirements or not) may optionally + choose to interpret the body of the message according to local + conventions. Many such conventions are currently in use and it + should be noted that in practice non-MIME messages can contain just + about anything. + + It is impossible to be certain that a non-MIME mail message is + actually plain text in the US-ASCII character set since it might well + be a message that, using some set of nonstandard local conventions + that predate MIME, includes text in another character set or non- + textual data presented in a manner that cannot be automatically + recognized (e.g., a uuencoded compressed UNIX tar file). + +5. Content-Type Header Field + + The purpose of the Content-Type field is to describe the data + contained in the body fully enough that the receiving user agent can + pick an appropriate agent or mechanism to present the data to the + user, or otherwise deal with the data in an appropriate manner. The + value in this field is called a media type. + + HISTORICAL NOTE: The Content-Type header field was first defined in + RFC 1049. RFC 1049 used a simpler and less powerful syntax, but one + that is largely compatible with the mechanism given here. + + The Content-Type header field specifies the nature of the data in the + body of an entity by giving media type and subtype identifiers, and + by providing auxiliary information that may be required for certain + media types. After the media type and subtype names, the remainder + of the header field is simply a set of parameters, specified in an + attribute=value notation. The ordering of parameters is not + significant. + + + + + + +Freed & Borenstein Standards Track [Page 10] + +RFC 2045 Internet Message Bodies November 1996 + + + In general, the top-level media type is used to declare the general + type of data, while the subtype specifies a specific format for that + type of data. Thus, a media type of "image/xyz" is enough to tell a + user agent that the data is an image, even if the user agent has no + knowledge of the specific image format "xyz". Such information can + be used, for example, to decide whether or not to show a user the raw + data from an unrecognized subtype -- such an action might be + reasonable for unrecognized subtypes of text, but not for + unrecognized subtypes of image or audio. For this reason, registered + subtypes of text, image, audio, and video should not contain embedded + information that is really of a different type. Such compound + formats should be represented using the "multipart" or "application" + types. + + Parameters are modifiers of the media subtype, and as such do not + fundamentally affect the nature of the content. The set of + meaningful parameters depends on the media type and subtype. Most + parameters are associated with a single specific subtype. However, a + given top-level media type may define parameters which are applicable + to any subtype of that type. Parameters may be required by their + defining content type or subtype or they may be optional. MIME + implementations must ignore any parameters whose names they do not + recognize. + + For example, the "charset" parameter is applicable to any subtype of + "text", while the "boundary" parameter is required for any subtype of + the "multipart" media type. + + There are NO globally-meaningful parameters that apply to all media + types. Truly global mechanisms are best addressed, in the MIME + model, by the definition of additional Content-* header fields. + + An initial set of seven top-level media types is defined in RFC 2046. + Five of these are discrete types whose content is essentially opaque + as far as MIME processing is concerned. The remaining two are + composite types whose contents require additional handling by MIME + processors. + + This set of top-level media types is intended to be substantially + complete. It is expected that additions to the larger set of + supported types can generally be accomplished by the creation of new + subtypes of these initial types. In the future, more top-level types + may be defined only by a standards-track extension to this standard. + If another top-level type is to be used for any reason, it must be + given a name starting with "X-" to indicate its non-standard status + and to avoid a potential conflict with a future official name. + + + + + +Freed & Borenstein Standards Track [Page 11] + +RFC 2045 Internet Message Bodies November 1996 + + +5.1. Syntax of the Content-Type Header Field + + In the Augmented BNF notation of RFC 822, a Content-Type header field + value is defined as follows: + + content := "Content-Type" ":" type "/" subtype + *(";" parameter) + ; Matching of media type and subtype + ; is ALWAYS case-insensitive. + + type := discrete-type / composite-type + + discrete-type := "text" / "image" / "audio" / "video" / + "application" / extension-token + + composite-type := "message" / "multipart" / extension-token + + extension-token := ietf-token / x-token + + ietf-token := <An extension token defined by a + standards-track RFC and registered + with IANA.> + + x-token := <The two characters "X-" or "x-" followed, with + no intervening white space, by any token> + + subtype := extension-token / iana-token + + iana-token := <A publicly-defined extension token. Tokens + of this form must be registered with IANA + as specified in RFC 2048.> + + parameter := attribute "=" value + + attribute := token + ; Matching of attributes + ; is ALWAYS case-insensitive. + + value := token / quoted-string + + token := 1*<any (US-ASCII) CHAR except SPACE, CTLs, + or tspecials> + + tspecials := "(" / ")" / "<" / ">" / "@" / + "," / ";" / ":" / "\" / <"> + "/" / "[" / "]" / "?" / "=" + ; Must be in quoted-string, + ; to use within parameter values + + + +Freed & Borenstein Standards Track [Page 12] + +RFC 2045 Internet Message Bodies November 1996 + + + Note that the definition of "tspecials" is the same as the RFC 822 + definition of "specials" with the addition of the three characters + "/", "?", and "=", and the removal of ".". + + Note also that a subtype specification is MANDATORY -- it may not be + omitted from a Content-Type header field. As such, there are no + default subtypes. + + The type, subtype, and parameter names are not case sensitive. For + example, TEXT, Text, and TeXt are all equivalent top-level media + types. Parameter values are normally case sensitive, but sometimes + are interpreted in a case-insensitive fashion, depending on the + intended use. (For example, multipart boundaries are case-sensitive, + but the "access-type" parameter for message/External-body is not + case-sensitive.) + + Note that the value of a quoted string parameter does not include the + quotes. That is, the quotation marks in a quoted-string are not a + part of the value of the parameter, but are merely used to delimit + that parameter value. In addition, comments are allowed in + accordance with RFC 822 rules for structured header fields. Thus the + following two forms + + Content-type: text/plain; charset=us-ascii (Plain text) + + Content-type: text/plain; charset="us-ascii" + + are completely equivalent. + + Beyond this syntax, the only syntactic constraint on the definition + of subtype names is the desire that their uses must not conflict. + That is, it would be undesirable to have two different communities + using "Content-Type: application/foobar" to mean two different + things. The process of defining new media subtypes, then, is not + intended to be a mechanism for imposing restrictions, but simply a + mechanism for publicizing their definition and usage. There are, + therefore, two acceptable mechanisms for defining new media subtypes: + + (1) Private values (starting with "X-") may be defined + bilaterally between two cooperating agents without + outside registration or standardization. Such values + cannot be registered or standardized. + + (2) New standard values should be registered with IANA as + described in RFC 2048. + + The second document in this set, RFC 2046, defines the initial set of + media types for MIME. + + + +Freed & Borenstein Standards Track [Page 13] + +RFC 2045 Internet Message Bodies November 1996 + + +5.2. Content-Type Defaults + + Default RFC 822 messages without a MIME Content-Type header are taken + by this protocol to be plain text in the US-ASCII character set, + which can be explicitly specified as: + + Content-type: text/plain; charset=us-ascii + + This default is assumed if no Content-Type header field is specified. + It is also recommend that this default be assumed when a + syntactically invalid Content-Type header field is encountered. In + the presence of a MIME-Version header field and the absence of any + Content-Type header field, a receiving User Agent can also assume + that plain US-ASCII text was the sender's intent. Plain US-ASCII + text may still be assumed in the absence of a MIME-Version or the + presence of an syntactically invalid Content-Type header field, but + the sender's intent might have been otherwise. + +6. Content-Transfer-Encoding Header Field + + Many media types which could be usefully transported via email are + represented, in their "natural" format, as 8bit character or binary + data. Such data cannot be transmitted over some transfer protocols. + For example, RFC 821 (SMTP) restricts mail messages to 7bit US-ASCII + data with lines no longer than 1000 characters including any trailing + CRLF line separator. + + It is necessary, therefore, to define a standard mechanism for + encoding such data into a 7bit short line format. Proper labelling + of unencoded material in less restrictive formats for direct use over + less restrictive transports is also desireable. This document + specifies that such encodings will be indicated by a new "Content- + Transfer-Encoding" header field. This field has not been defined by + any previous standard. + +6.1. Content-Transfer-Encoding Syntax + + The Content-Transfer-Encoding field's value is a single token + specifying the type of encoding, as enumerated below. Formally: + + encoding := "Content-Transfer-Encoding" ":" mechanism + + mechanism := "7bit" / "8bit" / "binary" / + "quoted-printable" / "base64" / + ietf-token / x-token + + These values are not case sensitive -- Base64 and BASE64 and bAsE64 + are all equivalent. An encoding type of 7BIT requires that the body + + + +Freed & Borenstein Standards Track [Page 14] + +RFC 2045 Internet Message Bodies November 1996 + + + is already in a 7bit mail-ready representation. This is the default + value -- that is, "Content-Transfer-Encoding: 7BIT" is assumed if the + Content-Transfer-Encoding header field is not present. + +6.2. Content-Transfer-Encodings Semantics + + This single Content-Transfer-Encoding token actually provides two + pieces of information. It specifies what sort of encoding + transformation the body was subjected to and hence what decoding + operation must be used to restore it to its original form, and it + specifies what the domain of the result is. + + The transformation part of any Content-Transfer-Encodings specifies, + either explicitly or implicitly, a single, well-defined decoding + algorithm, which for any sequence of encoded octets either transforms + it to the original sequence of octets which was encoded, or shows + that it is illegal as an encoded sequence. Content-Transfer- + Encodings transformations never depend on any additional external + profile information for proper operation. Note that while decoders + must produce a single, well-defined output for a valid encoding no + such restrictions exist for encoders: Encoding a given sequence of + octets to different, equivalent encoded sequences is perfectly legal. + + Three transformations are currently defined: identity, the "quoted- + printable" encoding, and the "base64" encoding. The domains are + "binary", "8bit" and "7bit". + + The Content-Transfer-Encoding values "7bit", "8bit", and "binary" all + mean that the identity (i.e. NO) encoding transformation has been + performed. As such, they serve simply as indicators of the domain of + the body data, and provide useful information about the sort of + encoding that might be needed for transmission in a given transport + system. The terms "7bit data", "8bit data", and "binary data" are + all defined in Section 2. + + The quoted-printable and base64 encodings transform their input from + an arbitrary domain into material in the "7bit" range, thus making it + safe to carry over restricted transports. The specific definition of + the transformations are given below. + + The proper Content-Transfer-Encoding label must always be used. + Labelling unencoded data containing 8bit characters as "7bit" is not + allowed, nor is labelling unencoded non-line-oriented data as + anything other than "binary" allowed. + + Unlike media subtypes, a proliferation of Content-Transfer-Encoding + values is both undesirable and unnecessary. However, establishing + only a single transformation into the "7bit" domain does not seem + + + +Freed & Borenstein Standards Track [Page 15] + +RFC 2045 Internet Message Bodies November 1996 + + + possible. There is a tradeoff between the desire for a compact and + efficient encoding of largely- binary data and the desire for a + somewhat readable encoding of data that is mostly, but not entirely, + 7bit. For this reason, at least two encoding mechanisms are + necessary: a more or less readable encoding (quoted-printable) and a + "dense" or "uniform" encoding (base64). + + Mail transport for unencoded 8bit data is defined in RFC 1652. As of + the initial publication of this document, there are no standardized + Internet mail transports for which it is legitimate to include + unencoded binary data in mail bodies. Thus there are no + circumstances in which the "binary" Content-Transfer-Encoding is + actually valid in Internet mail. However, in the event that binary + mail transport becomes a reality in Internet mail, or when MIME is + used in conjunction with any other binary-capable mail transport + mechanism, binary bodies must be labelled as such using this + mechanism. + + NOTE: The five values defined for the Content-Transfer-Encoding field + imply nothing about the media type other than the algorithm by which + it was encoded or the transport system requirements if unencoded. + +6.3. New Content-Transfer-Encodings + + Implementors may, if necessary, define private Content-Transfer- + Encoding values, but must use an x-token, which is a name prefixed by + "X-", to indicate its non-standard status, e.g., "Content-Transfer- + Encoding: x-my-new-encoding". Additional standardized Content- + Transfer-Encoding values must be specified by a standards-track RFC. + The requirements such specifications must meet are given in RFC 2048. + As such, all content-transfer-encoding namespace except that + beginning with "X-" is explicitly reserved to the IETF for future + use. + + Unlike media types and subtypes, the creation of new Content- + Transfer-Encoding values is STRONGLY discouraged, as it seems likely + to hinder interoperability with little potential benefit + +6.4. Interpretation and Use + + If a Content-Transfer-Encoding header field appears as part of a + message header, it applies to the entire body of that message. If a + Content-Transfer-Encoding header field appears as part of an entity's + headers, it applies only to the body of that entity. If an entity is + of type "multipart" the Content-Transfer-Encoding is not permitted to + have any value other than "7bit", "8bit" or "binary". Even more + severe restrictions apply to some subtypes of the "message" type. + + + + +Freed & Borenstein Standards Track [Page 16] + +RFC 2045 Internet Message Bodies November 1996 + + + It should be noted that most media types are defined in terms of + octets rather than bits, so that the mechanisms described here are + mechanisms for encoding arbitrary octet streams, not bit streams. If + a bit stream is to be encoded via one of these mechanisms, it must + first be converted to an 8bit byte stream using the network standard + bit order ("big-endian"), in which the earlier bits in a stream + become the higher-order bits in a 8bit byte. A bit stream not ending + at an 8bit boundary must be padded with zeroes. RFC 2046 provides a + mechanism for noting the addition of such padding in the case of the + application/octet-stream media type, which has a "padding" parameter. + + The encoding mechanisms defined here explicitly encode all data in + US-ASCII. Thus, for example, suppose an entity has header fields + such as: + + Content-Type: text/plain; charset=ISO-8859-1 + Content-transfer-encoding: base64 + + This must be interpreted to mean that the body is a base64 US-ASCII + encoding of data that was originally in ISO-8859-1, and will be in + that character set again after decoding. + + Certain Content-Transfer-Encoding values may only be used on certain + media types. In particular, it is EXPRESSLY FORBIDDEN to use any + encodings other than "7bit", "8bit", or "binary" with any composite + media type, i.e. one that recursively includes other Content-Type + fields. Currently the only composite media types are "multipart" and + "message". All encodings that are desired for bodies of type + multipart or message must be done at the innermost level, by encoding + the actual body that needs to be encoded. + + It should also be noted that, by definition, if a composite entity + has a transfer-encoding value such as "7bit", but one of the enclosed + entities has a less restrictive value such as "8bit", then either the + outer "7bit" labelling is in error, because 8bit data are included, + or the inner "8bit" labelling placed an unnecessarily high demand on + the transport system because the actual included data were actually + 7bit-safe. + + NOTE ON ENCODING RESTRICTIONS: Though the prohibition against using + content-transfer-encodings on composite body data may seem overly + restrictive, it is necessary to prevent nested encodings, in which + data are passed through an encoding algorithm multiple times, and + must be decoded multiple times in order to be properly viewed. + Nested encodings add considerable complexity to user agents: Aside + from the obvious efficiency problems with such multiple encodings, + they can obscure the basic structure of a message. In particular, + they can imply that several decoding operations are necessary simply + + + +Freed & Borenstein Standards Track [Page 17] + +RFC 2045 Internet Message Bodies November 1996 + + + to find out what types of bodies a message contains. Banning nested + encodings may complicate the job of certain mail gateways, but this + seems less of a problem than the effect of nested encodings on user + agents. + + Any entity with an unrecognized Content-Transfer-Encoding must be + treated as if it has a Content-Type of "application/octet-stream", + regardless of what the Content-Type header field actually says. + + NOTE ON THE RELATIONSHIP BETWEEN CONTENT-TYPE AND CONTENT-TRANSFER- + ENCODING: It may seem that the Content-Transfer-Encoding could be + inferred from the characteristics of the media that is to be encoded, + or, at the very least, that certain Content-Transfer-Encodings could + be mandated for use with specific media types. There are several + reasons why this is not the case. First, given the varying types of + transports used for mail, some encodings may be appropriate for some + combinations of media types and transports but not for others. (For + example, in an 8bit transport, no encoding would be required for text + in certain character sets, while such encodings are clearly required + for 7bit SMTP.) + + Second, certain media types may require different types of transfer + encoding under different circumstances. For example, many PostScript + bodies might consist entirely of short lines of 7bit data and hence + require no encoding at all. Other PostScript bodies (especially + those using Level 2 PostScript's binary encoding mechanism) may only + be reasonably represented using a binary transport encoding. + Finally, since the Content-Type field is intended to be an open-ended + specification mechanism, strict specification of an association + between media types and encodings effectively couples the + specification of an application protocol with a specific lower-level + transport. This is not desirable since the developers of a media + type should not have to be aware of all the transports in use and + what their limitations are. + +6.5. Translating Encodings + + The quoted-printable and base64 encodings are designed so that + conversion between them is possible. The only issue that arises in + such a conversion is the handling of hard line breaks in quoted- + printable encoding output. When converting from quoted-printable to + base64 a hard line break in the quoted-printable form represents a + CRLF sequence in the canonical form of the data. It must therefore be + converted to a corresponding encoded CRLF in the base64 form of the + data. Similarly, a CRLF sequence in the canonical form of the data + obtained after base64 decoding must be converted to a quoted- + printable hard line break, but ONLY when converting text data. + + + + +Freed & Borenstein Standards Track [Page 18] + +RFC 2045 Internet Message Bodies November 1996 + + +6.6. Canonical Encoding Model + + There was some confusion, in the previous versions of this RFC, + regarding the model for when email data was to be converted to + canonical form and encoded, and in particular how this process would + affect the treatment of CRLFs, given that the representation of + newlines varies greatly from system to system, and the relationship + between content-transfer-encodings and character sets. A canonical + model for encoding is presented in RFC 2049 for this reason. + +6.7. Quoted-Printable Content-Transfer-Encoding + + The Quoted-Printable encoding is intended to represent data that + largely consists of octets that correspond to printable characters in + the US-ASCII character set. It encodes the data in such a way that + the resulting octets are unlikely to be modified by mail transport. + If the data being encoded are mostly US-ASCII text, the encoded form + of the data remains largely recognizable by humans. A body which is + entirely US-ASCII may also be encoded in Quoted-Printable to ensure + the integrity of the data should the message pass through a + character-translating, and/or line-wrapping gateway. + + In this encoding, octets are to be represented as determined by the + following rules: + + (1) (General 8bit representation) Any octet, except a CR or + LF that is part of a CRLF line break of the canonical + (standard) form of the data being encoded, may be + represented by an "=" followed by a two digit + hexadecimal representation of the octet's value. The + digits of the hexadecimal alphabet, for this purpose, + are "0123456789ABCDEF". Uppercase letters must be + used; lowercase letters are not allowed. Thus, for + example, the decimal value 12 (US-ASCII form feed) can + be represented by "=0C", and the decimal value 61 (US- + ASCII EQUAL SIGN) can be represented by "=3D". This + rule must be followed except when the following rules + allow an alternative encoding. + + (2) (Literal representation) Octets with decimal values of + 33 through 60 inclusive, and 62 through 126, inclusive, + MAY be represented as the US-ASCII characters which + correspond to those octets (EXCLAMATION POINT through + LESS THAN, and GREATER THAN through TILDE, + respectively). + + (3) (White Space) Octets with values of 9 and 32 MAY be + represented as US-ASCII TAB (HT) and SPACE characters, + + + +Freed & Borenstein Standards Track [Page 19] + +RFC 2045 Internet Message Bodies November 1996 + + + respectively, but MUST NOT be so represented at the end + of an encoded line. Any TAB (HT) or SPACE characters + on an encoded line MUST thus be followed on that line + by a printable character. In particular, an "=" at the + end of an encoded line, indicating a soft line break + (see rule #5) may follow one or more TAB (HT) or SPACE + characters. It follows that an octet with decimal + value 9 or 32 appearing at the end of an encoded line + must be represented according to Rule #1. This rule is + necessary because some MTAs (Message Transport Agents, + programs which transport messages from one user to + another, or perform a portion of such transfers) are + known to pad lines of text with SPACEs, and others are + known to remove "white space" characters from the end + of a line. Therefore, when decoding a Quoted-Printable + body, any trailing white space on a line must be + deleted, as it will necessarily have been added by + intermediate transport agents. + + (4) (Line Breaks) A line break in a text body, represented + as a CRLF sequence in the text canonical form, must be + represented by a (RFC 822) line break, which is also a + CRLF sequence, in the Quoted-Printable encoding. Since + the canonical representation of media types other than + text do not generally include the representation of + line breaks as CRLF sequences, no hard line breaks + (i.e. line breaks that are intended to be meaningful + and to be displayed to the user) can occur in the + quoted-printable encoding of such types. Sequences + like "=0D", "=0A", "=0A=0D" and "=0D=0A" will routinely + appear in non-text data represented in quoted- + printable, of course. + + Note that many implementations may elect to encode the + local representation of various content types directly + rather than converting to canonical form first, + encoding, and then converting back to local + representation. In particular, this may apply to plain + text material on systems that use newline conventions + other than a CRLF terminator sequence. Such an + implementation optimization is permissible, but only + when the combined canonicalization-encoding step is + equivalent to performing the three steps separately. + + (5) (Soft Line Breaks) The Quoted-Printable encoding + REQUIRES that encoded lines be no more than 76 + characters long. If longer lines are to be encoded + with the Quoted-Printable encoding, "soft" line breaks + + + +Freed & Borenstein Standards Track [Page 20] + +RFC 2045 Internet Message Bodies November 1996 + + + must be used. An equal sign as the last character on a + encoded line indicates such a non-significant ("soft") + line break in the encoded text. + + Thus if the "raw" form of the line is a single unencoded line that + says: + + Now's the time for all folk to come to the aid of their country. + + This can be represented, in the Quoted-Printable encoding, as: + + Now's the time = + for all folk to come= + to the aid of their country. + + This provides a mechanism with which long lines are encoded in such a + way as to be restored by the user agent. The 76 character limit does + not count the trailing CRLF, but counts all other characters, + including any equal signs. + + Since the hyphen character ("-") may be represented as itself in the + Quoted-Printable encoding, care must be taken, when encapsulating a + quoted-printable encoded body inside one or more multipart entities, + to ensure that the boundary delimiter does not appear anywhere in the + encoded body. (A good strategy is to choose a boundary that includes + a character sequence such as "=_" which can never appear in a + quoted-printable body. See the definition of multipart messages in + RFC 2046.) + + NOTE: The quoted-printable encoding represents something of a + compromise between readability and reliability in transport. Bodies + encoded with the quoted-printable encoding will work reliably over + most mail gateways, but may not work perfectly over a few gateways, + notably those involving translation into EBCDIC. A higher level of + confidence is offered by the base64 Content-Transfer-Encoding. A way + to get reasonably reliable transport through EBCDIC gateways is to + also quote the US-ASCII characters + + !"#$@[\]^`{|}~ + + according to rule #1. + + Because quoted-printable data is generally assumed to be line- + oriented, it is to be expected that the representation of the breaks + between the lines of quoted-printable data may be altered in + transport, in the same manner that plain text mail has always been + altered in Internet mail when passing between systems with differing + newline conventions. If such alterations are likely to constitute a + + + +Freed & Borenstein Standards Track [Page 21] + +RFC 2045 Internet Message Bodies November 1996 + + + corruption of the data, it is probably more sensible to use the + base64 encoding rather than the quoted-printable encoding. + + NOTE: Several kinds of substrings cannot be generated according to + the encoding rules for the quoted-printable content-transfer- + encoding, and hence are formally illegal if they appear in the output + of a quoted-printable encoder. This note enumerates these cases and + suggests ways to handle such illegal substrings if any are + encountered in quoted-printable data that is to be decoded. + + (1) An "=" followed by two hexadecimal digits, one or both + of which are lowercase letters in "abcdef", is formally + illegal. A robust implementation might choose to + recognize them as the corresponding uppercase letters. + + (2) An "=" followed by a character that is neither a + hexadecimal digit (including "abcdef") nor the CR + character of a CRLF pair is illegal. This case can be + the result of US-ASCII text having been included in a + quoted-printable part of a message without itself + having been subjected to quoted-printable encoding. A + reasonable approach by a robust implementation might be + to include the "=" character and the following + character in the decoded data without any + transformation and, if possible, indicate to the user + that proper decoding was not possible at this point in + the data. + + (3) An "=" cannot be the ultimate or penultimate character + in an encoded object. This could be handled as in case + (2) above. + + (4) Control characters other than TAB, or CR and LF as + parts of CRLF pairs, must not appear. The same is true + for octets with decimal values greater than 126. If + found in incoming quoted-printable data by a decoder, a + robust implementation might exclude them from the + decoded data and warn the user that illegal characters + were discovered. + + (5) Encoded lines must not be longer than 76 characters, + not counting the trailing CRLF. If longer lines are + found in incoming, encoded data, a robust + implementation might nevertheless decode the lines, and + might report the erroneous encoding to the user. + + + + + + +Freed & Borenstein Standards Track [Page 22] + +RFC 2045 Internet Message Bodies November 1996 + + + WARNING TO IMPLEMENTORS: If binary data is encoded in quoted- + printable, care must be taken to encode CR and LF characters as "=0D" + and "=0A", respectively. In particular, a CRLF sequence in binary + data should be encoded as "=0D=0A". Otherwise, if CRLF were + represented as a hard line break, it might be incorrectly decoded on + platforms with different line break conventions. + + For formalists, the syntax of quoted-printable data is described by + the following grammar: + + quoted-printable := qp-line *(CRLF qp-line) + + qp-line := *(qp-segment transport-padding CRLF) + qp-part transport-padding + + qp-part := qp-section + ; Maximum length of 76 characters + + qp-segment := qp-section *(SPACE / TAB) "=" + ; Maximum length of 76 characters + + qp-section := [*(ptext / SPACE / TAB) ptext] + + ptext := hex-octet / safe-char + + safe-char := <any octet with decimal value of 33 through + 60 inclusive, and 62 through 126> + ; Characters not listed as "mail-safe" in + ; RFC 2049 are also not recommended. + + hex-octet := "=" 2(DIGIT / "A" / "B" / "C" / "D" / "E" / "F") + ; Octet must be used for characters > 127, =, + ; SPACEs or TABs at the ends of lines, and is + ; recommended for any character not listed in + ; RFC 2049 as "mail-safe". + + transport-padding := *LWSP-char + ; Composers MUST NOT generate + ; non-zero length transport + ; padding, but receivers MUST + ; be able to handle padding + ; added by message transports. + + IMPORTANT: The addition of LWSP between the elements shown in this + BNF is NOT allowed since this BNF does not specify a structured + header field. + + + + + +Freed & Borenstein Standards Track [Page 23] + +RFC 2045 Internet Message Bodies November 1996 + + +6.8. Base64 Content-Transfer-Encoding + + The Base64 Content-Transfer-Encoding is designed to represent + arbitrary sequences of octets in a form that need not be humanly + readable. The encoding and decoding algorithms are simple, but the + encoded data are consistently only about 33 percent larger than the + unencoded data. This encoding is virtually identical to the one used + in Privacy Enhanced Mail (PEM) applications, as defined in RFC 1421. + + A 65-character subset of US-ASCII is used, enabling 6 bits to be + represented per printable character. (The extra 65th character, "=", + is used to signify a special processing function.) + + NOTE: This subset has the important property that it is represented + identically in all versions of ISO 646, including US-ASCII, and all + characters in the subset are also represented identically in all + versions of EBCDIC. Other popular encodings, such as the encoding + used by the uuencode utility, Macintosh binhex 4.0 [RFC-1741], and + the base85 encoding specified as part of Level 2 PostScript, do not + share these properties, and thus do not fulfill the portability + requirements a binary transport encoding for mail must meet. + + The encoding process represents 24-bit groups of input bits as output + strings of 4 encoded characters. Proceeding from left to right, a + 24-bit input group is formed by concatenating 3 8bit input groups. + These 24 bits are then treated as 4 concatenated 6-bit groups, each + of which is translated into a single digit in the base64 alphabet. + When encoding a bit stream via the base64 encoding, the bit stream + must be presumed to be ordered with the most-significant-bit first. + That is, the first bit in the stream will be the high-order bit in + the first 8bit byte, and the eighth bit will be the low-order bit in + the first 8bit byte, and so on. + + Each 6-bit group is used as an index into an array of 64 printable + characters. The character referenced by the index is placed in the + output string. These characters, identified in Table 1, below, are + selected so as to be universally representable, and the set excludes + characters with particular significance to SMTP (e.g., ".", CR, LF) + and to the multipart boundary delimiters defined in RFC 2046 (e.g., + "-"). + + + + + + + + + + + +Freed & Borenstein Standards Track [Page 24] + +RFC 2045 Internet Message Bodies November 1996 + + + Table 1: The Base64 Alphabet + + Value Encoding Value Encoding Value Encoding Value Encoding + 0 A 17 R 34 i 51 z + 1 B 18 S 35 j 52 0 + 2 C 19 T 36 k 53 1 + 3 D 20 U 37 l 54 2 + 4 E 21 V 38 m 55 3 + 5 F 22 W 39 n 56 4 + 6 G 23 X 40 o 57 5 + 7 H 24 Y 41 p 58 6 + 8 I 25 Z 42 q 59 7 + 9 J 26 a 43 r 60 8 + 10 K 27 b 44 s 61 9 + 11 L 28 c 45 t 62 + + 12 M 29 d 46 u 63 / + 13 N 30 e 47 v + 14 O 31 f 48 w (pad) = + 15 P 32 g 49 x + 16 Q 33 h 50 y + + The encoded output stream must be represented in lines of no more + than 76 characters each. All line breaks or other characters not + found in Table 1 must be ignored by decoding software. In base64 + data, characters other than those in Table 1, line breaks, and other + white space probably indicate a transmission error, about which a + warning message or even a message rejection might be appropriate + under some circumstances. + + Special processing is performed if fewer than 24 bits are available + at the end of the data being encoded. A full encoding quantum is + always completed at the end of a body. When fewer than 24 input bits + are available in an input group, zero bits are added (on the right) + to form an integral number of 6-bit groups. Padding at the end of + the data is performed using the "=" character. Since all base64 + input is an integral number of octets, only the following cases can + arise: (1) the final quantum of encoding input is an integral + multiple of 24 bits; here, the final unit of encoded output will be + an integral multiple of 4 characters with no "=" padding, (2) the + final quantum of encoding input is exactly 8 bits; here, the final + unit of encoded output will be two characters followed by two "=" + padding characters, or (3) the final quantum of encoding input is + exactly 16 bits; here, the final unit of encoded output will be three + characters followed by one "=" padding character. + + Because it is used only for padding at the end of the data, the + occurrence of any "=" characters may be taken as evidence that the + end of the data has been reached (without truncation in transit). No + + + +Freed & Borenstein Standards Track [Page 25] + +RFC 2045 Internet Message Bodies November 1996 + + + such assurance is possible, however, when the number of octets + transmitted was a multiple of three and no "=" characters are + present. + + Any characters outside of the base64 alphabet are to be ignored in + base64-encoded data. + + Care must be taken to use the proper octets for line breaks if base64 + encoding is applied directly to text material that has not been + converted to canonical form. In particular, text line breaks must be + converted into CRLF sequences prior to base64 encoding. The + important thing to note is that this may be done directly by the + encoder rather than in a prior canonicalization step in some + implementations. + + NOTE: There is no need to worry about quoting potential boundary + delimiters within base64-encoded bodies within multipart entities + because no hyphen characters are used in the base64 encoding. + +7. Content-ID Header Field + + In constructing a high-level user agent, it may be desirable to allow + one body to make reference to another. Accordingly, bodies may be + labelled using the "Content-ID" header field, which is syntactically + identical to the "Message-ID" header field: + + id := "Content-ID" ":" msg-id + + Like the Message-ID values, Content-ID values must be generated to be + world-unique. + + The Content-ID value may be used for uniquely identifying MIME + entities in several contexts, particularly for caching data + referenced by the message/external-body mechanism. Although the + Content-ID header is generally optional, its use is MANDATORY in + implementations which generate data of the optional MIME media type + "message/external-body". That is, each message/external-body entity + must have a Content-ID field to permit caching of such data. + + It is also worth noting that the Content-ID value has special + semantics in the case of the multipart/alternative media type. This + is explained in the section of RFC 2046 dealing with + multipart/alternative. + + + + + + + + +Freed & Borenstein Standards Track [Page 26] + +RFC 2045 Internet Message Bodies November 1996 + + +8. Content-Description Header Field + + The ability to associate some descriptive information with a given + body is often desirable. For example, it may be useful to mark an + "image" body as "a picture of the Space Shuttle Endeavor." Such text + may be placed in the Content-Description header field. This header + field is always optional. + + description := "Content-Description" ":" *text + + The description is presumed to be given in the US-ASCII character + set, although the mechanism specified in RFC 2047 may be used for + non-US-ASCII Content-Description values. + +9. Additional MIME Header Fields + + Future documents may elect to define additional MIME header fields + for various purposes. Any new header field that further describes + the content of a message should begin with the string "Content-" to + allow such fields which appear in a message header to be + distinguished from ordinary RFC 822 message header fields. + + MIME-extension-field := <Any RFC 822 header field which + begins with the string + "Content-"> + +10. Summary + + Using the MIME-Version, Content-Type, and Content-Transfer-Encoding + header fields, it is possible to include, in a standardized way, + arbitrary types of data with RFC 822 conformant mail messages. No + restrictions imposed by either RFC 821 or RFC 822 are violated, and + care has been taken to avoid problems caused by additional + restrictions imposed by the characteristics of some Internet mail + transport mechanisms (see RFC 2049). + + The next document in this set, RFC 2046, specifies the initial set of + media types that can be labelled and transported using these headers. + +11. Security Considerations + + Security issues are discussed in the second document in this set, RFC + 2046. + + + + + + + + +Freed & Borenstein Standards Track [Page 27] + +RFC 2045 Internet Message Bodies November 1996 + + +12. Authors' Addresses + + For more information, the authors of this document are best contacted + via Internet mail: + + Ned Freed + Innosoft International, Inc. + 1050 East Garvey Avenue South + West Covina, CA 91790 + USA + + Phone: +1 818 919 3600 + Fax: +1 818 919 3614 + EMail: ned@innosoft.com + + + Nathaniel S. Borenstein + First Virtual Holdings + 25 Washington Avenue + Morristown, NJ 07960 + USA + + Phone: +1 201 540 8967 + Fax: +1 201 993 3032 + EMail: nsb@nsb.fv.com + + + MIME is a result of the work of the Internet Engineering Task Force + Working Group on RFC 822 Extensions. The chairman of that group, + Greg Vaudreuil, may be reached at: + + Gregory M. Vaudreuil + Octel Network Services + 17080 Dallas Parkway + Dallas, TX 75248-1905 + USA + + EMail: Greg.Vaudreuil@Octel.Com + + + + + + + + + + + + + +Freed & Borenstein Standards Track [Page 28] + +RFC 2045 Internet Message Bodies November 1996 + + +Appendix A -- Collected Grammar + + This appendix contains the complete BNF grammar for all the syntax + specified by this document. + + By itself, however, this grammar is incomplete. It refers by name to + several syntax rules that are defined by RFC 822. Rather than + reproduce those definitions here, and risk unintentional differences + between the two, this document simply refers the reader to RFC 822 + for the remaining definitions. Wherever a term is undefined, it + refers to the RFC 822 definition. + + attribute := token + ; Matching of attributes + ; is ALWAYS case-insensitive. + + composite-type := "message" / "multipart" / extension-token + + content := "Content-Type" ":" type "/" subtype + *(";" parameter) + ; Matching of media type and subtype + ; is ALWAYS case-insensitive. + + description := "Content-Description" ":" *text + + discrete-type := "text" / "image" / "audio" / "video" / + "application" / extension-token + + encoding := "Content-Transfer-Encoding" ":" mechanism + + entity-headers := [ content CRLF ] + [ encoding CRLF ] + [ id CRLF ] + [ description CRLF ] + *( MIME-extension-field CRLF ) + + extension-token := ietf-token / x-token + + hex-octet := "=" 2(DIGIT / "A" / "B" / "C" / "D" / "E" / "F") + ; Octet must be used for characters > 127, =, + ; SPACEs or TABs at the ends of lines, and is + ; recommended for any character not listed in + ; RFC 2049 as "mail-safe". + + iana-token := <A publicly-defined extension token. Tokens + of this form must be registered with IANA + as specified in RFC 2048.> + + + + +Freed & Borenstein Standards Track [Page 29] + +RFC 2045 Internet Message Bodies November 1996 + + + ietf-token := <An extension token defined by a + standards-track RFC and registered + with IANA.> + + id := "Content-ID" ":" msg-id + + mechanism := "7bit" / "8bit" / "binary" / + "quoted-printable" / "base64" / + ietf-token / x-token + + MIME-extension-field := <Any RFC 822 header field which + begins with the string + "Content-"> + + MIME-message-headers := entity-headers + fields + version CRLF + ; The ordering of the header + ; fields implied by this BNF + ; definition should be ignored. + + MIME-part-headers := entity-headers + [fields] + ; Any field not beginning with + ; "content-" can have no defined + ; meaning and may be ignored. + ; The ordering of the header + ; fields implied by this BNF + ; definition should be ignored. + + parameter := attribute "=" value + + ptext := hex-octet / safe-char + + qp-line := *(qp-segment transport-padding CRLF) + qp-part transport-padding + + qp-part := qp-section + ; Maximum length of 76 characters + + qp-section := [*(ptext / SPACE / TAB) ptext] + + qp-segment := qp-section *(SPACE / TAB) "=" + ; Maximum length of 76 characters + + quoted-printable := qp-line *(CRLF qp-line) + + + + + +Freed & Borenstein Standards Track [Page 30] + +RFC 2045 Internet Message Bodies November 1996 + + + safe-char := <any octet with decimal value of 33 through + 60 inclusive, and 62 through 126> + ; Characters not listed as "mail-safe" in + ; RFC 2049 are also not recommended. + + subtype := extension-token / iana-token + + token := 1*<any (US-ASCII) CHAR except SPACE, CTLs, + or tspecials> + + transport-padding := *LWSP-char + ; Composers MUST NOT generate + ; non-zero length transport + ; padding, but receivers MUST + ; be able to handle padding + ; added by message transports. + + tspecials := "(" / ")" / "<" / ">" / "@" / + "," / ";" / ":" / "\" / <"> + "/" / "[" / "]" / "?" / "=" + ; Must be in quoted-string, + ; to use within parameter values + + type := discrete-type / composite-type + + value := token / quoted-string + + version := "MIME-Version" ":" 1*DIGIT "." 1*DIGIT + + x-token := <The two characters "X-" or "x-" followed, with + no intervening white space, by any token> + + + + + + + + + + + + + + + + + + + + +Freed & Borenstein Standards Track [Page 31] + diff --git a/doc/mime-p2-rfc2046.txt b/doc/mime-p2-rfc2046.txt @@ -0,0 +1,2467 @@ + + + + + + +Network Working Group N. Freed +Request for Comments: 2046 Innosoft +Obsoletes: 1521, 1522, 1590 N. Borenstein +Category: Standards Track First Virtual + November 1996 + + + Multipurpose Internet Mail Extensions + (MIME) Part Two: + Media Types + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Abstract + + STD 11, RFC 822 defines a message representation protocol specifying + considerable detail about US-ASCII message headers, but which leaves + the message content, or message body, as flat US-ASCII text. This + set of documents, collectively called the Multipurpose Internet Mail + Extensions, or MIME, redefines the format of messages to allow for + + (1) textual message bodies in character sets other than + US-ASCII, + + (2) an extensible set of different formats for non-textual + message bodies, + + (3) multi-part message bodies, and + + (4) textual header information in character sets other than + US-ASCII. + + These documents are based on earlier work documented in RFC 934, STD + 11, and RFC 1049, but extends and revises them. Because RFC 822 said + so little about message bodies, these documents are largely + orthogonal to (rather than a revision of) RFC 822. + + The initial document in this set, RFC 2045, specifies the various + headers used to describe the structure of MIME messages. This second + document defines the general structure of the MIME media typing + system and defines an initial set of media types. The third document, + RFC 2047, describes extensions to RFC 822 to allow non-US-ASCII text + + + +Freed & Borenstein Standards Track [Page 1] + +RFC 2046 Media Types November 1996 + + + data in Internet mail header fields. The fourth document, RFC 2048, + specifies various IANA registration procedures for MIME-related + facilities. The fifth and final document, RFC 2049, describes MIME + conformance criteria as well as providing some illustrative examples + of MIME message formats, acknowledgements, and the bibliography. + + These documents are revisions of RFCs 1521 and 1522, which themselves + were revisions of RFCs 1341 and 1342. An appendix in RFC 2049 + describes differences and changes from previous versions. + +Table of Contents + + 1. Introduction ......................................... 3 + 2. Definition of a Top-Level Media Type ................. 4 + 3. Overview Of The Initial Top-Level Media Types ........ 4 + 4. Discrete Media Type Values ........................... 6 + 4.1 Text Media Type ..................................... 6 + 4.1.1 Representation of Line Breaks ..................... 7 + 4.1.2 Charset Parameter ................................. 7 + 4.1.3 Plain Subtype ..................................... 11 + 4.1.4 Unrecognized Subtypes ............................. 11 + 4.2 Image Media Type .................................... 11 + 4.3 Audio Media Type .................................... 11 + 4.4 Video Media Type .................................... 12 + 4.5 Application Media Type .............................. 12 + 4.5.1 Octet-Stream Subtype .............................. 13 + 4.5.2 PostScript Subtype ................................ 14 + 4.5.3 Other Application Subtypes ........................ 17 + 5. Composite Media Type Values .......................... 17 + 5.1 Multipart Media Type ................................ 17 + 5.1.1 Common Syntax ..................................... 19 + 5.1.2 Handling Nested Messages and Multiparts ........... 24 + 5.1.3 Mixed Subtype ..................................... 24 + 5.1.4 Alternative Subtype ............................... 24 + 5.1.5 Digest Subtype .................................... 26 + 5.1.6 Parallel Subtype .................................. 27 + 5.1.7 Other Multipart Subtypes .......................... 28 + 5.2 Message Media Type .................................. 28 + 5.2.1 RFC822 Subtype .................................... 28 + 5.2.2 Partial Subtype ................................... 29 + 5.2.2.1 Message Fragmentation and Reassembly ............ 30 + 5.2.2.2 Fragmentation and Reassembly Example ............ 31 + 5.2.3 External-Body Subtype ............................. 33 + 5.2.4 Other Message Subtypes ............................ 40 + 6. Experimental Media Type Values ....................... 40 + 7. Summary .............................................. 41 + 8. Security Considerations .............................. 41 + 9. Authors' Addresses ................................... 42 + + + +Freed & Borenstein Standards Track [Page 2] + +RFC 2046 Media Types November 1996 + + + A. Collected Grammar .................................... 43 + +1. Introduction + + The first document in this set, RFC 2045, defines a number of header + fields, including Content-Type. The Content-Type field is used to + specify the nature of the data in the body of a MIME entity, by + giving media type and subtype identifiers, and by providing auxiliary + information that may be required for certain media types. After the + type and subtype names, the remainder of the header field is simply a + set of parameters, specified in an attribute/value notation. The + ordering of parameters is not significant. + + In general, the top-level media type is used to declare the general + type of data, while the subtype specifies a specific format for that + type of data. Thus, a media type of "image/xyz" is enough to tell a + user agent that the data is an image, even if the user agent has no + knowledge of the specific image format "xyz". Such information can + be used, for example, to decide whether or not to show a user the raw + data from an unrecognized subtype -- such an action might be + reasonable for unrecognized subtypes of "text", but not for + unrecognized subtypes of "image" or "audio". For this reason, + registered subtypes of "text", "image", "audio", and "video" should + not contain embedded information that is really of a different type. + Such compound formats should be represented using the "multipart" or + "application" types. + + Parameters are modifiers of the media subtype, and as such do not + fundamentally affect the nature of the content. The set of + meaningful parameters depends on the media type and subtype. Most + parameters are associated with a single specific subtype. However, a + given top-level media type may define parameters which are applicable + to any subtype of that type. Parameters may be required by their + defining media type or subtype or they may be optional. MIME + implementations must also ignore any parameters whose names they do + not recognize. + + MIME's Content-Type header field and media type mechanism has been + carefully designed to be extensible, and it is expected that the set + of media type/subtype pairs and their associated parameters will grow + significantly over time. Several other MIME facilities, such as + transfer encodings and "message/external-body" access types, are + likely to have new values defined over time. In order to ensure that + the set of such values is developed in an orderly, well-specified, + and public manner, MIME sets up a registration process which uses the + Internet Assigned Numbers Authority (IANA) as a central registry for + MIME's various areas of extensibility. The registration process for + these areas is described in a companion document, RFC 2048. + + + +Freed & Borenstein Standards Track [Page 3] + +RFC 2046 Media Types November 1996 + + + The initial seven standard top-level media type are defined and + described in the remainder of this document. + +2. Definition of a Top-Level Media Type + + The definition of a top-level media type consists of: + + (1) a name and a description of the type, including + criteria for whether a particular type would qualify + under that type, + + (2) the names and definitions of parameters, if any, which + are defined for all subtypes of that type (including + whether such parameters are required or optional), + + (3) how a user agent and/or gateway should handle unknown + subtypes of this type, + + (4) general considerations on gatewaying entities of this + top-level type, if any, and + + (5) any restrictions on content-transfer-encodings for + entities of this top-level type. + +3. Overview Of The Initial Top-Level Media Types + + The five discrete top-level media types are: + + (1) text -- textual information. The subtype "plain" in + particular indicates plain text containing no + formatting commands or directives of any sort. Plain + text is intended to be displayed "as-is". No special + software is required to get the full meaning of the + text, aside from support for the indicated character + set. Other subtypes are to be used for enriched text in + forms where application software may enhance the + appearance of the text, but such software must not be + required in order to get the general idea of the + content. Possible subtypes of "text" thus include any + word processor format that can be read without + resorting to software that understands the format. In + particular, formats that employ embeddded binary + formatting information are not considered directly + readable. A very simple and portable subtype, + "richtext", was defined in RFC 1341, with a further + revision in RFC 1896 under the name "enriched". + + + + + +Freed & Borenstein Standards Track [Page 4] + +RFC 2046 Media Types November 1996 + + + (2) image -- image data. "Image" requires a display device + (such as a graphical display, a graphics printer, or a + FAX machine) to view the information. An initial + subtype is defined for the widely-used image format + JPEG. . subtypes are defined for two widely-used image + formats, jpeg and gif. + + (3) audio -- audio data. "Audio" requires an audio output + device (such as a speaker or a telephone) to "display" + the contents. An initial subtype "basic" is defined in + this document. + + (4) video -- video data. "Video" requires the capability + to display moving images, typically including + specialized hardware and software. An initial subtype + "mpeg" is defined in this document. + + (5) application -- some other kind of data, typically + either uninterpreted binary data or information to be + processed by an application. The subtype "octet- + stream" is to be used in the case of uninterpreted + binary data, in which case the simplest recommended + action is to offer to write the information into a file + for the user. The "PostScript" subtype is also defined + for the transport of PostScript material. Other + expected uses for "application" include spreadsheets, + data for mail-based scheduling systems, and languages + for "active" (computational) messaging, and word + processing formats that are not directly readable. + Note that security considerations may exist for some + types of application data, most notably + "application/PostScript" and any form of active + messaging. These issues are discussed later in this + document. + + The two composite top-level media types are: + + (1) multipart -- data consisting of multiple entities of + independent data types. Four subtypes are initially + defined, including the basic "mixed" subtype specifying + a generic mixed set of parts, "alternative" for + representing the same data in multiple formats, + "parallel" for parts intended to be viewed + simultaneously, and "digest" for multipart entities in + which each part has a default type of "message/rfc822". + + + + + + +Freed & Borenstein Standards Track [Page 5] + +RFC 2046 Media Types November 1996 + + + (2) message -- an encapsulated message. A body of media + type "message" is itself all or a portion of some kind + of message object. Such objects may or may not in turn + contain other entities. The "rfc822" subtype is used + when the encapsulated content is itself an RFC 822 + message. The "partial" subtype is defined for partial + RFC 822 messages, to permit the fragmented transmission + of bodies that are thought to be too large to be passed + through transport facilities in one piece. Another + subtype, "external-body", is defined for specifying + large bodies by reference to an external data source. + + It should be noted that the list of media type values given here may + be augmented in time, via the mechanisms described above, and that + the set of subtypes is expected to grow substantially. + +4. Discrete Media Type Values + + Five of the seven initial media type values refer to discrete bodies. + The content of these types must be handled by non-MIME mechanisms; + they are opaque to MIME processors. + +4.1. Text Media Type + + The "text" media type is intended for sending material which is + principally textual in form. A "charset" parameter may be used to + indicate the character set of the body text for "text" subtypes, + notably including the subtype "text/plain", which is a generic + subtype for plain text. Plain text does not provide for or allow + formatting commands, font attribute specifications, processing + instructions, interpretation directives, or content markup. Plain + text is seen simply as a linear sequence of characters, possibly + interrupted by line breaks or page breaks. Plain text may allow the + stacking of several characters in the same position in the text. + Plain text in scripts like Arabic and Hebrew may also include + facilitites that allow the arbitrary mixing of text segments with + opposite writing directions. + + Beyond plain text, there are many formats for representing what might + be known as "rich text". An interesting characteristic of many such + representations is that they are to some extent readable even without + the software that interprets them. It is useful, then, to + distinguish them, at the highest level, from such unreadable data as + images, audio, or text represented in an unreadable form. In the + absence of appropriate interpretation software, it is reasonable to + show subtypes of "text" to the user, while it is not reasonable to do + so with most nontextual data. Such formatted textual data should be + represented using subtypes of "text". + + + +Freed & Borenstein Standards Track [Page 6] + +RFC 2046 Media Types November 1996 + + +4.1.1. Representation of Line Breaks + + The canonical form of any MIME "text" subtype MUST always represent a + line break as a CRLF sequence. Similarly, any occurrence of CRLF in + MIME "text" MUST represent a line break. Use of CR and LF outside of + line break sequences is also forbidden. + + This rule applies regardless of format or character set or sets + involved. + + NOTE: The proper interpretation of line breaks when a body is + displayed depends on the media type. In particular, while it is + appropriate to treat a line break as a transition to a new line when + displaying a "text/plain" body, this treatment is actually incorrect + for other subtypes of "text" like "text/enriched" [RFC-1896]. + Similarly, whether or not line breaks should be added during display + operations is also a function of the media type. It should not be + necessary to add any line breaks to display "text/plain" correctly, + whereas proper display of "text/enriched" requires the appropriate + addition of line breaks. + + NOTE: Some protocols defines a maximum line length. E.g. SMTP [RFC- + 821] allows a maximum of 998 octets before the next CRLF sequence. + To be transported by such protocols, data which includes too long + segments without CRLF sequences must be encoded with a suitable + content-transfer-encoding. + +4.1.2. Charset Parameter + + A critical parameter that may be specified in the Content-Type field + for "text/plain" data is the character set. This is specified with a + "charset" parameter, as in: + + Content-type: text/plain; charset=iso-8859-1 + + Unlike some other parameter values, the values of the charset + parameter are NOT case sensitive. The default character set, which + must be assumed in the absence of a charset parameter, is US-ASCII. + + The specification for any future subtypes of "text" must specify + whether or not they will also utilize a "charset" parameter, and may + possibly restrict its values as well. For other subtypes of "text" + than "text/plain", the semantics of the "charset" parameter should be + defined to be identical to those specified here for "text/plain", + i.e., the body consists entirely of characters in the given charset. + In particular, definers of future "text" subtypes should pay close + attention to the implications of multioctet character sets for their + subtype definitions. + + + +Freed & Borenstein Standards Track [Page 7] + +RFC 2046 Media Types November 1996 + + + The charset parameter for subtypes of "text" gives a name of a + character set, as "character set" is defined in RFC 2045. The rules + regarding line breaks detailed in the previous section must also be + observed -- a character set whose definition does not conform to + these rules cannot be used in a MIME "text" subtype. + + An initial list of predefined character set names can be found at the + end of this section. Additional character sets may be registered + with IANA. + + Other media types than subtypes of "text" might choose to employ the + charset parameter as defined here, but with the CRLF/line break + restriction removed. Therefore, all character sets that conform to + the general definition of "character set" in RFC 2045 can be + registered for MIME use. + + Note that if the specified character set includes 8-bit characters + and such characters are used in the body, a Content-Transfer-Encoding + header field and a corresponding encoding on the data are required in + order to transmit the body via some mail transfer protocols, such as + SMTP [RFC-821]. + + The default character set, US-ASCII, has been the subject of some + confusion and ambiguity in the past. Not only were there some + ambiguities in the definition, there have been wide variations in + practice. In order to eliminate such ambiguity and variations in the + future, it is strongly recommended that new user agents explicitly + specify a character set as a media type parameter in the Content-Type + header field. "US-ASCII" does not indicate an arbitrary 7-bit + character set, but specifies that all octets in the body must be + interpreted as characters according to the US-ASCII character set. + National and application-oriented versions of ISO 646 [ISO-646] are + usually NOT identical to US-ASCII, and in that case their use in + Internet mail is explicitly discouraged. The omission of the ISO 646 + character set from this document is deliberate in this regard. The + character set name of "US-ASCII" explicitly refers to the character + set defined in ANSI X3.4-1986 [US- ASCII]. The new international + reference version (IRV) of the 1991 edition of ISO 646 is identical + to US-ASCII. The character set name "ASCII" is reserved and must not + be used for any purpose. + + NOTE: RFC 821 explicitly specifies "ASCII", and references an earlier + version of the American Standard. Insofar as one of the purposes of + specifying a media type and character set is to permit the receiver + to unambiguously determine how the sender intended the coded message + to be interpreted, assuming anything other than "strict ASCII" as the + default would risk unintentional and incompatible changes to the + semantics of messages now being transmitted. This also implies that + + + +Freed & Borenstein Standards Track [Page 8] + +RFC 2046 Media Types November 1996 + + + messages containing characters coded according to other versions of + ISO 646 than US-ASCII and the 1991 IRV, or using code-switching + procedures (e.g., those of ISO 2022), as well as 8bit or multiple + octet character encodings MUST use an appropriate character set + specification to be consistent with MIME. + + The complete US-ASCII character set is listed in ANSI X3.4- 1986. + Note that the control characters including DEL (0-31, 127) have no + defined meaning in apart from the combination CRLF (US-ASCII values + 13 and 10) indicating a new line. Two of the characters have de + facto meanings in wide use: FF (12) often means "start subsequent + text on the beginning of a new page"; and TAB or HT (9) often (though + not always) means "move the cursor to the next available column after + the current position where the column number is a multiple of 8 + (counting the first column as column 0)." Aside from these + conventions, any use of the control characters or DEL in a body must + either occur + + (1) because a subtype of text other than "plain" + specifically assigns some additional meaning, or + + (2) within the context of a private agreement between the + sender and recipient. Such private agreements are + discouraged and should be replaced by the other + capabilities of this document. + + NOTE: An enormous proliferation of character sets exist beyond US- + ASCII. A large number of partially or totally overlapping character + sets is NOT a good thing. A SINGLE character set that can be used + universally for representing all of the world's languages in Internet + mail would be preferrable. Unfortunately, existing practice in + several communities seems to point to the continued use of multiple + character sets in the near future. A small number of standard + character sets are, therefore, defined for Internet use in this + document. + + The defined charset values are: + + (1) US-ASCII -- as defined in ANSI X3.4-1986 [US-ASCII]. + + (2) ISO-8859-X -- where "X" is to be replaced, as + necessary, for the parts of ISO-8859 [ISO-8859]. Note + that the ISO 646 character sets have deliberately been + omitted in favor of their 8859 replacements, which are + the designated character sets for Internet mail. As of + the publication of this document, the legitimate values + for "X" are the digits 1 through 10. + + + + +Freed & Borenstein Standards Track [Page 9] + +RFC 2046 Media Types November 1996 + + + Characters in the range 128-159 has no assigned meaning in ISO-8859- + X. Characters with values below 128 in ISO-8859-X have the same + assigned meaning as they do in US-ASCII. + + Part 6 of ISO 8859 (Latin/Arabic alphabet) and part 8 (Latin/Hebrew + alphabet) includes both characters for which the normal writing + direction is right to left and characters for which it is left to + right, but do not define a canonical ordering method for representing + bi-directional text. The charset values "ISO-8859-6" and "ISO-8859- + 8", however, specify that the visual method is used [RFC-1556]. + + All of these character sets are used as pure 7bit or 8bit sets + without any shift or escape functions. The meaning of shift and + escape sequences in these character sets is not defined. + + The character sets specified above are the ones that were relatively + uncontroversial during the drafting of MIME. This document does not + endorse the use of any particular character set other than US-ASCII, + and recognizes that the future evolution of world character sets + remains unclear. + + Note that the character set used, if anything other than US- ASCII, + must always be explicitly specified in the Content-Type field. + + No character set name other than those defined above may be used in + Internet mail without the publication of a formal specification and + its registration with IANA, or by private agreement, in which case + the character set name must begin with "X-". + + Implementors are discouraged from defining new character sets unless + absolutely necessary. + + The "charset" parameter has been defined primarily for the purpose of + textual data, and is described in this section for that reason. + However, it is conceivable that non-textual data might also wish to + specify a charset value for some purpose, in which case the same + syntax and values should be used. + + In general, composition software should always use the "lowest common + denominator" character set possible. For example, if a body contains + only US-ASCII characters, it SHOULD be marked as being in the US- + ASCII character set, not ISO-8859-1, which, like all the ISO-8859 + family of character sets, is a superset of US-ASCII. More generally, + if a widely-used character set is a subset of another character set, + and a body contains only characters in the widely-used subset, it + should be labelled as being in that subset. This will increase the + chances that the recipient will be able to view the resulting entity + correctly. + + + +Freed & Borenstein Standards Track [Page 10] + +RFC 2046 Media Types November 1996 + + +4.1.3. Plain Subtype + + The simplest and most important subtype of "text" is "plain". This + indicates plain text that does not contain any formatting commands or + directives. Plain text is intended to be displayed "as-is", that is, + no interpretation of embedded formatting commands, font attribute + specifications, processing instructions, interpretation directives, + or content markup should be necessary for proper display. The + default media type of "text/plain; charset=us-ascii" for Internet + mail describes existing Internet practice. That is, it is the type + of body defined by RFC 822. + + No other "text" subtype is defined by this document. + +4.1.4. Unrecognized Subtypes + + Unrecognized subtypes of "text" should be treated as subtype "plain" + as long as the MIME implementation knows how to handle the charset. + Unrecognized subtypes which also specify an unrecognized charset + should be treated as "application/octet- stream". + +4.2. Image Media Type + + A media type of "image" indicates that the body contains an image. + The subtype names the specific image format. These names are not + case sensitive. An initial subtype is "jpeg" for the JPEG format + using JFIF encoding [JPEG]. + + The list of "image" subtypes given here is neither exclusive nor + exhaustive, and is expected to grow as more types are registered with + IANA, as described in RFC 2048. + + Unrecognized subtypes of "image" should at a miniumum be treated as + "application/octet-stream". Implementations may optionally elect to + pass subtypes of "image" that they do not specifically recognize to a + secure and robust general-purpose image viewing application, if such + an application is available. + + NOTE: Using of a generic-purpose image viewing application this way + inherits the security problems of the most dangerous type supported + by the application. + +4.3. Audio Media Type + + A media type of "audio" indicates that the body contains audio data. + Although there is not yet a consensus on an "ideal" audio format for + use with computers, there is a pressing need for a format capable of + providing interoperable behavior. + + + +Freed & Borenstein Standards Track [Page 11] + +RFC 2046 Media Types November 1996 + + + The initial subtype of "basic" is specified to meet this requirement + by providing an absolutely minimal lowest common denominator audio + format. It is expected that richer formats for higher quality and/or + lower bandwidth audio will be defined by a later document. + + The content of the "audio/basic" subtype is single channel audio + encoded using 8bit ISDN mu-law [PCM] at a sample rate of 8000 Hz. + + Unrecognized subtypes of "audio" should at a miniumum be treated as + "application/octet-stream". Implementations may optionally elect to + pass subtypes of "audio" that they do not specifically recognize to a + robust general-purpose audio playing application, if such an + application is available. + +4.4. Video Media Type + + A media type of "video" indicates that the body contains a time- + varying-picture image, possibly with color and coordinated sound. + The term 'video' is used in its most generic sense, rather than with + reference to any particular technology or format, and is not meant to + preclude subtypes such as animated drawings encoded compactly. The + subtype "mpeg" refers to video coded according to the MPEG standard + [MPEG]. + + Note that although in general this document strongly discourages the + mixing of multiple media in a single body, it is recognized that many + so-called video formats include a representation for synchronized + audio, and this is explicitly permitted for subtypes of "video". + + Unrecognized subtypes of "video" should at a minumum be treated as + "application/octet-stream". Implementations may optionally elect to + pass subtypes of "video" that they do not specifically recognize to a + robust general-purpose video display application, if such an + application is available. + +4.5. Application Media Type + + The "application" media type is to be used for discrete data which do + not fit in any of the other categories, and particularly for data to + be processed by some type of application program. This is + information which must be processed by an application before it is + viewable or usable by a user. Expected uses for the "application" + media type include file transfer, spreadsheets, data for mail-based + scheduling systems, and languages for "active" (computational) + material. (The latter, in particular, can pose security problems + which must be understood by implementors, and are considered in + detail in the discussion of the "application/PostScript" media type.) + + + + +Freed & Borenstein Standards Track [Page 12] + +RFC 2046 Media Types November 1996 + + + For example, a meeting scheduler might define a standard + representation for information about proposed meeting dates. An + intelligent user agent would use this information to conduct a dialog + with the user, and might then send additional material based on that + dialog. More generally, there have been several "active" messaging + languages developed in which programs in a suitably specialized + language are transported to a remote location and automatically run + in the recipient's environment. + + Such applications may be defined as subtypes of the "application" + media type. This document defines two subtypes: + + octet-stream, and PostScript. + + The subtype of "application" will often be either the name or include + part of the name of the application for which the data are intended. + This does not mean, however, that any application program name may be + used freely as a subtype of "application". + +4.5.1. Octet-Stream Subtype + + The "octet-stream" subtype is used to indicate that a body contains + arbitrary binary data. The set of currently defined parameters is: + + (1) TYPE -- the general type or category of binary data. + This is intended as information for the human recipient + rather than for any automatic processing. + + (2) PADDING -- the number of bits of padding that were + appended to the bit-stream comprising the actual + contents to produce the enclosed 8bit byte-oriented + data. This is useful for enclosing a bit-stream in a + body when the total number of bits is not a multiple of + 8. + + Both of these parameters are optional. + + An additional parameter, "CONVERSIONS", was defined in RFC 1341 but + has since been removed. RFC 1341 also defined the use of a "NAME" + parameter which gave a suggested file name to be used if the data + were to be written to a file. This has been deprecated in + anticipation of a separate Content-Disposition header field, to be + defined in a subsequent RFC. + + The recommended action for an implementation that receives an + "application/octet-stream" entity is to simply offer to put the data + in a file, with any Content-Transfer-Encoding undone, or perhaps to + use it as input to a user-specified process. + + + +Freed & Borenstein Standards Track [Page 13] + +RFC 2046 Media Types November 1996 + + + To reduce the danger of transmitting rogue programs, it is strongly + recommended that implementations NOT implement a path-search + mechanism whereby an arbitrary program named in the Content-Type + parameter (e.g., an "interpreter=" parameter) is found and executed + using the message body as input. + +4.5.2. PostScript Subtype + + A media type of "application/postscript" indicates a PostScript + program. Currently two variants of the PostScript language are + allowed; the original level 1 variant is described in [POSTSCRIPT] + and the more recent level 2 variant is described in [POSTSCRIPT2]. + + PostScript is a registered trademark of Adobe Systems, Inc. Use of + the MIME media type "application/postscript" implies recognition of + that trademark and all the rights it entails. + + The PostScript language definition provides facilities for internal + labelling of the specific language features a given program uses. + This labelling, called the PostScript document structuring + conventions, or DSC, is very general and provides substantially more + information than just the language level. The use of document + structuring conventions, while not required, is strongly recommended + as an aid to interoperability. Documents which lack proper + structuring conventions cannot be tested to see whether or not they + will work in a given environment. As such, some systems may assume + the worst and refuse to process unstructured documents. + + The execution of general-purpose PostScript interpreters entails + serious security risks, and implementors are discouraged from simply + sending PostScript bodies to "off- the-shelf" interpreters. While it + is usually safe to send PostScript to a printer, where the potential + for harm is greatly constrained by typical printer environments, + implementors should consider all of the following before they add + interactive display of PostScript bodies to their MIME readers. + + The remainder of this section outlines some, though probably not all, + of the possible problems with the transport of PostScript entities. + + (1) Dangerous operations in the PostScript language + include, but may not be limited to, the PostScript + operators "deletefile", "renamefile", "filenameforall", + and "file". "File" is only dangerous when applied to + something other than standard input or output. + Implementations may also define additional nonstandard + file operators; these may also pose a threat to + security. "Filenameforall", the wildcard file search + operator, may appear at first glance to be harmless. + + + +Freed & Borenstein Standards Track [Page 14] + +RFC 2046 Media Types November 1996 + + + Note, however, that this operator has the potential to + reveal information about what files the recipient has + access to, and this information may itself be + sensitive. Message senders should avoid the use of + potentially dangerous file operators, since these + operators are quite likely to be unavailable in secure + PostScript implementations. Message receiving and + displaying software should either completely disable + all potentially dangerous file operators or take + special care not to delegate any special authority to + their operation. These operators should be viewed as + being done by an outside agency when interpreting + PostScript documents. Such disabling and/or checking + should be done completely outside of the reach of the + PostScript language itself; care should be taken to + insure that no method exists for re-enabling full- + function versions of these operators. + + (2) The PostScript language provides facilities for exiting + the normal interpreter, or server, loop. Changes made + in this "outer" environment are customarily retained + across documents, and may in some cases be retained + semipermanently in nonvolatile memory. The operators + associated with exiting the interpreter loop have the + potential to interfere with subsequent document + processing. As such, their unrestrained use + constitutes a threat of service denial. PostScript + operators that exit the interpreter loop include, but + may not be limited to, the exitserver and startjob + operators. Message sending software should not + generate PostScript that depends on exiting the + interpreter loop to operate, since the ability to exit + will probably be unavailable in secure PostScript + implementations. Message receiving and displaying + software should completely disable the ability to make + retained changes to the PostScript environment by + eliminating or disabling the "startjob" and + "exitserver" operations. If these operations cannot be + eliminated or completely disabled the password + associated with them should at least be set to a hard- + to-guess value. + + (3) PostScript provides operators for setting system-wide + and device-specific parameters. These parameter + settings may be retained across jobs and may + potentially pose a threat to the correct operation of + the interpreter. The PostScript operators that set + system and device parameters include, but may not be + + + +Freed & Borenstein Standards Track [Page 15] + +RFC 2046 Media Types November 1996 + + + limited to, the "setsystemparams" and "setdevparams" + operators. Message sending software should not + generate PostScript that depends on the setting of + system or device parameters to operate correctly. The + ability to set these parameters will probably be + unavailable in secure PostScript implementations. + Message receiving and displaying software should + disable the ability to change system and device + parameters. If these operators cannot be completely + disabled the password associated with them should at + least be set to a hard-to-guess value. + + (4) Some PostScript implementations provide nonstandard + facilities for the direct loading and execution of + machine code. Such facilities are quite obviously open + to substantial abuse. Message sending software should + not make use of such features. Besides being totally + hardware-specific, they are also likely to be + unavailable in secure implementations of PostScript. + Message receiving and displaying software should not + allow such operators to be used if they exist. + + (5) PostScript is an extensible language, and many, if not + most, implementations of it provide a number of their + own extensions. This document does not deal with such + extensions explicitly since they constitute an unknown + factor. Message sending software should not make use + of nonstandard extensions; they are likely to be + missing from some implementations. Message receiving + and displaying software should make sure that any + nonstandard PostScript operators are secure and don't + present any kind of threat. + + (6) It is possible to write PostScript that consumes huge + amounts of various system resources. It is also + possible to write PostScript programs that loop + indefinitely. Both types of programs have the + potential to cause damage if sent to unsuspecting + recipients. Message-sending software should avoid the + construction and dissemination of such programs, which + is antisocial. Message receiving and displaying + software should provide appropriate mechanisms to abort + processing after a reasonable amount of time has + elapsed. In addition, PostScript interpreters should be + limited to the consumption of only a reasonable amount + of any given system resource. + + + + + +Freed & Borenstein Standards Track [Page 16] + +RFC 2046 Media Types November 1996 + + + (7) It is possible to include raw binary information inside + PostScript in various forms. This is not recommended + for use in Internet mail, both because it is not + supported by all PostScript interpreters and because it + significantly complicates the use of a MIME Content- + Transfer-Encoding. (Without such binary, PostScript + may typically be viewed as line-oriented data. The + treatment of CRLF sequences becomes extremely + problematic if binary and line-oriented data are mixed + in a single Postscript data stream.) + + (8) Finally, bugs may exist in some PostScript interpreters + which could possibly be exploited to gain unauthorized + access to a recipient's system. Apart from noting this + possibility, there is no specific action to take to + prevent this, apart from the timely correction of such + bugs if any are found. + +4.5.3. Other Application Subtypes + + It is expected that many other subtypes of "application" will be + defined in the future. MIME implementations must at a minimum treat + any unrecognized subtypes as being equivalent to "application/octet- + stream". + +5. Composite Media Type Values + + The remaining two of the seven initial Content-Type values refer to + composite entities. Composite entities are handled using MIME + mechanisms -- a MIME processor typically handles the body directly. + +5.1. Multipart Media Type + + In the case of multipart entities, in which one or more different + sets of data are combined in a single body, a "multipart" media type + field must appear in the entity's header. The body must then contain + one or more body parts, each preceded by a boundary delimiter line, + and the last one followed by a closing boundary delimiter line. + After its boundary delimiter line, each body part then consists of a + header area, a blank line, and a body area. Thus a body part is + similar to an RFC 822 message in syntax, but different in meaning. + + A body part is an entity and hence is NOT to be interpreted as + actually being an RFC 822 message. To begin with, NO header fields + are actually required in body parts. A body part that starts with a + blank line, therefore, is allowed and is a body part for which all + default values are to be assumed. In such a case, the absence of a + Content-Type header usually indicates that the corresponding body has + + + +Freed & Borenstein Standards Track [Page 17] + +RFC 2046 Media Types November 1996 + + + a content-type of "text/plain; charset=US-ASCII". + + The only header fields that have defined meaning for body parts are + those the names of which begin with "Content-". All other header + fields may be ignored in body parts. Although they should generally + be retained if at all possible, they may be discarded by gateways if + necessary. Such other fields are permitted to appear in body parts + but must not be depended on. "X-" fields may be created for + experimental or private purposes, with the recognition that the + information they contain may be lost at some gateways. + + NOTE: The distinction between an RFC 822 message and a body part is + subtle, but important. A gateway between Internet and X.400 mail, + for example, must be able to tell the difference between a body part + that contains an image and a body part that contains an encapsulated + message, the body of which is a JPEG image. In order to represent + the latter, the body part must have "Content-Type: message/rfc822", + and its body (after the blank line) must be the encapsulated message, + with its own "Content-Type: image/jpeg" header field. The use of + similar syntax facilitates the conversion of messages to body parts, + and vice versa, but the distinction between the two must be + understood by implementors. (For the special case in which parts + actually are messages, a "digest" subtype is also defined.) + + As stated previously, each body part is preceded by a boundary + delimiter line that contains the boundary delimiter. The boundary + delimiter MUST NOT appear inside any of the encapsulated parts, on a + line by itself or as the prefix of any line. This implies that it is + crucial that the composing agent be able to choose and specify a + unique boundary parameter value that does not contain the boundary + parameter value of an enclosing multipart as a prefix. + + All present and future subtypes of the "multipart" type must use an + identical syntax. Subtypes may differ in their semantics, and may + impose additional restrictions on syntax, but must conform to the + required syntax for the "multipart" type. This requirement ensures + that all conformant user agents will at least be able to recognize + and separate the parts of any multipart entity, even those of an + unrecognized subtype. + + As stated in the definition of the Content-Transfer-Encoding field + [RFC 2045], no encoding other than "7bit", "8bit", or "binary" is + permitted for entities of type "multipart". The "multipart" boundary + delimiters and header fields are always represented as 7bit US-ASCII + in any case (though the header fields may encode non-US-ASCII header + text as per RFC 2047) and data within the body parts can be encoded + on a part-by-part basis, with Content-Transfer-Encoding fields for + each appropriate body part. + + + +Freed & Borenstein Standards Track [Page 18] + +RFC 2046 Media Types November 1996 + + +5.1.1. Common Syntax + + This section defines a common syntax for subtypes of "multipart". + All subtypes of "multipart" must use this syntax. A simple example + of a multipart message also appears in this section. An example of a + more complex multipart message is given in RFC 2049. + + The Content-Type field for multipart entities requires one parameter, + "boundary". The boundary delimiter line is then defined as a line + consisting entirely of two hyphen characters ("-", decimal value 45) + followed by the boundary parameter value from the Content-Type header + field, optional linear whitespace, and a terminating CRLF. + + NOTE: The hyphens are for rough compatibility with the earlier RFC + 934 method of message encapsulation, and for ease of searching for + the boundaries in some implementations. However, it should be noted + that multipart messages are NOT completely compatible with RFC 934 + encapsulations; in particular, they do not obey RFC 934 quoting + conventions for embedded lines that begin with hyphens. This + mechanism was chosen over the RFC 934 mechanism because the latter + causes lines to grow with each level of quoting. The combination of + this growth with the fact that SMTP implementations sometimes wrap + long lines made the RFC 934 mechanism unsuitable for use in the event + that deeply-nested multipart structuring is ever desired. + + WARNING TO IMPLEMENTORS: The grammar for parameters on the Content- + type field is such that it is often necessary to enclose the boundary + parameter values in quotes on the Content-type line. This is not + always necessary, but never hurts. Implementors should be sure to + study the grammar carefully in order to avoid producing invalid + Content-type fields. Thus, a typical "multipart" Content-Type header + field might look like this: + + Content-Type: multipart/mixed; boundary=gc0p4Jq0M2Yt08j34c0p + + But the following is not valid: + + Content-Type: multipart/mixed; boundary=gc0pJq0M:08jU534c0p + + (because of the colon) and must instead be represented as + + Content-Type: multipart/mixed; boundary="gc0pJq0M:08jU534c0p" + + This Content-Type value indicates that the content consists of one or + more parts, each with a structure that is syntactically identical to + an RFC 822 message, except that the header area is allowed to be + completely empty, and that the parts are each preceded by the line + + + + +Freed & Borenstein Standards Track [Page 19] + +RFC 2046 Media Types November 1996 + + + --gc0pJq0M:08jU534c0p + + The boundary delimiter MUST occur at the beginning of a line, i.e., + following a CRLF, and the initial CRLF is considered to be attached + to the boundary delimiter line rather than part of the preceding + part. The boundary may be followed by zero or more characters of + linear whitespace. It is then terminated by either another CRLF and + the header fields for the next part, or by two CRLFs, in which case + there are no header fields for the next part. If no Content-Type + field is present it is assumed to be "message/rfc822" in a + "multipart/digest" and "text/plain" otherwise. + + NOTE: The CRLF preceding the boundary delimiter line is conceptually + attached to the boundary so that it is possible to have a part that + does not end with a CRLF (line break). Body parts that must be + considered to end with line breaks, therefore, must have two CRLFs + preceding the boundary delimiter line, the first of which is part of + the preceding body part, and the second of which is part of the + encapsulation boundary. + + Boundary delimiters must not appear within the encapsulated material, + and must be no longer than 70 characters, not counting the two + leading hyphens. + + The boundary delimiter line following the last body part is a + distinguished delimiter that indicates that no further body parts + will follow. Such a delimiter line is identical to the previous + delimiter lines, with the addition of two more hyphens after the + boundary parameter value. + + --gc0pJq0M:08jU534c0p-- + + NOTE TO IMPLEMENTORS: Boundary string comparisons must compare the + boundary value with the beginning of each candidate line. An exact + match of the entire candidate line is not required; it is sufficient + that the boundary appear in its entirety following the CRLF. + + There appears to be room for additional information prior to the + first boundary delimiter line and following the final boundary + delimiter line. These areas should generally be left blank, and + implementations must ignore anything that appears before the first + boundary delimiter line or after the last one. + + NOTE: These "preamble" and "epilogue" areas are generally not used + because of the lack of proper typing of these parts and the lack of + clear semantics for handling these areas at gateways, particularly + X.400 gateways. However, rather than leaving the preamble area + blank, many MIME implementations have found this to be a convenient + + + +Freed & Borenstein Standards Track [Page 20] + +RFC 2046 Media Types November 1996 + + + place to insert an explanatory note for recipients who read the + message with pre-MIME software, since such notes will be ignored by + MIME-compliant software. + + NOTE: Because boundary delimiters must not appear in the body parts + being encapsulated, a user agent must exercise care to choose a + unique boundary parameter value. The boundary parameter value in the + example above could have been the result of an algorithm designed to + produce boundary delimiters with a very low probability of already + existing in the data to be encapsulated without having to prescan the + data. Alternate algorithms might result in more "readable" boundary + delimiters for a recipient with an old user agent, but would require + more attention to the possibility that the boundary delimiter might + appear at the beginning of some line in the encapsulated part. The + simplest boundary delimiter line possible is something like "---", + with a closing boundary delimiter line of "-----". + + As a very simple example, the following multipart message has two + parts, both of them plain text, one of them explicitly typed and one + of them implicitly typed: + + From: Nathaniel Borenstein <nsb@bellcore.com> + To: Ned Freed <ned@innosoft.com> + Date: Sun, 21 Mar 1993 23:56:48 -0800 (PST) + Subject: Sample message + MIME-Version: 1.0 + Content-type: multipart/mixed; boundary="simple boundary" + + This is the preamble. It is to be ignored, though it + is a handy place for composition agents to include an + explanatory note to non-MIME conformant readers. + + --simple boundary + + This is implicitly typed plain US-ASCII text. + It does NOT end with a linebreak. + --simple boundary + Content-type: text/plain; charset=us-ascii + + This is explicitly typed plain US-ASCII text. + It DOES end with a linebreak. + + --simple boundary-- + + This is the epilogue. It is also to be ignored. + + + + + + +Freed & Borenstein Standards Track [Page 21] + +RFC 2046 Media Types November 1996 + + + The use of a media type of "multipart" in a body part within another + "multipart" entity is explicitly allowed. In such cases, for obvious + reasons, care must be taken to ensure that each nested "multipart" + entity uses a different boundary delimiter. See RFC 2049 for an + example of nested "multipart" entities. + + The use of the "multipart" media type with only a single body part + may be useful in certain contexts, and is explicitly permitted. + + NOTE: Experience has shown that a "multipart" media type with a + single body part is useful for sending non-text media types. It has + the advantage of providing the preamble as a place to include + decoding instructions. In addition, a number of SMTP gateways move + or remove the MIME headers, and a clever MIME decoder can take a good + guess at multipart boundaries even in the absence of the Content-Type + header and thereby successfully decode the message. + + The only mandatory global parameter for the "multipart" media type is + the boundary parameter, which consists of 1 to 70 characters from a + set of characters known to be very robust through mail gateways, and + NOT ending with white space. (If a boundary delimiter line appears to + end with white space, the white space must be presumed to have been + added by a gateway, and must be deleted.) It is formally specified + by the following BNF: + + boundary := 0*69<bchars> bcharsnospace + + bchars := bcharsnospace / " " + + bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / + "+" / "_" / "," / "-" / "." / + "/" / ":" / "=" / "?" + + Overall, the body of a "multipart" entity may be specified as + follows: + + dash-boundary := "--" boundary + ; boundary taken from the value of + ; boundary parameter of the + ; Content-Type field. + + multipart-body := [preamble CRLF] + dash-boundary transport-padding CRLF + body-part *encapsulation + close-delimiter transport-padding + [CRLF epilogue] + + + + + +Freed & Borenstein Standards Track [Page 22] + +RFC 2046 Media Types November 1996 + + + transport-padding := *LWSP-char + ; Composers MUST NOT generate + ; non-zero length transport + ; padding, but receivers MUST + ; be able to handle padding + ; added by message transports. + + encapsulation := delimiter transport-padding + CRLF body-part + + delimiter := CRLF dash-boundary + + close-delimiter := delimiter "--" + + preamble := discard-text + + epilogue := discard-text + + discard-text := *(*text CRLF) *text + ; May be ignored or discarded. + + body-part := MIME-part-headers [CRLF *OCTET] + ; Lines in a body-part must not start + ; with the specified dash-boundary and + ; the delimiter must not appear anywhere + ; in the body part. Note that the + ; semantics of a body-part differ from + ; the semantics of a message, as + ; described in the text. + + OCTET := <any 0-255 octet value> + + IMPORTANT: The free insertion of linear-white-space and RFC 822 + comments between the elements shown in this BNF is NOT allowed since + this BNF does not specify a structured header field. + + NOTE: In certain transport enclaves, RFC 822 restrictions such as + the one that limits bodies to printable US-ASCII characters may not + be in force. (That is, the transport domains may exist that resemble + standard Internet mail transport as specified in RFC 821 and assumed + by RFC 822, but without certain restrictions.) The relaxation of + these restrictions should be construed as locally extending the + definition of bodies, for example to include octets outside of the + US-ASCII range, as long as these extensions are supported by the + transport and adequately documented in the Content- Transfer-Encoding + header field. However, in no event are headers (either message + headers or body part headers) allowed to contain anything other than + US-ASCII characters. + + + +Freed & Borenstein Standards Track [Page 23] + +RFC 2046 Media Types November 1996 + + + NOTE: Conspicuously missing from the "multipart" type is a notion of + structured, related body parts. It is recommended that those wishing + to provide more structured or integrated multipart messaging + facilities should define subtypes of multipart that are syntactically + identical but define relationships between the various parts. For + example, subtypes of multipart could be defined that include a + distinguished part which in turn is used to specify the relationships + between the other parts, probably referring to them by their + Content-ID field. Old implementations will not recognize the new + subtype if this approach is used, but will treat it as + multipart/mixed and will thus be able to show the user the parts that + are recognized. + +5.1.2. Handling Nested Messages and Multiparts + + The "message/rfc822" subtype defined in a subsequent section of this + document has no terminating condition other than running out of data. + Similarly, an improperly truncated "multipart" entity may not have + any terminating boundary marker, and can turn up operationally due to + mail system malfunctions. + + It is essential that such entities be handled correctly when they are + themselves imbedded inside of another "multipart" structure. MIME + implementations are therefore required to recognize outer level + boundary markers at ANY level of inner nesting. It is not sufficient + to only check for the next expected marker or other terminating + condition. + +5.1.3. Mixed Subtype + + The "mixed" subtype of "multipart" is intended for use when the body + parts are independent and need to be bundled in a particular order. + Any "multipart" subtypes that an implementation does not recognize + must be treated as being of subtype "mixed". + +5.1.4. Alternative Subtype + + The "multipart/alternative" type is syntactically identical to + "multipart/mixed", but the semantics are different. In particular, + each of the body parts is an "alternative" version of the same + information. + + Systems should recognize that the content of the various parts are + interchangeable. Systems should choose the "best" type based on the + local environment and references, in some cases even through user + interaction. As with "multipart/mixed", the order of body parts is + significant. In this case, the alternatives appear in an order of + increasing faithfulness to the original content. In general, the + + + +Freed & Borenstein Standards Track [Page 24] + +RFC 2046 Media Types November 1996 + + + best choice is the LAST part of a type supported by the recipient + system's local environment. + + "Multipart/alternative" may be used, for example, to send a message + in a fancy text format in such a way that it can easily be displayed + anywhere: + + From: Nathaniel Borenstein <nsb@bellcore.com> + To: Ned Freed <ned@innosoft.com> + Date: Mon, 22 Mar 1993 09:41:09 -0800 (PST) + Subject: Formatted text mail + MIME-Version: 1.0 + Content-Type: multipart/alternative; boundary=boundary42 + + --boundary42 + Content-Type: text/plain; charset=us-ascii + + ... plain text version of message goes here ... + + --boundary42 + Content-Type: text/enriched + + ... RFC 1896 text/enriched version of same message + goes here ... + + --boundary42 + Content-Type: application/x-whatever + + ... fanciest version of same message goes here ... + + --boundary42-- + + In this example, users whose mail systems understood the + "application/x-whatever" format would see only the fancy version, + while other users would see only the enriched or plain text version, + depending on the capabilities of their system. + + In general, user agents that compose "multipart/alternative" entities + must place the body parts in increasing order of preference, that is, + with the preferred format last. For fancy text, the sending user + agent should put the plainest format first and the richest format + last. Receiving user agents should pick and display the last format + they are capable of displaying. In the case where one of the + alternatives is itself of type "multipart" and contains unrecognized + sub-parts, the user agent may choose either to show that alternative, + an earlier alternative, or both. + + + + + +Freed & Borenstein Standards Track [Page 25] + +RFC 2046 Media Types November 1996 + + + NOTE: From an implementor's perspective, it might seem more sensible + to reverse this ordering, and have the plainest alternative last. + However, placing the plainest alternative first is the friendliest + possible option when "multipart/alternative" entities are viewed + using a non-MIME-conformant viewer. While this approach does impose + some burden on conformant MIME viewers, interoperability with older + mail readers was deemed to be more important in this case. + + It may be the case that some user agents, if they can recognize more + than one of the formats, will prefer to offer the user the choice of + which format to view. This makes sense, for example, if a message + includes both a nicely- formatted image version and an easily-edited + text version. What is most critical, however, is that the user not + automatically be shown multiple versions of the same data. Either + the user should be shown the last recognized version or should be + given the choice. + + THE SEMANTICS OF CONTENT-ID IN MULTIPART/ALTERNATIVE: Each part of a + "multipart/alternative" entity represents the same data, but the + mappings between the two are not necessarily without information + loss. For example, information is lost when translating ODA to + PostScript or plain text. It is recommended that each part should + have a different Content-ID value in the case where the information + content of the two parts is not identical. And when the information + content is identical -- for example, where several parts of type + "message/external-body" specify alternate ways to access the + identical data -- the same Content-ID field value should be used, to + optimize any caching mechanisms that might be present on the + recipient's end. However, the Content-ID values used by the parts + should NOT be the same Content-ID value that describes the + "multipart/alternative" as a whole, if there is any such Content-ID + field. That is, one Content-ID value will refer to the + "multipart/alternative" entity, while one or more other Content-ID + values will refer to the parts inside it. + +5.1.5. Digest Subtype + + This document defines a "digest" subtype of the "multipart" Content- + Type. This type is syntactically identical to "multipart/mixed", but + the semantics are different. In particular, in a digest, the default + Content-Type value for a body part is changed from "text/plain" to + "message/rfc822". This is done to allow a more readable digest + format that is largely compatible (except for the quoting convention) + with RFC 934. + + Note: Though it is possible to specify a Content-Type value for a + body part in a digest which is other than "message/rfc822", such as a + "text/plain" part containing a description of the material in the + + + +Freed & Borenstein Standards Track [Page 26] + +RFC 2046 Media Types November 1996 + + + digest, actually doing so is undesireble. The "multipart/digest" + Content-Type is intended to be used to send collections of messages. + If a "text/plain" part is needed, it should be included as a seperate + part of a "multipart/mixed" message. + + A digest in this format might, then, look something like this: + + From: Moderator-Address + To: Recipient-List + Date: Mon, 22 Mar 1994 13:34:51 +0000 + Subject: Internet Digest, volume 42 + MIME-Version: 1.0 + Content-Type: multipart/mixed; + boundary="---- main boundary ----" + + ------ main boundary ---- + + ...Introductory text or table of contents... + + ------ main boundary ---- + Content-Type: multipart/digest; + boundary="---- next message ----" + + ------ next message ---- + + From: someone-else + Date: Fri, 26 Mar 1993 11:13:32 +0200 + Subject: my opinion + + ...body goes here ... + + ------ next message ---- + + From: someone-else-again + Date: Fri, 26 Mar 1993 10:07:13 -0500 + Subject: my different opinion + + ... another body goes here ... + + ------ next message ------ + + ------ main boundary ------ + +5.1.6. Parallel Subtype + + This document defines a "parallel" subtype of the "multipart" + Content-Type. This type is syntactically identical to + "multipart/mixed", but the semantics are different. In particular, + + + +Freed & Borenstein Standards Track [Page 27] + +RFC 2046 Media Types November 1996 + + + in a parallel entity, the order of body parts is not significant. + + A common presentation of this type is to display all of the parts + simultaneously on hardware and software that are capable of doing so. + However, composing agents should be aware that many mail readers will + lack this capability and will show the parts serially in any event. + +5.1.7. Other Multipart Subtypes + + Other "multipart" subtypes are expected in the future. MIME + implementations must in general treat unrecognized subtypes of + "multipart" as being equivalent to "multipart/mixed". + +5.2. Message Media Type + + It is frequently desirable, in sending mail, to encapsulate another + mail message. A special media type, "message", is defined to + facilitate this. In particular, the "rfc822" subtype of "message" is + used to encapsulate RFC 822 messages. + + NOTE: It has been suggested that subtypes of "message" might be + defined for forwarded or rejected messages. However, forwarded and + rejected messages can be handled as multipart messages in which the + first part contains any control or descriptive information, and a + second part, of type "message/rfc822", is the forwarded or rejected + message. Composing rejection and forwarding messages in this manner + will preserve the type information on the original message and allow + it to be correctly presented to the recipient, and hence is strongly + encouraged. + + Subtypes of "message" often impose restrictions on what encodings are + allowed. These restrictions are described in conjunction with each + specific subtype. + + Mail gateways, relays, and other mail handling agents are commonly + known to alter the top-level header of an RFC 822 message. In + particular, they frequently add, remove, or reorder header fields. + These operations are explicitly forbidden for the encapsulated + headers embedded in the bodies of messages of type "message." + +5.2.1. RFC822 Subtype + + A media type of "message/rfc822" indicates that the body contains an + encapsulated message, with the syntax of an RFC 822 message. + However, unlike top-level RFC 822 messages, the restriction that each + "message/rfc822" body must include a "From", "Date", and at least one + destination header is removed and replaced with the requirement that + at least one of "From", "Subject", or "Date" must be present. + + + +Freed & Borenstein Standards Track [Page 28] + +RFC 2046 Media Types November 1996 + + + It should be noted that, despite the use of the numbers "822", a + "message/rfc822" entity isn't restricted to material in strict + conformance to RFC822, nor are the semantics of "message/rfc822" + objects restricted to the semantics defined in RFC822. More + specifically, a "message/rfc822" message could well be a News article + or a MIME message. + + No encoding other than "7bit", "8bit", or "binary" is permitted for + the body of a "message/rfc822" entity. The message header fields are + always US-ASCII in any case, and data within the body can still be + encoded, in which case the Content-Transfer-Encoding header field in + the encapsulated message will reflect this. Non-US-ASCII text in the + headers of an encapsulated message can be specified using the + mechanisms described in RFC 2047. + +5.2.2. Partial Subtype + + The "partial" subtype is defined to allow large entities to be + delivered as several separate pieces of mail and automatically + reassembled by a receiving user agent. (The concept is similar to IP + fragmentation and reassembly in the basic Internet Protocols.) This + mechanism can be used when intermediate transport agents limit the + size of individual messages that can be sent. The media type + "message/partial" thus indicates that the body contains a fragment of + a larger entity. + + Because data of type "message" may never be encoded in base64 or + quoted-printable, a problem might arise if "message/partial" entities + are constructed in an environment that supports binary or 8bit + transport. The problem is that the binary data would be split into + multiple "message/partial" messages, each of them requiring binary + transport. If such messages were encountered at a gateway into a + 7bit transport environment, there would be no way to properly encode + them for the 7bit world, aside from waiting for all of the fragments, + reassembling the inner message, and then encoding the reassembled + data in base64 or quoted-printable. Since it is possible that + different fragments might go through different gateways, even this is + not an acceptable solution. For this reason, it is specified that + entities of type "message/partial" must always have a content- + transfer-encoding of 7bit (the default). In particular, even in + environments that support binary or 8bit transport, the use of a + content- transfer-encoding of "8bit" or "binary" is explicitly + prohibited for MIME entities of type "message/partial". This in turn + implies that the inner message must not use "8bit" or "binary" + encoding. + + + + + + +Freed & Borenstein Standards Track [Page 29] + +RFC 2046 Media Types November 1996 + + + Because some message transfer agents may choose to automatically + fragment large messages, and because such agents may use very + different fragmentation thresholds, it is possible that the pieces of + a partial message, upon reassembly, may prove themselves to comprise + a partial message. This is explicitly permitted. + + Three parameters must be specified in the Content-Type field of type + "message/partial": The first, "id", is a unique identifier, as close + to a world-unique identifier as possible, to be used to match the + fragments together. (In general, the identifier is essentially a + message-id; if placed in double quotes, it can be ANY message-id, in + accordance with the BNF for "parameter" given in RFC 2045.) The + second, "number", an integer, is the fragment number, which indicates + where this fragment fits into the sequence of fragments. The third, + "total", another integer, is the total number of fragments. This + third subfield is required on the final fragment, and is optional + (though encouraged) on the earlier fragments. Note also that these + parameters may be given in any order. + + Thus, the second piece of a 3-piece message may have either of the + following header fields: + + Content-Type: Message/Partial; number=2; total=3; + id="oc=jpbe0M2Yt4s@thumper.bellcore.com" + + Content-Type: Message/Partial; + id="oc=jpbe0M2Yt4s@thumper.bellcore.com"; + number=2 + + But the third piece MUST specify the total number of fragments: + + Content-Type: Message/Partial; number=3; total=3; + id="oc=jpbe0M2Yt4s@thumper.bellcore.com" + + Note that fragment numbering begins with 1, not 0. + + When the fragments of an entity broken up in this manner are put + together, the result is always a complete MIME entity, which may have + its own Content-Type header field, and thus may contain any other + data type. + +5.2.2.1. Message Fragmentation and Reassembly + + The semantics of a reassembled partial message must be those of the + "inner" message, rather than of a message containing the inner + message. This makes it possible, for example, to send a large audio + message as several partial messages, and still have it appear to the + recipient as a simple audio message rather than as an encapsulated + + + +Freed & Borenstein Standards Track [Page 30] + +RFC 2046 Media Types November 1996 + + + message containing an audio message. That is, the encapsulation of + the message is considered to be "transparent". + + When generating and reassembling the pieces of a "message/partial" + message, the headers of the encapsulated message must be merged with + the headers of the enclosing entities. In this process the following + rules must be observed: + + (1) Fragmentation agents must split messages at line + boundaries only. This restriction is imposed because + splits at points other than the ends of lines in turn + depends on message transports being able to preserve + the semantics of messages that don't end with a CRLF + sequence. Many transports are incapable of preserving + such semantics. + + (2) All of the header fields from the initial enclosing + message, except those that start with "Content-" and + the specific header fields "Subject", "Message-ID", + "Encrypted", and "MIME-Version", must be copied, in + order, to the new message. + + (3) The header fields in the enclosed message which start + with "Content-", plus the "Subject", "Message-ID", + "Encrypted", and "MIME-Version" fields, must be + appended, in order, to the header fields of the new + message. Any header fields in the enclosed message + which do not start with "Content-" (except for the + "Subject", "Message-ID", "Encrypted", and "MIME- + Version" fields) will be ignored and dropped. + + (4) All of the header fields from the second and any + subsequent enclosing messages are discarded by the + reassembly process. + +5.2.2.2. Fragmentation and Reassembly Example + + If an audio message is broken into two pieces, the first piece might + look something like this: + + X-Weird-Header-1: Foo + From: Bill@host.com + To: joe@otherhost.com + Date: Fri, 26 Mar 1993 12:59:38 -0500 (EST) + Subject: Audio mail (part 1 of 2) + Message-ID: <id1@host.com> + MIME-Version: 1.0 + Content-type: message/partial; id="ABC@host.com"; + + + +Freed & Borenstein Standards Track [Page 31] + +RFC 2046 Media Types November 1996 + + + number=1; total=2 + + X-Weird-Header-1: Bar + X-Weird-Header-2: Hello + Message-ID: <anotherid@foo.com> + Subject: Audio mail + MIME-Version: 1.0 + Content-type: audio/basic + Content-transfer-encoding: base64 + + ... first half of encoded audio data goes here ... + + and the second half might look something like this: + + From: Bill@host.com + To: joe@otherhost.com + Date: Fri, 26 Mar 1993 12:59:38 -0500 (EST) + Subject: Audio mail (part 2 of 2) + MIME-Version: 1.0 + Message-ID: <id2@host.com> + Content-type: message/partial; + id="ABC@host.com"; number=2; total=2 + + ... second half of encoded audio data goes here ... + + Then, when the fragmented message is reassembled, the resulting + message to be displayed to the user should look something like this: + + X-Weird-Header-1: Foo + From: Bill@host.com + To: joe@otherhost.com + Date: Fri, 26 Mar 1993 12:59:38 -0500 (EST) + Subject: Audio mail + Message-ID: <anotherid@foo.com> + MIME-Version: 1.0 + Content-type: audio/basic + Content-transfer-encoding: base64 + + ... first half of encoded audio data goes here ... + ... second half of encoded audio data goes here ... + + The inclusion of a "References" field in the headers of the second + and subsequent pieces of a fragmented message that references the + Message-Id on the previous piece may be of benefit to mail readers + that understand and track references. However, the generation of + such "References" fields is entirely optional. + + + + + +Freed & Borenstein Standards Track [Page 32] + +RFC 2046 Media Types November 1996 + + + Finally, it should be noted that the "Encrypted" header field has + been made obsolete by Privacy Enhanced Messaging (PEM) [RFC-1421, + RFC-1422, RFC-1423, RFC-1424], but the rules above are nevertheless + believed to describe the correct way to treat it if it is encountered + in the context of conversion to and from "message/partial" fragments. + +5.2.3. External-Body Subtype + + The external-body subtype indicates that the actual body data are not + included, but merely referenced. In this case, the parameters + describe a mechanism for accessing the external data. + + When a MIME entity is of type "message/external-body", it consists of + a header, two consecutive CRLFs, and the message header for the + encapsulated message. If another pair of consecutive CRLFs appears, + this of course ends the message header for the encapsulated message. + However, since the encapsulated message's body is itself external, it + does NOT appear in the area that follows. For example, consider the + following message: + + Content-type: message/external-body; + access-type=local-file; + name="/u/nsb/Me.jpeg" + + Content-type: image/jpeg + Content-ID: <id42@guppylake.bellcore.com> + Content-Transfer-Encoding: binary + + THIS IS NOT REALLY THE BODY! + + The area at the end, which might be called the "phantom body", is + ignored for most external-body messages. However, it may be used to + contain auxiliary information for some such messages, as indeed it is + when the access-type is "mail- server". The only access-type defined + in this document that uses the phantom body is "mail-server", but + other access-types may be defined in the future in other + specifications that use this area. + + The encapsulated headers in ALL "message/external-body" entities MUST + include a Content-ID header field to give a unique identifier by + which to reference the data. This identifier may be used for caching + mechanisms, and for recognizing the receipt of the data when the + access-type is "mail-server". + + Note that, as specified here, the tokens that describe external-body + data, such as file names and mail server commands, are required to be + in the US-ASCII character set. + + + + +Freed & Borenstein Standards Track [Page 33] + +RFC 2046 Media Types November 1996 + + + If this proves problematic in practice, a new mechanism may be + required as a future extension to MIME, either as newly defined + access-types for "message/external-body" or by some other mechanism. + + As with "message/partial", MIME entities of type "message/external- + body" MUST have a content-transfer-encoding of 7bit (the default). + In particular, even in environments that support binary or 8bit + transport, the use of a content- transfer-encoding of "8bit" or + "binary" is explicitly prohibited for entities of type + "message/external-body". + +5.2.3.1. General External-Body Parameters + + The parameters that may be used with any "message/external- body" + are: + + (1) ACCESS-TYPE -- A word indicating the supported access + mechanism by which the file or data may be obtained. + This word is not case sensitive. Values include, but + are not limited to, "FTP", "ANON-FTP", "TFTP", "LOCAL- + FILE", and "MAIL-SERVER". Future values, except for + experimental values beginning with "X-", must be + registered with IANA, as described in RFC 2048. + This parameter is unconditionally mandatory and MUST be + present on EVERY "message/external-body". + + (2) EXPIRATION -- The date (in the RFC 822 "date-time" + syntax, as extended by RFC 1123 to permit 4 digits in + the year field) after which the existence of the + external data is not guaranteed. This parameter may be + used with ANY access-type and is ALWAYS optional. + + (3) SIZE -- The size (in octets) of the data. The intent + of this parameter is to help the recipient decide + whether or not to expend the necessary resources to + retrieve the external data. Note that this describes + the size of the data in its canonical form, that is, + before any Content-Transfer-Encoding has been applied + or after the data have been decoded. This parameter + may be used with ANY access-type and is ALWAYS + optional. + + (4) PERMISSION -- A case-insensitive field that indicates + whether or not it is expected that clients might also + attempt to overwrite the data. By default, or if + permission is "read", the assumption is that they are + not, and that if the data is retrieved once, it is + never needed again. If PERMISSION is "read-write", + + + +Freed & Borenstein Standards Track [Page 34] + +RFC 2046 Media Types November 1996 + + + this assumption is invalid, and any local copy must be + considered no more than a cache. "Read" and "Read- + write" are the only defined values of permission. This + parameter may be used with ANY access-type and is + ALWAYS optional. + + The precise semantics of the access-types defined here are described + in the sections that follow. + +5.2.3.2. The 'ftp' and 'tftp' Access-Types + + An access-type of FTP or TFTP indicates that the message body is + accessible as a file using the FTP [RFC-959] or TFTP [RFC- 783] + protocols, respectively. For these access-types, the following + additional parameters are mandatory: + + (1) NAME -- The name of the file that contains the actual + body data. + + (2) SITE -- A machine from which the file may be obtained, + using the given protocol. This must be a fully + qualified domain name, not a nickname. + + (3) Before any data are retrieved, using FTP, the user will + generally need to be asked to provide a login id and a + password for the machine named by the site parameter. + For security reasons, such an id and password are not + specified as content-type parameters, but must be + obtained from the user. + + In addition, the following parameters are optional: + + (1) DIRECTORY -- A directory from which the data named by + NAME should be retrieved. + + (2) MODE -- A case-insensitive string indicating the mode + to be used when retrieving the information. The valid + values for access-type "TFTP" are "NETASCII", "OCTET", + and "MAIL", as specified by the TFTP protocol [RFC- + 783]. The valid values for access-type "FTP" are + "ASCII", "EBCDIC", "IMAGE", and "LOCALn" where "n" is a + decimal integer, typically 8. These correspond to the + representation types "A" "E" "I" and "L n" as specified + by the FTP protocol [RFC-959]. Note that "BINARY" and + "TENEX" are not valid values for MODE and that "OCTET" + or "IMAGE" or "LOCAL8" should be used instead. IF MODE + is not specified, the default value is "NETASCII" for + TFTP and "ASCII" otherwise. + + + +Freed & Borenstein Standards Track [Page 35] + +RFC 2046 Media Types November 1996 + + +5.2.3.3. The 'anon-ftp' Access-Type + + The "anon-ftp" access-type is identical to the "ftp" access type, + except that the user need not be asked to provide a name and password + for the specified site. Instead, the ftp protocol will be used with + login "anonymous" and a password that corresponds to the user's mail + address. + +5.2.3.4. The 'local-file' Access-Type + + An access-type of "local-file" indicates that the actual body is + accessible as a file on the local machine. Two additional parameters + are defined for this access type: + + (1) NAME -- The name of the file that contains the actual + body data. This parameter is mandatory for the + "local-file" access-type. + + (2) SITE -- A domain specifier for a machine or set of + machines that are known to have access to the data + file. This optional parameter is used to describe the + locality of reference for the data, that is, the site + or sites at which the file is expected to be visible. + Asterisks may be used for wildcard matching to a part + of a domain name, such as "*.bellcore.com", to indicate + a set of machines on which the data should be directly + visible, while a single asterisk may be used to + indicate a file that is expected to be universally + available, e.g., via a global file system. + +5.2.3.5. The 'mail-server' Access-Type + + The "mail-server" access-type indicates that the actual body is + available from a mail server. Two additional parameters are defined + for this access-type: + + (1) SERVER -- The addr-spec of the mail server from which + the actual body data can be obtained. This parameter + is mandatory for the "mail-server" access-type. + + (2) SUBJECT -- The subject that is to be used in the mail + that is sent to obtain the data. Note that keying mail + servers on Subject lines is NOT recommended, but such + mail servers are known to exist. This is an optional + parameter. + + + + + + +Freed & Borenstein Standards Track [Page 36] + +RFC 2046 Media Types November 1996 + + + Because mail servers accept a variety of syntaxes, some of which is + multiline, the full command to be sent to a mail server is not + included as a parameter in the content-type header field. Instead, + it is provided as the "phantom body" when the media type is + "message/external-body" and the access-type is mail-server. + + Note that MIME does not define a mail server syntax. Rather, it + allows the inclusion of arbitrary mail server commands in the phantom + body. Implementations must include the phantom body in the body of + the message it sends to the mail server address to retrieve the + relevant data. + + Unlike other access-types, mail-server access is asynchronous and + will happen at an unpredictable time in the future. For this reason, + it is important that there be a mechanism by which the returned data + can be matched up with the original "message/external-body" entity. + MIME mail servers must use the same Content-ID field on the returned + message that was used in the original "message/external-body" + entities, to facilitate such matching. + +5.2.3.6. External-Body Security Issues + + "Message/external-body" entities give rise to two important security + issues: + + (1) Accessing data via a "message/external-body" reference + effectively results in the message recipient performing + an operation that was specified by the message + originator. It is therefore possible for the message + originator to trick a recipient into doing something + they would not have done otherwise. For example, an + originator could specify a action that attempts + retrieval of material that the recipient is not + authorized to obtain, causing the recipient to + unwittingly violate some security policy. For this + reason, user agents capable of resolving external + references must always take steps to describe the + action they are to take to the recipient and ask for + explicit permisssion prior to performing it. + + The 'mail-server' access-type is particularly + vulnerable, in that it causes the recipient to send a + new message whose contents are specified by the + original message's originator. Given the potential for + abuse, any such request messages that are constructed + should contain a clear indication that they were + generated automatically (e.g. in a Comments: header + field) in an attempt to resolve a MIME + + + +Freed & Borenstein Standards Track [Page 37] + +RFC 2046 Media Types November 1996 + + + "message/external-body" reference. + + (2) MIME will sometimes be used in environments that + provide some guarantee of message integrity and + authenticity. If present, such guarantees may apply + only to the actual direct content of messages -- they + may or may not apply to data accessed through MIME's + "message/external-body" mechanism. In particular, it + may be possible to subvert certain access mechanisms + even when the messaging system itself is secure. + + It should be noted that this problem exists either with + or without the availabilty of MIME mechanisms. A + casual reference to an FTP site containing a document + in the text of a secure message brings up similar + issues -- the only difference is that MIME provides for + automatic retrieval of such material, and users may + place unwarranted trust is such automatic retrieval + mechanisms. + +5.2.3.7. Examples and Further Explanations + + When the external-body mechanism is used in conjunction with the + "multipart/alternative" media type it extends the functionality of + "multipart/alternative" to include the case where the same entity is + provided in the same format but via different accces mechanisms. + When this is done the originator of the message must order the parts + first in terms of preferred formats and then by preferred access + mechanisms. The recipient's viewer should then evaluate the list + both in terms of format and access mechanisms. + + With the emerging possibility of very wide-area file systems, it + becomes very hard to know in advance the set of machines where a file + will and will not be accessible directly from the file system. + Therefore it may make sense to provide both a file name, to be tried + directly, and the name of one or more sites from which the file is + known to be accessible. An implementation can try to retrieve remote + files using FTP or any other protocol, using anonymous file retrieval + or prompting the user for the necessary name and password. If an + external body is accessible via multiple mechanisms, the sender may + include multiple entities of type "message/external-body" within the + body parts of an enclosing "multipart/alternative" entity. + + However, the external-body mechanism is not intended to be limited to + file retrieval, as shown by the mail-server access-type. Beyond + this, one can imagine, for example, using a video server for external + references to video clips. + + + + +Freed & Borenstein Standards Track [Page 38] + +RFC 2046 Media Types November 1996 + + + The embedded message header fields which appear in the body of the + "message/external-body" data must be used to declare the media type + of the external body if it is anything other than plain US-ASCII + text, since the external body does not have a header section to + declare its type. Similarly, any Content-transfer-encoding other + than "7bit" must also be declared here. Thus a complete + "message/external-body" message, referring to an object in PostScript + format, might look like this: + + From: Whomever + To: Someone + Date: Whenever + Subject: whatever + MIME-Version: 1.0 + Message-ID: <id1@host.com> + Content-Type: multipart/alternative; boundary=42 + Content-ID: <id001@guppylake.bellcore.com> + + --42 + Content-Type: message/external-body; name="BodyFormats.ps"; + site="thumper.bellcore.com"; mode="image"; + access-type=ANON-FTP; directory="pub"; + expiration="Fri, 14 Jun 1991 19:13:14 -0400 (EDT)" + + Content-type: application/postscript + Content-ID: <id42@guppylake.bellcore.com> + + --42 + Content-Type: message/external-body; access-type=local-file; + name="/u/nsb/writing/rfcs/RFC-MIME.ps"; + site="thumper.bellcore.com"; + expiration="Fri, 14 Jun 1991 19:13:14 -0400 (EDT)" + + Content-type: application/postscript + Content-ID: <id42@guppylake.bellcore.com> + + --42 + Content-Type: message/external-body; + access-type=mail-server + server="listserv@bogus.bitnet"; + expiration="Fri, 14 Jun 1991 19:13:14 -0400 (EDT)" + + Content-type: application/postscript + Content-ID: <id42@guppylake.bellcore.com> + + get RFC-MIME.DOC + + --42-- + + + +Freed & Borenstein Standards Track [Page 39] + +RFC 2046 Media Types November 1996 + + + Note that in the above examples, the default Content-transfer- + encoding of "7bit" is assumed for the external postscript data. + + Like the "message/partial" type, the "message/external-body" media + type is intended to be transparent, that is, to convey the data type + in the external body rather than to convey a message with a body of + that type. Thus the headers on the outer and inner parts must be + merged using the same rules as for "message/partial". In particular, + this means that the Content-type and Subject fields are overridden, + but the From field is preserved. + + Note that since the external bodies are not transported along with + the external body reference, they need not conform to transport + limitations that apply to the reference itself. In particular, + Internet mail transports may impose 7bit and line length limits, but + these do not automatically apply to binary external body references. + Thus a Content-Transfer-Encoding is not generally necessary, though + it is permitted. + + Note that the body of a message of type "message/external-body" is + governed by the basic syntax for an RFC 822 message. In particular, + anything before the first consecutive pair of CRLFs is header + information, while anything after it is body information, which is + ignored for most access-types. + +5.2.4. Other Message Subtypes + + MIME implementations must in general treat unrecognized subtypes of + "message" as being equivalent to "application/octet-stream". + + Future subtypes of "message" intended for use with email should be + restricted to "7bit" encoding. A type other than "message" should be + used if restriction to "7bit" is not possible. + +6. Experimental Media Type Values + + A media type value beginning with the characters "X-" is a private + value, to be used by consenting systems by mutual agreement. Any + format without a rigorous and public definition must be named with an + "X-" prefix, and publicly specified values shall never begin with + "X-". (Older versions of the widely used Andrew system use the "X- + BE2" name, so new systems should probably choose a different name.) + + In general, the use of "X-" top-level types is strongly discouraged. + Implementors should invent subtypes of the existing types whenever + possible. In many cases, a subtype of "application" will be more + appropriate than a new top-level type. + + + + +Freed & Borenstein Standards Track [Page 40] + +RFC 2046 Media Types November 1996 + + +7. Summary + + The five discrete media types provide provide a standardized + mechanism for tagging entities as "audio", "image", or several other + kinds of data. The composite "multipart" and "message" media types + allow mixing and hierarchical structuring of entities of different + types in a single message. A distinguished parameter syntax allows + further specification of data format details, particularly the + specification of alternate character sets. Additional optional + header fields provide mechanisms for certain extensions deemed + desirable by many implementors. Finally, a number of useful media + types are defined for general use by consenting user agents, notably + "message/partial" and "message/external-body". + +9. Security Considerations + + Security issues are discussed in the context of the + "application/postscript" type, the "message/external-body" type, and + in RFC 2048. Implementors should pay special attention to the + security implications of any media types that can cause the remote + execution of any actions in the recipient's environment. In such + cases, the discussion of the "application/postscript" type may serve + as a model for considering other media types with remote execution + capabilities. + + + + + + + + + + + + + + + + + + + + + + + + + + + +Freed & Borenstein Standards Track [Page 41] + +RFC 2046 Media Types November 1996 + + +9. Authors' Addresses + + For more information, the authors of this document are best contacted + via Internet mail: + + Ned Freed + Innosoft International, Inc. + 1050 East Garvey Avenue South + West Covina, CA 91790 + USA + + Phone: +1 818 919 3600 + Fax: +1 818 919 3614 + EMail: ned@innosoft.com + + + Nathaniel S. Borenstein + First Virtual Holdings + 25 Washington Avenue + Morristown, NJ 07960 + USA + + Phone: +1 201 540 8967 + Fax: +1 201 993 3032 + EMail: nsb@nsb.fv.com + + + MIME is a result of the work of the Internet Engineering Task Force + Working Group on RFC 822 Extensions. The chairman of that group, + Greg Vaudreuil, may be reached at: + + Gregory M. Vaudreuil + Octel Network Services + 17080 Dallas Parkway + Dallas, TX 75248-1905 + USA + + EMail: Greg.Vaudreuil@Octel.Com + + + + + + + + + + + + + +Freed & Borenstein Standards Track [Page 42] + +RFC 2046 Media Types November 1996 + + +Appendix A -- Collected Grammar + + This appendix contains the complete BNF grammar for all the syntax + specified by this document. + + By itself, however, this grammar is incomplete. It refers by name to + several syntax rules that are defined by RFC 822. Rather than + reproduce those definitions here, and risk unintentional differences + between the two, this document simply refers the reader to RFC 822 + for the remaining definitions. Wherever a term is undefined, it + refers to the RFC 822 definition. + + boundary := 0*69<bchars> bcharsnospace + + bchars := bcharsnospace / " " + + bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / + "+" / "_" / "," / "-" / "." / + "/" / ":" / "=" / "?" + + body-part := <"message" as defined in RFC 822, with all + header fields optional, not starting with the + specified dash-boundary, and with the + delimiter not occurring anywhere in the + body part. Note that the semantics of a + part differ from the semantics of a message, + as described in the text.> + + close-delimiter := delimiter "--" + + dash-boundary := "--" boundary + ; boundary taken from the value of + ; boundary parameter of the + ; Content-Type field. + + delimiter := CRLF dash-boundary + + discard-text := *(*text CRLF) + ; May be ignored or discarded. + + encapsulation := delimiter transport-padding + CRLF body-part + + epilogue := discard-text + + multipart-body := [preamble CRLF] + dash-boundary transport-padding CRLF + body-part *encapsulation + + + +Freed & Borenstein Standards Track [Page 43] + +RFC 2046 Media Types November 1996 + + + close-delimiter transport-padding + [CRLF epilogue] + + preamble := discard-text + + transport-padding := *LWSP-char + ; Composers MUST NOT generate + ; non-zero length transport + ; padding, but receivers MUST + ; be able to handle padding + ; added by message transports. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Freed & Borenstein Standards Track [Page 44] + diff --git a/filter.c b/filter.c @@ -19,10 +19,10 @@ int main(int argc, char **argv) { for (i = filter; i < argc; i++) if (!strcmp (argv[i], "-e")) edit = i; - } - for (i = 0; i < argc; i++) { - strncpy (argv2[i], argv[i], 1023); - argv2[i][1023] = '\0'; + for (i = 0; i < argc; i++) { + strncpy (argv2[i], argv[i], 1023); + argv2[i][1023] = '\0'; + } } memset (b, '\0', 1024); /* Headers */ @@ -33,7 +33,7 @@ int main(int argc, char **argv) { if (!strncmp (b, argv[i], strlen(argv[i])) || argv[i][0] == ':') { /* Edit/Remove Headers */ print = 1; - for (j = edit + 1; j < argc && argv[j]; j++) + for (j = edit + 1; !value && j < argc && argv[j]; j++) if ((ptr = strchr (argv[j], ':')) && !strncmp (b, argv[j], ptr - argv[j] + 1)) { if (ptr[1] != '\0' && argv2[j][0]) diff --git a/mbox.c b/mbox.c @@ -8,7 +8,6 @@ FILE *fd; static char word[1024]; -// XXX maybe so many [1024] stuff. can this cause truncated mails? static void mbox_ls() { char b[1024], from[1024], subject[1024], date[1024], *ptr; int m = 0, headers = 1;