* Added missing rfc's * Minor optimization in dmc-filter when using -v - dmc

commit 4a53c1f65beb951587ed0664a107e5339b3f8c22
parent 60bfa2edb213e82738037473393e80c144982b31
Author: nibble <unknown>
Date:   Mon,  9 Nov 2009 19:28:21 +0100

* Added missing rfc's
* Minor optimization in dmc-filter when using -v
Diffstat:
doc/imf-rfc5322.txt  | 3195 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
doc/mbox-rfc4155.txt  | 507 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
doc/mime-p1-rfc2045.txt  | 1739 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
doc/mime-p2-rfc2046.txt  | 2467 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
filter.c  | 10 +++++-----
mbox.c  | 1 -

6 files changed, 7913 insertions(+), 6 deletions(-)
diff --git a/doc/imf-rfc5322.txt b/doc/imf-rfc5322.txt
@@ -0,0 +1,3195 @@
+
+
+
+
+
+
+Network Working Group                                    P. Resnick, Ed.
+Request for Comments: 5322                         Qualcomm Incorporated
+Obsoletes: 2822                                             October 2008
+Updates: 4021
+Category: Standards Track
+
+
+                        Internet Message Format
+
+Status of This Memo
+
+   This document specifies an Internet standards track protocol for the
+   Internet community, and requests discussion and suggestions for
+   improvements.  Please refer to the current edition of the "Internet
+   Official Protocol Standards" (STD 1) for the standardization state
+   and status of this protocol.  Distribution of this memo is unlimited.
+
+Abstract
+
+   This document specifies the Internet Message Format (IMF), a syntax
+   for text messages that are sent between computer users, within the
+   framework of "electronic mail" messages.  This specification is a
+   revision of Request For Comments (RFC) 2822, which itself superseded
+   Request For Comments (RFC) 822, "Standard for the Format of ARPA
+   Internet Text Messages", updating it to reflect current practice and
+   incorporating incremental changes that were specified in other RFCs.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                     [Page 1]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+Table of Contents
+
+   1.  Introduction . . . . . . . . . . . . . . . . . . . . . . . . .  4
+     1.1.  Scope  . . . . . . . . . . . . . . . . . . . . . . . . . .  4
+     1.2.  Notational Conventions . . . . . . . . . . . . . . . . . .  5
+       1.2.1.  Requirements Notation  . . . . . . . . . . . . . . . .  5
+       1.2.2.  Syntactic Notation . . . . . . . . . . . . . . . . . .  5
+       1.2.3.  Structure of This Document . . . . . . . . . . . . . .  5
+   2.  Lexical Analysis of Messages . . . . . . . . . . . . . . . . .  6
+     2.1.  General Description  . . . . . . . . . . . . . . . . . . .  6
+       2.1.1.  Line Length Limits . . . . . . . . . . . . . . . . . .  7
+     2.2.  Header Fields  . . . . . . . . . . . . . . . . . . . . . .  8
+       2.2.1.  Unstructured Header Field Bodies . . . . . . . . . . .  8
+       2.2.2.  Structured Header Field Bodies . . . . . . . . . . . .  8
+       2.2.3.  Long Header Fields . . . . . . . . . . . . . . . . . .  8
+     2.3.  Body . . . . . . . . . . . . . . . . . . . . . . . . . . .  9
+   3.  Syntax . . . . . . . . . . . . . . . . . . . . . . . . . . . . 10
+     3.1.  Introduction . . . . . . . . . . . . . . . . . . . . . . . 10
+     3.2.  Lexical Tokens . . . . . . . . . . . . . . . . . . . . . . 10
+       3.2.1.  Quoted characters  . . . . . . . . . . . . . . . . . . 10
+       3.2.2.  Folding White Space and Comments . . . . . . . . . . . 11
+       3.2.3.  Atom . . . . . . . . . . . . . . . . . . . . . . . . . 12
+       3.2.4.  Quoted Strings . . . . . . . . . . . . . . . . . . . . 13
+       3.2.5.  Miscellaneous Tokens . . . . . . . . . . . . . . . . . 14
+     3.3.  Date and Time Specification  . . . . . . . . . . . . . . . 14
+     3.4.  Address Specification  . . . . . . . . . . . . . . . . . . 16
+       3.4.1.  Addr-Spec Specification  . . . . . . . . . . . . . . . 17
+     3.5.  Overall Message Syntax . . . . . . . . . . . . . . . . . . 18
+     3.6.  Field Definitions  . . . . . . . . . . . . . . . . . . . . 19
+       3.6.1.  The Origination Date Field . . . . . . . . . . . . . . 22
+       3.6.2.  Originator Fields  . . . . . . . . . . . . . . . . . . 22
+       3.6.3.  Destination Address Fields . . . . . . . . . . . . . . 23
+       3.6.4.  Identification Fields  . . . . . . . . . . . . . . . . 25
+       3.6.5.  Informational Fields . . . . . . . . . . . . . . . . . 27
+       3.6.6.  Resent Fields  . . . . . . . . . . . . . . . . . . . . 28
+       3.6.7.  Trace Fields . . . . . . . . . . . . . . . . . . . . . 30
+       3.6.8.  Optional Fields  . . . . . . . . . . . . . . . . . . . 30
+   4.  Obsolete Syntax  . . . . . . . . . . . . . . . . . . . . . . . 31
+     4.1.  Miscellaneous Obsolete Tokens  . . . . . . . . . . . . . . 32
+     4.2.  Obsolete Folding White Space . . . . . . . . . . . . . . . 33
+     4.3.  Obsolete Date and Time . . . . . . . . . . . . . . . . . . 33
+     4.4.  Obsolete Addressing  . . . . . . . . . . . . . . . . . . . 35
+     4.5.  Obsolete Header Fields . . . . . . . . . . . . . . . . . . 35
+       4.5.1.  Obsolete Origination Date Field  . . . . . . . . . . . 36
+       4.5.2.  Obsolete Originator Fields . . . . . . . . . . . . . . 36
+       4.5.3.  Obsolete Destination Address Fields  . . . . . . . . . 37
+       4.5.4.  Obsolete Identification Fields . . . . . . . . . . . . 37
+       4.5.5.  Obsolete Informational Fields  . . . . . . . . . . . . 37
+
+
+
+Resnick                     Standards Track                     [Page 2]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+       4.5.6.  Obsolete Resent Fields . . . . . . . . . . . . . . . . 38
+       4.5.7.  Obsolete Trace Fields  . . . . . . . . . . . . . . . . 38
+       4.5.8.  Obsolete optional fields . . . . . . . . . . . . . . . 38
+   5.  Security Considerations  . . . . . . . . . . . . . . . . . . . 38
+   6.  IANA Considerations  . . . . . . . . . . . . . . . . . . . . . 39
+   Appendix A.     Example Messages . . . . . . . . . . . . . . . . . 43
+   Appendix A.1.   Addressing Examples  . . . . . . . . . . . . . . . 44
+   Appendix A.1.1. A Message from One Person to Another with
+                   Simple Addressing  . . . . . . . . . . . . . . . . 44
+   Appendix A.1.2. Different Types of Mailboxes . . . . . . . . . . . 45
+   Appendix A.1.3. Group Addresses  . . . . . . . . . . . . . . . . . 45
+   Appendix A.2.   Reply Messages . . . . . . . . . . . . . . . . . . 46
+   Appendix A.3.   Resent Messages  . . . . . . . . . . . . . . . . . 47
+   Appendix A.4.   Messages with Trace Fields . . . . . . . . . . . . 48
+   Appendix A.5.   White Space, Comments, and Other Oddities  . . . . 49
+   Appendix A.6.   Obsoleted Forms  . . . . . . . . . . . . . . . . . 50
+   Appendix A.6.1. Obsolete Addressing  . . . . . . . . . . . . . . . 50
+   Appendix A.6.2. Obsolete Dates . . . . . . . . . . . . . . . . . . 50
+   Appendix A.6.3. Obsolete White Space and Comments  . . . . . . . . 51
+   Appendix B.     Differences from Earlier Specifications  . . . . . 52
+   Appendix C.     Acknowledgements . . . . . . . . . . . . . . . . . 53
+   7.  References . . . . . . . . . . . . . . . . . . . . . . . . . . 55
+     7.1.  Normative References . . . . . . . . . . . . . . . . . . . 55
+     7.2.  Informative References . . . . . . . . . . . . . . . . . . 55
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                     [Page 3]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+1.  Introduction
+
+1.1.  Scope
+
+   This document specifies the Internet Message Format (IMF), a syntax
+   for text messages that are sent between computer users, within the
+   framework of "electronic mail" messages.  This specification is an
+   update to [RFC2822], which itself superseded [RFC0822], updating it
+   to reflect current practice and incorporating incremental changes
+   that were specified in other RFCs such as [RFC1123].
+
+   This document specifies a syntax only for text messages.  In
+   particular, it makes no provision for the transmission of images,
+   audio, or other sorts of structured data in electronic mail messages.
+   There are several extensions published, such as the MIME document
+   series ([RFC2045], [RFC2046], [RFC2049]), which describe mechanisms
+   for the transmission of such data through electronic mail, either by
+   extending the syntax provided here or by structuring such messages to
+   conform to this syntax.  Those mechanisms are outside of the scope of
+   this specification.
+
+   In the context of electronic mail, messages are viewed as having an
+   envelope and contents.  The envelope contains whatever information is
+   needed to accomplish transmission and delivery.  (See [RFC5321] for a
+   discussion of the envelope.)  The contents comprise the object to be
+   delivered to the recipient.  This specification applies only to the
+   format and some of the semantics of message contents.  It contains no
+   specification of the information in the envelope.
+
+   However, some message systems may use information from the contents
+   to create the envelope.  It is intended that this specification
+   facilitate the acquisition of such information by programs.
+
+   This specification is intended as a definition of what message
+   content format is to be passed between systems.  Though some message
+   systems locally store messages in this format (which eliminates the
+   need for translation between formats) and others use formats that
+   differ from the one specified in this specification, local storage is
+   outside of the scope of this specification.
+
+      Note: This specification is not intended to dictate the internal
+      formats used by sites, the specific message system features that
+      they are expected to support, or any of the characteristics of
+      user interface programs that create or read messages.  In
+      addition, this document does not specify an encoding of the
+      characters for either transport or storage; that is, it does not
+      specify the number of bits used or how those bits are specifically
+      transferred over the wire or stored on disk.
+
+
+
+Resnick                     Standards Track                     [Page 4]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+1.2.  Notational Conventions
+
+1.2.1.  Requirements Notation
+
+   This document occasionally uses terms that appear in capital letters.
+   When the terms "MUST", "SHOULD", "RECOMMENDED", "MUST NOT", "SHOULD
+   NOT", and "MAY" appear capitalized, they are being used to indicate
+   particular requirements of this specification.  A discussion of the
+   meanings of these terms appears in [RFC2119].
+
+1.2.2.  Syntactic Notation
+
+   This specification uses the Augmented Backus-Naur Form (ABNF)
+   [RFC5234] notation for the formal definitions of the syntax of
+   messages.  Characters will be specified either by a decimal value
+   (e.g., the value %d65 for uppercase A and %d97 for lowercase A) or by
+   a case-insensitive literal value enclosed in quotation marks (e.g.,
+   "A" for either uppercase or lowercase A).
+
+1.2.3.  Structure of This Document
+
+   This document is divided into several sections.
+
+   This section, section 1, is a short introduction to the document.
+
+   Section 2 lays out the general description of a message and its
+   constituent parts.  This is an overview to help the reader understand
+   some of the general principles used in the later portions of this
+   document.  Any examples in this section MUST NOT be taken as
+   specification of the formal syntax of any part of a message.
+
+   Section 3 specifies formal ABNF rules for the structure of each part
+   of a message (the syntax) and describes the relationship between
+   those parts and their meaning in the context of a message (the
+   semantics).  That is, it lays out the actual rules for the structure
+   of each part of a message (the syntax) as well as a description of
+   the parts and instructions for their interpretation (the semantics).
+   This includes analysis of the syntax and semantics of subparts of
+   messages that have specific structure.  The syntax included in
+   section 3 represents messages as they MUST be created.  There are
+   also notes in section 3 to indicate if any of the options specified
+   in the syntax SHOULD be used over any of the others.
+
+   Both sections 2 and 3 describe messages that are legal to generate
+   for purposes of this specification.
+
+
+
+
+
+
+Resnick                     Standards Track                     [Page 5]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   Section 4 of this document specifies an "obsolete" syntax.  There are
+   references in section 3 to these obsolete syntactic elements.  The
+   rules of the obsolete syntax are elements that have appeared in
+   earlier versions of this specification or have previously been widely
+   used in Internet messages.  As such, these elements MUST be
+   interpreted by parsers of messages in order to be conformant to this
+   specification.  However, since items in this syntax have been
+   determined to be non-interoperable or to cause significant problems
+   for recipients of messages, they MUST NOT be generated by creators of
+   conformant messages.
+
+   Section 5 details security considerations to take into account when
+   implementing this specification.
+
+   Appendix A lists examples of different sorts of messages.  These
+   examples are not exhaustive of the types of messages that appear on
+   the Internet, but give a broad overview of certain syntactic forms.
+
+   Appendix B lists the differences between this specification and
+   earlier specifications for Internet messages.
+
+   Appendix C contains acknowledgements.
+
+2.  Lexical Analysis of Messages
+
+2.1.  General Description
+
+   At the most basic level, a message is a series of characters.  A
+   message that is conformant with this specification is composed of
+   characters with values in the range of 1 through 127 and interpreted
+   as US-ASCII [ANSI.X3-4.1986] characters.  For brevity, this document
+   sometimes refers to this range of characters as simply "US-ASCII
+   characters".
+
+      Note: This document specifies that messages are made up of
+      characters in the US-ASCII range of 1 through 127.  There are
+      other documents, specifically the MIME document series ([RFC2045],
+      [RFC2046], [RFC2047], [RFC2049], [RFC4288], [RFC4289]), that
+      extend this specification to allow for values outside of that
+      range.  Discussion of those mechanisms is not within the scope of
+      this specification.
+
+   Messages are divided into lines of characters.  A line is a series of
+   characters that is delimited with the two characters carriage-return
+   and line-feed; that is, the carriage return (CR) character (ASCII
+   value 13) followed immediately by the line feed (LF) character (ASCII
+   value 10).  (The carriage return/line feed pair is usually written in
+   this document as "CRLF".)
+
+
+
+Resnick                     Standards Track                     [Page 6]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   A message consists of header fields (collectively called "the header
+   section of the message") followed, optionally, by a body.  The header
+   section is a sequence of lines of characters with special syntax as
+   defined in this specification.  The body is simply a sequence of
+   characters that follows the header section and is separated from the
+   header section by an empty line (i.e., a line with nothing preceding
+   the CRLF).
+
+      Note: Common parlance and earlier versions of this specification
+      use the term "header" to either refer to the entire header section
+      or to refer to an individual header field.  To avoid ambiguity,
+      this document does not use the terms "header" or "headers" in
+      isolation, but instead always uses "header field" to refer to the
+      individual field and "header section" to refer to the entire
+      collection.
+
+2.1.1.  Line Length Limits
+
+   There are two limits that this specification places on the number of
+   characters in a line.  Each line of characters MUST be no more than
+   998 characters, and SHOULD be no more than 78 characters, excluding
+   the CRLF.
+
+   The 998 character limit is due to limitations in many implementations
+   that send, receive, or store IMF messages which simply cannot handle
+   more than 998 characters on a line.  Receiving implementations would
+   do well to handle an arbitrarily large number of characters in a line
+   for robustness sake.  However, there are so many implementations that
+   (in compliance with the transport requirements of [RFC5321]) do not
+   accept messages containing more than 1000 characters including the CR
+   and LF per line, it is important for implementations not to create
+   such messages.
+
+   The more conservative 78 character recommendation is to accommodate
+   the many implementations of user interfaces that display these
+   messages which may truncate, or disastrously wrap, the display of
+   more than 78 characters per line, in spite of the fact that such
+   implementations are non-conformant to the intent of this
+   specification (and that of [RFC5321] if they actually cause
+   information to be lost).  Again, even though this limitation is put
+   on messages, it is incumbent upon implementations that display
+   messages to handle an arbitrarily large number of characters in a
+   line (certainly at least up to the 998 character limit) for the sake
+   of robustness.
+
+
+
+
+
+
+
+Resnick                     Standards Track                     [Page 7]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+2.2.  Header Fields
+
+   Header fields are lines beginning with a field name, followed by a
+   colon (":"), followed by a field body, and terminated by CRLF.  A
+   field name MUST be composed of printable US-ASCII characters (i.e.,
+   characters that have values between 33 and 126, inclusive), except
+   colon.  A field body may be composed of printable US-ASCII characters
+   as well as the space (SP, ASCII value 32) and horizontal tab (HTAB,
+   ASCII value 9) characters (together known as the white space
+   characters, WSP).  A field body MUST NOT include CR and LF except
+   when used in "folding" and "unfolding", as described in section
+   2.2.3.  All field bodies MUST conform to the syntax described in
+   sections 3 and 4 of this specification.
+
+2.2.1.  Unstructured Header Field Bodies
+
+   Some field bodies in this specification are defined simply as
+   "unstructured" (which is specified in section 3.2.5 as any printable
+   US-ASCII characters plus white space characters) with no further
+   restrictions.  These are referred to as unstructured field bodies.
+   Semantically, unstructured field bodies are simply to be treated as a
+   single line of characters with no further processing (except for
+   "folding" and "unfolding" as described in section 2.2.3).
+
+2.2.2.  Structured Header Field Bodies
+
+   Some field bodies in this specification have a syntax that is more
+   restrictive than the unstructured field bodies described above.
+   These are referred to as "structured" field bodies.  Structured field
+   bodies are sequences of specific lexical tokens as described in
+   sections 3 and 4 of this specification.  Many of these tokens are
+   allowed (according to their syntax) to be introduced or end with
+   comments (as described in section 3.2.2) as well as the white space
+   characters, and those white space characters are subject to "folding"
+   and "unfolding" as described in section 2.2.3.  Semantic analysis of
+   structured field bodies is given along with their syntax.
+
+2.2.3.  Long Header Fields
+
+   Each header field is logically a single line of characters comprising
+   the field name, the colon, and the field body.  For convenience
+   however, and to deal with the 998/78 character limitations per line,
+   the field body portion of a header field can be split into a
+   multiple-line representation; this is called "folding".  The general
+   rule is that wherever this specification allows for folding white
+   space (not simply WSP characters), a CRLF may be inserted before any
+   WSP.
+
+
+
+
+Resnick                     Standards Track                     [Page 8]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   For example, the header field:
+
+   Subject: This is a test
+
+   can be represented as:
+
+   Subject: This
+    is a test
+
+      Note: Though structured field bodies are defined in such a way
+      that folding can take place between many of the lexical tokens
+      (and even within some of the lexical tokens), folding SHOULD be
+      limited to placing the CRLF at higher-level syntactic breaks.  For
+      instance, if a field body is defined as comma-separated values, it
+      is recommended that folding occur after the comma separating the
+      structured items in preference to other places where the field
+      could be folded, even if it is allowed elsewhere.
+
+   The process of moving from this folded multiple-line representation
+   of a header field to its single line representation is called
+   "unfolding".  Unfolding is accomplished by simply removing any CRLF
+   that is immediately followed by WSP.  Each header field should be
+   treated in its unfolded form for further syntactic and semantic
+   evaluation.  An unfolded header field has no length restriction and
+   therefore may be indeterminately long.
+
+2.3.  Body
+
+   The body of a message is simply lines of US-ASCII characters.  The
+   only two limitations on the body are as follows:
+
+   o  CR and LF MUST only occur together as CRLF; they MUST NOT appear
+      independently in the body.
+   o  Lines of characters in the body MUST be limited to 998 characters,
+      and SHOULD be limited to 78 characters, excluding the CRLF.
+
+      Note: As was stated earlier, there are other documents,
+      specifically the MIME documents ([RFC2045], [RFC2046], [RFC2049],
+      [RFC4288], [RFC4289]), that extend (and limit) this specification
+      to allow for different sorts of message bodies.  Again, these
+      mechanisms are beyond the scope of this document.
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                     [Page 9]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+3.  Syntax
+
+3.1.  Introduction
+
+   The syntax as given in this section defines the legal syntax of
+   Internet messages.  Messages that are conformant to this
+   specification MUST conform to the syntax in this section.  If there
+   are options in this section where one option SHOULD be generated,
+   that is indicated either in the prose or in a comment next to the
+   syntax.
+
+   For the defined expressions, a short description of the syntax and
+   use is given, followed by the syntax in ABNF, followed by a semantic
+   analysis.  The following primitive tokens that are used but otherwise
+   unspecified are taken from the "Core Rules" of [RFC5234], Appendix
+   B.1: CR, LF, CRLF, HTAB, SP, WSP, DQUOTE, DIGIT, ALPHA, and VCHAR.
+
+   In some of the definitions, there will be non-terminals whose names
+   start with "obs-".  These "obs-" elements refer to tokens defined in
+   the obsolete syntax in section 4.  In all cases, these productions
+   are to be ignored for the purposes of generating legal Internet
+   messages and MUST NOT be used as part of such a message.  However,
+   when interpreting messages, these tokens MUST be honored as part of
+   the legal syntax.  In this sense, section 3 defines a grammar for the
+   generation of messages, with "obs-" elements that are to be ignored,
+   while section 4 adds grammar for the interpretation of messages.
+
+3.2.  Lexical Tokens
+
+   The following rules are used to define an underlying lexical
+   analyzer, which feeds tokens to the higher-level parsers.  This
+   section defines the tokens used in structured header field bodies.
+
+      Note: Readers of this specification need to pay special attention
+      to how these lexical tokens are used in both the lower-level and
+      higher-level syntax later in the document.  Particularly, the
+      white space tokens and the comment tokens defined in section 3.2.2
+      get used in the lower-level tokens defined here, and those lower-
+      level tokens are in turn used as parts of the higher-level tokens
+      defined later.  Therefore, white space and comments may be allowed
+      in the higher-level tokens even though they may not explicitly
+      appear in a particular definition.
+
+3.2.1.  Quoted characters
+
+   Some characters are reserved for special interpretation, such as
+   delimiting lexical tokens.  To permit use of these characters as
+   uninterpreted data, a quoting mechanism is provided.
+
+
+
+Resnick                     Standards Track                    [Page 10]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   quoted-pair     =   ("\" (VCHAR / WSP)) / obs-qp
+
+   Where any quoted-pair appears, it is to be interpreted as the
+   character alone.  That is to say, the "\" character that appears as
+   part of a quoted-pair is semantically "invisible".
+
+      Note: The "\" character may appear in a message where it is not
+      part of a quoted-pair.  A "\" character that does not appear in a
+      quoted-pair is not semantically invisible.  The only places in
+      this specification where quoted-pair currently appears are
+      ccontent, qcontent, and in obs-dtext in section 4.
+
+3.2.2.  Folding White Space and Comments
+
+   White space characters, including white space used in folding
+   (described in section 2.2.3), may appear between many elements in
+   header field bodies.  Also, strings of characters that are treated as
+   comments may be included in structured field bodies as characters
+   enclosed in parentheses.  The following defines the folding white
+   space (FWS) and comment constructs.
+
+   Strings of characters enclosed in parentheses are considered comments
+   so long as they do not appear within a "quoted-string", as defined in
+   section 3.2.4.  Comments may nest.
+
+   There are several places in this specification where comments and FWS
+   may be freely inserted.  To accommodate that syntax, an additional
+   token for "CFWS" is defined for places where comments and/or FWS can
+   occur.  However, where CFWS occurs in this specification, it MUST NOT
+   be inserted in such a way that any line of a folded header field is
+   made up entirely of WSP characters and nothing else.
+
+   FWS             =   ([*WSP CRLF] 1*WSP) /  obs-FWS
+                                          ; Folding white space
+
+   ctext           =   %d33-39 /          ; Printable US-ASCII
+                       %d42-91 /          ;  characters not including
+                       %d93-126 /         ;  "(", ")", or "\"
+                       obs-ctext
+
+   ccontent        =   ctext / quoted-pair / comment
+
+   comment         =   "(" *([FWS] ccontent) [FWS] ")"
+
+   CFWS            =   (1*([FWS] comment) [FWS]) / FWS
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 11]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   Throughout this specification, where FWS (the folding white space
+   token) appears, it indicates a place where folding, as discussed in
+   section 2.2.3, may take place.  Wherever folding appears in a message
+   (that is, a header field body containing a CRLF followed by any WSP),
+   unfolding (removal of the CRLF) is performed before any further
+   semantic analysis is performed on that header field according to this
+   specification.  That is to say, any CRLF that appears in FWS is
+   semantically "invisible".
+
+   A comment is normally used in a structured field body to provide some
+   human-readable informational text.  Since a comment is allowed to
+   contain FWS, folding is permitted within the comment.  Also note that
+   since quoted-pair is allowed in a comment, the parentheses and
+   backslash characters may appear in a comment, so long as they appear
+   as a quoted-pair.  Semantically, the enclosing parentheses are not
+   part of the comment; the comment is what is contained between the two
+   parentheses.  As stated earlier, the "\" in any quoted-pair and the
+   CRLF in any FWS that appears within the comment are semantically
+   "invisible" and therefore not part of the comment either.
+
+   Runs of FWS, comment, or CFWS that occur between lexical tokens in a
+   structured header field are semantically interpreted as a single
+   space character.
+
+3.2.3.  Atom
+
+   Several productions in structured header field bodies are simply
+   strings of certain basic characters.  Such productions are called
+   atoms.
+
+   Some of the structured header field bodies also allow the period
+   character (".", ASCII value 46) within runs of atext.  An additional
+   "dot-atom" token is defined for those purposes.
+
+      Note: The "specials" token does not appear anywhere else in this
+      specification.  It is simply the visible (i.e., non-control, non-
+      white space) characters that do not appear in atext.  It is
+      provided only because it is useful for implementers who use tools
+      that lexically analyze messages.  Each of the characters in
+      specials can be used to indicate a tokenization point in lexical
+      analysis.
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 12]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   atext           =   ALPHA / DIGIT /    ; Printable US-ASCII
+                       "!" / "#" /        ;  characters not including
+                       "$" / "%" /        ;  specials.  Used for atoms.
+                       "&" / "'" /
+                       "*" / "+" /
+                       "-" / "/" /
+                       "=" / "?" /
+                       "^" / "_" /
+                       "`" / "{" /
+                       "|" / "}" /
+                       "~"
+
+   atom            =   [CFWS] 1*atext [CFWS]
+
+   dot-atom-text   =   1*atext *("." 1*atext)
+
+   dot-atom        =   [CFWS] dot-atom-text [CFWS]
+
+   specials        =   "(" / ")" /        ; Special characters that do
+                       "<" / ">" /        ;  not appear in atext
+                       "[" / "]" /
+                       ":" / ";" /
+                       "@" / "\" /
+                       "," / "." /
+                       DQUOTE
+
+   Both atom and dot-atom are interpreted as a single unit, comprising
+   the string of characters that make it up.  Semantically, the optional
+   comments and FWS surrounding the rest of the characters are not part
+   of the atom; the atom is only the run of atext characters in an atom,
+   or the atext and "." characters in a dot-atom.
+
+3.2.4.  Quoted Strings
+
+   Strings of characters that include characters other than those
+   allowed in atoms can be represented in a quoted string format, where
+   the characters are surrounded by quote (DQUOTE, ASCII value 34)
+   characters.
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 13]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   qtext           =   %d33 /             ; Printable US-ASCII
+                       %d35-91 /          ;  characters not including
+                       %d93-126 /         ;  "\" or the quote character
+                       obs-qtext
+
+   qcontent        =   qtext / quoted-pair
+
+   quoted-string   =   [CFWS]
+                       DQUOTE *([FWS] qcontent) [FWS] DQUOTE
+                       [CFWS]
+
+   A quoted-string is treated as a unit.  That is, quoted-string is
+   identical to atom, semantically.  Since a quoted-string is allowed to
+   contain FWS, folding is permitted.  Also note that since quoted-pair
+   is allowed in a quoted-string, the quote and backslash characters may
+   appear in a quoted-string so long as they appear as a quoted-pair.
+
+   Semantically, neither the optional CFWS outside of the quote
+   characters nor the quote characters themselves are part of the
+   quoted-string; the quoted-string is what is contained between the two
+   quote characters.  As stated earlier, the "\" in any quoted-pair and
+   the CRLF in any FWS/CFWS that appears within the quoted-string are
+   semantically "invisible" and therefore not part of the quoted-string
+   either.
+
+3.2.5.  Miscellaneous Tokens
+
+   Three additional tokens are defined: word and phrase for combinations
+   of atoms and/or quoted-strings, and unstructured for use in
+   unstructured header fields and in some places within structured
+   header fields.
+
+   word            =   atom / quoted-string
+
+   phrase          =   1*word / obs-phrase
+
+   unstructured    =   (*([FWS] VCHAR) *WSP) / obs-unstruct
+
+3.3.  Date and Time Specification
+
+   Date and time values occur in several header fields.  This section
+   specifies the syntax for a full date and time specification.  Though
+   folding white space is permitted throughout the date-time
+   specification, it is RECOMMENDED that a single space be used in each
+   place that FWS appears (whether it is required or optional); some
+   older implementations will not interpret longer sequences of folding
+   white space correctly.
+
+
+
+
+Resnick                     Standards Track                    [Page 14]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   date-time       =   [ day-of-week "," ] date time [CFWS]
+
+   day-of-week     =   ([FWS] day-name) / obs-day-of-week
+
+   day-name        =   "Mon" / "Tue" / "Wed" / "Thu" /
+                       "Fri" / "Sat" / "Sun"
+
+   date            =   day month year
+
+   day             =   ([FWS] 1*2DIGIT FWS) / obs-day
+
+   month           =   "Jan" / "Feb" / "Mar" / "Apr" /
+                       "May" / "Jun" / "Jul" / "Aug" /
+                       "Sep" / "Oct" / "Nov" / "Dec"
+
+   year            =   (FWS 4*DIGIT FWS) / obs-year
+
+   time            =   time-of-day zone
+
+   time-of-day     =   hour ":" minute [ ":" second ]
+
+   hour            =   2DIGIT / obs-hour
+
+   minute          =   2DIGIT / obs-minute
+
+   second          =   2DIGIT / obs-second
+
+   zone            =   (FWS ( "+" / "-" ) 4DIGIT) / obs-zone
+
+   The day is the numeric day of the month.  The year is any numeric
+   year 1900 or later.
+
+   The time-of-day specifies the number of hours, minutes, and
+   optionally seconds since midnight of the date indicated.
+
+   The date and time-of-day SHOULD express local time.
+
+   The zone specifies the offset from Coordinated Universal Time (UTC,
+   formerly referred to as "Greenwich Mean Time") that the date and
+   time-of-day represent.  The "+" or "-" indicates whether the time-of-
+   day is ahead of (i.e., east of) or behind (i.e., west of) Universal
+   Time.  The first two digits indicate the number of hours difference
+   from Universal Time, and the last two digits indicate the number of
+   additional minutes difference from Universal Time.  (Hence, +hhmm
+   means +(hh * 60 + mm) minutes, and -hhmm means -(hh * 60 + mm)
+   minutes).  The form "+0000" SHOULD be used to indicate a time zone at
+   Universal Time.  Though "-0000" also indicates Universal Time, it is
+
+
+
+
+Resnick                     Standards Track                    [Page 15]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   used to indicate that the time was generated on a system that may be
+   in a local time zone other than Universal Time and that the date-time
+   contains no information about the local time zone.
+
+   A date-time specification MUST be semantically valid.  That is, the
+   day-of-week (if included) MUST be the day implied by the date, the
+   numeric day-of-month MUST be between 1 and the number of days allowed
+   for the specified month (in the specified year), the time-of-day MUST
+   be in the range 00:00:00 through 23:59:60 (the number of seconds
+   allowing for a leap second; see [RFC1305]), and the last two digits
+   of the zone MUST be within the range 00 through 59.
+
+3.4.  Address Specification
+
+   Addresses occur in several message header fields to indicate senders
+   and recipients of messages.  An address may either be an individual
+   mailbox, or a group of mailboxes.
+
+   address         =   mailbox / group
+
+   mailbox         =   name-addr / addr-spec
+
+   name-addr       =   [display-name] angle-addr
+
+   angle-addr      =   [CFWS] "<" addr-spec ">" [CFWS] /
+                       obs-angle-addr
+
+   group           =   display-name ":" [group-list] ";" [CFWS]
+
+   display-name    =   phrase
+
+   mailbox-list    =   (mailbox *("," mailbox)) / obs-mbox-list
+
+   address-list    =   (address *("," address)) / obs-addr-list
+
+   group-list      =   mailbox-list / CFWS / obs-group-list
+
+   A mailbox receives mail.  It is a conceptual entity that does not
+   necessarily pertain to file storage.  For example, some sites may
+   choose to print mail on a printer and deliver the output to the
+   addressee's desk.
+
+   Normally, a mailbox is composed of two parts: (1) an optional display
+   name that indicates the name of the recipient (which can be a person
+   or a system) that could be displayed to the user of a mail
+   application, and (2) an addr-spec address enclosed in angle brackets
+
+
+
+
+
+Resnick                     Standards Track                    [Page 16]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   ("<" and ">").  There is an alternate simple form of a mailbox where
+   the addr-spec address appears alone, without the recipient's name or
+   the angle brackets.  The Internet addr-spec address is described in
+   section 3.4.1.
+
+      Note: Some legacy implementations used the simple form where the
+      addr-spec appears without the angle brackets, but included the
+      name of the recipient in parentheses as a comment following the
+      addr-spec.  Since the meaning of the information in a comment is
+      unspecified, implementations SHOULD use the full name-addr form of
+      the mailbox, instead of the legacy form, to specify the display
+      name associated with a mailbox.  Also, because some legacy
+      implementations interpret the comment, comments generally SHOULD
+      NOT be used in address fields to avoid confusing such
+      implementations.
+
+   When it is desirable to treat several mailboxes as a single unit
+   (i.e., in a distribution list), the group construct can be used.  The
+   group construct allows the sender to indicate a named group of
+   recipients.  This is done by giving a display name for the group,
+   followed by a colon, followed by a comma-separated list of any number
+   of mailboxes (including zero and one), and ending with a semicolon.
+   Because the list of mailboxes can be empty, using the group construct
+   is also a simple way to communicate to recipients that the message
+   was sent to one or more named sets of recipients, without actually
+   providing the individual mailbox address for any of those recipients.
+
+3.4.1.  Addr-Spec Specification
+
+   An addr-spec is a specific Internet identifier that contains a
+   locally interpreted string followed by the at-sign character ("@",
+   ASCII value 64) followed by an Internet domain.  The locally
+   interpreted string is either a quoted-string or a dot-atom.  If the
+   string can be represented as a dot-atom (that is, it contains no
+   characters other than atext characters or "." surrounded by atext
+   characters), then the dot-atom form SHOULD be used and the quoted-
+   string form SHOULD NOT be used.  Comments and folding white space
+   SHOULD NOT be used around the "@" in the addr-spec.
+
+      Note: A liberal syntax for the domain portion of addr-spec is
+      given here.  However, the domain portion contains addressing
+      information specified by and used in other protocols (e.g.,
+      [RFC1034], [RFC1035], [RFC1123], [RFC5321]).  It is therefore
+      incumbent upon implementations to conform to the syntax of
+      addresses for the context in which they are used.
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 17]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   addr-spec       =   local-part "@" domain
+
+   local-part      =   dot-atom / quoted-string / obs-local-part
+
+   domain          =   dot-atom / domain-literal / obs-domain
+
+   domain-literal  =   [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
+
+   dtext           =   %d33-90 /          ; Printable US-ASCII
+                       %d94-126 /         ;  characters not including
+                       obs-dtext          ;  "[", "]", or "\"
+
+   The domain portion identifies the point to which the mail is
+   delivered.  In the dot-atom form, this is interpreted as an Internet
+   domain name (either a host name or a mail exchanger name) as
+   described in [RFC1034], [RFC1035], and [RFC1123].  In the domain-
+   literal form, the domain is interpreted as the literal Internet
+   address of the particular host.  In both cases, how addressing is
+   used and how messages are transported to a particular host is covered
+   in separate documents, such as [RFC5321].  These mechanisms are
+   outside of the scope of this document.
+
+   The local-part portion is a domain-dependent string.  In addresses,
+   it is simply interpreted on the particular host as a name of a
+   particular mailbox.
+
+3.5.  Overall Message Syntax
+
+   A message consists of header fields, optionally followed by a message
+   body.  Lines in a message MUST be a maximum of 998 characters
+   excluding the CRLF, but it is RECOMMENDED that lines be limited to 78
+   characters excluding the CRLF.  (See section 2.1.1 for explanation.)
+   In a message body, though all of the characters listed in the text
+   rule MAY be used, the use of US-ASCII control characters (values 1
+   through 8, 11, 12, and 14 through 31) is discouraged since their
+   interpretation by receivers for display is not guaranteed.
+
+   message         =   (fields / obs-fields)
+                       [CRLF body]
+
+   body            =   (*(*998text CRLF) *998text) / obs-body
+
+   text            =   %d1-9 /            ; Characters excluding CR
+                       %d11 /             ;  and LF
+                       %d12 /
+                       %d14-127
+
+
+
+
+
+Resnick                     Standards Track                    [Page 18]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   The header fields carry most of the semantic information and are
+   defined in section 3.6.  The body is simply a series of lines of text
+   that are uninterpreted for the purposes of this specification.
+
+3.6.  Field Definitions
+
+   The header fields of a message are defined here.  All header fields
+   have the same general syntactic structure: a field name, followed by
+   a colon, followed by the field body.  The specific syntax for each
+   header field is defined in the subsequent sections.
+
+      Note: In the ABNF syntax for each field in subsequent sections,
+      each field name is followed by the required colon.  However, for
+      brevity, sometimes the colon is not referred to in the textual
+      description of the syntax.  It is, nonetheless, required.
+
+   It is important to note that the header fields are not guaranteed to
+   be in a particular order.  They may appear in any order, and they
+   have been known to be reordered occasionally when transported over
+   the Internet.  However, for the purposes of this specification,
+   header fields SHOULD NOT be reordered when a message is transported
+   or transformed.  More importantly, the trace header fields and resent
+   header fields MUST NOT be reordered, and SHOULD be kept in blocks
+   prepended to the message.  See sections 3.6.6 and 3.6.7 for more
+   information.
+
+   The only required header fields are the origination date field and
+   the originator address field(s).  All other header fields are
+   syntactically optional.  More information is contained in the table
+   following this definition.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 19]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   fields          =   *(trace
+                         *optional-field /
+                         *(resent-date /
+                          resent-from /
+                          resent-sender /
+                          resent-to /
+                          resent-cc /
+                          resent-bcc /
+                          resent-msg-id))
+                       *(orig-date /
+                       from /
+                       sender /
+                       reply-to /
+                       to /
+                       cc /
+                       bcc /
+                       message-id /
+                       in-reply-to /
+                       references /
+                       subject /
+                       comments /
+                       keywords /
+                       optional-field)
+
+   The following table indicates limits on the number of times each
+   field may occur in the header section of a message as well as any
+   special limitations on the use of those fields.  An asterisk ("*")
+   next to a value in the minimum or maximum column indicates that a
+   special restriction appears in the Notes column.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 20]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   +----------------+--------+------------+----------------------------+
+   | Field          | Min    | Max number | Notes                      |
+   |                | number |            |                            |
+   +----------------+--------+------------+----------------------------+
+   | trace          | 0      | unlimited  | Block prepended - see      |
+   |                |        |            | 3.6.7                      |
+   | resent-date    | 0*     | unlimited* | One per block, required if |
+   |                |        |            | other resent fields are    |
+   |                |        |            | present - see 3.6.6        |
+   | resent-from    | 0      | unlimited* | One per block - see 3.6.6  |
+   | resent-sender  | 0*     | unlimited* | One per block, MUST occur  |
+   |                |        |            | with multi-address         |
+   |                |        |            | resent-from - see 3.6.6    |
+   | resent-to      | 0      | unlimited* | One per block - see 3.6.6  |
+   | resent-cc      | 0      | unlimited* | One per block - see 3.6.6  |
+   | resent-bcc     | 0      | unlimited* | One per block - see 3.6.6  |
+   | resent-msg-id  | 0      | unlimited* | One per block - see 3.6.6  |
+   | orig-date      | 1      | 1          |                            |
+   | from           | 1      | 1          | See sender and 3.6.2       |
+   | sender         | 0*     | 1          | MUST occur with            |
+   |                |        |            | multi-address from - see   |
+   |                |        |            | 3.6.2                      |
+   | reply-to       | 0      | 1          |                            |
+   | to             | 0      | 1          |                            |
+   | cc             | 0      | 1          |                            |
+   | bcc            | 0      | 1          |                            |
+   | message-id     | 0*     | 1          | SHOULD be present - see    |
+   |                |        |            | 3.6.4                      |
+   | in-reply-to    | 0*     | 1          | SHOULD occur in some       |
+   |                |        |            | replies - see 3.6.4        |
+   | references     | 0*     | 1          | SHOULD occur in some       |
+   |                |        |            | replies - see 3.6.4        |
+   | subject        | 0      | 1          |                            |
+   | comments       | 0      | unlimited  |                            |
+   | keywords       | 0      | unlimited  |                            |
+   | optional-field | 0      | unlimited  |                            |
+   +----------------+--------+------------+----------------------------+
+
+   The exact interpretation of each field is described in subsequent
+   sections.
+
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 21]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+3.6.1.  The Origination Date Field
+
+   The origination date field consists of the field name "Date" followed
+   by a date-time specification.
+
+   orig-date       =   "Date:" date-time CRLF
+
+   The origination date specifies the date and time at which the creator
+   of the message indicated that the message was complete and ready to
+   enter the mail delivery system.  For instance, this might be the time
+   that a user pushes the "send" or "submit" button in an application
+   program.  In any case, it is specifically not intended to convey the
+   time that the message is actually transported, but rather the time at
+   which the human or other creator of the message has put the message
+   into its final form, ready for transport.  (For example, a portable
+   computer user who is not connected to a network might queue a message
+   for delivery.  The origination date is intended to contain the date
+   and time that the user queued the message, not the time when the user
+   connected to the network to send the message.)
+
+3.6.2.  Originator Fields
+
+   The originator fields of a message consist of the from field, the
+   sender field (when applicable), and optionally the reply-to field.
+   The from field consists of the field name "From" and a comma-
+   separated list of one or more mailbox specifications.  If the from
+   field contains more than one mailbox specification in the mailbox-
+   list, then the sender field, containing the field name "Sender" and a
+   single mailbox specification, MUST appear in the message.  In either
+   case, an optional reply-to field MAY also be included, which contains
+   the field name "Reply-To" and a comma-separated list of one or more
+   addresses.
+
+   from            =   "From:" mailbox-list CRLF
+
+   sender          =   "Sender:" mailbox CRLF
+
+   reply-to        =   "Reply-To:" address-list CRLF
+
+   The originator fields indicate the mailbox(es) of the source of the
+   message.  The "From:" field specifies the author(s) of the message,
+   that is, the mailbox(es) of the person(s) or system(s) responsible
+   for the writing of the message.  The "Sender:" field specifies the
+   mailbox of the agent responsible for the actual transmission of the
+   message.  For example, if a secretary were to send a message for
+   another person, the mailbox of the secretary would appear in the
+   "Sender:" field and the mailbox of the actual author would appear in
+   the "From:" field.  If the originator of the message can be indicated
+
+
+
+Resnick                     Standards Track                    [Page 22]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   by a single mailbox and the author and transmitter are identical, the
+   "Sender:" field SHOULD NOT be used.  Otherwise, both fields SHOULD
+   appear.
+
+      Note: The transmitter information is always present.  The absence
+      of the "Sender:" field is sometimes mistakenly taken to mean that
+      the agent responsible for transmission of the message has not been
+      specified.  This absence merely means that the transmitter is
+      identical to the author and is therefore not redundantly placed
+      into the "Sender:" field.
+
+   The originator fields also provide the information required when
+   replying to a message.  When the "Reply-To:" field is present, it
+   indicates the address(es) to which the author of the message suggests
+   that replies be sent.  In the absence of the "Reply-To:" field,
+   replies SHOULD by default be sent to the mailbox(es) specified in the
+   "From:" field unless otherwise specified by the person composing the
+   reply.
+
+   In all cases, the "From:" field SHOULD NOT contain any mailbox that
+   does not belong to the author(s) of the message.  See also section
+   3.6.3 for more information on forming the destination addresses for a
+   reply.
+
+3.6.3.  Destination Address Fields
+
+   The destination fields of a message consist of three possible fields,
+   each of the same form: the field name, which is either "To", "Cc", or
+   "Bcc", followed by a comma-separated list of one or more addresses
+   (either mailbox or group syntax).
+
+   to              =   "To:" address-list CRLF
+
+   cc              =   "Cc:" address-list CRLF
+
+   bcc             =   "Bcc:" [address-list / CFWS] CRLF
+
+   The destination fields specify the recipients of the message.  Each
+   destination field may have one or more addresses, and the addresses
+   indicate the intended recipients of the message.  The only difference
+   between the three fields is how each is used.
+
+   The "To:" field contains the address(es) of the primary recipient(s)
+   of the message.
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 23]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   The "Cc:" field (where the "Cc" means "Carbon Copy" in the sense of
+   making a copy on a typewriter using carbon paper) contains the
+   addresses of others who are to receive the message, though the
+   content of the message may not be directed at them.
+
+   The "Bcc:" field (where the "Bcc" means "Blind Carbon Copy") contains
+   addresses of recipients of the message whose addresses are not to be
+   revealed to other recipients of the message.  There are three ways in
+   which the "Bcc:" field is used.  In the first case, when a message
+   containing a "Bcc:" field is prepared to be sent, the "Bcc:" line is
+   removed even though all of the recipients (including those specified
+   in the "Bcc:" field) are sent a copy of the message.  In the second
+   case, recipients specified in the "To:" and "Cc:" lines each are sent
+   a copy of the message with the "Bcc:" line removed as above, but the
+   recipients on the "Bcc:" line get a separate copy of the message
+   containing a "Bcc:" line.  (When there are multiple recipient
+   addresses in the "Bcc:" field, some implementations actually send a
+   separate copy of the message to each recipient with a "Bcc:"
+   containing only the address of that particular recipient.)  Finally,
+   since a "Bcc:" field may contain no addresses, a "Bcc:" field can be
+   sent without any addresses indicating to the recipients that blind
+   copies were sent to someone.  Which method to use with "Bcc:" fields
+   is implementation dependent, but refer to the "Security
+   Considerations" section of this document for a discussion of each.
+
+   When a message is a reply to another message, the mailboxes of the
+   authors of the original message (the mailboxes in the "From:" field)
+   or mailboxes specified in the "Reply-To:" field (if it exists) MAY
+   appear in the "To:" field of the reply since these would normally be
+   the primary recipients of the reply.  If a reply is sent to a message
+   that has destination fields, it is often desirable to send a copy of
+   the reply to all of the recipients of the message, in addition to the
+   author.  When such a reply is formed, addresses in the "To:" and
+   "Cc:" fields of the original message MAY appear in the "Cc:" field of
+   the reply, since these are normally secondary recipients of the
+   reply.  If a "Bcc:" field is present in the original message,
+   addresses in that field MAY appear in the "Bcc:" field of the reply,
+   but they SHOULD NOT appear in the "To:" or "Cc:" fields.
+
+      Note: Some mail applications have automatic reply commands that
+      include the destination addresses of the original message in the
+      destination addresses of the reply.  How those reply commands
+      behave is implementation dependent and is beyond the scope of this
+      document.  In particular, whether or not to include the original
+      destination addresses when the original message had a "Reply-To:"
+      field is not addressed here.
+
+
+
+
+
+Resnick                     Standards Track                    [Page 24]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+3.6.4.  Identification Fields
+
+   Though listed as optional in the table in section 3.6, every message
+   SHOULD have a "Message-ID:" field.  Furthermore, reply messages
+   SHOULD have "In-Reply-To:" and "References:" fields as appropriate
+   and as described below.
+
+   The "Message-ID:" field contains a single unique message identifier.
+   The "References:" and "In-Reply-To:" fields each contain one or more
+   unique message identifiers, optionally separated by CFWS.
+
+   The message identifier (msg-id) syntax is a limited version of the
+   addr-spec construct enclosed in the angle bracket characters, "<" and
+   ">".  Unlike addr-spec, this syntax only permits the dot-atom-text
+   form on the left-hand side of the "@" and does not have internal CFWS
+   anywhere in the message identifier.
+
+      Note: As with addr-spec, a liberal syntax is given for the right-
+      hand side of the "@" in a msg-id.  However, later in this section,
+      the use of a domain for the right-hand side of the "@" is
+      RECOMMENDED.  Again, the syntax of domain constructs is specified
+      by and used in other protocols (e.g., [RFC1034], [RFC1035],
+      [RFC1123], [RFC5321]).  It is therefore incumbent upon
+      implementations to conform to the syntax of addresses for the
+      context in which they are used.
+
+   message-id      =   "Message-ID:" msg-id CRLF
+
+   in-reply-to     =   "In-Reply-To:" 1*msg-id CRLF
+
+   references      =   "References:" 1*msg-id CRLF
+
+   msg-id          =   [CFWS] "<" id-left "@" id-right ">" [CFWS]
+
+   id-left         =   dot-atom-text / obs-id-left
+
+   id-right        =   dot-atom-text / no-fold-literal / obs-id-right
+
+   no-fold-literal =   "[" *dtext "]"
+
+   The "Message-ID:" field provides a unique message identifier that
+   refers to a particular version of a particular message.  The
+   uniqueness of the message identifier is guaranteed by the host that
+   generates it (see below).  This message identifier is intended to be
+   machine readable and not necessarily meaningful to humans.  A message
+   identifier pertains to exactly one version of a particular message;
+   subsequent revisions to the message each receive new message
+   identifiers.
+
+
+
+Resnick                     Standards Track                    [Page 25]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+      Note: There are many instances when messages are "changed", but
+      those changes do not constitute a new instantiation of that
+      message, and therefore the message would not get a new message
+      identifier.  For example, when messages are introduced into the
+      transport system, they are often prepended with additional header
+      fields such as trace fields (described in section 3.6.7) and
+      resent fields (described in section 3.6.6).  The addition of such
+      header fields does not change the identity of the message and
+      therefore the original "Message-ID:" field is retained.  In all
+      cases, it is the meaning that the sender of the message wishes to
+      convey (i.e., whether this is the same message or a different
+      message) that determines whether or not the "Message-ID:" field
+      changes, not any particular syntactic difference that appears (or
+      does not appear) in the message.
+
+   The "In-Reply-To:" and "References:" fields are used when creating a
+   reply to a message.  They hold the message identifier of the original
+   message and the message identifiers of other messages (for example,
+   in the case of a reply to a message that was itself a reply).  The
+   "In-Reply-To:" field may be used to identify the message (or
+   messages) to which the new message is a reply, while the
+   "References:" field may be used to identify a "thread" of
+   conversation.
+
+   When creating a reply to a message, the "In-Reply-To:" and
+   "References:" fields of the resultant message are constructed as
+   follows:
+
+   The "In-Reply-To:" field will contain the contents of the
+   "Message-ID:" field of the message to which this one is a reply (the
+   "parent message").  If there is more than one parent message, then
+   the "In-Reply-To:" field will contain the contents of all of the
+   parents' "Message-ID:" fields.  If there is no "Message-ID:" field in
+   any of the parent messages, then the new message will have no "In-
+   Reply-To:" field.
+
+   The "References:" field will contain the contents of the parent's
+   "References:" field (if any) followed by the contents of the parent's
+   "Message-ID:" field (if any).  If the parent message does not contain
+   a "References:" field but does have an "In-Reply-To:" field
+   containing a single message identifier, then the "References:" field
+   will contain the contents of the parent's "In-Reply-To:" field
+   followed by the contents of the parent's "Message-ID:" field (if
+   any).  If the parent has none of the "References:", "In-Reply-To:",
+   or "Message-ID:" fields, then the new message will have no
+   "References:" field.
+
+
+
+
+
+Resnick                     Standards Track                    [Page 26]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+      Note: Some implementations parse the "References:" field to
+      display the "thread of the discussion".  These implementations
+      assume that each new message is a reply to a single parent and
+      hence that they can walk backwards through the "References:" field
+      to find the parent of each message listed there.  Therefore,
+      trying to form a "References:" field for a reply that has multiple
+      parents is discouraged; how to do so is not defined in this
+      document.
+
+   The message identifier (msg-id) itself MUST be a globally unique
+   identifier for a message.  The generator of the message identifier
+   MUST guarantee that the msg-id is unique.  There are several
+   algorithms that can be used to accomplish this.  Since the msg-id has
+   a similar syntax to addr-spec (identical except that quoted strings,
+   comments, and folding white space are not allowed), a good method is
+   to put the domain name (or a domain literal IP address) of the host
+   on which the message identifier was created on the right-hand side of
+   the "@" (since domain names and IP addresses are normally unique),
+   and put a combination of the current absolute date and time along
+   with some other currently unique (perhaps sequential) identifier
+   available on the system (for example, a process id number) on the
+   left-hand side.  Though other algorithms will work, it is RECOMMENDED
+   that the right-hand side contain some domain identifier (either of
+   the host itself or otherwise) such that the generator of the message
+   identifier can guarantee the uniqueness of the left-hand side within
+   the scope of that domain.
+
+   Semantically, the angle bracket characters are not part of the
+   msg-id; the msg-id is what is contained between the two angle bracket
+   characters.
+
+3.6.5.  Informational Fields
+
+   The informational fields are all optional.  The "Subject:" and
+   "Comments:" fields are unstructured fields as defined in section
+   2.2.1, and therefore may contain text or folding white space.  The
+   "Keywords:" field contains a comma-separated list of one or more
+   words or quoted-strings.
+
+   subject         =   "Subject:" unstructured CRLF
+
+   comments        =   "Comments:" unstructured CRLF
+
+   keywords        =   "Keywords:" phrase *("," phrase) CRLF
+
+   These three fields are intended to have only human-readable content
+   with information about the message.  The "Subject:" field is the most
+   common and contains a short string identifying the topic of the
+
+
+
+Resnick                     Standards Track                    [Page 27]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   message.  When used in a reply, the field body MAY start with the
+   string "Re: " (an abbreviation of the Latin "in re", meaning "in the
+   matter of") followed by the contents of the "Subject:" field body of
+   the original message.  If this is done, only one instance of the
+   literal string "Re: " ought to be used since use of other strings or
+   more than one instance can lead to undesirable consequences.  The
+   "Comments:" field contains any additional comments on the text of the
+   body of the message.  The "Keywords:" field contains a comma-
+   separated list of important words and phrases that might be useful
+   for the recipient.
+
+3.6.6.  Resent Fields
+
+   Resent fields SHOULD be added to any message that is reintroduced by
+   a user into the transport system.  A separate set of resent fields
+   SHOULD be added each time this is done.  All of the resent fields
+   corresponding to a particular resending of the message SHOULD be
+   grouped together.  Each new set of resent fields is prepended to the
+   message; that is, the most recent set of resent fields appears
+   earlier in the message.  No other fields in the message are changed
+   when resent fields are added.
+
+   Each of the resent fields corresponds to a particular field elsewhere
+   in the syntax.  For instance, the "Resent-Date:" field corresponds to
+   the "Date:" field and the "Resent-To:" field corresponds to the "To:"
+   field.  In each case, the syntax for the field body is identical to
+   the syntax given previously for the corresponding field.
+
+   When resent fields are used, the "Resent-From:" and "Resent-Date:"
+   fields MUST be sent.  The "Resent-Message-ID:" field SHOULD be sent.
+   "Resent-Sender:" SHOULD NOT be used if "Resent-Sender:" would be
+   identical to "Resent-From:".
+
+   resent-date     =   "Resent-Date:" date-time CRLF
+
+   resent-from     =   "Resent-From:" mailbox-list CRLF
+
+   resent-sender   =   "Resent-Sender:" mailbox CRLF
+
+   resent-to       =   "Resent-To:" address-list CRLF
+
+   resent-cc       =   "Resent-Cc:" address-list CRLF
+
+   resent-bcc      =   "Resent-Bcc:" [address-list / CFWS] CRLF
+
+   resent-msg-id   =   "Resent-Message-ID:" msg-id CRLF
+
+
+
+
+
+Resnick                     Standards Track                    [Page 28]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   Resent fields are used to identify a message as having been
+   reintroduced into the transport system by a user.  The purpose of
+   using resent fields is to have the message appear to the final
+   recipient as if it were sent directly by the original sender, with
+   all of the original fields remaining the same.  Each set of resent
+   fields correspond to a particular resending event.  That is, if a
+   message is resent multiple times, each set of resent fields gives
+   identifying information for each individual time.  Resent fields are
+   strictly informational.  They MUST NOT be used in the normal
+   processing of replies or other such automatic actions on messages.
+
+      Note: Reintroducing a message into the transport system and using
+      resent fields is a different operation from "forwarding".
+      "Forwarding" has two meanings: One sense of forwarding is that a
+      mail reading program can be told by a user to forward a copy of a
+      message to another person, making the forwarded message the body
+      of the new message.  A forwarded message in this sense does not
+      appear to have come from the original sender, but is an entirely
+      new message from the forwarder of the message.  Forwarding may
+      also mean that a mail transport program gets a message and
+      forwards it on to a different destination for final delivery.
+      Resent header fields are not intended for use with either type of
+      forwarding.
+
+   The resent originator fields indicate the mailbox of the person(s) or
+   system(s) that resent the message.  As with the regular originator
+   fields, there are two forms: a simple "Resent-From:" form, which
+   contains the mailbox of the individual doing the resending, and the
+   more complex form, when one individual (identified in the "Resent-
+   Sender:" field) resends a message on behalf of one or more others
+   (identified in the "Resent-From:" field).
+
+      Note: When replying to a resent message, replies behave just as
+      they would with any other message, using the original "From:",
+      "Reply-To:", "Message-ID:", and other fields.  The resent fields
+      are only informational and MUST NOT be used in the normal
+      processing of replies.
+
+   The "Resent-Date:" indicates the date and time at which the resent
+   message is dispatched by the resender of the message.  Like the
+   "Date:" field, it is not the date and time that the message was
+   actually transported.
+
+   The "Resent-To:", "Resent-Cc:", and "Resent-Bcc:" fields function
+   identically to the "To:", "Cc:", and "Bcc:" fields, respectively,
+   except that they indicate the recipients of the resent message, not
+   the recipients of the original message.
+
+
+
+
+Resnick                     Standards Track                    [Page 29]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   The "Resent-Message-ID:" field provides a unique identifier for the
+   resent message.
+
+3.6.7.  Trace Fields
+
+   The trace fields are a group of header fields consisting of an
+   optional "Return-Path:" field, and one or more "Received:" fields.
+   The "Return-Path:" header field contains a pair of angle brackets
+   that enclose an optional addr-spec.  The "Received:" field contains a
+   (possibly empty) list of tokens followed by a semicolon and a date-
+   time specification.  Each token must be a word, angle-addr, addr-
+   spec, or a domain.  Further restrictions are applied to the syntax of
+   the trace fields by specifications that provide for their use, such
+   as [RFC5321].
+
+   trace           =   [return]
+                       1*received
+
+   return          =   "Return-Path:" path CRLF
+
+   path            =   angle-addr / ([CFWS] "<" [CFWS] ">" [CFWS])
+
+   received        =   "Received:" *received-token ";" date-time CRLF
+
+   received-token  =   word / angle-addr / addr-spec / domain
+
+   A full discussion of the Internet mail use of trace fields is
+   contained in [RFC5321].  For the purposes of this specification, the
+   trace fields are strictly informational, and any formal
+   interpretation of them is outside of the scope of this document.
+
+3.6.8.  Optional Fields
+
+   Fields may appear in messages that are otherwise unspecified in this
+   document.  They MUST conform to the syntax of an optional-field.
+   This is a field name, made up of the printable US-ASCII characters
+   except SP and colon, followed by a colon, followed by any text that
+   conforms to the unstructured syntax.
+
+   The field names of any optional field MUST NOT be identical to any
+   field name specified elsewhere in this document.
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 30]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   optional-field  =   field-name ":" unstructured CRLF
+
+   field-name      =   1*ftext
+
+   ftext           =   %d33-57 /          ; Printable US-ASCII
+                       %d59-126           ;  characters not including
+                                          ;  ":".
+
+   For the purposes of this specification, any optional field is
+   uninterpreted.
+
+4.  Obsolete Syntax
+
+   Earlier versions of this specification allowed for different (usually
+   more liberal) syntax than is allowed in this version.  Also, there
+   have been syntactic elements used in messages on the Internet whose
+   interpretations have never been documented.  Though these syntactic
+   forms MUST NOT be generated according to the grammar in section 3,
+   they MUST be accepted and parsed by a conformant receiver.  This
+   section documents many of these syntactic elements.  Taking the
+   grammar in section 3 and adding the definitions presented in this
+   section will result in the grammar to use for the interpretation of
+   messages.
+
+      Note: This section identifies syntactic forms that any
+      implementation MUST reasonably interpret.  However, there are
+      certainly Internet messages that do not conform to even the
+      additional syntax given in this section.  The fact that a
+      particular form does not appear in any section of this document is
+      not justification for computer programs to crash or for malformed
+      data to be irretrievably lost by any implementation.  It is up to
+      the implementation to deal with messages robustly.
+
+   One important difference between the obsolete (interpreting) and the
+   current (generating) syntax is that in structured header field bodies
+   (i.e., between the colon and the CRLF of any structured header
+   field), white space characters, including folding white space, and
+   comments could be freely inserted between any syntactic tokens.  This
+   allowed many complex forms that have proven difficult for some
+   implementations to parse.
+
+   Another key difference between the obsolete and the current syntax is
+   that the rule in section 3.2.2 regarding lines composed entirely of
+   white space in comments and folding white space does not apply.  See
+   the discussion of folding white space in section 4.2 below.
+
+   Finally, certain characters that were formerly allowed in messages
+   appear in this section.  The NUL character (ASCII value 0) was once
+
+
+
+Resnick                     Standards Track                    [Page 31]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   allowed, but is no longer for compatibility reasons.  Similarly, US-
+   ASCII control characters other than CR, LF, SP, and HTAB (ASCII
+   values 1 through 8, 11, 12, 14 through 31, and 127) were allowed to
+   appear in header field bodies.  CR and LF were allowed to appear in
+   messages other than as CRLF; this use is also shown here.
+
+   Other differences in syntax and semantics are noted in the following
+   sections.
+
+4.1.  Miscellaneous Obsolete Tokens
+
+   These syntactic elements are used elsewhere in the obsolete syntax or
+   in the main syntax.  Bare CR, bare LF, and NUL are added to obs-qp,
+   obs-body, and obs-unstruct.  US-ASCII control characters are added to
+   obs-qp, obs-unstruct, obs-ctext, and obs-qtext.  The period character
+   is added to obs-phrase.  The obs-phrase-list provides for a
+   (potentially empty) comma-separated list of phrases that may include
+   "null" elements.  That is, there could be two or more commas in such
+   a list with nothing in between them, or commas at the beginning or
+   end of the list.
+
+      Note: The "period" (or "full stop") character (".") in obs-phrase
+      is not a form that was allowed in earlier versions of this or any
+      other specification.  Period (nor any other character from
+      specials) was not allowed in phrase because it introduced a
+      parsing difficulty distinguishing between phrases and portions of
+      an addr-spec (see section 4.4).  It appears here because the
+      period character is currently used in many messages in the
+      display-name portion of addresses, especially for initials in
+      names, and therefore must be interpreted properly.
+
+   obs-NO-WS-CTL   =   %d1-8 /            ; US-ASCII control
+                       %d11 /             ;  characters that do not
+                       %d12 /             ;  include the carriage
+                       %d14-31 /          ;  return, line feed, and
+                       %d127              ;  white space characters
+
+   obs-ctext       =   obs-NO-WS-CTL
+
+   obs-qtext       =   obs-NO-WS-CTL
+
+   obs-utext       =   %d0 / obs-NO-WS-CTL / VCHAR
+
+   obs-qp          =   "\" (%d0 / obs-NO-WS-CTL / LF / CR)
+
+   obs-body        =   *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)
+
+   obs-unstruct    =   *((*LF *CR *(obs-utext *LF *CR)) / FWS)
+
+
+
+Resnick                     Standards Track                    [Page 32]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   obs-phrase      =   word *(word / "." / CFWS)
+
+   obs-phrase-list =   [phrase / CFWS] *("," [phrase / CFWS])
+
+   Bare CR and bare LF appear in messages with two different meanings.
+   In many cases, bare CR or bare LF are used improperly instead of CRLF
+   to indicate line separators.  In other cases, bare CR and bare LF are
+   used simply as US-ASCII control characters with their traditional
+   ASCII meanings.
+
+4.2.  Obsolete Folding White Space
+
+   In the obsolete syntax, any amount of folding white space MAY be
+   inserted where the obs-FWS rule is allowed.  This creates the
+   possibility of having two consecutive "folds" in a line, and
+   therefore the possibility that a line which makes up a folded header
+   field could be composed entirely of white space.
+
+   obs-FWS         =   1*WSP *(CRLF 1*WSP)
+
+4.3.  Obsolete Date and Time
+
+   The syntax for the obsolete date format allows a 2 digit year in the
+   date field and allows for a list of alphabetic time zone specifiers
+   that were used in earlier versions of this specification.  It also
+   permits comments and folding white space between many of the tokens.
+
+   obs-day-of-week =   [CFWS] day-name [CFWS]
+
+   obs-day         =   [CFWS] 1*2DIGIT [CFWS]
+
+   obs-year        =   [CFWS] 2*DIGIT [CFWS]
+
+   obs-hour        =   [CFWS] 2DIGIT [CFWS]
+
+   obs-minute      =   [CFWS] 2DIGIT [CFWS]
+
+   obs-second      =   [CFWS] 2DIGIT [CFWS]
+
+   obs-zone        =   "UT" / "GMT" /     ; Universal Time
+                                          ; North American UT
+                                          ; offsets
+                       "EST" / "EDT" /    ; Eastern:  - 5/ - 4
+                       "CST" / "CDT" /    ; Central:  - 6/ - 5
+                       "MST" / "MDT" /    ; Mountain: - 7/ - 6
+                       "PST" / "PDT" /    ; Pacific:  - 8/ - 7
+                                          ;
+
+
+
+
+Resnick                     Standards Track                    [Page 33]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+                       %d65-73 /          ; Military zones - "A"
+                       %d75-90 /          ; through "I" and "K"
+                       %d97-105 /         ; through "Z", both
+                       %d107-122          ; upper and lower case
+
+   Where a two or three digit year occurs in a date, the year is to be
+   interpreted as follows: If a two digit year is encountered whose
+   value is between 00 and 49, the year is interpreted by adding 2000,
+   ending up with a value between 2000 and 2049.  If a two digit year is
+   encountered with a value between 50 and 99, or any three digit year
+   is encountered, the year is interpreted by adding 1900.
+
+   In the obsolete time zone, "UT" and "GMT" are indications of
+   "Universal Time" and "Greenwich Mean Time", respectively, and are
+   both semantically identical to "+0000".
+
+   The remaining three character zones are the US time zones.  The first
+   letter, "E", "C", "M", or "P" stands for "Eastern", "Central",
+   "Mountain", and "Pacific".  The second letter is either "S" for
+   "Standard" time, or "D" for "Daylight Savings" (or summer) time.
+   Their interpretations are as follows:
+
+      EDT is semantically equivalent to -0400
+      EST is semantically equivalent to -0500
+      CDT is semantically equivalent to -0500
+      CST is semantically equivalent to -0600
+      MDT is semantically equivalent to -0600
+      MST is semantically equivalent to -0700
+      PDT is semantically equivalent to -0700
+      PST is semantically equivalent to -0800
+
+   The 1 character military time zones were defined in a non-standard
+   way in [RFC0822] and are therefore unpredictable in their meaning.
+   The original definitions of the military zones "A" through "I" are
+   equivalent to "+0100" through "+0900", respectively; "K", "L", and
+   "M" are equivalent to "+1000", "+1100", and "+1200", respectively;
+   "N" through "Y" are equivalent to "-0100" through "-1200".
+   respectively; and "Z" is equivalent to "+0000".  However, because of
+   the error in [RFC0822], they SHOULD all be considered equivalent to
+   "-0000" unless there is out-of-band information confirming their
+   meaning.
+
+   Other multi-character (usually between 3 and 5) alphabetic time zones
+   have been used in Internet messages.  Any such time zone whose
+   meaning is not known SHOULD be considered equivalent to "-0000"
+   unless there is out-of-band information confirming their meaning.
+
+
+
+
+
+Resnick                     Standards Track                    [Page 34]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+4.4.  Obsolete Addressing
+
+   There are four primary differences in addressing.  First, mailbox
+   addresses were allowed to have a route portion before the addr-spec
+   when enclosed in "<" and ">".  The route is simply a comma-separated
+   list of domain names, each preceded by "@", and the list terminated
+   by a colon.  Second, CFWS were allowed between the period-separated
+   elements of local-part and domain (i.e., dot-atom was not used).  In
+   addition, local-part is allowed to contain quoted-string in addition
+   to just atom.  Third, mailbox-list and address-list were allowed to
+   have "null" members.  That is, there could be two or more commas in
+   such a list with nothing in between them, or commas at the beginning
+   or end of the list.  Finally, US-ASCII control characters and quoted-
+   pairs were allowed in domain literals and are added here.
+
+   obs-angle-addr  =   [CFWS] "<" obs-route addr-spec ">" [CFWS]
+
+   obs-route       =   obs-domain-list ":"
+
+   obs-domain-list =   *(CFWS / ",") "@" domain
+                       *("," [CFWS] ["@" domain])
+
+   obs-mbox-list   =   *([CFWS] ",") mailbox *("," [mailbox / CFWS])
+
+   obs-addr-list   =   *([CFWS] ",") address *("," [address / CFWS])
+
+   obs-group-list  =   1*([CFWS] ",") [CFWS]
+
+   obs-local-part  =   word *("." word)
+
+   obs-domain      =   atom *("." atom)
+
+   obs-dtext       =   obs-NO-WS-CTL / quoted-pair
+
+   When interpreting addresses, the route portion SHOULD be ignored.
+
+4.5.  Obsolete Header Fields
+
+   Syntactically, the primary difference in the obsolete field syntax is
+   that it allows multiple occurrences of any of the fields and they may
+   occur in any order.  Also, any amount of white space is allowed
+   before the ":" at the end of the field name.
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 35]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   obs-fields      =   *(obs-return /
+                       obs-received /
+                       obs-orig-date /
+                       obs-from /
+                       obs-sender /
+                       obs-reply-to /
+                       obs-to /
+                       obs-cc /
+                       obs-bcc /
+                       obs-message-id /
+                       obs-in-reply-to /
+                       obs-references /
+                       obs-subject /
+                       obs-comments /
+                       obs-keywords /
+                       obs-resent-date /
+                       obs-resent-from /
+                       obs-resent-send /
+                       obs-resent-rply /
+                       obs-resent-to /
+                       obs-resent-cc /
+                       obs-resent-bcc /
+                       obs-resent-mid /
+                       obs-optional)
+
+   Except for destination address fields (described in section 4.5.3),
+   the interpretation of multiple occurrences of fields is unspecified.
+   Also, the interpretation of trace fields and resent fields that do
+   not occur in blocks prepended to the message is unspecified as well.
+   Unless otherwise noted in the following sections, interpretation of
+   other fields is identical to the interpretation of their non-obsolete
+   counterparts in section 3.
+
+4.5.1.  Obsolete Origination Date Field
+
+   obs-orig-date   =   "Date" *WSP ":" date-time CRLF
+
+4.5.2.  Obsolete Originator Fields
+
+   obs-from        =   "From" *WSP ":" mailbox-list CRLF
+
+   obs-sender      =   "Sender" *WSP ":" mailbox CRLF
+
+   obs-reply-to    =   "Reply-To" *WSP ":" address-list CRLF
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 36]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+4.5.3.  Obsolete Destination Address Fields
+
+   obs-to          =   "To" *WSP ":" address-list CRLF
+
+   obs-cc          =   "Cc" *WSP ":" address-list CRLF
+
+   obs-bcc         =   "Bcc" *WSP ":"
+                       (address-list / (*([CFWS] ",") [CFWS])) CRLF
+
+   When multiple occurrences of destination address fields occur in a
+   message, they SHOULD be treated as if the address list in the first
+   occurrence of the field is combined with the address lists of the
+   subsequent occurrences by adding a comma and concatenating.
+
+4.5.4.  Obsolete Identification Fields
+
+   The obsolete "In-Reply-To:" and "References:" fields differ from the
+   current syntax in that they allow phrase (words or quoted strings) to
+   appear.  The obsolete forms of the left and right sides of msg-id
+   allow interspersed CFWS, making them syntactically identical to
+   local-part and domain, respectively.
+
+   obs-message-id  =   "Message-ID" *WSP ":" msg-id CRLF
+
+   obs-in-reply-to =   "In-Reply-To" *WSP ":" *(phrase / msg-id) CRLF
+
+   obs-references  =   "References" *WSP ":" *(phrase / msg-id) CRLF
+
+   obs-id-left     =   local-part
+
+   obs-id-right    =   domain
+
+   For purposes of interpretation, the phrases in the "In-Reply-To:" and
+   "References:" fields are ignored.
+
+   Semantically, none of the optional CFWS in the local-part and the
+   domain is part of the obs-id-left and obs-id-right, respectively.
+
+4.5.5.  Obsolete Informational Fields
+
+   obs-subject     =   "Subject" *WSP ":" unstructured CRLF
+
+   obs-comments    =   "Comments" *WSP ":" unstructured CRLF
+
+   obs-keywords    =   "Keywords" *WSP ":" obs-phrase-list CRLF
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 37]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+4.5.6.  Obsolete Resent Fields
+
+   The obsolete syntax adds a "Resent-Reply-To:" field, which consists
+   of the field name, the optional comments and folding white space, the
+   colon, and a comma separated list of addresses.
+
+   obs-resent-from =   "Resent-From" *WSP ":" mailbox-list CRLF
+
+   obs-resent-send =   "Resent-Sender" *WSP ":" mailbox CRLF
+
+   obs-resent-date =   "Resent-Date" *WSP ":" date-time CRLF
+
+   obs-resent-to   =   "Resent-To" *WSP ":" address-list CRLF
+
+   obs-resent-cc   =   "Resent-Cc" *WSP ":" address-list CRLF
+
+   obs-resent-bcc  =   "Resent-Bcc" *WSP ":"
+                       (address-list / (*([CFWS] ",") [CFWS])) CRLF
+
+   obs-resent-mid  =   "Resent-Message-ID" *WSP ":" msg-id CRLF
+
+   obs-resent-rply =   "Resent-Reply-To" *WSP ":" address-list CRLF
+
+   As with other resent fields, the "Resent-Reply-To:" field is to be
+   treated as trace information only.
+
+4.5.7.  Obsolete Trace Fields
+
+   The obs-return and obs-received are again given here as template
+   definitions, just as return and received are in section 3.  Their
+   full syntax is given in [RFC5321].
+
+   obs-return      =   "Return-Path" *WSP ":" path CRLF
+
+   obs-received    =   "Received" *WSP ":" *received-token CRLF
+
+4.5.8.  Obsolete optional fields
+
+   obs-optional    =   field-name *WSP ":" unstructured CRLF
+
+5.  Security Considerations
+
+   Care needs to be taken when displaying messages on a terminal or
+   terminal emulator.  Powerful terminals may act on escape sequences
+   and other combinations of US-ASCII control characters with a variety
+   of consequences.  They can remap the keyboard or permit other
+   modifications to the terminal that could lead to denial of service or
+   even damaged data.  They can trigger (sometimes programmable)
+
+
+
+Resnick                     Standards Track                    [Page 38]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   answerback messages that can allow a message to cause commands to be
+   issued on the recipient's behalf.  They can also affect the operation
+   of terminal attached devices such as printers.  Message viewers may
+   wish to strip potentially dangerous terminal escape sequences from
+   the message prior to display.  However, other escape sequences appear
+   in messages for useful purposes (cf. [ISO.2022.1994], [RFC2045],
+   [RFC2046], [RFC2047], [RFC2049], [RFC4288], [RFC4289]) and therefore
+   should not be stripped indiscriminately.
+
+   Transmission of non-text objects in messages raises additional
+   security issues.  These issues are discussed in [RFC2045], [RFC2046],
+   [RFC2047], [RFC2049], [RFC4288], and [RFC4289].
+
+   Many implementations use the "Bcc:" (blind carbon copy) field,
+   described in section 3.6.3, to facilitate sending messages to
+   recipients without revealing the addresses of one or more of the
+   addressees to the other recipients.  Mishandling this use of "Bcc:"
+   may disclose confidential information that could eventually lead to
+   security problems through knowledge of even the existence of a
+   particular mail address.  For example, if using the first method
+   described in section 3.6.3, where the "Bcc:" line is removed from the
+   message, blind recipients have no explicit indication that they have
+   been sent a blind copy, except insofar as their address does not
+   appear in the header section of a message.  Because of this, one of
+   the blind addressees could potentially send a reply to all of the
+   shown recipients and accidentally reveal that the message went to the
+   blind recipient.  When the second method from section 3.6.3 is used,
+   the blind recipient's address appears in the "Bcc:" field of a
+   separate copy of the message.  If the "Bcc:" field sent contains all
+   of the blind addressees, all of the "Bcc:" recipients will be seen by
+   each "Bcc:" recipient.  Even if a separate message is sent to each
+   "Bcc:" recipient with only the individual's address, implementations
+   still need to be careful to process replies to the message as per
+   section 3.6.3 so as not to accidentally reveal the blind recipient to
+   other recipients.
+
+6.  IANA Considerations
+
+   This document updates the registrations that appeared in [RFC4021]
+   that referred to the definitions in [RFC2822].  IANA has updated the
+   Permanent Message Header Field Repository with the following header
+   fields, in accordance with the procedures set out in [RFC3864].
+
+   Header field name:  Date
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.1)
+
+
+
+Resnick                     Standards Track                    [Page 39]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   Header field name:  From
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.2)
+
+   Header field name:  Sender
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.2)
+
+   Header field name:  Reply-To
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.2)
+
+   Header field name:  To
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.3)
+
+   Header field name:  Cc
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.3)
+
+   Header field name:  Bcc
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.3)
+
+   Header field name:  Message-ID
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.4)
+
+   Header field name:  In-Reply-To
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.4)
+
+
+
+
+Resnick                     Standards Track                    [Page 40]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   Header field name:  References
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.4)
+
+   Header field name:  Subject
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.5)
+
+   Header field name:  Comments
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.5)
+
+   Header field name:  Keywords
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.5)
+
+   Header field name:  Resent-Date
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.6)
+
+   Header field name:  Resent-From
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.6)
+
+   Header field name:  Resent-Sender
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.6)
+
+   Header field name:  Resent-To
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.6)
+
+
+
+
+Resnick                     Standards Track                    [Page 41]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   Header field name:  Resent-Cc
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.6)
+
+   Header field name:  Resent-Bcc
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.6)
+
+   Header field name:  Resent-Reply-To
+   Applicable protocol:  Mail
+   Status:  obsolete
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 4.5.6)
+
+   Header field name:  Resent-Message-ID
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.6)
+
+   Header field name:  Return-Path
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.7)
+
+   Header field name:  Received
+   Applicable protocol:  Mail
+   Status:  standard
+   Author/Change controller:  IETF
+   Specification document(s):  This document (section 3.6.7)
+   Related information:  [RFC5321]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 42]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+Appendix A.  Example Messages
+
+   This section presents a selection of messages.  These are intended to
+   assist in the implementation of this specification, but should not be
+   taken as normative; that is to say, although the examples in this
+   section were carefully reviewed, if there happens to be a conflict
+   between these examples and the syntax described in sections 3 and 4
+   of this document, the syntax in those sections is to be taken as
+   correct.
+
+   In the text version of this document, messages in this section are
+   delimited between lines of "----".  The "----" lines are not part of
+   the message itself.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 43]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+Appendix A.1.  Addressing Examples
+
+   The following are examples of messages that might be sent between two
+   individuals.
+
+Appendix A.1.1.  A Message from One Person to Another with Simple
+                 Addressing
+
+   This could be called a canonical message.  It has a single author,
+   John Doe, a single recipient, Mary Smith, a subject, the date, a
+   message identifier, and a textual message in the body.
+
+   ----
+   From: John Doe <jdoe@machine.example>
+   To: Mary Smith <mary@example.net>
+   Subject: Saying Hello
+   Date: Fri, 21 Nov 1997 09:55:06 -0600
+   Message-ID: <1234@local.machine.example>
+
+   This is a message just to say hello.
+   So, "Hello".
+   ----
+
+   If John's secretary Michael actually sent the message, even though
+   John was the author and replies to this message should go back to
+   him, the sender field would be used:
+
+   ----
+   From: John Doe <jdoe@machine.example>
+   Sender: Michael Jones <mjones@machine.example>
+   To: Mary Smith <mary@example.net>
+   Subject: Saying Hello
+   Date: Fri, 21 Nov 1997 09:55:06 -0600
+   Message-ID: <1234@local.machine.example>
+
+   This is a message just to say hello.
+   So, "Hello".
+   ----
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 44]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+Appendix A.1.2.  Different Types of Mailboxes
+
+   This message includes multiple addresses in the destination fields
+   and also uses several different forms of addresses.
+
+   ----
+   From: "Joe Q. Public" <john.q.public@example.com>
+   To: Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>
+   Cc: <boss@nil.test>, "Giant; \"Big\" Box" <sysservices@example.net>
+   Date: Tue, 1 Jul 2003 10:52:37 +0200
+   Message-ID: <5678.21-Nov-1997@example.com>
+
+   Hi everyone.
+   ----
+
+   Note that the display names for Joe Q. Public and Giant; "Big" Box
+   needed to be enclosed in double-quotes because the former contains
+   the period and the latter contains both semicolon and double-quote
+   characters (the double-quote characters appearing as quoted-pair
+   constructs).  Conversely, the display name for Who? could appear
+   without them because the question mark is legal in an atom.  Notice
+   also that jdoe@example.org and boss@nil.test have no display names
+   associated with them at all, and jdoe@example.org uses the simpler
+   address form without the angle brackets.
+
+Appendix A.1.3.  Group Addresses
+
+   ----
+   From: Pete <pete@silly.example>
+   To: A Group:Ed Jones <c@a.test>,joe@where.test,John <jdoe@one.test>;
+   Cc: Undisclosed recipients:;
+   Date: Thu, 13 Feb 1969 23:32:54 -0330
+   Message-ID: <testabcd.1234@silly.example>
+
+   Testing.
+   ----
+
+   In this message, the "To:" field has a single group recipient named
+   "A Group", which contains 3 addresses, and a "Cc:" field with an
+   empty group recipient named Undisclosed recipients.
+
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 45]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+Appendix A.2.  Reply Messages
+
+   The following is a series of three messages that make up a
+   conversation thread between John and Mary.  John first sends a
+   message to Mary, Mary then replies to John's message, and then John
+   replies to Mary's reply message.
+
+   Note especially the "Message-ID:", "References:", and "In-Reply-To:"
+   fields in each message.
+
+   ----
+   From: John Doe <jdoe@machine.example>
+   To: Mary Smith <mary@example.net>
+   Subject: Saying Hello
+   Date: Fri, 21 Nov 1997 09:55:06 -0600
+   Message-ID: <1234@local.machine.example>
+
+   This is a message just to say hello.
+   So, "Hello".
+   ----
+
+   When sending replies, the Subject field is often retained, though
+   prepended with "Re: " as described in section 3.6.5.
+
+   ----
+   From: Mary Smith <mary@example.net>
+   To: John Doe <jdoe@machine.example>
+   Reply-To: "Mary Smith: Personal Account" <smith@home.example>
+   Subject: Re: Saying Hello
+   Date: Fri, 21 Nov 1997 10:01:10 -0600
+   Message-ID: <3456@example.net>
+   In-Reply-To: <1234@local.machine.example>
+   References: <1234@local.machine.example>
+
+   This is a reply to your hello.
+   ----
+
+   Note the "Reply-To:" field in the above message.  When John replies
+   to Mary's message above, the reply should go to the address in the
+   "Reply-To:" field instead of the address in the "From:" field.
+
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 46]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   ----
+   To: "Mary Smith: Personal Account" <smith@home.example>
+   From: John Doe <jdoe@machine.example>
+   Subject: Re: Saying Hello
+   Date: Fri, 21 Nov 1997 11:00:00 -0600
+   Message-ID: <abcd.1234@local.machine.test>
+   In-Reply-To: <3456@example.net>
+   References: <1234@local.machine.example> <3456@example.net>
+
+   This is a reply to your reply.
+   ----
+
+Appendix A.3.  Resent Messages
+
+   Start with the message that has been used as an example several
+   times:
+
+   ----
+   From: John Doe <jdoe@machine.example>
+   To: Mary Smith <mary@example.net>
+   Subject: Saying Hello
+   Date: Fri, 21 Nov 1997 09:55:06 -0600
+   Message-ID: <1234@local.machine.example>
+
+   This is a message just to say hello.
+   So, "Hello".
+   ----
+
+   Say that Mary, upon receiving this message, wishes to send a copy of
+   the message to Jane such that (a) the message would appear to have
+   come straight from John; (b) if Jane replies to the message, the
+   reply should go back to John; and (c) all of the original
+   information, like the date the message was originally sent to Mary,
+   the message identifier, and the original addressee, is preserved.  In
+   this case, resent fields are prepended to the message:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 47]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   ----
+   Resent-From: Mary Smith <mary@example.net>
+   Resent-To: Jane Brown <j-brown@other.example>
+   Resent-Date: Mon, 24 Nov 1997 14:22:01 -0800
+   Resent-Message-ID: <78910@example.net>
+   From: John Doe <jdoe@machine.example>
+   To: Mary Smith <mary@example.net>
+   Subject: Saying Hello
+   Date: Fri, 21 Nov 1997 09:55:06 -0600
+   Message-ID: <1234@local.machine.example>
+
+   This is a message just to say hello.
+   So, "Hello".
+   ----
+
+   If Jane, in turn, wished to resend this message to another person,
+   she would prepend her own set of resent header fields to the above
+   and send that.  (Note that for brevity, trace fields are not shown.)
+
+Appendix A.4.  Messages with Trace Fields
+
+   As messages are sent through the transport system as described in
+   [RFC5321], trace fields are prepended to the message.  The following
+   is an example of what those trace fields might look like.  Note that
+   there is some folding white space in the first one since these lines
+   can be long.
+
+   ----
+   Received: from x.y.test
+      by example.net
+      via TCP
+      with ESMTP
+      id ABC12345
+      for <mary@example.net>;  21 Nov 1997 10:05:43 -0600
+   Received: from node.example by x.y.test; 21 Nov 1997 10:01:22 -0600
+   From: John Doe <jdoe@node.example>
+   To: Mary Smith <mary@example.net>
+   Subject: Saying Hello
+   Date: Fri, 21 Nov 1997 09:55:06 -0600
+   Message-ID: <1234@local.node.example>
+
+   This is a message just to say hello.
+   So, "Hello".
+   ----
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 48]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+Appendix A.5.  White Space, Comments, and Other Oddities
+
+   White space, including folding white space, and comments can be
+   inserted between many of the tokens of fields.  Taking the example
+   from A.1.3, white space and comments can be inserted into all of the
+   fields.
+
+   ----
+   From: Pete(A nice \) chap) <pete(his account)@silly.test(his host)>
+   To:A Group(Some people)
+        :Chris Jones <c@(Chris's host.)public.example>,
+            joe@example.org,
+     John <jdoe@one.test> (my dear friend); (the end of the group)
+   Cc:(Empty list)(start)Hidden recipients  :(nobody(that I know))  ;
+   Date: Thu,
+         13
+           Feb
+             1969
+         23:32
+                  -0330 (Newfoundland Time)
+   Message-ID:              <testabcd.1234@silly.test>
+
+   Testing.
+   ----
+
+   The above example is aesthetically displeasing, but perfectly legal.
+   Note particularly (1) the comments in the "From:" field (including
+   one that has a ")" character appearing as part of a quoted-pair); (2)
+   the white space absent after the ":" in the "To:" field as well as
+   the comment and folding white space after the group name, the special
+   character (".") in the comment in Chris Jones's address, and the
+   folding white space before and after "joe@example.org,"; (3) the
+   multiple and nested comments in the "Cc:" field as well as the
+   comment immediately following the ":" after "Cc"; (4) the folding
+   white space (but no comments except at the end) and the missing
+   seconds in the time of the date field; and (5) the white space before
+   (but not within) the identifier in the "Message-ID:" field.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 49]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+Appendix A.6.  Obsoleted Forms
+
+   The following are examples of obsolete (that is, the "MUST NOT
+   generate") syntactic elements described in section 4 of this
+   document.
+
+Appendix A.6.1.  Obsolete Addressing
+
+   Note in the example below the lack of quotes around Joe Q. Public,
+   the route that appears in the address for Mary Smith, the two commas
+   that appear in the "To:" field, and the spaces that appear around the
+   "." in the jdoe address.
+
+   ----
+   From: Joe Q. Public <john.q.public@example.com>
+   To: Mary Smith <@node.test:mary@example.net>, , jdoe@test  . example
+   Date: Tue, 1 Jul 2003 10:52:37 +0200
+   Message-ID: <5678.21-Nov-1997@example.com>
+
+   Hi everyone.
+   ----
+
+Appendix A.6.2.  Obsolete Dates
+
+   The following message uses an obsolete date format, including a non-
+   numeric time zone and a two digit year.  Note that although the day-
+   of-week is missing, that is not specific to the obsolete syntax; it
+   is optional in the current syntax as well.
+
+   ----
+   From: John Doe <jdoe@machine.example>
+   To: Mary Smith <mary@example.net>
+   Subject: Saying Hello
+   Date: 21 Nov 97 09:55:06 GMT
+   Message-ID: <1234@local.machine.example>
+
+   This is a message just to say hello.
+   So, "Hello".
+   ----
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 50]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+Appendix A.6.3.  Obsolete White Space and Comments
+
+   White space and comments can appear between many more elements than
+   in the current syntax.  Also, folding lines that are made up entirely
+   of white space are legal.
+
+   ----
+   From  : John Doe <jdoe@machine(comment).  example>
+   To    : Mary Smith
+   __
+             <mary@example.net>
+   Subject     : Saying Hello
+   Date  : Fri, 21 Nov 1997 09(comment):   55  :  06 -0600
+   Message-ID  : <1234   @   local(blah)  .machine .example>
+
+   This is a message just to say hello.
+   So, "Hello".
+   ----
+
+   Note especially the second line of the "To:" field.  It starts with
+   two space characters.  (Note that "__" represent blank spaces.)
+   Therefore, it is considered part of the folding, as described in
+   section 4.2.  Also, the comments and white space throughout
+   addresses, dates, and message identifiers are all part of the
+   obsolete syntax.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 51]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+Appendix B.  Differences from Earlier Specifications
+
+   This appendix contains a list of changes that have been made in the
+   Internet Message Format from earlier specifications, specifically
+   [RFC0822], [RFC1123], and [RFC2822].  Items marked with an asterisk
+   (*) below are items which appear in section 4 of this document and
+   therefore can no longer be generated.
+
+   The following are the changes made from [RFC0822] and [RFC1123] to
+   [RFC2822] that remain in this document:
+
+   1.   Period allowed in obsolete form of phrase.
+   2.   ABNF moved out of document, now in [RFC5234].
+   3.   Four or more digits allowed for year.
+   4.   Header field ordering (and lack thereof) made explicit.
+   5.   Encrypted header field removed.
+   6.   Specifically allow and give meaning to "-0000" time zone.
+   7.   Folding white space is not allowed between every token.
+   8.   Requirement for destinations removed.
+   9.   Forwarding and resending redefined.
+   10.  Extension header fields no longer specifically called out.
+   11.  ASCII 0 (null) removed.*
+   12.  Folding continuation lines cannot contain only white space.*
+   13.  Free insertion of comments not allowed in date.*
+   14.  Non-numeric time zones not allowed.*
+   15.  Two digit years not allowed.*
+   16.  Three digit years interpreted, but not allowed for generation.*
+   17.  Routes in addresses not allowed.*
+   18.  CFWS within local-parts and domains not allowed.*
+   19.  Empty members of address lists not allowed.*
+   20.  Folding white space between field name and colon not allowed.*
+   21.  Comments between field name and colon not allowed.
+   22.  Tightened syntax of in-reply-to and references.*
+   23.  CFWS within msg-id not allowed.*
+   24.  Tightened semantics of resent fields as informational only.
+   25.  Resent-Reply-To not allowed.*
+   26.  No multiple occurrences of fields (except resent and received).*
+   27.  Free CR and LF not allowed.*
+   28.  Line length limits specified.
+   29.  Bcc more clearly specified.
+
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 52]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   The following are changes from [RFC2822].
+   1.   Assorted typographical/grammatical errors fixed and
+        clarifications made.
+   2.   Changed "standard" to "document" or "specification" throughout.
+   3.   Made distinction between "header field" and "header section".
+   4.   Removed NO-WS-CTL from ctext, qtext, dtext, and unstructured.*
+   5.   Moved discussion of specials to the "Atom" section.  Moved text
+        to "Overall message syntax" section.
+   6.   Simplified CFWS syntax.
+   7.   Fixed unstructured syntax.
+   8.   Changed date and time syntax to deal with white space in
+        obsolete date syntax.
+   9.   Removed quoted-pair from domain literals and message
+        identifiers.*
+   10.  Clarified that other specifications limit domain syntax.
+   11.  Simplified "Bcc:" and "Resent-Bcc:" syntax.
+   12.  Allowed optional-field to appear within trace information.
+   13.  Removed no-fold-quote from msg-id.  Clarified syntax
+        limitations.
+   14.  Generalized "Received:" syntax to fix bugs and move definition
+        out of this document.
+   15.  Simplified obs-qp.  Fixed and simplified obs-utext (which now
+        only appears in the obsolete syntax).  Removed obs-text and obs-
+        char, adding obs-body.
+   16.  Fixed obsolete date syntax to allow for more (or less) comments
+        and white space.
+   17.  Fixed all obsolete list syntax (obs-domain-list, obs-mbox-list,
+        obs-addr-list, obs-phrase-list, and the newly added obs-group-
+        list).
+   18.  Fixed obs-reply-to syntax.
+   19.  Fixed obs-bcc and obs-resent-bcc to allow empty lists.
+   20.  Removed obs-path.
+
+Appendix C.  Acknowledgements
+
+   Many people contributed to this document.  They included folks who
+   participated in the Detailed Revision and Update of Messaging
+   Standards (DRUMS) Working Group of the Internet Engineering Task
+   Force (IETF), the chair of DRUMS, the Area Directors of the IETF, and
+   people who simply sent their comments in via email.  The editor is
+   deeply indebted to them all and thanks them sincerely.  The below
+   list includes everyone who sent email concerning both this document
+   and [RFC2822].  Hopefully, everyone who contributed is named here:
+
+   +--------------------+----------------------+---------------------+
+   | Matti Aarnio       | Tanaka Akira         | Russ Allbery        |
+   | Eric Allman        | Harald Alvestrand    | Ran Atkinson        |
+   | Jos Backus         | Bruce Balden         | Dave Barr           |
+
+
+
+Resnick                     Standards Track                    [Page 53]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   | Alan Barrett       | John Beck            | J Robert von Behren |
+   | Jos den Bekker     | D J Bernstein        | James Berriman      |
+   | Oliver Block       | Norbert Bollow       | Raj Bose            |
+   | Antony Bowesman    | Scott Bradner        | Randy Bush          |
+   | Tom Byrer          | Bruce Campbell       | Larry Campbell      |
+   | W J Carpenter      | Michael Chapman      | Richard Clayton     |
+   | Maurizio Codogno   | Jim Conklin          | R Kelley Cook       |
+   | Nathan Coulter     | Steve Coya           | Mark Crispin        |
+   | Dave Crocker       | Matt Curtin          | Michael D'Errico    |
+   | Cyrus Daboo        | Michael D Dean       | Jutta Degener       |
+   | Mark Delany        | Steve Dorner         | Harold A Driscoll   |
+   | Michael Elkins     | Frank Ellerman       | Robert Elz          |
+   | Johnny Eriksson    | Erik E Fair          | Roger Fajman        |
+   | Patrik Faeltstroem | Claus Andre Faerber  | Barry Finkel        |
+   | Erik Forsberg      | Chuck Foster         | Paul Fox            |
+   | Klaus M Frank      | Ned Freed            | Jochen Friedrich    |
+   | Randall C Gellens  | Sukvinder Singh Gill | Tim Goodwin         |
+   | Philip Guenther    | Arnt Gulbrandsen     | Eric A Hall         |
+   | Tony Hansen        | John Hawkinson       | Philip Hazel        |
+   | Kai Henningsen     | Robert Herriot       | Paul Hethmon        |
+   | Jim Hill           | Alfred Hoenes        | Paul E Hoffman      |
+   | Steve Hole         | Kari Hurtta          | Marco S Hyman       |
+   | Ofer Inbar         | Olle Jarnefors       | Kevin Johnson       |
+   | Sudish Joseph      | Maynard Kang         | Prabhat Keni        |
+   | John C Klensin     | Graham Klyne         | Brad Knowles        |
+   | Shuhei Kobayashi   | Peter Koch           | Dan Kohn            |
+   | Christian Kuhtz    | Anand Kumria         | Steen Larsen        |
+   | Eliot Lear         | Barry Leiba          | Jay Levitt          |
+   | Bruce Lilly        | Lars-Johan Liman     | Charles Lindsey     |
+   | Pete Loshin        | Simon Lyall          | Bill Manning        |
+   | John Martin        | Mark Martinec        | Larry Masinter      |
+   | Denis McKeon       | William P McQuillan  | Alexey Melnikov     |
+   | Perry E Metzger    | Steven Miller        | S Moonesamy         |
+   | Keith Moore        | John Gardiner Myers  | Chris Newman        |
+   | John W Noerenberg  | Eric Norman          | Mike O'Dell         |
+   | Larry Osterman     | Paul Overell         | Jacob Palme         |
+   | Michael A Patton   | Uzi Paz              | Michael A Quinlan   |
+   | Robert Rapplean    | Eric S Raymond       | Sam Roberts         |
+   | Hugh Sasse         | Bart Schaefer        | Tom Scola           |
+   | Wolfgang Segmuller | Nick Shelness        | John Stanley        |
+   | Einar Stefferud    | Jeff Stephenson      | Bernard Stern       |
+   | Peter Sylvester    | Mark Symons          | Eric Thomas         |
+   | Lee Thompson       | Karel De Vriendt     | Matthew Wall        |
+   | Rolf Weber         | Brent B Welch        | Dan Wing            |
+   | Jack De Winter     | Gregory J Woodhouse  | Greg A Woods        |
+   | Kazu Yamamoto      | Alain Zahm           | Jamie Zawinski      |
+   | Timothy S Zurcher  |                      |                     |
+   +--------------------+----------------------+---------------------+
+
+
+
+Resnick                     Standards Track                    [Page 54]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+7.  References
+
+7.1.  Normative References
+
+   [ANSI.X3-4.1986]  American National Standards Institute, "Coded
+                     Character Set - 7-bit American Standard Code for
+                     Information Interchange", ANSI X3.4, 1986.
+
+   [RFC1034]         Mockapetris, P., "Domain names - concepts and
+                     facilities", STD 13, RFC 1034, November 1987.
+
+   [RFC1035]         Mockapetris, P., "Domain names - implementation and
+                     specification", STD 13, RFC 1035, November 1987.
+
+   [RFC1123]         Braden, R., "Requirements for Internet Hosts -
+                     Application and Support", STD 3, RFC 1123,
+                     October 1989.
+
+   [RFC2119]         Bradner, S., "Key words for use in RFCs to Indicate
+                     Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+   [RFC5234]         Crocker, D. and P. Overell, "Augmented BNF for
+                     Syntax Specifications: ABNF", STD 68, RFC 5234,
+                     January 2008.
+
+7.2.  Informative References
+
+   [RFC0822]         Crocker, D., "Standard for the format of ARPA
+                     Internet text messages", STD 11, RFC 822,
+                     August 1982.
+
+   [RFC1305]         Mills, D., "Network Time Protocol (Version 3)
+                     Specification, Implementation", RFC 1305,
+                     March 1992.
+
+   [ISO.2022.1994]   International Organization for Standardization,
+                     "Information technology - Character code structure
+                     and extension techniques", ISO Standard 2022, 1994.
+
+   [RFC2045]         Freed, N. and N. Borenstein, "Multipurpose Internet
+                     Mail Extensions (MIME) Part One: Format of Internet
+                     Message Bodies", RFC 2045, November 1996.
+
+   [RFC2046]         Freed, N. and N. Borenstein, "Multipurpose Internet
+                     Mail Extensions (MIME) Part Two: Media Types",
+                     RFC 2046, November 1996.
+
+
+
+
+
+Resnick                     Standards Track                    [Page 55]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+   [RFC2047]         Moore, K., "MIME (Multipurpose Internet Mail
+                     Extensions) Part Three: Message Header Extensions
+                     for Non-ASCII Text", RFC 2047, November 1996.
+
+   [RFC2049]         Freed, N. and N. Borenstein, "Multipurpose Internet
+                     Mail Extensions (MIME) Part Five: Conformance
+                     Criteria and Examples", RFC 2049, November 1996.
+
+   [RFC2822]         Resnick, P., "Internet Message Format", RFC 2822,
+                     April 2001.
+
+   [RFC3864]         Klyne, G., Nottingham, M., and J. Mogul,
+                     "Registration Procedures for Message Header
+                     Fields", BCP 90, RFC 3864, September 2004.
+
+   [RFC4021]         Klyne, G. and J. Palme, "Registration of Mail and
+                     MIME Header Fields", RFC 4021, March 2005.
+
+   [RFC4288]         Freed, N. and J. Klensin, "Media Type
+                     Specifications and Registration Procedures",
+                     BCP 13, RFC 4288, December 2005.
+
+   [RFC4289]         Freed, N. and J. Klensin, "Multipurpose Internet
+                     Mail Extensions (MIME) Part Four: Registration
+                     Procedures", BCP 13, RFC 4289, December 2005.
+
+   [RFC5321]         Klensin, J., "Simple Mail Transfer Protocol",
+                     RFC 5321, October 2008.
+
+Author's Address
+
+   Peter W. Resnick (editor)
+   Qualcomm Incorporated
+   5775 Morehouse Drive
+   San Diego, CA  92121-1714
+   US
+
+   Phone: +1 858 651 4478
+   EMail: presnick@qualcomm.com
+   URI:   http://www.qualcomm.com/~presnick/
+
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 56]
+
+RFC 5322                Internet Message Format             October 2008
+
+
+Full Copyright Statement
+
+   Copyright (C) The IETF Trust (2008).
+
+   This document is subject to the rights, licenses and restrictions
+   contained in BCP 78, and except as set forth therein, the authors
+   retain all their rights.
+
+   This document and the information contained herein are provided on an
+   "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS
+   OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY, THE IETF TRUST AND
+   THE INTERNET ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF
+   THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED
+   WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Intellectual Property
+
+   The IETF takes no position regarding the validity or scope of any
+   Intellectual Property Rights or other rights that might be claimed to
+   pertain to the implementation or use of the technology described in
+   this document or the extent to which any license under such rights
+   might or might not be available; nor does it represent that it has
+   made any independent effort to identify any such rights.  Information
+   on the procedures with respect to rights in RFC documents can be
+   found in BCP 78 and BCP 79.
+
+   Copies of IPR disclosures made to the IETF Secretariat and any
+   assurances of licenses to be made available, or the result of an
+   attempt made to obtain a general license or permission for the use of
+   such proprietary rights by implementers or users of this
+   specification can be obtained from the IETF on-line IPR repository at
+   http://www.ietf.org/ipr.
+
+   The IETF invites any interested party to bring to its attention any
+   copyrights, patents or patent applications, or other proprietary
+   rights that may cover technology that may be required to implement
+   this standard.  Please address the information to the IETF at
+   ietf-ipr@ietf.org.
+
+
+
+
+
+
+
+
+
+
+
+
+Resnick                     Standards Track                    [Page 57]
+
diff --git a/doc/mbox-rfc4155.txt b/doc/mbox-rfc4155.txt
@@ -0,0 +1,507 @@
+
+
+
+
+
+
+Network Working Group                                            E. Hall
+Request for Comments: 4155                                September 2005
+Category: Informational
+
+
+                    The application/mbox Media Type
+
+Status of This Memo
+
+   This memo provides information for the Internet community.  It does
+   not specify an Internet standard of any kind.  Distribution of this
+   memo is unlimited.
+
+Copyright Notice
+
+   Copyright (C) The Internet Society (2005).
+
+Abstract
+
+   This memo requests that the application/mbox media type be authorized
+   for allocation by the IESG, according to the terms specified in RFC
+   2048.  This memo also defines a default format for the mbox database,
+   which must be supported by all conformant implementations.
+
+1.  Background and Overview
+
+   UNIX-like operating systems have historically made widespread use of
+   "mbox" database files for a variety of local email purposes.  In the
+   common case, mbox files store linear sequences of one or more
+   electronic mail messages, with local email clients treating the
+   database as a logical folder of email messages.  mbox databases are
+   also used by a variety of other messaging tools, such as mailing list
+   management programs, archiving and filtering utilities, messaging
+   servers, and other related applications.  In recent years, mbox
+   databases have also become common on a large number of non-UNIX
+   computing platforms, for similar kinds of purposes.
+
+   The increased pervasiveness of these files has led to an increased
+   demand for a standardized, network-wide interchange of these files as
+   discrete database objects.  In turn, this dictates a need for a
+   general media type definition for mbox files, which is the subject
+   and purpose of this memo.
+
+
+
+
+
+
+
+
+
+Hall                         Informational                      [Page 1]
+
+RFC 4155            The application/mbox Media Type       September 2005
+
+
+2.  About the mbox Database
+
+   The mbox database format is not documented in an authoritative
+   specification, but instead exists as a well-known output format that
+   is anecdotally documented, or which is only authoritatively
+   documented for a specific platform or tool.
+
+   mbox databases typically contain a linear sequence of electronic mail
+   messages.  Each message begins with a separator line that identifies
+   the message sender, and also identifies the date and time at which
+   the message was received by the final recipient (either the last-hop
+   system in the transfer path, or the system which serves as the
+   recipient's mailstore).  Each message is typically terminated by an
+   empty line.  The end of the database is usually recognized by either
+   the absence of any additional data, or by the presence of an explicit
+   end-of-file marker.
+
+   The structure of the separator lines vary across implementations, but
+   usually contain the exact character sequence of "From", followed by a
+   single Space character (0x20), an email address of some kind, another
+   Space character, a timestamp sequence of some kind, and an end-of-
+   line marker.  However, due to the lack of any authoritative
+   specification, each of these attributes are known to vary widely
+   across implementations.  For example, the email address can reflect
+   any addressing syntax that has ever been used on any messaging system
+   in all of history (specifically including address forms that are not
+   compatible with Internet messages, as defined by RFC 2822 [RFC2822]).
+   Similarly, the timestamp sequences can also vary according to system
+   output, while the end-of-line sequences will often reflect platform-
+   specific requirements.  Different data formats can even appear within
+   a single database as a result of multiple mbox files being
+   concatenated together, or because a single file was accessed by
+   multiple messaging clients, each of which has used its own syntax for
+   the separator line.
+
+   Message data within mbox databases often reflects site-specific
+   peculiarities.  For example, it is entirely possible for the message
+   body or headers in an mbox database to contain untagged eight-bit
+   character data that implicitly reflects a site-specific default
+   language or locale, or that reflects local defaults for timestamps
+   and email addresses; none of this data is widely portable beyond the
+   local scope.  Similarly, message data can also contain unencoded
+   eight-bit binary data, or can use encoding formats that represent a
+   specific platform (e.g., BINHEX or UUENCODE sequences).
+
+
+
+
+
+
+
+Hall                         Informational                      [Page 2]
+
+RFC 4155            The application/mbox Media Type       September 2005
+
+
+   Many implementations are also known to escape message body lines that
+   begin with the character sequence of "From ", so as to prevent
+   confusion with overly-liberal parsers that do not search for full
+   separator lines.  In the common case, a leading Greater-Than symbol
+   (0x3E) is used for this purpose (with "From " becoming ">From ").
+   However, other implementations are known not to escape such lines
+   unless they are immediately preceded by a blank line or if they also
+   appear to contain an email address and a timestamp.  Other
+   implementations are also known to perform secondary escapes against
+   these lines if they are already escaped or quoted, while others
+   ignore these mechanisms altogether.
+
+   A comprehensive description of mbox database files on UNIX-like
+   systems can be found at http://qmail.org./man/man5/mbox.html, which
+   should be treated as mostly authoritative for those variations that
+   are otherwise only documented in anecdotal form.  However, readers
+   are advised that many other platforms and tools make use of mbox
+   databases, and that there are many more potential variations that can
+   be encountered in the wild.
+
+   In order to mitigate errors that may arise from such vagaries, this
+   specification defines a "format" parameter to the application/mbox
+   media type declaration, which can be used to identify the specific
+   kind of mbox database that is being transferred.  Furthermore, this
+   specification defines a "default" database format which MUST be
+   supported by implementations that claim to be compliant with this
+   specification, and which is to be used as the implicit format for
+   undeclared application/mbox data objects.  Additional format types
+   are to be defined in subsequent specifications.  Messaging systems
+   that receive an mbox database with an unknown format parameter value
+   SHOULD treat the data as an opaque binary object, as if the data had
+   been declared as application/octet-stream
+
+   Refer to Appendix A for a description of the default mbox format.
+
+   Note that RFC 2046 [RFC2046] defines the multipart/digest media type
+   for transferring platform-independent message files.  Because that
+   specification defines a set of neutral and strict formatting rules,
+   the multipart/digest media type already facilitates highly-
+   predictable transfer and conversion operations; as such, implementers
+   are strongly encouraged to support and use that media type where
+   possible.
+
+
+
+
+
+
+
+
+
+Hall                         Informational                      [Page 3]
+
+RFC 4155            The application/mbox Media Type       September 2005
+
+
+3.  Prerequisites and Terminology
+
+   Readers of this document are expected to be familiar with the
+   specification for MIME [RFC2045] and MIME-type registrations
+   [RFC2048].
+
+   The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+   "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+   document are to be interpreted as described in RFC 2119 [RFC2119].
+
+4.  The application/mbox Media Type Registration
+
+   This section provides the media type registration application (as per
+   [RFC2048]).
+
+   MIME media type name: application
+
+   MIME subtype name: mbox
+
+   Required parameters: none
+
+   Optional parameters: The "format" parameter identifies the format of
+   the mbox database and the messages contained therein.  The default
+   value for the "format" parameter is "default", and refers to the
+   formatting rules defined in Appendix A of this memo.  mbox databases
+   that do not have a "format" parameter SHOULD be interpreted as having
+   the implicit "format" value of "default".  mbox databases that have
+   an unknown value for the "format" parameter SHOULD be treated as
+   opaque data objects, as if the media type had been specified as
+   application/octet-stream.  Additional values for the format parameter
+   are to be defined in subsequent specifications, and registered with
+   IANA.
+
+   Encoding considerations: If an email client receives an mbox database
+   as a message attachment, and then stores that attachment within a
+   local mbox database, the contents of the two database files may
+   become irreversibly intermingled, such that both databases are
+   rendered unrecognizable.  In order to avoid these collisions,
+   messaging systems that support this specification MUST encode an mbox
+   database (or at a minimum, the separator lines) with non-transparent
+   transfer encoding (such as BASE64 or Quoted-Printable) whenever an
+   application/mbox object is transferred via messaging protocols.
+   Other transfer services are generally encouraged to adopt similar
+   encoding strategies in order to allow for any subsequent
+   retransmission that might occur, but this is not a requirement.
+   Implementers should also be prepared to encode mbox data locally if
+   non-compliant data is received.
+
+
+
+
+Hall                         Informational                      [Page 4]
+
+RFC 4155            The application/mbox Media Type       September 2005
+
+
+   Security considerations: mbox data is passive, and does not generally
+   represent a unique or new security threat.  However, there is risk in
+   sharing any kind of data, because unintentional information may be
+   exposed, and this risk certainly applies to mbox data as well.
+
+   Interoperability considerations: Due to the lack of a single
+   authoritative specification for mbox databases, there are a large
+   number of variations between database formats (refer to the
+   introduction text for common examples), and it is expected that non-
+   conformant data will be erroneously tagged or exchanged.  Although
+   the "default" format specified in this memo does not allow for these
+   kinds of vagaries, prior negotiation or agreement between humans may
+   sometimes be needed.
+
+   Published specification: see Appendix A.
+
+   Applications that use this media type: hundreds of messaging products
+   make use of the mbox database format, in one form or another.
+
+   Magic number(s): mbox database files can be recognized by having a
+   leading character sequence of "From", followed by a single Space
+   character (0x20), followed by additional printable character data
+   (refer to the description in Appendix A for details).  However,
+   implementers are cautioned that all such files will not be compliant
+   with all of the formatting rules, therefore implementers should treat
+   these files with an appropriate amount of circumspection.
+
+   File extension(s): mbox database files sometimes have an ".mbox"
+   extension, but this is not required nor expected.  As with magic
+   numbers, implementers should avoid reflexive assumptions about the
+   contents of such files.
+
+   Macintosh File Type Code(s): None are known to be common.
+
+   Person & email address to contact for further information: Eric A.
+   Hall (ehall@ntrg.com)
+
+   Intended usage: COMMON
+
+5.  Security Considerations
+
+   See the discussion in section 4.
+
+
+
+
+
+
+
+
+
+Hall                         Informational                      [Page 5]
+
+RFC 4155            The application/mbox Media Type       September 2005
+
+
+6.  IANA Considerations
+
+   The IANA has registered the application/mbox media type in the MIME
+   registry, using the application provided in section 4 above.
+
+   Furthermore, IANA has established and will maintain a registry of
+   values for the "format" parameter as described in this memo.  The
+   first registration is the "default" value, using the description
+   provided in Appendix A.  Subsequent values for the "format" parameter
+   MUST be accompanied by some form of recognizable, complete, and
+   legitimate specification, such as an IESG-approved specification, or
+   some kind of authoritative vendor documentation.
+
+7.  Normative References
+
+   [RFC2045]   Freed, N. and N. Borenstein, "Multipurpose Internet Mail
+               Extensions (MIME) Part One: Format of Internet Message
+               Bodies", RFC 2045, November 1996.
+
+   [RFC2046]   Freed, N. and N. Borenstein, "Multipurpose Internet Mail
+               Extensions (MIME) Part Two: Media Types", RFC 2046,
+               November 1996.
+
+   [RFC2048]   Freed, N., Klensin, J., and J. Postel, "Multipurpose
+               Internet Mail Extensions (MIME) Part Four: Registration
+               Procedures", BCP 13, RFC 2048, November 1996.
+
+   [RFC2119]   Bradner, S., "Key words for use in RFCs to Indicate
+               Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+   [RFC2822]   Resnick, P., "Internet Message Format", RFC 2822, April
+               2001.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Hall                         Informational                      [Page 6]
+
+RFC 4155            The application/mbox Media Type       September 2005
+
+
+Appendix A.  The "default" mbox Database Format
+
+   In order to improve interoperability among messaging systems, this
+   memo defines a "default" mbox database format, which MUST be
+   supported by all implementations that claim to be compliant with this
+   specification.
+
+   The "default" mbox database format uses a linear sequence of Internet
+   messages, with each message being immediately prefaced by a separator
+   line, and being terminated by an empty line.  More specifically:
+
+      o Each message within the database MUST follow the syntax and
+        formatting rules defined in RFC 2822 [RFC2822] and its related
+        specifications, with the exception that the canonical mbox
+        database MUST use a single Line-Feed character (0x0A) as the
+        end-of-line sequence, and MUST NOT use a Carriage-Return/Line-
+        Feed pair (NB: this requirement only applies to the canonical
+        mbox database as transferred, and does not override any other
+        specifications).  This usage represents the most common
+        historical representation of the mbox database format, and
+        allows for the least amount of conversion.
+
+      o Messages within the default mbox database MUST consist of
+        seven-bit characters within an eight-bit stream.  Eight-bit data
+        within the stream MUST be converted to a seven-bit form (using
+        appropriate, standardized encoding) and appropriately tagged
+        (with the correct header fields) before the database is
+        transferred.
+
+      o Message headers and data in the default mbox database MUST be
+        fully-qualified, as per the relevant specification(s).  For
+        example, email addresses in the various header fields MUST have
+        legitimate domain names (as per RFC 2822), while extended
+        characters and encodings MUST be specified in the appropriate
+        location (as per the appropriate MIME specifications), and so
+        forth.
+
+      o Each message in the mbox database MUST be immediately preceded
+        by a single separator line, which MUST conform to the following
+        syntax:
+
+           The exact character sequence of "From";
+
+           a single Space character (0x20);
+
+           the email address of the message sender (as obtained from the
+           message envelope or other authoritative source), conformant
+           with the "addr-spec" syntax from RFC 2822;
+
+
+
+Hall                         Informational                      [Page 7]
+
+RFC 4155            The application/mbox Media Type       September 2005
+
+
+           a single Space character;
+
+           a timestamp indicating the UTC date and time when the message
+           was originally received, conformant with the syntax of the
+           traditional UNIX 'ctime' output sans timezone (note that the
+           use of UTC precludes the need for a timezone indicator);
+
+           an end-of-line marker.
+
+      o Each message in the database MUST be terminated by an empty
+        line, containing a single end-of-line marker.
+
+   Note that the first message in an mbox database will only be prefaced
+   by a separator line, while every other message will begin with two
+   end-of-line sequences (one at the end of the message itself, and
+   another to mark the end of the message within the mbox database file
+   stream) and a separator line (marking the new message).  The end of
+   the database is implicitly reached when no more message data or
+   separator lines are found.
+
+   Also note that this specification does not prescribe any escape
+   syntax for message body lines that begin with the character sequence
+   of "From ".  Recipient systems are expected to parse full separator
+   lines as they are documented above.
+
+Author's Address
+
+   Eric A. Hall
+
+   EMail: ehall@ntrg.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Hall                         Informational                      [Page 8]
+
+RFC 4155            The application/mbox Media Type       September 2005
+
+
+Full Copyright Statement
+
+   Copyright (C) The Internet Society (2005).
+
+   This document is subject to the rights, licenses and restrictions
+   contained in BCP 78, and except as set forth therein, the authors
+   retain all their rights.
+
+   This document and the information contained herein are provided on an
+   "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS
+   OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET
+   ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED,
+   INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE
+   INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED
+   WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Intellectual Property
+
+   The IETF takes no position regarding the validity or scope of any
+   Intellectual Property Rights or other rights that might be claimed to
+   pertain to the implementation or use of the technology described in
+   this document or the extent to which any license under such rights
+   might or might not be available; nor does it represent that it has
+   made any independent effort to identify any such rights.  Information
+   on the procedures with respect to rights in RFC documents can be
+   found in BCP 78 and BCP 79.
+
+   Copies of IPR disclosures made to the IETF Secretariat and any
+   assurances of licenses to be made available, or the result of an
+   attempt made to obtain a general license or permission for the use of
+   such proprietary rights by implementers or users of this
+   specification can be obtained from the IETF on-line IPR repository at
+   http://www.ietf.org/ipr.
+
+   The IETF invites any interested party to bring to its attention any
+   copyrights, patents or patent applications, or other proprietary
+   rights that may cover technology that may be required to implement
+   this standard.  Please address the information to the IETF at ietf-
+   ipr@ietf.org.
+
+Acknowledgement
+
+   Funding for the RFC Editor function is currently provided by the
+   Internet Society.
+
+
+
+
+
+
+
+Hall                         Informational                      [Page 9]
+
diff --git a/doc/mime-p1-rfc2045.txt b/doc/mime-p1-rfc2045.txt
@@ -0,0 +1,1739 @@
+
+
+
+
+
+
+Network Working Group                                          N. Freed
+Request for Comments: 2045                                     Innosoft
+Obsoletes: 1521, 1522, 1590                               N. Borenstein
+Category: Standards Track                                 First Virtual
+                                                          November 1996
+
+
+                 Multipurpose Internet Mail Extensions
+                            (MIME) Part One:
+                   Format of Internet Message Bodies
+
+Status of this Memo
+
+   This document specifies an Internet standards track protocol for the
+   Internet community, and requests discussion and suggestions for
+   improvements.  Please refer to the current edition of the "Internet
+   Official Protocol Standards" (STD 1) for the standardization state
+   and status of this protocol.  Distribution of this memo is unlimited.
+
+Abstract
+
+   STD 11, RFC 822, defines a message representation protocol specifying
+   considerable detail about US-ASCII message headers, and leaves the
+   message content, or message body, as flat US-ASCII text.  This set of
+   documents, collectively called the Multipurpose Internet Mail
+   Extensions, or MIME, redefines the format of messages to allow for
+
+    (1)   textual message bodies in character sets other than
+          US-ASCII,
+
+    (2)   an extensible set of different formats for non-textual
+          message bodies,
+
+    (3)   multi-part message bodies, and
+
+    (4)   textual header information in character sets other than
+          US-ASCII.
+
+   These documents are based on earlier work documented in RFC 934, STD
+   11, and RFC 1049, but extends and revises them.  Because RFC 822 said
+   so little about message bodies, these documents are largely
+   orthogonal to (rather than a revision of) RFC 822.
+
+   This initial document specifies the various headers used to describe
+   the structure of MIME messages. The second document, RFC 2046,
+   defines the general structure of the MIME media typing system and
+   defines an initial set of media types. The third document, RFC 2047,
+   describes extensions to RFC 822 to allow non-US-ASCII text data in
+
+
+
+Freed & Borenstein          Standards Track                     [Page 1]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+   Internet mail header fields. The fourth document, RFC 2048, specifies
+   various IANA registration procedures for MIME-related facilities. The
+   fifth and final document, RFC 2049, describes MIME conformance
+   criteria as well as providing some illustrative examples of MIME
+   message formats, acknowledgements, and the bibliography.
+
+   These documents are revisions of RFCs 1521, 1522, and 1590, which
+   themselves were revisions of RFCs 1341 and 1342.  An appendix in RFC
+   2049 describes differences and changes from previous versions.
+
+Table of Contents
+
+   1. Introduction .........................................    3
+   2. Definitions, Conventions, and Generic BNF Grammar ....    5
+   2.1 CRLF ................................................    5
+   2.2 Character Set .......................................    6
+   2.3 Message .............................................    6
+   2.4 Entity ..............................................    6
+   2.5 Body Part ...........................................    7
+   2.6 Body ................................................    7
+   2.7 7bit Data ...........................................    7
+   2.8 8bit Data ...........................................    7
+   2.9 Binary Data .........................................    7
+   2.10 Lines ..............................................    7
+   3. MIME Header Fields ...................................    8
+   4. MIME-Version Header Field ............................    8
+   5. Content-Type Header Field ............................   10
+   5.1 Syntax of the Content-Type Header Field .............   12
+   5.2 Content-Type Defaults ...............................   14
+   6. Content-Transfer-Encoding Header Field ...............   14
+   6.1 Content-Transfer-Encoding Syntax ....................   14
+   6.2 Content-Transfer-Encodings Semantics ................   15
+   6.3 New Content-Transfer-Encodings ......................   16
+   6.4 Interpretation and Use ..............................   16
+   6.5 Translating Encodings ...............................   18
+   6.6 Canonical Encoding Model ............................   19
+   6.7 Quoted-Printable Content-Transfer-Encoding ..........   19
+   6.8 Base64 Content-Transfer-Encoding ....................   24
+   7. Content-ID Header Field ..............................   26
+   8. Content-Description Header Field .....................   27
+   9. Additional MIME Header Fields ........................   27
+   10. Summary .............................................   27
+   11. Security Considerations .............................   27
+   12. Authors' Addresses ..................................   28
+   A. Collected Grammar ....................................   29
+
+
+
+
+
+
+Freed & Borenstein          Standards Track                     [Page 2]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+1.  Introduction
+
+   Since its publication in 1982, RFC 822 has defined the standard
+   format of textual mail messages on the Internet.  Its success has
+   been such that the RFC 822 format has been adopted, wholly or
+   partially, well beyond the confines of the Internet and the Internet
+   SMTP transport defined by RFC 821.  As the format has seen wider use,
+   a number of limitations have proven increasingly restrictive for the
+   user community.
+
+   RFC 822 was intended to specify a format for text messages.  As such,
+   non-text messages, such as multimedia messages that might include
+   audio or images, are simply not mentioned.  Even in the case of text,
+   however, RFC 822 is inadequate for the needs of mail users whose
+   languages require the use of character sets richer than US-ASCII.
+   Since RFC 822 does not specify mechanisms for mail containing audio,
+   video, Asian language text, or even text in most European languages,
+   additional specifications are needed.
+
+   One of the notable limitations of RFC 821/822 based mail systems is
+   the fact that they limit the contents of electronic mail messages to
+   relatively short lines (e.g. 1000 characters or less [RFC-821]) of
+   7bit US-ASCII.  This forces users to convert any non-textual data
+   that they may wish to send into seven-bit bytes representable as
+   printable US-ASCII characters before invoking a local mail UA (User
+   Agent, a program with which human users send and receive mail).
+   Examples of such encodings currently used in the Internet include
+   pure hexadecimal, uuencode, the 3-in-4 base 64 scheme specified in
+   RFC 1421, the Andrew Toolkit Representation [ATK], and many others.
+
+   The limitations of RFC 822 mail become even more apparent as gateways
+   are designed to allow for the exchange of mail messages between RFC
+   822 hosts and X.400 hosts.  X.400 [X400] specifies mechanisms for the
+   inclusion of non-textual material within electronic mail messages.
+   The current standards for the mapping of X.400 messages to RFC 822
+   messages specify either that X.400 non-textual material must be
+   converted to (not encoded in) IA5Text format, or that they must be
+   discarded, notifying the RFC 822 user that discarding has occurred.
+   This is clearly undesirable, as information that a user may wish to
+   receive is lost.  Even though a user agent may not have the
+   capability of dealing with the non-textual material, the user might
+   have some mechanism external to the UA that can extract useful
+   information from the material.  Moreover, it does not allow for the
+   fact that the message may eventually be gatewayed back into an X.400
+   message handling system (i.e., the X.400 message is "tunneled"
+   through Internet mail), where the non-textual information would
+   definitely become useful again.
+
+
+
+
+Freed & Borenstein          Standards Track                     [Page 3]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+   This document describes several mechanisms that combine to solve most
+   of these problems without introducing any serious incompatibilities
+   with the existing world of RFC 822 mail.  In particular, it
+   describes:
+
+    (1)   A MIME-Version header field, which uses a version
+          number to declare a message to be conformant with MIME
+          and allows mail processing agents to distinguish
+          between such messages and those generated by older or
+          non-conformant software, which are presumed to lack
+          such a field.
+
+    (2)   A Content-Type header field, generalized from RFC 1049,
+          which can be used to specify the media type and subtype
+          of data in the body of a message and to fully specify
+          the native representation (canonical form) of such
+          data.
+
+    (3)   A Content-Transfer-Encoding header field, which can be
+          used to specify both the encoding transformation that
+          was applied to the body and the domain of the result.
+          Encoding transformations other than the identity
+          transformation are usually applied to data in order to
+          allow it to pass through mail transport mechanisms
+          which may have data or character set limitations.
+
+    (4)   Two additional header fields that can be used to
+          further describe the data in a body, the Content-ID and
+          Content-Description header fields.
+
+   All of the header fields defined in this document are subject to the
+   general syntactic rules for header fields specified in RFC 822.  In
+   particular, all of these header fields except for Content-Disposition
+   can include RFC 822 comments, which have no semantic content and
+   should be ignored during MIME processing.
+
+   Finally, to specify and promote interoperability, RFC 2049 provides a
+   basic applicability statement for a subset of the above mechanisms
+   that defines a minimal level of "conformance" with this document.
+
+   HISTORICAL NOTE:  Several of the mechanisms described in this set of
+   documents may seem somewhat strange or even baroque at first reading.
+   It is important to note that compatibility with existing standards
+   AND robustness across existing practice were two of the highest
+   priorities of the working group that developed this set of documents.
+   In particular, compatibility was always favored over elegance.
+
+
+
+
+
+Freed & Borenstein          Standards Track                     [Page 4]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+   Please refer to the current edition of the "Internet Official
+   Protocol Standards" for the standardization state and status of this
+   protocol.  RFC 822 and STD 3, RFC 1123 also provide essential
+   background for MIME since no conforming implementation of MIME can
+   violate them.  In addition, several other informational RFC documents
+   will be of interest to the MIME implementor, in particular RFC 1344,
+   RFC 1345, and RFC 1524.
+
+2.  Definitions, Conventions, and Generic BNF Grammar
+
+   Although the mechanisms specified in this set of documents are all
+   described in prose, most are also described formally in the augmented
+   BNF notation of RFC 822. Implementors will need to be familiar with
+   this notation in order to understand this set of documents, and are
+   referred to RFC 822 for a complete explanation of the augmented BNF
+   notation.
+
+   Some of the augmented BNF in this set of documents makes named
+   references to syntax rules defined in RFC 822.  A complete formal
+   grammar, then, is obtained by combining the collected grammar
+   appendices in each document in this set with the BNF of RFC 822 plus
+   the modifications to RFC 822 defined in RFC 1123 (which specifically
+   changes the syntax for `return', `date' and `mailbox').
+
+   All numeric and octet values are given in decimal notation in this
+   set of documents. All media type values, subtype values, and
+   parameter names as defined are case-insensitive.  However, parameter
+   values are case-sensitive unless otherwise specified for the specific
+   parameter.
+
+   FORMATTING NOTE:  Notes, such at this one, provide additional
+   nonessential information which may be skipped by the reader without
+   missing anything essential.  The primary purpose of these non-
+   essential notes is to convey information about the rationale of this
+   set of documents, or to place these documents in the proper
+   historical or evolutionary context.  Such information may in
+   particular be skipped by those who are focused entirely on building a
+   conformant implementation, but may be of use to those who wish to
+   understand why certain design choices were made.
+
+2.1.  CRLF
+
+   The term CRLF, in this set of documents, refers to the sequence of
+   octets corresponding to the two US-ASCII characters CR (decimal value
+   13) and LF (decimal value 10) which, taken together, in this order,
+   denote a line break in RFC 822 mail.
+
+
+
+
+
+Freed & Borenstein          Standards Track                     [Page 5]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+2.2.  Character Set
+
+   The term "character set" is used in MIME to refer to a method of
+   converting a sequence of octets into a sequence of characters.  Note
+   that unconditional and unambiguous conversion in the other direction
+   is not required, in that not all characters may be representable by a
+   given character set and a character set may provide more than one
+   sequence of octets to represent a particular sequence of characters.
+
+   This definition is intended to allow various kinds of character
+   encodings, from simple single-table mappings such as US-ASCII to
+   complex table switching methods such as those that use ISO 2022's
+   techniques, to be used as character sets.  However, the definition
+   associated with a MIME character set name must fully specify the
+   mapping to be performed.  In particular, use of external profiling
+   information to determine the exact mapping is not permitted.
+
+   NOTE: The term "character set" was originally to describe such
+   straightforward schemes as US-ASCII and ISO-8859-1 which have a
+   simple one-to-one mapping from single octets to single characters.
+   Multi-octet coded character sets and switching techniques make the
+   situation more complex. For example, some communities use the term
+   "character encoding" for what MIME calls a "character set", while
+   using the phrase "coded character set" to denote an abstract mapping
+   from integers (not octets) to characters.
+
+2.3.  Message
+
+   The term "message", when not further qualified, means either a
+   (complete or "top-level") RFC 822 message being transferred on a
+   network, or a message encapsulated in a body of type "message/rfc822"
+   or "message/partial".
+
+2.4.  Entity
+
+   The term "entity", refers specifically to the MIME-defined header
+   fields and contents of either a message or one of the parts in the
+   body of a multipart entity.  The specification of such entities is
+   the essence of MIME.  Since the contents of an entity are often
+   called the "body", it makes sense to speak about the body of an
+   entity.  Any sort of field may be present in the header of an entity,
+   but only those fields whose names begin with "content-" actually have
+   any MIME-related meaning.  Note that this does NOT imply thay they
+   have no meaning at all -- an entity that is also a message has non-
+   MIME header fields whose meanings are defined by RFC 822.
+
+
+
+
+
+
+Freed & Borenstein          Standards Track                     [Page 6]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+2.5.  Body Part
+
+   The term "body part" refers to an entity inside of a multipart
+   entity.
+
+2.6.  Body
+
+   The term "body", when not further qualified, means the body of an
+   entity, that is, the body of either a message or of a body part.
+
+   NOTE:  The previous four definitions are clearly circular.  This is
+   unavoidable, since the overall structure of a MIME message is indeed
+   recursive.
+
+2.7.  7bit Data
+
+   "7bit data" refers to data that is all represented as relatively
+   short lines with 998 octets or less between CRLF line separation
+   sequences [RFC-821].  No octets with decimal values greater than 127
+   are allowed and neither are NULs (octets with decimal value 0).  CR
+   (decimal value 13) and LF (decimal value 10) octets only occur as
+   part of CRLF line separation sequences.
+
+2.8.  8bit Data
+
+   "8bit data" refers to data that is all represented as relatively
+   short lines with 998 octets or less between CRLF line separation
+   sequences [RFC-821]), but octets with decimal values greater than 127
+   may be used.  As with "7bit data" CR and LF octets only occur as part
+   of CRLF line separation sequences and no NULs are allowed.
+
+2.9.  Binary Data
+
+   "Binary data" refers to data where any sequence of octets whatsoever
+   is allowed.
+
+2.10.  Lines
+
+   "Lines" are defined as sequences of octets separated by a CRLF
+   sequences.  This is consistent with both RFC 821 and RFC 822.
+   "Lines" only refers to a unit of data in a message, which may or may
+   not correspond to something that is actually displayed by a user
+   agent.
+
+
+
+
+
+
+
+
+Freed & Borenstein          Standards Track                     [Page 7]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+3.  MIME Header Fields
+
+   MIME defines a number of new RFC 822 header fields that are used to
+   describe the content of a MIME entity.  These header fields occur in
+   at least two contexts:
+
+    (1)   As part of a regular RFC 822 message header.
+
+    (2)   In a MIME body part header within a multipart
+          construct.
+
+   The formal definition of these header fields is as follows:
+
+     entity-headers := [ content CRLF ]
+                       [ encoding CRLF ]
+                       [ id CRLF ]
+                       [ description CRLF ]
+                       *( MIME-extension-field CRLF )
+
+     MIME-message-headers := entity-headers
+                             fields
+                             version CRLF
+                             ; The ordering of the header
+                             ; fields implied by this BNF
+                             ; definition should be ignored.
+
+     MIME-part-headers := entity-headers
+                          [ fields ]
+                          ; Any field not beginning with
+                          ; "content-" can have no defined
+                          ; meaning and may be ignored.
+                          ; The ordering of the header
+                          ; fields implied by this BNF
+                          ; definition should be ignored.
+
+   The syntax of the various specific MIME header fields will be
+   described in the following sections.
+
+4.  MIME-Version Header Field
+
+   Since RFC 822 was published in 1982, there has really been only one
+   format standard for Internet messages, and there has been little
+   perceived need to declare the format standard in use.  This document
+   is an independent specification that complements RFC 822.  Although
+   the extensions in this document have been defined in such a way as to
+   be compatible with RFC 822, there are still circumstances in which it
+   might be desirable for a mail-processing agent to know whether a
+   message was composed with the new standard in mind.
+
+
+
+Freed & Borenstein          Standards Track                     [Page 8]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+   Therefore, this document defines a new header field, "MIME-Version",
+   which is to be used to declare the version of the Internet message
+   body format standard in use.
+
+   Messages composed in accordance with this document MUST include such
+   a header field, with the following verbatim text:
+
+     MIME-Version: 1.0
+
+   The presence of this header field is an assertion that the message
+   has been composed in compliance with this document.
+
+   Since it is possible that a future document might extend the message
+   format standard again, a formal BNF is given for the content of the
+   MIME-Version field:
+
+     version := "MIME-Version" ":" 1*DIGIT "." 1*DIGIT
+
+   Thus, future format specifiers, which might replace or extend "1.0",
+   are constrained to be two integer fields, separated by a period.  If
+   a message is received with a MIME-version value other than "1.0", it
+   cannot be assumed to conform with this document.
+
+   Note that the MIME-Version header field is required at the top level
+   of a message.  It is not required for each body part of a multipart
+   entity.  It is required for the embedded headers of a body of type
+   "message/rfc822" or "message/partial" if and only if the embedded
+   message is itself claimed to be MIME-conformant.
+
+   It is not possible to fully specify how a mail reader that conforms
+   with MIME as defined in this document should treat a message that
+   might arrive in the future with some value of MIME-Version other than
+   "1.0".
+
+   It is also worth noting that version control for specific media types
+   is not accomplished using the MIME-Version mechanism.  In particular,
+   some formats (such as application/postscript) have version numbering
+   conventions that are internal to the media format.  Where such
+   conventions exist, MIME does nothing to supersede them.  Where no
+   such conventions exist, a MIME media type might use a "version"
+   parameter in the content-type field if necessary.
+
+
+
+
+
+
+
+
+
+
+Freed & Borenstein          Standards Track                     [Page 9]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+   NOTE TO IMPLEMENTORS:  When checking MIME-Version values any RFC 822
+   comment strings that are present must be ignored.  In particular, the
+   following four MIME-Version fields are equivalent:
+
+     MIME-Version: 1.0
+
+     MIME-Version: 1.0 (produced by MetaSend Vx.x)
+
+     MIME-Version: (produced by MetaSend Vx.x) 1.0
+
+     MIME-Version: 1.(produced by MetaSend Vx.x)0
+
+   In the absence of a MIME-Version field, a receiving mail user agent
+   (whether conforming to MIME requirements or not) may optionally
+   choose to interpret the body of the message according to local
+   conventions.  Many such conventions are currently in use and it
+   should be noted that in practice non-MIME messages can contain just
+   about anything.
+
+   It is impossible to be certain that a non-MIME mail message is
+   actually plain text in the US-ASCII character set since it might well
+   be a message that, using some set of nonstandard local conventions
+   that predate MIME, includes text in another character set or non-
+   textual data presented in a manner that cannot be automatically
+   recognized (e.g., a uuencoded compressed UNIX tar file).
+
+5.  Content-Type Header Field
+
+   The purpose of the Content-Type field is to describe the data
+   contained in the body fully enough that the receiving user agent can
+   pick an appropriate agent or mechanism to present the data to the
+   user, or otherwise deal with the data in an appropriate manner. The
+   value in this field is called a media type.
+
+   HISTORICAL NOTE:  The Content-Type header field was first defined in
+   RFC 1049.  RFC 1049 used a simpler and less powerful syntax, but one
+   that is largely compatible with the mechanism given here.
+
+   The Content-Type header field specifies the nature of the data in the
+   body of an entity by giving media type and subtype identifiers, and
+   by providing auxiliary information that may be required for certain
+   media types.  After the media type and subtype names, the remainder
+   of the header field is simply a set of parameters, specified in an
+   attribute=value notation.  The ordering of parameters is not
+   significant.
+
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 10]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+   In general, the top-level media type is used to declare the general
+   type of data, while the subtype specifies a specific format for that
+   type of data.  Thus, a media type of "image/xyz" is enough to tell a
+   user agent that the data is an image, even if the user agent has no
+   knowledge of the specific image format "xyz".  Such information can
+   be used, for example, to decide whether or not to show a user the raw
+   data from an unrecognized subtype -- such an action might be
+   reasonable for unrecognized subtypes of text, but not for
+   unrecognized subtypes of image or audio.  For this reason, registered
+   subtypes of text, image, audio, and video should not contain embedded
+   information that is really of a different type.  Such compound
+   formats should be represented using the "multipart" or "application"
+   types.
+
+   Parameters are modifiers of the media subtype, and as such do not
+   fundamentally affect the nature of the content.  The set of
+   meaningful parameters depends on the media type and subtype.  Most
+   parameters are associated with a single specific subtype.  However, a
+   given top-level media type may define parameters which are applicable
+   to any subtype of that type.  Parameters may be required by their
+   defining content type or subtype or they may be optional. MIME
+   implementations must ignore any parameters whose names they do not
+   recognize.
+
+   For example, the "charset" parameter is applicable to any subtype of
+   "text", while the "boundary" parameter is required for any subtype of
+   the "multipart" media type.
+
+   There are NO globally-meaningful parameters that apply to all media
+   types.  Truly global mechanisms are best addressed, in the MIME
+   model, by the definition of additional Content-* header fields.
+
+   An initial set of seven top-level media types is defined in RFC 2046.
+   Five of these are discrete types whose content is essentially opaque
+   as far as MIME processing is concerned.  The remaining two are
+   composite types whose contents require additional handling by MIME
+   processors.
+
+   This set of top-level media types is intended to be substantially
+   complete.  It is expected that additions to the larger set of
+   supported types can generally be accomplished by the creation of new
+   subtypes of these initial types.  In the future, more top-level types
+   may be defined only by a standards-track extension to this standard.
+   If another top-level type is to be used for any reason, it must be
+   given a name starting with "X-" to indicate its non-standard status
+   and to avoid a potential conflict with a future official name.
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 11]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+5.1.  Syntax of the Content-Type Header Field
+
+   In the Augmented BNF notation of RFC 822, a Content-Type header field
+   value is defined as follows:
+
+     content := "Content-Type" ":" type "/" subtype
+                *(";" parameter)
+                ; Matching of media type and subtype
+                ; is ALWAYS case-insensitive.
+
+     type := discrete-type / composite-type
+
+     discrete-type := "text" / "image" / "audio" / "video" /
+                      "application" / extension-token
+
+     composite-type := "message" / "multipart" / extension-token
+
+     extension-token := ietf-token / x-token
+
+     ietf-token := <An extension token defined by a
+                    standards-track RFC and registered
+                    with IANA.>
+
+     x-token := <The two characters "X-" or "x-" followed, with
+                 no intervening white space, by any token>
+
+     subtype := extension-token / iana-token
+
+     iana-token := <A publicly-defined extension token. Tokens
+                    of this form must be registered with IANA
+                    as specified in RFC 2048.>
+
+     parameter := attribute "=" value
+
+     attribute := token
+                  ; Matching of attributes
+                  ; is ALWAYS case-insensitive.
+
+     value := token / quoted-string
+
+     token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
+                 or tspecials>
+
+     tspecials :=  "(" / ")" / "<" / ">" / "@" /
+                   "," / ";" / ":" / "\" / <">
+                   "/" / "[" / "]" / "?" / "="
+                   ; Must be in quoted-string,
+                   ; to use within parameter values
+
+
+
+Freed & Borenstein          Standards Track                    [Page 12]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+   Note that the definition of "tspecials" is the same as the RFC 822
+   definition of "specials" with the addition of the three characters
+   "/", "?", and "=", and the removal of ".".
+
+   Note also that a subtype specification is MANDATORY -- it may not be
+   omitted from a Content-Type header field.  As such, there are no
+   default subtypes.
+
+   The type, subtype, and parameter names are not case sensitive.  For
+   example, TEXT, Text, and TeXt are all equivalent top-level media
+   types.  Parameter values are normally case sensitive, but sometimes
+   are interpreted in a case-insensitive fashion, depending on the
+   intended use.  (For example, multipart boundaries are case-sensitive,
+   but the "access-type" parameter for message/External-body is not
+   case-sensitive.)
+
+   Note that the value of a quoted string parameter does not include the
+   quotes.  That is, the quotation marks in a quoted-string are not a
+   part of the value of the parameter, but are merely used to delimit
+   that parameter value.  In addition, comments are allowed in
+   accordance with RFC 822 rules for structured header fields.  Thus the
+   following two forms
+
+     Content-type: text/plain; charset=us-ascii (Plain text)
+
+     Content-type: text/plain; charset="us-ascii"
+
+   are completely equivalent.
+
+   Beyond this syntax, the only syntactic constraint on the definition
+   of subtype names is the desire that their uses must not conflict.
+   That is, it would be undesirable to have two different communities
+   using "Content-Type: application/foobar" to mean two different
+   things.  The process of defining new media subtypes, then, is not
+   intended to be a mechanism for imposing restrictions, but simply a
+   mechanism for publicizing their definition and usage.  There are,
+   therefore, two acceptable mechanisms for defining new media subtypes:
+
+    (1)   Private values (starting with "X-") may be defined
+          bilaterally between two cooperating agents without
+          outside registration or standardization. Such values
+          cannot be registered or standardized.
+
+    (2)   New standard values should be registered with IANA as
+          described in RFC 2048.
+
+   The second document in this set, RFC 2046, defines the initial set of
+   media types for MIME.
+
+
+
+Freed & Borenstein          Standards Track                    [Page 13]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+5.2.  Content-Type Defaults
+
+   Default RFC 822 messages without a MIME Content-Type header are taken
+   by this protocol to be plain text in the US-ASCII character set,
+   which can be explicitly specified as:
+
+     Content-type: text/plain; charset=us-ascii
+
+   This default is assumed if no Content-Type header field is specified.
+   It is also recommend that this default be assumed when a
+   syntactically invalid Content-Type header field is encountered. In
+   the presence of a MIME-Version header field and the absence of any
+   Content-Type header field, a receiving User Agent can also assume
+   that plain US-ASCII text was the sender's intent.  Plain US-ASCII
+   text may still be assumed in the absence of a MIME-Version or the
+   presence of an syntactically invalid Content-Type header field, but
+   the sender's intent might have been otherwise.
+
+6.  Content-Transfer-Encoding Header Field
+
+   Many media types which could be usefully transported via email are
+   represented, in their "natural" format, as 8bit character or binary
+   data.  Such data cannot be transmitted over some transfer protocols.
+   For example, RFC 821 (SMTP) restricts mail messages to 7bit US-ASCII
+   data with lines no longer than 1000 characters including any trailing
+   CRLF line separator.
+
+   It is necessary, therefore, to define a standard mechanism for
+   encoding such data into a 7bit short line format.  Proper labelling
+   of unencoded material in less restrictive formats for direct use over
+   less restrictive transports is also desireable.  This document
+   specifies that such encodings will be indicated by a new "Content-
+   Transfer-Encoding" header field.  This field has not been defined by
+   any previous standard.
+
+6.1.  Content-Transfer-Encoding Syntax
+
+   The Content-Transfer-Encoding field's value is a single token
+   specifying the type of encoding, as enumerated below.  Formally:
+
+     encoding := "Content-Transfer-Encoding" ":" mechanism
+
+     mechanism := "7bit" / "8bit" / "binary" /
+                  "quoted-printable" / "base64" /
+                  ietf-token / x-token
+
+   These values are not case sensitive -- Base64 and BASE64 and bAsE64
+   are all equivalent.  An encoding type of 7BIT requires that the body
+
+
+
+Freed & Borenstein          Standards Track                    [Page 14]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+   is already in a 7bit mail-ready representation.  This is the default
+   value -- that is, "Content-Transfer-Encoding: 7BIT" is assumed if the
+   Content-Transfer-Encoding header field is not present.
+
+6.2.  Content-Transfer-Encodings Semantics
+
+   This single Content-Transfer-Encoding token actually provides two
+   pieces of information.  It specifies what sort of encoding
+   transformation the body was subjected to and hence what decoding
+   operation must be used to restore it to its original form, and it
+   specifies what the domain of the result is.
+
+   The transformation part of any Content-Transfer-Encodings specifies,
+   either explicitly or implicitly, a single, well-defined decoding
+   algorithm, which for any sequence of encoded octets either transforms
+   it to the original sequence of octets which was encoded, or shows
+   that it is illegal as an encoded sequence.  Content-Transfer-
+   Encodings transformations never depend on any additional external
+   profile information for proper operation. Note that while decoders
+   must produce a single, well-defined output for a valid encoding no
+   such restrictions exist for encoders: Encoding a given sequence of
+   octets to different, equivalent encoded sequences is perfectly legal.
+
+   Three transformations are currently defined: identity, the "quoted-
+   printable" encoding, and the "base64" encoding.  The domains are
+   "binary", "8bit" and "7bit".
+
+   The Content-Transfer-Encoding values "7bit", "8bit", and "binary" all
+   mean that the identity (i.e. NO) encoding transformation has been
+   performed.  As such, they serve simply as indicators of the domain of
+   the body data, and provide useful information about the sort of
+   encoding that might be needed for transmission in a given transport
+   system.  The terms "7bit data", "8bit data", and "binary data" are
+   all defined in Section 2.
+
+   The quoted-printable and base64 encodings transform their input from
+   an arbitrary domain into material in the "7bit" range, thus making it
+   safe to carry over restricted transports.  The specific definition of
+   the transformations are given below.
+
+   The proper Content-Transfer-Encoding label must always be used.
+   Labelling unencoded data containing 8bit characters as "7bit" is not
+   allowed, nor is labelling unencoded non-line-oriented data as
+   anything other than "binary" allowed.
+
+   Unlike media subtypes, a proliferation of Content-Transfer-Encoding
+   values is both undesirable and unnecessary.  However, establishing
+   only a single transformation into the "7bit" domain does not seem
+
+
+
+Freed & Borenstein          Standards Track                    [Page 15]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+   possible.  There is a tradeoff between the desire for a compact and
+   efficient encoding of largely- binary data and the desire for a
+   somewhat readable encoding of data that is mostly, but not entirely,
+   7bit.  For this reason, at least two encoding mechanisms are
+   necessary: a more or less readable encoding (quoted-printable) and a
+   "dense" or "uniform" encoding (base64).
+
+   Mail transport for unencoded 8bit data is defined in RFC 1652.  As of
+   the initial publication of this document, there are no standardized
+   Internet mail transports for which it is legitimate to include
+   unencoded binary data in mail bodies.  Thus there are no
+   circumstances in which the "binary" Content-Transfer-Encoding is
+   actually valid in Internet mail.  However, in the event that binary
+   mail transport becomes a reality in Internet mail, or when MIME is
+   used in conjunction with any other binary-capable mail transport
+   mechanism, binary bodies must be labelled as such using this
+   mechanism.
+
+   NOTE: The five values defined for the Content-Transfer-Encoding field
+   imply nothing about the media type other than the algorithm by which
+   it was encoded or the transport system requirements if unencoded.
+
+6.3.  New Content-Transfer-Encodings
+
+   Implementors may, if necessary, define private Content-Transfer-
+   Encoding values, but must use an x-token, which is a name prefixed by
+   "X-", to indicate its non-standard status, e.g., "Content-Transfer-
+   Encoding: x-my-new-encoding".  Additional standardized Content-
+   Transfer-Encoding values must be specified by a standards-track RFC.
+   The requirements such specifications must meet are given in RFC 2048.
+   As such, all content-transfer-encoding namespace except that
+   beginning with "X-" is explicitly reserved to the IETF for future
+   use.
+
+   Unlike media types and subtypes, the creation of new Content-
+   Transfer-Encoding values is STRONGLY discouraged, as it seems likely
+   to hinder interoperability with little potential benefit
+
+6.4.  Interpretation and Use
+
+   If a Content-Transfer-Encoding header field appears as part of a
+   message header, it applies to the entire body of that message.  If a
+   Content-Transfer-Encoding header field appears as part of an entity's
+   headers, it applies only to the body of that entity.  If an entity is
+   of type "multipart" the Content-Transfer-Encoding is not permitted to
+   have any value other than "7bit", "8bit" or "binary".  Even more
+   severe restrictions apply to some subtypes of the "message" type.
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 16]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+   It should be noted that most media types are defined in terms of
+   octets rather than bits, so that the mechanisms described here are
+   mechanisms for encoding arbitrary octet streams, not bit streams.  If
+   a bit stream is to be encoded via one of these mechanisms, it must
+   first be converted to an 8bit byte stream using the network standard
+   bit order ("big-endian"), in which the earlier bits in a stream
+   become the higher-order bits in a 8bit byte.  A bit stream not ending
+   at an 8bit boundary must be padded with zeroes. RFC 2046 provides a
+   mechanism for noting the addition of such padding in the case of the
+   application/octet-stream media type, which has a "padding" parameter.
+
+   The encoding mechanisms defined here explicitly encode all data in
+   US-ASCII.  Thus, for example, suppose an entity has header fields
+   such as:
+
+     Content-Type: text/plain; charset=ISO-8859-1
+     Content-transfer-encoding: base64
+
+   This must be interpreted to mean that the body is a base64 US-ASCII
+   encoding of data that was originally in ISO-8859-1, and will be in
+   that character set again after decoding.
+
+   Certain Content-Transfer-Encoding values may only be used on certain
+   media types.  In particular, it is EXPRESSLY FORBIDDEN to use any
+   encodings other than "7bit", "8bit", or "binary" with any composite
+   media type, i.e. one that recursively includes other Content-Type
+   fields.  Currently the only composite media types are "multipart" and
+   "message".  All encodings that are desired for bodies of type
+   multipart or message must be done at the innermost level, by encoding
+   the actual body that needs to be encoded.
+
+   It should also be noted that, by definition, if a composite entity
+   has a transfer-encoding value such as "7bit", but one of the enclosed
+   entities has a less restrictive value such as "8bit", then either the
+   outer "7bit" labelling is in error, because 8bit data are included,
+   or the inner "8bit" labelling placed an unnecessarily high demand on
+   the transport system because the actual included data were actually
+   7bit-safe.
+
+   NOTE ON ENCODING RESTRICTIONS:  Though the prohibition against using
+   content-transfer-encodings on composite body data may seem overly
+   restrictive, it is necessary to prevent nested encodings, in which
+   data are passed through an encoding algorithm multiple times, and
+   must be decoded multiple times in order to be properly viewed.
+   Nested encodings add considerable complexity to user agents:  Aside
+   from the obvious efficiency problems with such multiple encodings,
+   they can obscure the basic structure of a message.  In particular,
+   they can imply that several decoding operations are necessary simply
+
+
+
+Freed & Borenstein          Standards Track                    [Page 17]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+   to find out what types of bodies a message contains.  Banning nested
+   encodings may complicate the job of certain mail gateways, but this
+   seems less of a problem than the effect of nested encodings on user
+   agents.
+
+   Any entity with an unrecognized Content-Transfer-Encoding must be
+   treated as if it has a Content-Type of "application/octet-stream",
+   regardless of what the Content-Type header field actually says.
+
+   NOTE ON THE RELATIONSHIP BETWEEN CONTENT-TYPE AND CONTENT-TRANSFER-
+   ENCODING: It may seem that the Content-Transfer-Encoding could be
+   inferred from the characteristics of the media that is to be encoded,
+   or, at the very least, that certain Content-Transfer-Encodings could
+   be mandated for use with specific media types.  There are several
+   reasons why this is not the case. First, given the varying types of
+   transports used for mail, some encodings may be appropriate for some
+   combinations of media types and transports but not for others.  (For
+   example, in an 8bit transport, no encoding would be required for text
+   in certain character sets, while such encodings are clearly required
+   for 7bit SMTP.)
+
+   Second, certain media types may require different types of transfer
+   encoding under different circumstances.  For example, many PostScript
+   bodies might consist entirely of short lines of 7bit data and hence
+   require no encoding at all.  Other PostScript bodies (especially
+   those using Level 2 PostScript's binary encoding mechanism) may only
+   be reasonably represented using a binary transport encoding.
+   Finally, since the Content-Type field is intended to be an open-ended
+   specification mechanism, strict specification of an association
+   between media types and encodings effectively couples the
+   specification of an application protocol with a specific lower-level
+   transport.  This is not desirable since the developers of a media
+   type should not have to be aware of all the transports in use and
+   what their limitations are.
+
+6.5.  Translating Encodings
+
+   The quoted-printable and base64 encodings are designed so that
+   conversion between them is possible.  The only issue that arises in
+   such a conversion is the handling of hard line breaks in quoted-
+   printable encoding output. When converting from quoted-printable to
+   base64 a hard line break in the quoted-printable form represents a
+   CRLF sequence in the canonical form of the data. It must therefore be
+   converted to a corresponding encoded CRLF in the base64 form of the
+   data.  Similarly, a CRLF sequence in the canonical form of the data
+   obtained after base64 decoding must be converted to a quoted-
+   printable hard line break, but ONLY when converting text data.
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 18]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+6.6.  Canonical Encoding Model
+
+   There was some confusion, in the previous versions of this RFC,
+   regarding the model for when email data was to be converted to
+   canonical form and encoded, and in particular how this process would
+   affect the treatment of CRLFs, given that the representation of
+   newlines varies greatly from system to system, and the relationship
+   between content-transfer-encodings and character sets.  A canonical
+   model for encoding is presented in RFC 2049 for this reason.
+
+6.7.  Quoted-Printable Content-Transfer-Encoding
+
+   The Quoted-Printable encoding is intended to represent data that
+   largely consists of octets that correspond to printable characters in
+   the US-ASCII character set.  It encodes the data in such a way that
+   the resulting octets are unlikely to be modified by mail transport.
+   If the data being encoded are mostly US-ASCII text, the encoded form
+   of the data remains largely recognizable by humans.  A body which is
+   entirely US-ASCII may also be encoded in Quoted-Printable to ensure
+   the integrity of the data should the message pass through a
+   character-translating, and/or line-wrapping gateway.
+
+   In this encoding, octets are to be represented as determined by the
+   following rules:
+
+    (1)   (General 8bit representation) Any octet, except a CR or
+          LF that is part of a CRLF line break of the canonical
+          (standard) form of the data being encoded, may be
+          represented by an "=" followed by a two digit
+          hexadecimal representation of the octet's value.  The
+          digits of the hexadecimal alphabet, for this purpose,
+          are "0123456789ABCDEF".  Uppercase letters must be
+          used; lowercase letters are not allowed.  Thus, for
+          example, the decimal value 12 (US-ASCII form feed) can
+          be represented by "=0C", and the decimal value 61 (US-
+          ASCII EQUAL SIGN) can be represented by "=3D".  This
+          rule must be followed except when the following rules
+          allow an alternative encoding.
+
+    (2)   (Literal representation) Octets with decimal values of
+          33 through 60 inclusive, and 62 through 126, inclusive,
+          MAY be represented as the US-ASCII characters which
+          correspond to those octets (EXCLAMATION POINT through
+          LESS THAN, and GREATER THAN through TILDE,
+          respectively).
+
+    (3)   (White Space) Octets with values of 9 and 32 MAY be
+          represented as US-ASCII TAB (HT) and SPACE characters,
+
+
+
+Freed & Borenstein          Standards Track                    [Page 19]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+          respectively, but MUST NOT be so represented at the end
+          of an encoded line.  Any TAB (HT) or SPACE characters
+          on an encoded line MUST thus be followed on that line
+          by a printable character.  In particular, an "=" at the
+          end of an encoded line, indicating a soft line break
+          (see rule #5) may follow one or more TAB (HT) or SPACE
+          characters.  It follows that an octet with decimal
+          value 9 or 32 appearing at the end of an encoded line
+          must be represented according to Rule #1.  This rule is
+          necessary because some MTAs (Message Transport Agents,
+          programs which transport messages from one user to
+          another, or perform a portion of such transfers) are
+          known to pad lines of text with SPACEs, and others are
+          known to remove "white space" characters from the end
+          of a line.  Therefore, when decoding a Quoted-Printable
+          body, any trailing white space on a line must be
+          deleted, as it will necessarily have been added by
+          intermediate transport agents.
+
+    (4)   (Line Breaks) A line break in a text body, represented
+          as a CRLF sequence in the text canonical form, must be
+          represented by a (RFC 822) line break, which is also a
+          CRLF sequence, in the Quoted-Printable encoding.  Since
+          the canonical representation of media types other than
+          text do not generally include the representation of
+          line breaks as CRLF sequences, no hard line breaks
+          (i.e. line breaks that are intended to be meaningful
+          and to be displayed to the user) can occur in the
+          quoted-printable encoding of such types.  Sequences
+          like "=0D", "=0A", "=0A=0D" and "=0D=0A" will routinely
+          appear in non-text data represented in quoted-
+          printable, of course.
+
+          Note that many implementations may elect to encode the
+          local representation of various content types directly
+          rather than converting to canonical form first,
+          encoding, and then converting back to local
+          representation.  In particular, this may apply to plain
+          text material on systems that use newline conventions
+          other than a CRLF terminator sequence.  Such an
+          implementation optimization is permissible, but only
+          when the combined canonicalization-encoding step is
+          equivalent to performing the three steps separately.
+
+    (5)   (Soft Line Breaks) The Quoted-Printable encoding
+          REQUIRES that encoded lines be no more than 76
+          characters long.  If longer lines are to be encoded
+          with the Quoted-Printable encoding, "soft" line breaks
+
+
+
+Freed & Borenstein          Standards Track                    [Page 20]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+          must be used.  An equal sign as the last character on a
+          encoded line indicates such a non-significant ("soft")
+          line break in the encoded text.
+
+   Thus if the "raw" form of the line is a single unencoded line that
+   says:
+
+     Now's the time for all folk to come to the aid of their country.
+
+   This can be represented, in the Quoted-Printable encoding, as:
+
+     Now's the time =
+     for all folk to come=
+      to the aid of their country.
+
+   This provides a mechanism with which long lines are encoded in such a
+   way as to be restored by the user agent.  The 76 character limit does
+   not count the trailing CRLF, but counts all other characters,
+   including any equal signs.
+
+   Since the hyphen character ("-") may be represented as itself in the
+   Quoted-Printable encoding, care must be taken, when encapsulating a
+   quoted-printable encoded body inside one or more multipart entities,
+   to ensure that the boundary delimiter does not appear anywhere in the
+   encoded body.  (A good strategy is to choose a boundary that includes
+   a character sequence such as "=_" which can never appear in a
+   quoted-printable body.  See the definition of multipart messages in
+   RFC 2046.)
+
+   NOTE: The quoted-printable encoding represents something of a
+   compromise between readability and reliability in transport.  Bodies
+   encoded with the quoted-printable encoding will work reliably over
+   most mail gateways, but may not work perfectly over a few gateways,
+   notably those involving translation into EBCDIC.  A higher level of
+   confidence is offered by the base64 Content-Transfer-Encoding.  A way
+   to get reasonably reliable transport through EBCDIC gateways is to
+   also quote the US-ASCII characters
+
+     !"#$@[\]^`{|}~
+
+   according to rule #1.
+
+   Because quoted-printable data is generally assumed to be line-
+   oriented, it is to be expected that the representation of the breaks
+   between the lines of quoted-printable data may be altered in
+   transport, in the same manner that plain text mail has always been
+   altered in Internet mail when passing between systems with differing
+   newline conventions.  If such alterations are likely to constitute a
+
+
+
+Freed & Borenstein          Standards Track                    [Page 21]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+   corruption of the data, it is probably more sensible to use the
+   base64 encoding rather than the quoted-printable encoding.
+
+   NOTE: Several kinds of substrings cannot be generated according to
+   the encoding rules for the quoted-printable content-transfer-
+   encoding, and hence are formally illegal if they appear in the output
+   of a quoted-printable encoder. This note enumerates these cases and
+   suggests ways to handle such illegal substrings if any are
+   encountered in quoted-printable data that is to be decoded.
+
+    (1)   An "=" followed by two hexadecimal digits, one or both
+          of which are lowercase letters in "abcdef", is formally
+          illegal. A robust implementation might choose to
+          recognize them as the corresponding uppercase letters.
+
+    (2)   An "=" followed by a character that is neither a
+          hexadecimal digit (including "abcdef") nor the CR
+          character of a CRLF pair is illegal.  This case can be
+          the result of US-ASCII text having been included in a
+          quoted-printable part of a message without itself
+          having been subjected to quoted-printable encoding.  A
+          reasonable approach by a robust implementation might be
+          to include the "=" character and the following
+          character in the decoded data without any
+          transformation and, if possible, indicate to the user
+          that proper decoding was not possible at this point in
+          the data.
+
+    (3)   An "=" cannot be the ultimate or penultimate character
+          in an encoded object.  This could be handled as in case
+          (2) above.
+
+    (4)   Control characters other than TAB, or CR and LF as
+          parts of CRLF pairs, must not appear. The same is true
+          for octets with decimal values greater than 126.  If
+          found in incoming quoted-printable data by a decoder, a
+          robust implementation might exclude them from the
+          decoded data and warn the user that illegal characters
+          were discovered.
+
+    (5)   Encoded lines must not be longer than 76 characters,
+          not counting the trailing CRLF. If longer lines are
+          found in incoming, encoded data, a robust
+          implementation might nevertheless decode the lines, and
+          might report the erroneous encoding to the user.
+
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 22]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+   WARNING TO IMPLEMENTORS:  If binary data is encoded in quoted-
+   printable, care must be taken to encode CR and LF characters as "=0D"
+   and "=0A", respectively.  In particular, a CRLF sequence in binary
+   data should be encoded as "=0D=0A".  Otherwise, if CRLF were
+   represented as a hard line break, it might be incorrectly decoded on
+   platforms with different line break conventions.
+
+   For formalists, the syntax of quoted-printable data is described by
+   the following grammar:
+
+     quoted-printable := qp-line *(CRLF qp-line)
+
+     qp-line := *(qp-segment transport-padding CRLF)
+                qp-part transport-padding
+
+     qp-part := qp-section
+                ; Maximum length of 76 characters
+
+     qp-segment := qp-section *(SPACE / TAB) "="
+                   ; Maximum length of 76 characters
+
+     qp-section := [*(ptext / SPACE / TAB) ptext]
+
+     ptext := hex-octet / safe-char
+
+     safe-char := <any octet with decimal value of 33 through
+                  60 inclusive, and 62 through 126>
+                  ; Characters not listed as "mail-safe" in
+                  ; RFC 2049 are also not recommended.
+
+     hex-octet := "=" 2(DIGIT / "A" / "B" / "C" / "D" / "E" / "F")
+                  ; Octet must be used for characters > 127, =,
+                  ; SPACEs or TABs at the ends of lines, and is
+                  ; recommended for any character not listed in
+                  ; RFC 2049 as "mail-safe".
+
+     transport-padding := *LWSP-char
+                          ; Composers MUST NOT generate
+                          ; non-zero length transport
+                          ; padding, but receivers MUST
+                          ; be able to handle padding
+                          ; added by message transports.
+
+   IMPORTANT:  The addition of LWSP between the elements shown in this
+   BNF is NOT allowed since this BNF does not specify a structured
+   header field.
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 23]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+6.8.  Base64 Content-Transfer-Encoding
+
+   The Base64 Content-Transfer-Encoding is designed to represent
+   arbitrary sequences of octets in a form that need not be humanly
+   readable.  The encoding and decoding algorithms are simple, but the
+   encoded data are consistently only about 33 percent larger than the
+   unencoded data.  This encoding is virtually identical to the one used
+   in Privacy Enhanced Mail (PEM) applications, as defined in RFC 1421.
+
+   A 65-character subset of US-ASCII is used, enabling 6 bits to be
+   represented per printable character. (The extra 65th character, "=",
+   is used to signify a special processing function.)
+
+   NOTE:  This subset has the important property that it is represented
+   identically in all versions of ISO 646, including US-ASCII, and all
+   characters in the subset are also represented identically in all
+   versions of EBCDIC. Other popular encodings, such as the encoding
+   used by the uuencode utility, Macintosh binhex 4.0 [RFC-1741], and
+   the base85 encoding specified as part of Level 2 PostScript, do not
+   share these properties, and thus do not fulfill the portability
+   requirements a binary transport encoding for mail must meet.
+
+   The encoding process represents 24-bit groups of input bits as output
+   strings of 4 encoded characters.  Proceeding from left to right, a
+   24-bit input group is formed by concatenating 3 8bit input groups.
+   These 24 bits are then treated as 4 concatenated 6-bit groups, each
+   of which is translated into a single digit in the base64 alphabet.
+   When encoding a bit stream via the base64 encoding, the bit stream
+   must be presumed to be ordered with the most-significant-bit first.
+   That is, the first bit in the stream will be the high-order bit in
+   the first 8bit byte, and the eighth bit will be the low-order bit in
+   the first 8bit byte, and so on.
+
+   Each 6-bit group is used as an index into an array of 64 printable
+   characters.  The character referenced by the index is placed in the
+   output string.  These characters, identified in Table 1, below, are
+   selected so as to be universally representable, and the set excludes
+   characters with particular significance to SMTP (e.g., ".", CR, LF)
+   and to the multipart boundary delimiters defined in RFC 2046 (e.g.,
+   "-").
+
+
+
+
+
+
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 24]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+                    Table 1: The Base64 Alphabet
+
+     Value Encoding  Value Encoding  Value Encoding  Value Encoding
+         0 A            17 R            34 i            51 z
+         1 B            18 S            35 j            52 0
+         2 C            19 T            36 k            53 1
+         3 D            20 U            37 l            54 2
+         4 E            21 V            38 m            55 3
+         5 F            22 W            39 n            56 4
+         6 G            23 X            40 o            57 5
+         7 H            24 Y            41 p            58 6
+         8 I            25 Z            42 q            59 7
+         9 J            26 a            43 r            60 8
+        10 K            27 b            44 s            61 9
+        11 L            28 c            45 t            62 +
+        12 M            29 d            46 u            63 /
+        13 N            30 e            47 v
+        14 O            31 f            48 w         (pad) =
+        15 P            32 g            49 x
+        16 Q            33 h            50 y
+
+   The encoded output stream must be represented in lines of no more
+   than 76 characters each.  All line breaks or other characters not
+   found in Table 1 must be ignored by decoding software.  In base64
+   data, characters other than those in Table 1, line breaks, and other
+   white space probably indicate a transmission error, about which a
+   warning message or even a message rejection might be appropriate
+   under some circumstances.
+
+   Special processing is performed if fewer than 24 bits are available
+   at the end of the data being encoded.  A full encoding quantum is
+   always completed at the end of a body.  When fewer than 24 input bits
+   are available in an input group, zero bits are added (on the right)
+   to form an integral number of 6-bit groups.  Padding at the end of
+   the data is performed using the "=" character.  Since all base64
+   input is an integral number of octets, only the following cases can
+   arise: (1) the final quantum of encoding input is an integral
+   multiple of 24 bits; here, the final unit of encoded output will be
+   an integral multiple of 4 characters with no "=" padding, (2) the
+   final quantum of encoding input is exactly 8 bits; here, the final
+   unit of encoded output will be two characters followed by two "="
+   padding characters, or (3) the final quantum of encoding input is
+   exactly 16 bits; here, the final unit of encoded output will be three
+   characters followed by one "=" padding character.
+
+   Because it is used only for padding at the end of the data, the
+   occurrence of any "=" characters may be taken as evidence that the
+   end of the data has been reached (without truncation in transit).  No
+
+
+
+Freed & Borenstein          Standards Track                    [Page 25]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+   such assurance is possible, however, when the number of octets
+   transmitted was a multiple of three and no "=" characters are
+   present.
+
+   Any characters outside of the base64 alphabet are to be ignored in
+   base64-encoded data.
+
+   Care must be taken to use the proper octets for line breaks if base64
+   encoding is applied directly to text material that has not been
+   converted to canonical form.  In particular, text line breaks must be
+   converted into CRLF sequences prior to base64 encoding.  The
+   important thing to note is that this may be done directly by the
+   encoder rather than in a prior canonicalization step in some
+   implementations.
+
+   NOTE: There is no need to worry about quoting potential boundary
+   delimiters within base64-encoded bodies within multipart entities
+   because no hyphen characters are used in the base64 encoding.
+
+7.  Content-ID Header Field
+
+   In constructing a high-level user agent, it may be desirable to allow
+   one body to make reference to another.  Accordingly, bodies may be
+   labelled using the "Content-ID" header field, which is syntactically
+   identical to the "Message-ID" header field:
+
+     id := "Content-ID" ":" msg-id
+
+   Like the Message-ID values, Content-ID values must be generated to be
+   world-unique.
+
+   The Content-ID value may be used for uniquely identifying MIME
+   entities in several contexts, particularly for caching data
+   referenced by the message/external-body mechanism.  Although the
+   Content-ID header is generally optional, its use is MANDATORY in
+   implementations which generate data of the optional MIME media type
+   "message/external-body".  That is, each message/external-body entity
+   must have a Content-ID field to permit caching of such data.
+
+   It is also worth noting that the Content-ID value has special
+   semantics in the case of the multipart/alternative media type.  This
+   is explained in the section of RFC 2046 dealing with
+   multipart/alternative.
+
+
+
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 26]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+8.  Content-Description Header Field
+
+   The ability to associate some descriptive information with a given
+   body is often desirable.  For example, it may be useful to mark an
+   "image" body as "a picture of the Space Shuttle Endeavor."  Such text
+   may be placed in the Content-Description header field.  This header
+   field is always optional.
+
+     description := "Content-Description" ":" *text
+
+   The description is presumed to be given in the US-ASCII character
+   set, although the mechanism specified in RFC 2047 may be used for
+   non-US-ASCII Content-Description values.
+
+9.  Additional MIME Header Fields
+
+   Future documents may elect to define additional MIME header fields
+   for various purposes.  Any new header field that further describes
+   the content of a message should begin with the string "Content-" to
+   allow such fields which appear in a message header to be
+   distinguished from ordinary RFC 822 message header fields.
+
+     MIME-extension-field := <Any RFC 822 header field which
+                              begins with the string
+                              "Content-">
+
+10.  Summary
+
+   Using the MIME-Version, Content-Type, and Content-Transfer-Encoding
+   header fields, it is possible to include, in a standardized way,
+   arbitrary types of data with RFC 822 conformant mail messages.  No
+   restrictions imposed by either RFC 821 or RFC 822 are violated, and
+   care has been taken to avoid problems caused by additional
+   restrictions imposed by the characteristics of some Internet mail
+   transport mechanisms (see RFC 2049).
+
+   The next document in this set, RFC 2046, specifies the initial set of
+   media types that can be labelled and transported using these headers.
+
+11.  Security Considerations
+
+   Security issues are discussed in the second document in this set, RFC
+   2046.
+
+
+
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 27]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+12.  Authors' Addresses
+
+   For more information, the authors of this document are best contacted
+   via Internet mail:
+
+   Ned Freed
+   Innosoft International, Inc.
+   1050 East Garvey Avenue South
+   West Covina, CA 91790
+   USA
+
+   Phone: +1 818 919 3600
+   Fax:   +1 818 919 3614
+   EMail: ned@innosoft.com
+
+
+   Nathaniel S. Borenstein
+   First Virtual Holdings
+   25 Washington Avenue
+   Morristown, NJ 07960
+   USA
+
+   Phone: +1 201 540 8967
+   Fax:   +1 201 993 3032
+   EMail: nsb@nsb.fv.com
+
+
+   MIME is a result of the work of the Internet Engineering Task Force
+   Working Group on RFC 822 Extensions.  The chairman of that group,
+   Greg Vaudreuil, may be reached at:
+
+   Gregory M. Vaudreuil
+   Octel Network Services
+   17080 Dallas Parkway
+   Dallas, TX 75248-1905
+   USA
+
+   EMail: Greg.Vaudreuil@Octel.Com
+
+
+
+
+
+
+
+
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 28]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+Appendix A -- Collected Grammar
+
+   This appendix contains the complete BNF grammar for all the syntax
+   specified by this document.
+
+   By itself, however, this grammar is incomplete.  It refers by name to
+   several syntax rules that are defined by RFC 822.  Rather than
+   reproduce those definitions here, and risk unintentional differences
+   between the two, this document simply refers the reader to RFC 822
+   for the remaining definitions. Wherever a term is undefined, it
+   refers to the RFC 822 definition.
+
+  attribute := token
+               ; Matching of attributes
+               ; is ALWAYS case-insensitive.
+
+  composite-type := "message" / "multipart" / extension-token
+
+  content := "Content-Type" ":" type "/" subtype
+             *(";" parameter)
+             ; Matching of media type and subtype
+             ; is ALWAYS case-insensitive.
+
+  description := "Content-Description" ":" *text
+
+  discrete-type := "text" / "image" / "audio" / "video" /
+                   "application" / extension-token
+
+  encoding := "Content-Transfer-Encoding" ":" mechanism
+
+  entity-headers := [ content CRLF ]
+                    [ encoding CRLF ]
+                    [ id CRLF ]
+                    [ description CRLF ]
+                    *( MIME-extension-field CRLF )
+
+  extension-token := ietf-token / x-token
+
+  hex-octet := "=" 2(DIGIT / "A" / "B" / "C" / "D" / "E" / "F")
+               ; Octet must be used for characters > 127, =,
+               ; SPACEs or TABs at the ends of lines, and is
+               ; recommended for any character not listed in
+               ; RFC 2049 as "mail-safe".
+
+  iana-token := <A publicly-defined extension token. Tokens
+                 of this form must be registered with IANA
+                 as specified in RFC 2048.>
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 29]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+  ietf-token := <An extension token defined by a
+                 standards-track RFC and registered
+                 with IANA.>
+
+  id := "Content-ID" ":" msg-id
+
+  mechanism := "7bit" / "8bit" / "binary" /
+               "quoted-printable" / "base64" /
+               ietf-token / x-token
+
+  MIME-extension-field := <Any RFC 822 header field which
+                           begins with the string
+                           "Content-">
+
+  MIME-message-headers := entity-headers
+                          fields
+                          version CRLF
+                          ; The ordering of the header
+                          ; fields implied by this BNF
+                          ; definition should be ignored.
+
+  MIME-part-headers := entity-headers
+                       [fields]
+                       ; Any field not beginning with
+                       ; "content-" can have no defined
+                       ; meaning and may be ignored.
+                       ; The ordering of the header
+                       ; fields implied by this BNF
+                       ; definition should be ignored.
+
+  parameter := attribute "=" value
+
+  ptext := hex-octet / safe-char
+
+  qp-line := *(qp-segment transport-padding CRLF)
+             qp-part transport-padding
+
+  qp-part := qp-section
+             ; Maximum length of 76 characters
+
+  qp-section := [*(ptext / SPACE / TAB) ptext]
+
+  qp-segment := qp-section *(SPACE / TAB) "="
+                ; Maximum length of 76 characters
+
+  quoted-printable := qp-line *(CRLF qp-line)
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 30]
+
+RFC 2045                Internet Message Bodies            November 1996
+
+
+  safe-char := <any octet with decimal value of 33 through
+               60 inclusive, and 62 through 126>
+               ; Characters not listed as "mail-safe" in
+               ; RFC 2049 are also not recommended.
+
+  subtype := extension-token / iana-token
+
+  token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
+              or tspecials>
+
+  transport-padding := *LWSP-char
+                       ; Composers MUST NOT generate
+                       ; non-zero length transport
+                       ; padding, but receivers MUST
+                       ; be able to handle padding
+                       ; added by message transports.
+
+  tspecials :=  "(" / ")" / "<" / ">" / "@" /
+                "," / ";" / ":" / "\" / <">
+                "/" / "[" / "]" / "?" / "="
+                ; Must be in quoted-string,
+                ; to use within parameter values
+
+  type := discrete-type / composite-type
+
+  value := token / quoted-string
+
+  version := "MIME-Version" ":" 1*DIGIT "." 1*DIGIT
+
+  x-token := <The two characters "X-" or "x-" followed, with
+              no  intervening white space, by any token>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 31]
+
diff --git a/doc/mime-p2-rfc2046.txt b/doc/mime-p2-rfc2046.txt
@@ -0,0 +1,2467 @@
+
+
+
+
+
+
+Network Working Group                                          N. Freed
+Request for Comments: 2046                                     Innosoft
+Obsoletes: 1521, 1522, 1590                               N. Borenstein
+Category: Standards Track                                 First Virtual
+                                                          November 1996
+
+
+                 Multipurpose Internet Mail Extensions
+                            (MIME) Part Two:
+                              Media Types
+
+Status of this Memo
+
+   This document specifies an Internet standards track protocol for the
+   Internet community, and requests discussion and suggestions for
+   improvements.  Please refer to the current edition of the "Internet
+   Official Protocol Standards" (STD 1) for the standardization state
+   and status of this protocol.  Distribution of this memo is unlimited.
+
+Abstract
+
+   STD 11, RFC 822 defines a message representation protocol specifying
+   considerable detail about US-ASCII message headers, but which leaves
+   the message content, or message body, as flat US-ASCII text.  This
+   set of documents, collectively called the Multipurpose Internet Mail
+   Extensions, or MIME, redefines the format of messages to allow for
+
+    (1)   textual message bodies in character sets other than
+          US-ASCII,
+
+    (2)   an extensible set of different formats for non-textual
+          message bodies,
+
+    (3)   multi-part message bodies, and
+
+    (4)   textual header information in character sets other than
+          US-ASCII.
+
+   These documents are based on earlier work documented in RFC 934, STD
+   11, and RFC 1049, but extends and revises them.  Because RFC 822 said
+   so little about message bodies, these documents are largely
+   orthogonal to (rather than a revision of) RFC 822.
+
+   The initial document in this set, RFC 2045, specifies the various
+   headers used to describe the structure of MIME messages. This second
+   document defines the general structure of the MIME media typing
+   system and defines an initial set of media types. The third document,
+   RFC 2047, describes extensions to RFC 822 to allow non-US-ASCII text
+
+
+
+Freed & Borenstein          Standards Track                     [Page 1]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   data in Internet mail header fields. The fourth document, RFC 2048,
+   specifies various IANA registration procedures for MIME-related
+   facilities.  The fifth and final document, RFC 2049, describes MIME
+   conformance criteria as well as providing some illustrative examples
+   of MIME message formats, acknowledgements, and the bibliography.
+
+   These documents are revisions of RFCs 1521 and 1522, which themselves
+   were revisions of RFCs 1341 and 1342.  An appendix in RFC 2049
+   describes differences and changes from previous versions.
+
+Table of Contents
+
+   1. Introduction .........................................    3
+   2. Definition of a Top-Level Media Type .................    4
+   3. Overview Of The Initial Top-Level Media Types ........    4
+   4. Discrete Media Type Values ...........................    6
+   4.1 Text Media Type .....................................    6
+   4.1.1 Representation of Line Breaks .....................    7
+   4.1.2 Charset Parameter .................................    7
+   4.1.3 Plain Subtype .....................................   11
+   4.1.4 Unrecognized Subtypes .............................   11
+   4.2 Image Media Type ....................................   11
+   4.3 Audio Media Type ....................................   11
+   4.4 Video Media Type ....................................   12
+   4.5 Application Media Type ..............................   12
+   4.5.1 Octet-Stream Subtype ..............................   13
+   4.5.2 PostScript Subtype ................................   14
+   4.5.3 Other Application Subtypes ........................   17
+   5. Composite Media Type Values ..........................   17
+   5.1 Multipart Media Type ................................   17
+   5.1.1 Common Syntax .....................................   19
+   5.1.2 Handling Nested Messages and Multiparts ...........   24
+   5.1.3 Mixed Subtype .....................................   24
+   5.1.4 Alternative Subtype ...............................   24
+   5.1.5 Digest Subtype ....................................   26
+   5.1.6 Parallel Subtype ..................................   27
+   5.1.7 Other Multipart Subtypes ..........................   28
+   5.2 Message Media Type ..................................   28
+   5.2.1 RFC822 Subtype ....................................   28
+   5.2.2 Partial Subtype ...................................   29
+   5.2.2.1 Message Fragmentation and Reassembly ............   30
+   5.2.2.2 Fragmentation and Reassembly Example ............   31
+   5.2.3 External-Body Subtype .............................   33
+   5.2.4 Other Message Subtypes ............................   40
+   6. Experimental Media Type Values .......................   40
+   7. Summary ..............................................   41
+   8. Security Considerations ..............................   41
+   9. Authors' Addresses ...................................   42
+
+
+
+Freed & Borenstein          Standards Track                     [Page 2]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   A. Collected Grammar ....................................   43
+
+1.  Introduction
+
+   The first document in this set, RFC 2045, defines a number of header
+   fields, including Content-Type. The Content-Type field is used to
+   specify the nature of the data in the body of a MIME entity, by
+   giving media type and subtype identifiers, and by providing auxiliary
+   information that may be required for certain media types.  After the
+   type and subtype names, the remainder of the header field is simply a
+   set of parameters, specified in an attribute/value notation.  The
+   ordering of parameters is not significant.
+
+   In general, the top-level media type is used to declare the general
+   type of data, while the subtype specifies a specific format for that
+   type of data.  Thus, a media type of "image/xyz" is enough to tell a
+   user agent that the data is an image, even if the user agent has no
+   knowledge of the specific image format "xyz".  Such information can
+   be used, for example, to decide whether or not to show a user the raw
+   data from an unrecognized subtype -- such an action might be
+   reasonable for unrecognized subtypes of "text", but not for
+   unrecognized subtypes of "image" or "audio".  For this reason,
+   registered subtypes of "text", "image", "audio", and "video" should
+   not contain embedded information that is really of a different type.
+   Such compound formats should be represented using the "multipart" or
+   "application" types.
+
+   Parameters are modifiers of the media subtype, and as such do not
+   fundamentally affect the nature of the content.  The set of
+   meaningful parameters depends on the media type and subtype.  Most
+   parameters are associated with a single specific subtype.  However, a
+   given top-level media type may define parameters which are applicable
+   to any subtype of that type.  Parameters may be required by their
+   defining media type or subtype or they may be optional.  MIME
+   implementations must also ignore any parameters whose names they do
+   not recognize.
+
+   MIME's Content-Type header field and media type mechanism has been
+   carefully designed to be extensible, and it is expected that the set
+   of media type/subtype pairs and their associated parameters will grow
+   significantly over time.  Several other MIME facilities, such as
+   transfer encodings and "message/external-body" access types, are
+   likely to have new values defined over time.  In order to ensure that
+   the set of such values is developed in an orderly, well-specified,
+   and public manner, MIME sets up a registration process which uses the
+   Internet Assigned Numbers Authority (IANA) as a central registry for
+   MIME's various areas of extensibility.  The registration process for
+   these areas is described in a companion document, RFC 2048.
+
+
+
+Freed & Borenstein          Standards Track                     [Page 3]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   The initial seven standard top-level media type are defined and
+   described in the remainder of this document.
+
+2.  Definition of a Top-Level Media Type
+
+   The definition of a top-level media type consists of:
+
+    (1)   a name and a description of the type, including
+          criteria for whether a particular type would qualify
+          under that type,
+
+    (2)   the names and definitions of parameters, if any, which
+          are defined for all subtypes of that type (including
+          whether such parameters are required or optional),
+
+    (3)   how a user agent and/or gateway should handle unknown
+          subtypes of this type,
+
+    (4)   general considerations on gatewaying entities of this
+          top-level type, if any, and
+
+    (5)   any restrictions on content-transfer-encodings for
+          entities of this top-level type.
+
+3.  Overview Of The Initial Top-Level Media Types
+
+   The five discrete top-level media types are:
+
+    (1)   text -- textual information.  The subtype "plain" in
+          particular indicates plain text containing no
+          formatting commands or directives of any sort. Plain
+          text is intended to be displayed "as-is". No special
+          software is required to get the full meaning of the
+          text, aside from support for the indicated character
+          set. Other subtypes are to be used for enriched text in
+          forms where application software may enhance the
+          appearance of the text, but such software must not be
+          required in order to get the general idea of the
+          content.  Possible subtypes of "text" thus include any
+          word processor format that can be read without
+          resorting to software that understands the format.  In
+          particular, formats that employ embeddded binary
+          formatting information are not considered directly
+          readable. A very simple and portable subtype,
+          "richtext", was defined in RFC 1341, with a further
+          revision in RFC 1896 under the name "enriched".
+
+
+
+
+
+Freed & Borenstein          Standards Track                     [Page 4]
+
+RFC 2046                      Media Types                  November 1996
+
+
+    (2)   image -- image data.  "Image" requires a display device
+          (such as a graphical display, a graphics printer, or a
+          FAX machine) to view the information. An initial
+          subtype is defined for the widely-used image format
+          JPEG. .  subtypes are defined for two widely-used image
+          formats, jpeg and gif.
+
+    (3)   audio -- audio data.  "Audio" requires an audio output
+          device (such as a speaker or a telephone) to "display"
+          the contents.  An initial subtype "basic" is defined in
+          this document.
+
+    (4)   video -- video data.  "Video" requires the capability
+          to display moving images, typically including
+          specialized hardware and software.  An initial subtype
+          "mpeg" is defined in this document.
+
+    (5)   application -- some other kind of data, typically
+          either uninterpreted binary data or information to be
+          processed by an application.  The subtype "octet-
+          stream" is to be used in the case of uninterpreted
+          binary data, in which case the simplest recommended
+          action is to offer to write the information into a file
+          for the user.  The "PostScript" subtype is also defined
+          for the transport of PostScript material.  Other
+          expected uses for "application" include spreadsheets,
+          data for mail-based scheduling systems, and languages
+          for "active" (computational) messaging, and word
+          processing formats that are not directly readable.
+          Note that security considerations may exist for some
+          types of application data, most notably
+          "application/PostScript" and any form of active
+          messaging.  These issues are discussed later in this
+          document.
+
+   The two composite top-level media types are:
+
+    (1)   multipart -- data consisting of multiple entities of
+          independent data types.  Four subtypes are initially
+          defined, including the basic "mixed" subtype specifying
+          a generic mixed set of parts, "alternative" for
+          representing the same data in multiple formats,
+          "parallel" for parts intended to be viewed
+          simultaneously, and "digest" for multipart entities in
+          which each part has a default type of "message/rfc822".
+
+
+
+
+
+
+Freed & Borenstein          Standards Track                     [Page 5]
+
+RFC 2046                      Media Types                  November 1996
+
+
+    (2)   message -- an encapsulated message.  A body of media
+          type "message" is itself all or a portion of some kind
+          of message object.  Such objects may or may not in turn
+          contain other entities.  The "rfc822" subtype is used
+          when the encapsulated content is itself an RFC 822
+          message.  The "partial" subtype is defined for partial
+          RFC 822 messages, to permit the fragmented transmission
+          of bodies that are thought to be too large to be passed
+          through transport facilities in one piece.  Another
+          subtype, "external-body", is defined for specifying
+          large bodies by reference to an external data source.
+
+   It should be noted that the list of media type values given here may
+   be augmented in time, via the mechanisms described above, and that
+   the set of subtypes is expected to grow substantially.
+
+4.  Discrete Media Type Values
+
+   Five of the seven initial media type values refer to discrete bodies.
+   The content of these types must be handled by non-MIME mechanisms;
+   they are opaque to MIME processors.
+
+4.1.  Text Media Type
+
+   The "text" media type is intended for sending material which is
+   principally textual in form.  A "charset" parameter may be used to
+   indicate the character set of the body text for "text" subtypes,
+   notably including the subtype "text/plain", which is a generic
+   subtype for plain text.  Plain text does not provide for or allow
+   formatting commands, font attribute specifications, processing
+   instructions, interpretation directives, or content markup.  Plain
+   text is seen simply as a linear sequence of characters, possibly
+   interrupted by line breaks or page breaks.  Plain text may allow the
+   stacking of several characters in the same position in the text.
+   Plain text in scripts like Arabic and Hebrew may also include
+   facilitites that allow the arbitrary mixing of text segments with
+   opposite writing directions.
+
+   Beyond plain text, there are many formats for representing what might
+   be known as "rich text".  An interesting characteristic of many such
+   representations is that they are to some extent readable even without
+   the software that interprets them.  It is useful, then, to
+   distinguish them, at the highest level, from such unreadable data as
+   images, audio, or text represented in an unreadable form. In the
+   absence of appropriate interpretation software, it is reasonable to
+   show subtypes of "text" to the user, while it is not reasonable to do
+   so with most nontextual data. Such formatted textual data should be
+   represented using subtypes of "text".
+
+
+
+Freed & Borenstein          Standards Track                     [Page 6]
+
+RFC 2046                      Media Types                  November 1996
+
+
+4.1.1.  Representation of Line Breaks
+
+   The canonical form of any MIME "text" subtype MUST always represent a
+   line break as a CRLF sequence.  Similarly, any occurrence of CRLF in
+   MIME "text" MUST represent a line break.  Use of CR and LF outside of
+   line break sequences is also forbidden.
+
+   This rule applies regardless of format or character set or sets
+   involved.
+
+   NOTE: The proper interpretation of line breaks when a body is
+   displayed depends on the media type. In particular, while it is
+   appropriate to treat a line break as a transition to a new line when
+   displaying a "text/plain" body, this treatment is actually incorrect
+   for other subtypes of "text" like "text/enriched" [RFC-1896].
+   Similarly, whether or not line breaks should be added during display
+   operations is also a function of the media type. It should not be
+   necessary to add any line breaks to display "text/plain" correctly,
+   whereas proper display of "text/enriched" requires the appropriate
+   addition of line breaks.
+
+   NOTE: Some protocols defines a maximum line length.  E.g. SMTP [RFC-
+   821] allows a maximum of 998 octets before the next CRLF sequence.
+   To be transported by such protocols, data which includes too long
+   segments without CRLF sequences must be encoded with a suitable
+   content-transfer-encoding.
+
+4.1.2.  Charset Parameter
+
+   A critical parameter that may be specified in the Content-Type field
+   for "text/plain" data is the character set.  This is specified with a
+   "charset" parameter, as in:
+
+     Content-type: text/plain; charset=iso-8859-1
+
+   Unlike some other parameter values, the values of the charset
+   parameter are NOT case sensitive.  The default character set, which
+   must be assumed in the absence of a charset parameter, is US-ASCII.
+
+   The specification for any future subtypes of "text" must specify
+   whether or not they will also utilize a "charset" parameter, and may
+   possibly restrict its values as well.  For other subtypes of "text"
+   than "text/plain", the semantics of the "charset" parameter should be
+   defined to be identical to those specified here for "text/plain",
+   i.e., the body consists entirely of characters in the given charset.
+   In particular, definers of future "text" subtypes should pay close
+   attention to the implications of multioctet character sets for their
+   subtype definitions.
+
+
+
+Freed & Borenstein          Standards Track                     [Page 7]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   The charset parameter for subtypes of "text" gives a name of a
+   character set, as "character set" is defined in RFC 2045.  The rules
+   regarding line breaks detailed in the previous section must also be
+   observed -- a character set whose definition does not conform to
+   these rules cannot be used in a MIME "text" subtype.
+
+   An initial list of predefined character set names can be found at the
+   end of this section.  Additional character sets may be registered
+   with IANA.
+
+   Other media types than subtypes of "text" might choose to employ the
+   charset parameter as defined here, but with the CRLF/line break
+   restriction removed.  Therefore, all character sets that conform to
+   the general definition of "character set" in RFC 2045 can be
+   registered for MIME use.
+
+   Note that if the specified character set includes 8-bit characters
+   and such characters are used in the body, a Content-Transfer-Encoding
+   header field and a corresponding encoding on the data are required in
+   order to transmit the body via some mail transfer protocols, such as
+   SMTP [RFC-821].
+
+   The default character set, US-ASCII, has been the subject of some
+   confusion and ambiguity in the past.  Not only were there some
+   ambiguities in the definition, there have been wide variations in
+   practice.  In order to eliminate such ambiguity and variations in the
+   future, it is strongly recommended that new user agents explicitly
+   specify a character set as a media type parameter in the Content-Type
+   header field. "US-ASCII" does not indicate an arbitrary 7-bit
+   character set, but specifies that all octets in the body must be
+   interpreted as characters according to the US-ASCII character set.
+   National and application-oriented versions of ISO 646 [ISO-646] are
+   usually NOT identical to US-ASCII, and in that case their use in
+   Internet mail is explicitly discouraged.  The omission of the ISO 646
+   character set from this document is deliberate in this regard.  The
+   character set name of "US-ASCII" explicitly refers to the character
+   set defined in ANSI X3.4-1986 [US- ASCII].  The new international
+   reference version (IRV) of the 1991 edition of ISO 646 is identical
+   to US-ASCII.  The character set name "ASCII" is reserved and must not
+   be used for any purpose.
+
+   NOTE: RFC 821 explicitly specifies "ASCII", and references an earlier
+   version of the American Standard.  Insofar as one of the purposes of
+   specifying a media type and character set is to permit the receiver
+   to unambiguously determine how the sender intended the coded message
+   to be interpreted, assuming anything other than "strict ASCII" as the
+   default would risk unintentional and incompatible changes to the
+   semantics of messages now being transmitted.  This also implies that
+
+
+
+Freed & Borenstein          Standards Track                     [Page 8]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   messages containing characters coded according to other versions of
+   ISO 646 than US-ASCII and the 1991 IRV, or using code-switching
+   procedures (e.g., those of ISO 2022), as well as 8bit or multiple
+   octet character encodings MUST use an appropriate character set
+   specification to be consistent with MIME.
+
+   The complete US-ASCII character set is listed in ANSI X3.4- 1986.
+   Note that the control characters including DEL (0-31, 127) have no
+   defined meaning in apart from the combination CRLF (US-ASCII values
+   13 and 10) indicating a new line.  Two of the characters have de
+   facto meanings in wide use: FF (12) often means "start subsequent
+   text on the beginning of a new page"; and TAB or HT (9) often (though
+   not always) means "move the cursor to the next available column after
+   the current position where the column number is a multiple of 8
+   (counting the first column as column 0)."  Aside from these
+   conventions, any use of the control characters or DEL in a body must
+   either occur
+
+    (1)   because a subtype of text other than "plain"
+          specifically assigns some additional meaning, or
+
+    (2)   within the context of a private agreement between the
+          sender and recipient. Such private agreements are
+          discouraged and should be replaced by the other
+          capabilities of this document.
+
+   NOTE: An enormous proliferation of character sets exist beyond US-
+   ASCII.  A large number of partially or totally overlapping character
+   sets is NOT a good thing.  A SINGLE character set that can be used
+   universally for representing all of the world's languages in Internet
+   mail would be preferrable.  Unfortunately, existing practice in
+   several communities seems to point to the continued use of multiple
+   character sets in the near future.  A small number of standard
+   character sets are, therefore, defined for Internet use in this
+   document.
+
+   The defined charset values are:
+
+    (1)   US-ASCII -- as defined in ANSI X3.4-1986 [US-ASCII].
+
+    (2)   ISO-8859-X -- where "X" is to be replaced, as
+          necessary, for the parts of ISO-8859 [ISO-8859].  Note
+          that the ISO 646 character sets have deliberately been
+          omitted in favor of their 8859 replacements, which are
+          the designated character sets for Internet mail.  As of
+          the publication of this document, the legitimate values
+          for "X" are the digits 1 through 10.
+
+
+
+
+Freed & Borenstein          Standards Track                     [Page 9]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   Characters in the range 128-159 has no assigned meaning in ISO-8859-
+   X.  Characters with values below 128 in ISO-8859-X have the same
+   assigned meaning as they do in US-ASCII.
+
+   Part 6 of ISO 8859 (Latin/Arabic alphabet) and part 8 (Latin/Hebrew
+   alphabet) includes both characters for which the normal writing
+   direction is right to left and characters for which it is left to
+   right, but do not define a canonical ordering method for representing
+   bi-directional text.  The charset values "ISO-8859-6" and "ISO-8859-
+   8", however, specify that the visual method is used [RFC-1556].
+
+   All of these character sets are used as pure 7bit or 8bit sets
+   without any shift or escape functions.  The meaning of shift and
+   escape sequences in these character sets is not defined.
+
+   The character sets specified above are the ones that were relatively
+   uncontroversial during the drafting of MIME.  This document does not
+   endorse the use of any particular character set other than US-ASCII,
+   and recognizes that the future evolution of world character sets
+   remains unclear.
+
+   Note that the character set used, if anything other than US- ASCII,
+   must always be explicitly specified in the Content-Type field.
+
+   No character set name other than those defined above may be used in
+   Internet mail without the publication of a formal specification and
+   its registration with IANA, or by private agreement, in which case
+   the character set name must begin with "X-".
+
+   Implementors are discouraged from defining new character sets unless
+   absolutely necessary.
+
+   The "charset" parameter has been defined primarily for the purpose of
+   textual data, and is described in this section for that reason.
+   However, it is conceivable that non-textual data might also wish to
+   specify a charset value for some purpose, in which case the same
+   syntax and values should be used.
+
+   In general, composition software should always use the "lowest common
+   denominator" character set possible.  For example, if a body contains
+   only US-ASCII characters, it SHOULD be marked as being in the US-
+   ASCII character set, not ISO-8859-1, which, like all the ISO-8859
+   family of character sets, is a superset of US-ASCII.  More generally,
+   if a widely-used character set is a subset of another character set,
+   and a body contains only characters in the widely-used subset, it
+   should be labelled as being in that subset.  This will increase the
+   chances that the recipient will be able to view the resulting entity
+   correctly.
+
+
+
+Freed & Borenstein          Standards Track                    [Page 10]
+
+RFC 2046                      Media Types                  November 1996
+
+
+4.1.3.  Plain Subtype
+
+   The simplest and most important subtype of "text" is "plain".  This
+   indicates plain text that does not contain any formatting commands or
+   directives. Plain text is intended to be displayed "as-is", that is,
+   no interpretation of embedded formatting commands, font attribute
+   specifications, processing instructions, interpretation directives,
+   or content markup should be necessary for proper display.  The
+   default media type of "text/plain; charset=us-ascii" for Internet
+   mail describes existing Internet practice.  That is, it is the type
+   of body defined by RFC 822.
+
+   No other "text" subtype is defined by this document.
+
+4.1.4.  Unrecognized Subtypes
+
+   Unrecognized subtypes of "text" should be treated as subtype "plain"
+   as long as the MIME implementation knows how to handle the charset.
+   Unrecognized subtypes which also specify an unrecognized charset
+   should be treated as "application/octet- stream".
+
+4.2.  Image Media Type
+
+   A media type of "image" indicates that the body contains an image.
+   The subtype names the specific image format.  These names are not
+   case sensitive. An initial subtype is "jpeg" for the JPEG format
+   using JFIF encoding [JPEG].
+
+   The list of "image" subtypes given here is neither exclusive nor
+   exhaustive, and is expected to grow as more types are registered with
+   IANA, as described in RFC 2048.
+
+   Unrecognized subtypes of "image" should at a miniumum be treated as
+   "application/octet-stream".  Implementations may optionally elect to
+   pass subtypes of "image" that they do not specifically recognize to a
+   secure and robust general-purpose image viewing application, if such
+   an application is available.
+
+   NOTE: Using of a generic-purpose image viewing application this way
+   inherits the security problems of the most dangerous type supported
+   by the application.
+
+4.3.  Audio Media Type
+
+   A media type of "audio" indicates that the body contains audio data.
+   Although there is not yet a consensus on an "ideal" audio format for
+   use with computers, there is a pressing need for a format capable of
+   providing interoperable behavior.
+
+
+
+Freed & Borenstein          Standards Track                    [Page 11]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   The initial subtype of "basic" is specified to meet this requirement
+   by providing an absolutely minimal lowest common denominator audio
+   format.  It is expected that richer formats for higher quality and/or
+   lower bandwidth audio will be defined by a later document.
+
+   The content of the "audio/basic" subtype is single channel audio
+   encoded using 8bit ISDN mu-law [PCM] at a sample rate of 8000 Hz.
+
+   Unrecognized subtypes of "audio" should at a miniumum be treated as
+   "application/octet-stream".  Implementations may optionally elect to
+   pass subtypes of "audio" that they do not specifically recognize to a
+   robust general-purpose audio playing application, if such an
+   application is available.
+
+4.4.  Video Media Type
+
+   A media type of "video" indicates that the body contains a time-
+   varying-picture image, possibly with color and coordinated sound.
+   The term 'video' is used in its most generic sense, rather than with
+   reference to any particular technology or format, and is not meant to
+   preclude subtypes such as animated drawings encoded compactly.  The
+   subtype "mpeg" refers to video coded according to the MPEG standard
+   [MPEG].
+
+   Note that although in general this document strongly discourages the
+   mixing of multiple media in a single body, it is recognized that many
+   so-called video formats include a representation for synchronized
+   audio, and this is explicitly permitted for subtypes of "video".
+
+   Unrecognized subtypes of "video" should at a minumum be treated as
+   "application/octet-stream".  Implementations may optionally elect to
+   pass subtypes of "video" that they do not specifically recognize to a
+   robust general-purpose video display application, if such an
+   application is available.
+
+4.5.  Application Media Type
+
+   The "application" media type is to be used for discrete data which do
+   not fit in any of the other categories, and particularly for data to
+   be processed by some type of application program.  This is
+   information which must be processed by an application before it is
+   viewable or usable by a user.  Expected uses for the "application"
+   media type include file transfer, spreadsheets, data for mail-based
+   scheduling systems, and languages for "active" (computational)
+   material.  (The latter, in particular, can pose security problems
+   which must be understood by implementors, and are considered in
+   detail in the discussion of the "application/PostScript" media type.)
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 12]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   For example, a meeting scheduler might define a standard
+   representation for information about proposed meeting dates.  An
+   intelligent user agent would use this information to conduct a dialog
+   with the user, and might then send additional material based on that
+   dialog.  More generally, there have been several "active" messaging
+   languages developed in which programs in a suitably specialized
+   language are transported to a remote location and automatically run
+   in the recipient's environment.
+
+   Such applications may be defined as subtypes of the "application"
+   media type. This document defines two subtypes:
+
+   octet-stream, and PostScript.
+
+   The subtype of "application" will often be either the name or include
+   part of the name of the application for which the data are intended.
+   This does not mean, however, that any application program name may be
+   used freely as a subtype of "application".
+
+4.5.1.  Octet-Stream Subtype
+
+   The "octet-stream" subtype is used to indicate that a body contains
+   arbitrary binary data.  The set of currently defined parameters is:
+
+    (1)   TYPE -- the general type or category of binary data.
+          This is intended as information for the human recipient
+          rather than for any automatic processing.
+
+    (2)   PADDING -- the number of bits of padding that were
+          appended to the bit-stream comprising the actual
+          contents to produce the enclosed 8bit byte-oriented
+          data.  This is useful for enclosing a bit-stream in a
+          body when the total number of bits is not a multiple of
+          8.
+
+   Both of these parameters are optional.
+
+   An additional parameter, "CONVERSIONS", was defined in RFC 1341 but
+   has since been removed.  RFC 1341 also defined the use of a "NAME"
+   parameter which gave a suggested file name to be used if the data
+   were to be written to a file.  This has been deprecated in
+   anticipation of a separate Content-Disposition header field, to be
+   defined in a subsequent RFC.
+
+   The recommended action for an implementation that receives an
+   "application/octet-stream" entity is to simply offer to put the data
+   in a file, with any Content-Transfer-Encoding undone, or perhaps to
+   use it as input to a user-specified process.
+
+
+
+Freed & Borenstein          Standards Track                    [Page 13]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   To reduce the danger of transmitting rogue programs, it is strongly
+   recommended that implementations NOT implement a path-search
+   mechanism whereby an arbitrary program named in the Content-Type
+   parameter (e.g., an "interpreter=" parameter) is found and executed
+   using the message body as input.
+
+4.5.2.  PostScript Subtype
+
+   A media type of "application/postscript" indicates a PostScript
+   program.  Currently two variants of the PostScript language are
+   allowed; the original level 1 variant is described in [POSTSCRIPT]
+   and the more recent level 2 variant is described in [POSTSCRIPT2].
+
+   PostScript is a registered trademark of Adobe Systems, Inc.  Use of
+   the MIME media type "application/postscript" implies recognition of
+   that trademark and all the rights it entails.
+
+   The PostScript language definition provides facilities for internal
+   labelling of the specific language features a given program uses.
+   This labelling, called the PostScript document structuring
+   conventions, or DSC, is very general and provides substantially more
+   information than just the language level.  The use of document
+   structuring conventions, while not required, is strongly recommended
+   as an aid to interoperability.  Documents which lack proper
+   structuring conventions cannot be tested to see whether or not they
+   will work in a given environment.  As such, some systems may assume
+   the worst and refuse to process unstructured documents.
+
+   The execution of general-purpose PostScript interpreters entails
+   serious security risks, and implementors are discouraged from simply
+   sending PostScript bodies to "off- the-shelf" interpreters.  While it
+   is usually safe to send PostScript to a printer, where the potential
+   for harm is greatly constrained by typical printer environments,
+   implementors should consider all of the following before they add
+   interactive display of PostScript bodies to their MIME readers.
+
+   The remainder of this section outlines some, though probably not all,
+   of the possible problems with the transport of PostScript entities.
+
+    (1)   Dangerous operations in the PostScript language
+          include, but may not be limited to, the PostScript
+          operators "deletefile", "renamefile", "filenameforall",
+          and "file".  "File" is only dangerous when applied to
+          something other than standard input or output.
+          Implementations may also define additional nonstandard
+          file operators; these may also pose a threat to
+          security. "Filenameforall", the wildcard file search
+          operator, may appear at first glance to be harmless.
+
+
+
+Freed & Borenstein          Standards Track                    [Page 14]
+
+RFC 2046                      Media Types                  November 1996
+
+
+          Note, however, that this operator has the potential to
+          reveal information about what files the recipient has
+          access to, and this information may itself be
+          sensitive.  Message senders should avoid the use of
+          potentially dangerous file operators, since these
+          operators are quite likely to be unavailable in secure
+          PostScript implementations.  Message receiving and
+          displaying software should either completely disable
+          all potentially dangerous file operators or take
+          special care not to delegate any special authority to
+          their operation.  These operators should be viewed as
+          being done by an outside agency when interpreting
+          PostScript documents.  Such disabling and/or checking
+          should be done completely outside of the reach of the
+          PostScript language itself; care should be taken to
+          insure that no method exists for re-enabling full-
+          function versions of these operators.
+
+    (2)   The PostScript language provides facilities for exiting
+          the normal interpreter, or server, loop.  Changes made
+          in this "outer" environment are customarily retained
+          across documents, and may in some cases be retained
+          semipermanently in nonvolatile memory.  The operators
+          associated with exiting the interpreter loop have the
+          potential to interfere with subsequent document
+          processing.  As such, their unrestrained use
+          constitutes a threat of service denial.  PostScript
+          operators that exit the interpreter loop include, but
+          may not be limited to, the exitserver and startjob
+          operators.  Message sending software should not
+          generate PostScript that depends on exiting the
+          interpreter loop to operate, since the ability to exit
+          will probably be unavailable in secure PostScript
+          implementations.  Message receiving and displaying
+          software should completely disable the ability to make
+          retained changes to the PostScript environment by
+          eliminating or disabling the "startjob" and
+          "exitserver" operations.  If these operations cannot be
+          eliminated or completely disabled the password
+          associated with them should at least be set to a hard-
+          to-guess value.
+
+    (3)   PostScript provides operators for setting system-wide
+          and device-specific parameters.  These parameter
+          settings may be retained across jobs and may
+          potentially pose a threat to the correct operation of
+          the interpreter.  The PostScript operators that set
+          system and device parameters include, but may not be
+
+
+
+Freed & Borenstein          Standards Track                    [Page 15]
+
+RFC 2046                      Media Types                  November 1996
+
+
+          limited to, the "setsystemparams" and "setdevparams"
+          operators.  Message sending software should not
+          generate PostScript that depends on the setting of
+          system or device parameters to operate correctly.  The
+          ability to set these parameters will probably be
+          unavailable in secure PostScript implementations.
+          Message receiving and displaying software should
+          disable the ability to change system and device
+          parameters.  If these operators cannot be completely
+          disabled the password associated with them should at
+          least be set to a hard-to-guess value.
+
+    (4)   Some PostScript implementations provide nonstandard
+          facilities for the direct loading and execution of
+          machine code.  Such facilities are quite obviously open
+          to substantial abuse.  Message sending software should
+          not make use of such features.  Besides being totally
+          hardware-specific, they are also likely to be
+          unavailable in secure implementations of PostScript.
+          Message receiving and displaying software should not
+          allow such operators to be used if they exist.
+
+    (5)   PostScript is an extensible language, and many, if not
+          most, implementations of it provide a number of their
+          own extensions.  This document does not deal with such
+          extensions explicitly since they constitute an unknown
+          factor.  Message sending software should not make use
+          of nonstandard extensions; they are likely to be
+          missing from some implementations.  Message receiving
+          and displaying software should make sure that any
+          nonstandard PostScript operators are secure and don't
+          present any kind of threat.
+
+    (6)   It is possible to write PostScript that consumes huge
+          amounts of various system resources.  It is also
+          possible to write PostScript programs that loop
+          indefinitely.  Both types of programs have the
+          potential to cause damage if sent to unsuspecting
+          recipients.  Message-sending software should avoid the
+          construction and dissemination of such programs, which
+          is antisocial.  Message receiving and displaying
+          software should provide appropriate mechanisms to abort
+          processing after a reasonable amount of time has
+          elapsed. In addition, PostScript interpreters should be
+          limited to the consumption of only a reasonable amount
+          of any given system resource.
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 16]
+
+RFC 2046                      Media Types                  November 1996
+
+
+    (7)   It is possible to include raw binary information inside
+          PostScript in various forms.  This is not recommended
+          for use in Internet mail, both because it is not
+          supported by all PostScript interpreters and because it
+          significantly complicates the use of a MIME Content-
+          Transfer-Encoding.  (Without such binary, PostScript
+          may typically be viewed as line-oriented data.  The
+          treatment of CRLF sequences becomes extremely
+          problematic if binary and line-oriented data are mixed
+          in a single Postscript data stream.)
+
+    (8)   Finally, bugs may exist in some PostScript interpreters
+          which could possibly be exploited to gain unauthorized
+          access to a recipient's system.  Apart from noting this
+          possibility, there is no specific action to take to
+          prevent this, apart from the timely correction of such
+          bugs if any are found.
+
+4.5.3.  Other Application Subtypes
+
+   It is expected that many other subtypes of "application" will be
+   defined in the future.  MIME implementations must at a minimum treat
+   any unrecognized subtypes as being equivalent to "application/octet-
+   stream".
+
+5.  Composite Media Type Values
+
+   The remaining two of the seven initial Content-Type values refer to
+   composite entities.  Composite entities are handled using MIME
+   mechanisms -- a MIME processor typically handles the body directly.
+
+5.1.  Multipart Media Type
+
+   In the case of multipart entities, in which one or more different
+   sets of data are combined in a single body, a "multipart" media type
+   field must appear in the entity's header.  The body must then contain
+   one or more body parts, each preceded by a boundary delimiter line,
+   and the last one followed by a closing boundary delimiter line.
+   After its boundary delimiter line, each body part then consists of a
+   header area, a blank line, and a body area.  Thus a body part is
+   similar to an RFC 822 message in syntax, but different in meaning.
+
+   A body part is an entity and hence is NOT to be interpreted as
+   actually being an RFC 822 message.  To begin with, NO header fields
+   are actually required in body parts.  A body part that starts with a
+   blank line, therefore, is allowed and is a body part for which all
+   default values are to be assumed.  In such a case, the absence of a
+   Content-Type header usually indicates that the corresponding body has
+
+
+
+Freed & Borenstein          Standards Track                    [Page 17]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   a content-type of "text/plain; charset=US-ASCII".
+
+   The only header fields that have defined meaning for body parts are
+   those the names of which begin with "Content-".  All other header
+   fields may be ignored in body parts.  Although they should generally
+   be retained if at all possible, they may be discarded by gateways if
+   necessary.  Such other fields are permitted to appear in body parts
+   but must not be depended on.  "X-" fields may be created for
+   experimental or private purposes, with the recognition that the
+   information they contain may be lost at some gateways.
+
+   NOTE:  The distinction between an RFC 822 message and a body part is
+   subtle, but important.  A gateway between Internet and X.400 mail,
+   for example, must be able to tell the difference between a body part
+   that contains an image and a body part that contains an encapsulated
+   message, the body of which is a JPEG image.  In order to represent
+   the latter, the body part must have "Content-Type: message/rfc822",
+   and its body (after the blank line) must be the encapsulated message,
+   with its own "Content-Type: image/jpeg" header field.  The use of
+   similar syntax facilitates the conversion of messages to body parts,
+   and vice versa, but the distinction between the two must be
+   understood by implementors.  (For the special case in which parts
+   actually are messages, a "digest" subtype is also defined.)
+
+   As stated previously, each body part is preceded by a boundary
+   delimiter line that contains the boundary delimiter.  The boundary
+   delimiter MUST NOT appear inside any of the encapsulated parts, on a
+   line by itself or as the prefix of any line.  This implies that it is
+   crucial that the composing agent be able to choose and specify a
+   unique boundary parameter value that does not contain the boundary
+   parameter value of an enclosing multipart as a prefix.
+
+   All present and future subtypes of the "multipart" type must use an
+   identical syntax.  Subtypes may differ in their semantics, and may
+   impose additional restrictions on syntax, but must conform to the
+   required syntax for the "multipart" type.  This requirement ensures
+   that all conformant user agents will at least be able to recognize
+   and separate the parts of any multipart entity, even those of an
+   unrecognized subtype.
+
+   As stated in the definition of the Content-Transfer-Encoding field
+   [RFC 2045], no encoding other than "7bit", "8bit", or "binary" is
+   permitted for entities of type "multipart".  The "multipart" boundary
+   delimiters and header fields are always represented as 7bit US-ASCII
+   in any case (though the header fields may encode non-US-ASCII header
+   text as per RFC 2047) and data within the body parts can be encoded
+   on a part-by-part basis, with Content-Transfer-Encoding fields for
+   each appropriate body part.
+
+
+
+Freed & Borenstein          Standards Track                    [Page 18]
+
+RFC 2046                      Media Types                  November 1996
+
+
+5.1.1.  Common Syntax
+
+   This section defines a common syntax for subtypes of "multipart".
+   All subtypes of "multipart" must use this syntax.  A simple example
+   of a multipart message also appears in this section.  An example of a
+   more complex multipart message is given in RFC 2049.
+
+   The Content-Type field for multipart entities requires one parameter,
+   "boundary". The boundary delimiter line is then defined as a line
+   consisting entirely of two hyphen characters ("-", decimal value 45)
+   followed by the boundary parameter value from the Content-Type header
+   field, optional linear whitespace, and a terminating CRLF.
+
+   NOTE:  The hyphens are for rough compatibility with the earlier RFC
+   934 method of message encapsulation, and for ease of searching for
+   the boundaries in some implementations.  However, it should be noted
+   that multipart messages are NOT completely compatible with RFC 934
+   encapsulations; in particular, they do not obey RFC 934 quoting
+   conventions for embedded lines that begin with hyphens.  This
+   mechanism was chosen over the RFC 934 mechanism because the latter
+   causes lines to grow with each level of quoting.  The combination of
+   this growth with the fact that SMTP implementations sometimes wrap
+   long lines made the RFC 934 mechanism unsuitable for use in the event
+   that deeply-nested multipart structuring is ever desired.
+
+   WARNING TO IMPLEMENTORS:  The grammar for parameters on the Content-
+   type field is such that it is often necessary to enclose the boundary
+   parameter values in quotes on the Content-type line.  This is not
+   always necessary, but never hurts. Implementors should be sure to
+   study the grammar carefully in order to avoid producing invalid
+   Content-type fields.  Thus, a typical "multipart" Content-Type header
+   field might look like this:
+
+     Content-Type: multipart/mixed; boundary=gc0p4Jq0M2Yt08j34c0p
+
+   But the following is not valid:
+
+     Content-Type: multipart/mixed; boundary=gc0pJq0M:08jU534c0p
+
+   (because of the colon) and must instead be represented as
+
+     Content-Type: multipart/mixed; boundary="gc0pJq0M:08jU534c0p"
+
+   This Content-Type value indicates that the content consists of one or
+   more parts, each with a structure that is syntactically identical to
+   an RFC 822 message, except that the header area is allowed to be
+   completely empty, and that the parts are each preceded by the line
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 19]
+
+RFC 2046                      Media Types                  November 1996
+
+
+     --gc0pJq0M:08jU534c0p
+
+   The boundary delimiter MUST occur at the beginning of a line, i.e.,
+   following a CRLF, and the initial CRLF is considered to be attached
+   to the boundary delimiter line rather than part of the preceding
+   part.  The boundary may be followed by zero or more characters of
+   linear whitespace. It is then terminated by either another CRLF and
+   the header fields for the next part, or by two CRLFs, in which case
+   there are no header fields for the next part.  If no Content-Type
+   field is present it is assumed to be "message/rfc822" in a
+   "multipart/digest" and "text/plain" otherwise.
+
+   NOTE:  The CRLF preceding the boundary delimiter line is conceptually
+   attached to the boundary so that it is possible to have a part that
+   does not end with a CRLF (line  break).  Body parts that must be
+   considered to end with line breaks, therefore, must have two CRLFs
+   preceding the boundary delimiter line, the first of which is part of
+   the preceding body part, and the second of which is part of the
+   encapsulation boundary.
+
+   Boundary delimiters must not appear within the encapsulated material,
+   and must be no longer than 70 characters, not counting the two
+   leading hyphens.
+
+   The boundary delimiter line following the last body part is a
+   distinguished delimiter that indicates that no further body parts
+   will follow.  Such a delimiter line is identical to the previous
+   delimiter lines, with the addition of two more hyphens after the
+   boundary parameter value.
+
+     --gc0pJq0M:08jU534c0p--
+
+   NOTE TO IMPLEMENTORS:  Boundary string comparisons must compare the
+   boundary value with the beginning of each candidate line.  An exact
+   match of the entire candidate line is not required; it is sufficient
+   that the boundary appear in its entirety following the CRLF.
+
+   There appears to be room for additional information prior to the
+   first boundary delimiter line and following the final boundary
+   delimiter line.  These areas should generally be left blank, and
+   implementations must ignore anything that appears before the first
+   boundary delimiter line or after the last one.
+
+   NOTE:  These "preamble" and "epilogue" areas are generally not used
+   because of the lack of proper typing of these parts and the lack of
+   clear semantics for handling these areas at gateways, particularly
+   X.400 gateways.  However, rather than leaving the preamble area
+   blank, many MIME implementations have found this to be a convenient
+
+
+
+Freed & Borenstein          Standards Track                    [Page 20]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   place to insert an explanatory note for recipients who read the
+   message with pre-MIME software, since such notes will be ignored by
+   MIME-compliant software.
+
+   NOTE:  Because boundary delimiters must not appear in the body parts
+   being encapsulated, a user agent must exercise care to choose a
+   unique boundary parameter value.  The boundary parameter value in the
+   example above could have been the result of an algorithm designed to
+   produce boundary delimiters with a very low probability of already
+   existing in the data to be encapsulated without having to prescan the
+   data.  Alternate algorithms might result in more "readable" boundary
+   delimiters for a recipient with an old user agent, but would require
+   more attention to the possibility that the boundary delimiter might
+   appear at the beginning of some line in the encapsulated part.  The
+   simplest boundary delimiter line possible is something like "---",
+   with a closing boundary delimiter line of "-----".
+
+   As a very simple example, the following multipart message has two
+   parts, both of them plain text, one of them explicitly typed and one
+   of them implicitly typed:
+
+     From: Nathaniel Borenstein <nsb@bellcore.com>
+     To: Ned Freed <ned@innosoft.com>
+     Date: Sun, 21 Mar 1993 23:56:48 -0800 (PST)
+     Subject: Sample message
+     MIME-Version: 1.0
+     Content-type: multipart/mixed; boundary="simple boundary"
+
+     This is the preamble.  It is to be ignored, though it
+     is a handy place for composition agents to include an
+     explanatory note to non-MIME conformant readers.
+
+     --simple boundary
+
+     This is implicitly typed plain US-ASCII text.
+     It does NOT end with a linebreak.
+     --simple boundary
+     Content-type: text/plain; charset=us-ascii
+
+     This is explicitly typed plain US-ASCII text.
+     It DOES end with a linebreak.
+
+     --simple boundary--
+
+     This is the epilogue.  It is also to be ignored.
+
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 21]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   The use of a media type of "multipart" in a body part within another
+   "multipart" entity is explicitly allowed.  In such cases, for obvious
+   reasons, care must be taken to ensure that each nested "multipart"
+   entity uses a different boundary delimiter.  See RFC 2049 for an
+   example of nested "multipart" entities.
+
+   The use of the "multipart" media type with only a single body part
+   may be useful in certain contexts, and is explicitly permitted.
+
+   NOTE: Experience has shown that a "multipart" media type with a
+   single body part is useful for sending non-text media types.  It has
+   the advantage of providing the preamble as a place to include
+   decoding instructions.  In addition, a number of SMTP gateways move
+   or remove the MIME headers, and a clever MIME decoder can take a good
+   guess at multipart boundaries even in the absence of the Content-Type
+   header and thereby successfully decode the message.
+
+   The only mandatory global parameter for the "multipart" media type is
+   the boundary parameter, which consists of 1 to 70 characters from a
+   set of characters known to be very robust through mail gateways, and
+   NOT ending with white space. (If a boundary delimiter line appears to
+   end with white space, the white space must be presumed to have been
+   added by a gateway, and must be deleted.)  It is formally specified
+   by the following BNF:
+
+     boundary := 0*69<bchars> bcharsnospace
+
+     bchars := bcharsnospace / " "
+
+     bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" /
+                      "+" / "_" / "," / "-" / "." /
+                      "/" / ":" / "=" / "?"
+
+   Overall, the body of a "multipart" entity may be specified as
+   follows:
+
+     dash-boundary := "--" boundary
+                      ; boundary taken from the value of
+                      ; boundary parameter of the
+                      ; Content-Type field.
+
+     multipart-body := [preamble CRLF]
+                       dash-boundary transport-padding CRLF
+                       body-part *encapsulation
+                       close-delimiter transport-padding
+                       [CRLF epilogue]
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 22]
+
+RFC 2046                      Media Types                  November 1996
+
+
+     transport-padding := *LWSP-char
+                          ; Composers MUST NOT generate
+                          ; non-zero length transport
+                          ; padding, but receivers MUST
+                          ; be able to handle padding
+                          ; added by message transports.
+
+     encapsulation := delimiter transport-padding
+                      CRLF body-part
+
+     delimiter := CRLF dash-boundary
+
+     close-delimiter := delimiter "--"
+
+     preamble := discard-text
+
+     epilogue := discard-text
+
+     discard-text := *(*text CRLF) *text
+                     ; May be ignored or discarded.
+
+     body-part := MIME-part-headers [CRLF *OCTET]
+                  ; Lines in a body-part must not start
+                  ; with the specified dash-boundary and
+                  ; the delimiter must not appear anywhere
+                  ; in the body part.  Note that the
+                  ; semantics of a body-part differ from
+                  ; the semantics of a message, as
+                  ; described in the text.
+
+     OCTET := <any 0-255 octet value>
+
+   IMPORTANT:  The free insertion of linear-white-space and RFC 822
+   comments between the elements shown in this BNF is NOT allowed since
+   this BNF does not specify a structured header field.
+
+   NOTE:  In certain transport enclaves, RFC 822 restrictions such as
+   the one that limits bodies to printable US-ASCII characters may not
+   be in force. (That is, the transport domains may exist that resemble
+   standard Internet mail transport as specified in RFC 821 and assumed
+   by RFC 822, but without certain restrictions.) The relaxation of
+   these restrictions should be construed as locally extending the
+   definition of bodies, for example to include octets outside of the
+   US-ASCII range, as long as these extensions are supported by the
+   transport and adequately documented in the Content- Transfer-Encoding
+   header field.  However, in no event are headers (either message
+   headers or body part headers) allowed to contain anything other than
+   US-ASCII characters.
+
+
+
+Freed & Borenstein          Standards Track                    [Page 23]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   NOTE:  Conspicuously missing from the "multipart" type is a notion of
+   structured, related body parts. It is recommended that those wishing
+   to provide more structured or integrated multipart messaging
+   facilities should define subtypes of multipart that are syntactically
+   identical but define relationships between the various parts. For
+   example, subtypes of multipart could be defined that include a
+   distinguished part which in turn is used to specify the relationships
+   between the other parts, probably referring to them by their
+   Content-ID field.  Old implementations will not recognize the new
+   subtype if this approach is used, but will treat it as
+   multipart/mixed and will thus be able to show the user the parts that
+   are recognized.
+
+5.1.2.  Handling Nested Messages and Multiparts
+
+   The "message/rfc822" subtype defined in a subsequent section of this
+   document has no terminating condition other than running out of data.
+   Similarly, an improperly truncated "multipart" entity may not have
+   any terminating boundary marker, and can turn up operationally due to
+   mail system malfunctions.
+
+   It is essential that such entities be handled correctly when they are
+   themselves imbedded inside of another "multipart" structure.  MIME
+   implementations are therefore required to recognize outer level
+   boundary markers at ANY level of inner nesting.  It is not sufficient
+   to only check for the next expected marker or other terminating
+   condition.
+
+5.1.3.  Mixed Subtype
+
+   The "mixed" subtype of "multipart" is intended for use when the body
+   parts are independent and need to be bundled in a particular order.
+   Any "multipart" subtypes that an implementation does not recognize
+   must be treated as being of subtype "mixed".
+
+5.1.4.  Alternative Subtype
+
+   The "multipart/alternative" type is syntactically identical to
+   "multipart/mixed", but the semantics are different.  In particular,
+   each of the body parts is an "alternative" version of the same
+   information.
+
+   Systems should recognize that the content of the various parts are
+   interchangeable.  Systems should choose the "best" type based on the
+   local environment and references, in some cases even through user
+   interaction.  As with "multipart/mixed", the order of body parts is
+   significant.  In this case, the alternatives appear in an order of
+   increasing faithfulness to the original content.  In general, the
+
+
+
+Freed & Borenstein          Standards Track                    [Page 24]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   best choice is the LAST part of a type supported by the recipient
+   system's local environment.
+
+   "Multipart/alternative" may be used, for example, to send a message
+   in a fancy text format in such a way that it can easily be displayed
+   anywhere:
+
+     From: Nathaniel Borenstein <nsb@bellcore.com>
+     To: Ned Freed <ned@innosoft.com>
+     Date: Mon, 22 Mar 1993 09:41:09 -0800 (PST)
+     Subject: Formatted text mail
+     MIME-Version: 1.0
+     Content-Type: multipart/alternative; boundary=boundary42
+
+     --boundary42
+     Content-Type: text/plain; charset=us-ascii
+
+       ... plain text version of message goes here ...
+
+     --boundary42
+     Content-Type: text/enriched
+
+       ... RFC 1896 text/enriched version of same message
+           goes here ...
+
+     --boundary42
+     Content-Type: application/x-whatever
+
+       ... fanciest version of same message goes here ...
+
+     --boundary42--
+
+   In this example, users whose mail systems understood the
+   "application/x-whatever" format would see only the fancy version,
+   while other users would see only the enriched or plain text version,
+   depending on the capabilities of their system.
+
+   In general, user agents that compose "multipart/alternative" entities
+   must place the body parts in increasing order of preference, that is,
+   with the preferred format last.  For fancy text, the sending user
+   agent should put the plainest format first and the richest format
+   last.  Receiving user agents should pick and display the last format
+   they are capable of displaying.  In the case where one of the
+   alternatives is itself of type "multipart" and contains unrecognized
+   sub-parts, the user agent may choose either to show that alternative,
+   an earlier alternative, or both.
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 25]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   NOTE: From an implementor's perspective, it might seem more sensible
+   to reverse this ordering, and have the plainest alternative last.
+   However, placing the plainest alternative first is the friendliest
+   possible option when "multipart/alternative" entities are viewed
+   using a non-MIME-conformant viewer.  While this approach does impose
+   some burden on conformant MIME viewers, interoperability with older
+   mail readers was deemed to be more important in this case.
+
+   It may be the case that some user agents, if they can recognize more
+   than one of the formats, will prefer to offer the user the choice of
+   which format to view.  This makes sense, for example, if a message
+   includes both a nicely- formatted image version and an easily-edited
+   text version.  What is most critical, however, is that the user not
+   automatically be shown multiple versions of the same data.  Either
+   the user should be shown the last recognized version or should be
+   given the choice.
+
+   THE SEMANTICS OF CONTENT-ID IN MULTIPART/ALTERNATIVE:  Each part of a
+   "multipart/alternative" entity represents the same data, but the
+   mappings between the two are not necessarily without information
+   loss.  For example, information is lost when translating ODA to
+   PostScript or plain text.  It is recommended that each part should
+   have a different Content-ID value in the case where the information
+   content of the two parts is not identical.  And when the information
+   content is identical -- for example, where several parts of type
+   "message/external-body" specify alternate ways to access the
+   identical data -- the same Content-ID field value should be used, to
+   optimize any caching mechanisms that might be present on the
+   recipient's end.  However, the Content-ID values used by the parts
+   should NOT be the same Content-ID value that describes the
+   "multipart/alternative" as a whole, if there is any such Content-ID
+   field.  That is, one Content-ID value will refer to the
+   "multipart/alternative" entity, while one or more other Content-ID
+   values will refer to the parts inside it.
+
+5.1.5.  Digest Subtype
+
+   This document defines a "digest" subtype of the "multipart" Content-
+   Type.  This type is syntactically identical to "multipart/mixed", but
+   the semantics are different.  In particular, in a digest, the default
+   Content-Type value for a body part is changed from "text/plain" to
+   "message/rfc822".  This is done to allow a more readable digest
+   format that is largely compatible (except for the quoting convention)
+   with RFC 934.
+
+   Note: Though it is possible to specify a Content-Type value for a
+   body part in a digest which is other than "message/rfc822", such as a
+   "text/plain" part containing a description of the material in the
+
+
+
+Freed & Borenstein          Standards Track                    [Page 26]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   digest, actually doing so is undesireble. The "multipart/digest"
+   Content-Type is intended to be used to send collections of messages.
+   If a "text/plain" part is needed, it should be included as a seperate
+   part of a "multipart/mixed" message.
+
+   A digest in this format might, then, look something like this:
+
+     From: Moderator-Address
+     To: Recipient-List
+     Date: Mon, 22 Mar 1994 13:34:51 +0000
+     Subject: Internet Digest, volume 42
+     MIME-Version: 1.0
+     Content-Type: multipart/mixed;
+                   boundary="---- main boundary ----"
+
+     ------ main boundary ----
+
+       ...Introductory text or table of contents...
+
+     ------ main boundary ----
+     Content-Type: multipart/digest;
+                   boundary="---- next message ----"
+
+     ------ next message ----
+
+     From: someone-else
+     Date: Fri, 26 Mar 1993 11:13:32 +0200
+     Subject: my opinion
+
+       ...body goes here ...
+
+     ------ next message ----
+
+     From: someone-else-again
+     Date: Fri, 26 Mar 1993 10:07:13 -0500
+     Subject: my different opinion
+
+       ... another body goes here ...
+
+     ------ next message ------
+
+     ------ main boundary ------
+
+5.1.6.  Parallel Subtype
+
+   This document defines a "parallel" subtype of the "multipart"
+   Content-Type.  This type is syntactically identical to
+   "multipart/mixed", but the semantics are different.  In particular,
+
+
+
+Freed & Borenstein          Standards Track                    [Page 27]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   in a parallel entity, the order of body parts is not significant.
+
+   A common presentation of this type is to display all of the parts
+   simultaneously on hardware and software that are capable of doing so.
+   However, composing agents should be aware that many mail readers will
+   lack this capability and will show the parts serially in any event.
+
+5.1.7.  Other Multipart Subtypes
+
+   Other "multipart" subtypes are expected in the future.  MIME
+   implementations must in general treat unrecognized subtypes of
+   "multipart" as being equivalent to "multipart/mixed".
+
+5.2.  Message Media Type
+
+   It is frequently desirable, in sending mail, to encapsulate another
+   mail message.  A special media type, "message", is defined to
+   facilitate this.  In particular, the "rfc822" subtype of "message" is
+   used to encapsulate RFC 822 messages.
+
+   NOTE:  It has been suggested that subtypes of "message" might be
+   defined for forwarded or rejected messages.  However, forwarded and
+   rejected messages can be handled as multipart messages in which the
+   first part contains any control or descriptive information, and a
+   second part, of type "message/rfc822", is the forwarded or rejected
+   message.  Composing rejection and forwarding messages in this manner
+   will preserve the type information on the original message and allow
+   it to be correctly presented to the recipient, and hence is strongly
+   encouraged.
+
+   Subtypes of "message" often impose restrictions on what encodings are
+   allowed.  These restrictions are described in conjunction with each
+   specific subtype.
+
+   Mail gateways, relays, and other mail handling agents are commonly
+   known to alter the top-level header of an RFC 822 message.  In
+   particular, they frequently add, remove, or reorder header fields.
+   These operations are explicitly forbidden for the encapsulated
+   headers embedded in the bodies of messages of type "message."
+
+5.2.1.  RFC822 Subtype
+
+   A media type of "message/rfc822" indicates that the body contains an
+   encapsulated message, with the syntax of an RFC 822 message.
+   However, unlike top-level RFC 822 messages, the restriction that each
+   "message/rfc822" body must include a "From", "Date", and at least one
+   destination header is removed and replaced with the requirement that
+   at least one of "From", "Subject", or "Date" must be present.
+
+
+
+Freed & Borenstein          Standards Track                    [Page 28]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   It should be noted that, despite the use of the numbers "822", a
+   "message/rfc822" entity isn't restricted to material in strict
+   conformance to RFC822, nor are the semantics of "message/rfc822"
+   objects restricted to the semantics defined in RFC822. More
+   specifically, a "message/rfc822" message could well be a News article
+   or a MIME message.
+
+   No encoding other than "7bit", "8bit", or "binary" is permitted for
+   the body of a "message/rfc822" entity.  The message header fields are
+   always US-ASCII in any case, and data within the body can still be
+   encoded, in which case the Content-Transfer-Encoding header field in
+   the encapsulated message will reflect this.  Non-US-ASCII text in the
+   headers of an encapsulated message can be specified using the
+   mechanisms described in RFC 2047.
+
+5.2.2.  Partial Subtype
+
+   The "partial" subtype is defined to allow large entities to be
+   delivered as several separate pieces of mail and automatically
+   reassembled by a receiving user agent.  (The concept is similar to IP
+   fragmentation and reassembly in the basic Internet Protocols.)  This
+   mechanism can be used when intermediate transport agents limit the
+   size of individual messages that can be sent.  The media type
+   "message/partial" thus indicates that the body contains a fragment of
+   a larger entity.
+
+   Because data of type "message" may never be encoded in base64 or
+   quoted-printable, a problem might arise if "message/partial" entities
+   are constructed in an environment that supports binary or 8bit
+   transport.  The problem is that the binary data would be split into
+   multiple "message/partial" messages, each of them requiring binary
+   transport.  If such messages were encountered at a gateway into a
+   7bit transport environment, there would be no way to properly encode
+   them for the 7bit world, aside from waiting for all of the fragments,
+   reassembling the inner message, and then encoding the reassembled
+   data in base64 or quoted-printable.  Since it is possible that
+   different fragments might go through different gateways, even this is
+   not an acceptable solution.  For this reason, it is specified that
+   entities of type "message/partial" must always have a content-
+   transfer-encoding of 7bit (the default).  In particular, even in
+   environments that support binary or 8bit transport, the use of a
+   content- transfer-encoding of "8bit" or "binary" is explicitly
+   prohibited for MIME entities of type "message/partial". This in turn
+   implies that the inner message must not use "8bit" or "binary"
+   encoding.
+
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 29]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   Because some message transfer agents may choose to automatically
+   fragment large messages, and because such agents may use very
+   different fragmentation thresholds, it is possible that the pieces of
+   a partial message, upon reassembly, may prove themselves to comprise
+   a partial message.  This is explicitly permitted.
+
+   Three parameters must be specified in the Content-Type field of type
+   "message/partial":  The first, "id", is a unique identifier, as close
+   to a world-unique identifier as possible, to be used to match the
+   fragments together. (In general, the identifier is essentially a
+   message-id; if placed in double quotes, it can be ANY message-id, in
+   accordance with the BNF for "parameter" given in RFC 2045.)  The
+   second, "number", an integer, is the fragment number, which indicates
+   where this fragment fits into the sequence of fragments.  The third,
+   "total", another integer, is the total number of fragments.  This
+   third subfield is required on the final fragment, and is optional
+   (though encouraged) on the earlier fragments.  Note also that these
+   parameters may be given in any order.
+
+   Thus, the second piece of a 3-piece message may have either of the
+   following header fields:
+
+     Content-Type: Message/Partial; number=2; total=3;
+                   id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
+
+     Content-Type: Message/Partial;
+                   id="oc=jpbe0M2Yt4s@thumper.bellcore.com";
+                   number=2
+
+   But the third piece MUST specify the total number of fragments:
+
+     Content-Type: Message/Partial; number=3; total=3;
+                   id="oc=jpbe0M2Yt4s@thumper.bellcore.com"
+
+   Note that fragment numbering begins with 1, not 0.
+
+   When the fragments of an entity broken up in this manner are put
+   together, the result is always a complete MIME entity, which may have
+   its own Content-Type header field, and thus may contain any other
+   data type.
+
+5.2.2.1.  Message Fragmentation and Reassembly
+
+   The semantics of a reassembled partial message must be those of the
+   "inner" message, rather than of a message containing the inner
+   message.  This makes it possible, for example, to send a large audio
+   message as several partial messages, and still have it appear to the
+   recipient as a simple audio message rather than as an encapsulated
+
+
+
+Freed & Borenstein          Standards Track                    [Page 30]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   message containing an audio message.  That is, the encapsulation of
+   the message is considered to be "transparent".
+
+   When generating and reassembling the pieces of a "message/partial"
+   message, the headers of the encapsulated message must be merged with
+   the headers of the enclosing entities.  In this process the following
+   rules must be observed:
+
+    (1)   Fragmentation agents must split messages at line
+          boundaries only. This restriction is imposed because
+          splits at points other than the ends of lines in turn
+          depends on message transports being able to preserve
+          the semantics of messages that don't end with a CRLF
+          sequence. Many transports are incapable of preserving
+          such semantics.
+
+    (2)   All of the header fields from the initial enclosing
+          message, except those that start with "Content-" and
+          the specific header fields "Subject", "Message-ID",
+          "Encrypted", and "MIME-Version", must be copied, in
+          order, to the new message.
+
+    (3)   The header fields in the enclosed message which start
+          with "Content-", plus the "Subject", "Message-ID",
+          "Encrypted", and "MIME-Version" fields, must be
+          appended, in order, to the header fields of the new
+          message.  Any header fields in the enclosed message
+          which do not start with "Content-" (except for the
+          "Subject", "Message-ID", "Encrypted", and "MIME-
+          Version" fields) will be ignored and dropped.
+
+    (4)   All of the header fields from the second and any
+          subsequent enclosing messages are discarded by the
+          reassembly process.
+
+5.2.2.2.  Fragmentation and Reassembly Example
+
+   If an audio message is broken into two pieces, the first piece might
+   look something like this:
+
+     X-Weird-Header-1: Foo
+     From: Bill@host.com
+     To: joe@otherhost.com
+     Date: Fri, 26 Mar 1993 12:59:38 -0500 (EST)
+     Subject: Audio mail (part 1 of 2)
+     Message-ID: <id1@host.com>
+     MIME-Version: 1.0
+     Content-type: message/partial; id="ABC@host.com";
+
+
+
+Freed & Borenstein          Standards Track                    [Page 31]
+
+RFC 2046                      Media Types                  November 1996
+
+
+                   number=1; total=2
+
+     X-Weird-Header-1: Bar
+     X-Weird-Header-2: Hello
+     Message-ID: <anotherid@foo.com>
+     Subject: Audio mail
+     MIME-Version: 1.0
+     Content-type: audio/basic
+     Content-transfer-encoding: base64
+
+       ... first half of encoded audio data goes here ...
+
+   and the second half might look something like this:
+
+     From: Bill@host.com
+     To: joe@otherhost.com
+     Date: Fri, 26 Mar 1993 12:59:38 -0500 (EST)
+     Subject: Audio mail (part 2 of 2)
+     MIME-Version: 1.0
+     Message-ID: <id2@host.com>
+     Content-type: message/partial;
+                   id="ABC@host.com"; number=2; total=2
+
+       ... second half of encoded audio data goes here ...
+
+   Then, when the fragmented message is reassembled, the resulting
+   message to be displayed to the user should look something like this:
+
+     X-Weird-Header-1: Foo
+     From: Bill@host.com
+     To: joe@otherhost.com
+     Date: Fri, 26 Mar 1993 12:59:38 -0500 (EST)
+     Subject: Audio mail
+     Message-ID: <anotherid@foo.com>
+     MIME-Version: 1.0
+     Content-type: audio/basic
+     Content-transfer-encoding: base64
+
+       ... first half of encoded audio data goes here ...
+       ... second half of encoded audio data goes here ...
+
+   The inclusion of a "References" field in the headers of the second
+   and subsequent pieces of a fragmented message that references the
+   Message-Id on the previous piece may be of benefit to mail readers
+   that understand and track references.  However, the generation of
+   such "References" fields is entirely optional.
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 32]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   Finally, it should be noted that the "Encrypted" header field has
+   been made obsolete by Privacy Enhanced Messaging (PEM) [RFC-1421,
+   RFC-1422, RFC-1423, RFC-1424], but the rules above are nevertheless
+   believed to describe the correct way to treat it if it is encountered
+   in the context of conversion to and from "message/partial" fragments.
+
+5.2.3.  External-Body Subtype
+
+   The external-body subtype indicates that the actual body data are not
+   included, but merely referenced.  In this case, the parameters
+   describe a mechanism for accessing the external data.
+
+   When a MIME entity is of type "message/external-body", it consists of
+   a header, two consecutive CRLFs, and the message header for the
+   encapsulated message.  If another pair of consecutive CRLFs appears,
+   this of course ends the message header for the encapsulated message.
+   However, since the encapsulated message's body is itself external, it
+   does NOT appear in the area that follows.  For example, consider the
+   following message:
+
+     Content-type: message/external-body;
+                   access-type=local-file;
+                   name="/u/nsb/Me.jpeg"
+
+     Content-type: image/jpeg
+     Content-ID: <id42@guppylake.bellcore.com>
+     Content-Transfer-Encoding: binary
+
+     THIS IS NOT REALLY THE BODY!
+
+   The area at the end, which might be called the "phantom body", is
+   ignored for most external-body messages.  However, it may be used to
+   contain auxiliary information for some such messages, as indeed it is
+   when the access-type is "mail- server".  The only access-type defined
+   in this document that uses the phantom body is "mail-server", but
+   other access-types may be defined in the future in other
+   specifications that use this area.
+
+   The encapsulated headers in ALL "message/external-body" entities MUST
+   include a Content-ID header field to give a unique identifier by
+   which to reference the data.  This identifier may be used for caching
+   mechanisms, and for recognizing the receipt of the data when the
+   access-type is "mail-server".
+
+   Note that, as specified here, the tokens that describe external-body
+   data, such as file names and mail server commands, are required to be
+   in the US-ASCII character set.
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 33]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   If this proves problematic in practice, a new mechanism may be
+   required as a future extension to MIME, either as newly defined
+   access-types for "message/external-body" or by some other mechanism.
+
+   As with "message/partial", MIME entities of type "message/external-
+   body" MUST have a content-transfer-encoding of 7bit (the default).
+   In particular, even in environments that support binary or 8bit
+   transport, the use of a content- transfer-encoding of "8bit" or
+   "binary" is explicitly prohibited for entities of type
+   "message/external-body".
+
+5.2.3.1.  General External-Body Parameters
+
+   The parameters that may be used with any "message/external- body"
+   are:
+
+    (1)   ACCESS-TYPE -- A word indicating the supported access
+          mechanism by which the file or data may be obtained.
+          This word is not case sensitive.  Values include, but
+          are not limited to, "FTP", "ANON-FTP", "TFTP", "LOCAL-
+          FILE", and "MAIL-SERVER".  Future values, except for
+          experimental values beginning with "X-", must be
+          registered with IANA, as described in RFC 2048.
+          This parameter is unconditionally mandatory and MUST be
+          present on EVERY "message/external-body".
+
+    (2)   EXPIRATION -- The date (in the RFC 822 "date-time"
+          syntax, as extended by RFC 1123 to permit 4 digits in
+          the year field) after which the existence of the
+          external data is not guaranteed.  This parameter may be
+          used with ANY access-type and is ALWAYS optional.
+
+    (3)   SIZE -- The size (in octets) of the data.  The intent
+          of this parameter is to help the recipient decide
+          whether or not to expend the necessary resources to
+          retrieve the external data.  Note that this describes
+          the size of the data in its canonical form, that is,
+          before any Content-Transfer-Encoding has been applied
+          or after the data have been decoded.  This parameter
+          may be used with ANY access-type and is ALWAYS
+          optional.
+
+    (4)   PERMISSION -- A case-insensitive field that indicates
+          whether or not it is expected that clients might also
+          attempt to overwrite the data.  By default, or if
+          permission is "read", the assumption is that they are
+          not, and that if the data is retrieved once, it is
+          never needed again.  If PERMISSION is "read-write",
+
+
+
+Freed & Borenstein          Standards Track                    [Page 34]
+
+RFC 2046                      Media Types                  November 1996
+
+
+          this assumption is invalid, and any local copy must be
+          considered no more than a cache.  "Read" and "Read-
+          write" are the only defined values of permission.  This
+          parameter may be used with ANY access-type and is
+          ALWAYS optional.
+
+   The precise semantics of the access-types defined here are described
+   in the sections that follow.
+
+5.2.3.2.  The 'ftp' and 'tftp' Access-Types
+
+   An access-type of FTP or TFTP indicates that the message body is
+   accessible as a file using the FTP [RFC-959] or TFTP [RFC- 783]
+   protocols, respectively.  For these access-types, the following
+   additional parameters are mandatory:
+
+    (1)   NAME -- The name of the file that contains the actual
+          body data.
+
+    (2)   SITE -- A machine from which the file may be obtained,
+          using the given protocol.  This must be a fully
+          qualified domain name, not a nickname.
+
+    (3)   Before any data are retrieved, using FTP, the user will
+          generally need to be asked to provide a login id and a
+          password for the machine named by the site parameter.
+          For security reasons, such an id and password are not
+          specified as content-type parameters, but must be
+          obtained from the user.
+
+   In addition, the following parameters are optional:
+
+    (1)   DIRECTORY -- A directory from which the data named by
+          NAME should be retrieved.
+
+    (2)   MODE -- A case-insensitive string indicating the mode
+          to be used when retrieving the information.  The valid
+          values for access-type "TFTP" are "NETASCII", "OCTET",
+          and "MAIL", as specified by the TFTP protocol [RFC-
+          783].  The valid values for access-type "FTP" are
+          "ASCII", "EBCDIC", "IMAGE", and "LOCALn" where "n" is a
+          decimal integer, typically 8.  These correspond to the
+          representation types "A" "E" "I" and "L n" as specified
+          by the FTP protocol [RFC-959].  Note that "BINARY" and
+          "TENEX" are not valid values for MODE and that "OCTET"
+          or "IMAGE" or "LOCAL8" should be used instead.  IF MODE
+          is not specified, the  default value is "NETASCII" for
+          TFTP and "ASCII" otherwise.
+
+
+
+Freed & Borenstein          Standards Track                    [Page 35]
+
+RFC 2046                      Media Types                  November 1996
+
+
+5.2.3.3.  The 'anon-ftp' Access-Type
+
+   The "anon-ftp" access-type is identical to the "ftp" access type,
+   except that the user need not be asked to provide a name and password
+   for the specified site.  Instead, the ftp protocol will be used with
+   login "anonymous" and a password that corresponds to the user's mail
+   address.
+
+5.2.3.4.  The 'local-file' Access-Type
+
+   An access-type of "local-file" indicates that the actual body is
+   accessible as a file on the local machine.  Two additional parameters
+   are defined for this access type:
+
+    (1)   NAME -- The name of the file that contains the actual
+          body data.  This parameter is mandatory for the
+          "local-file" access-type.
+
+    (2)   SITE -- A domain specifier for a machine or set of
+          machines that are known to have access to the data
+          file.  This optional parameter is used to describe the
+          locality of reference for the data, that is, the site
+          or sites at which the file is expected to be visible.
+          Asterisks may be used for wildcard matching to a part
+          of a domain name, such as "*.bellcore.com", to indicate
+          a set of machines on which the data should be directly
+          visible, while a single asterisk may be used to
+          indicate a file that is expected to be universally
+          available, e.g., via a global file system.
+
+5.2.3.5.  The 'mail-server' Access-Type
+
+   The "mail-server" access-type indicates that the actual body is
+   available from a mail server.  Two additional parameters are defined
+   for this access-type:
+
+    (1)   SERVER -- The addr-spec of the mail server from which
+          the actual body data can be obtained.  This parameter
+          is mandatory for the "mail-server" access-type.
+
+    (2)   SUBJECT -- The subject that is to be used in the mail
+          that is sent to obtain the data.  Note that keying mail
+          servers on Subject lines is NOT recommended, but such
+          mail servers are known to exist.  This is an optional
+          parameter.
+
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 36]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   Because mail servers accept a variety of syntaxes, some of which is
+   multiline, the full command to be sent to a mail server is not
+   included as a parameter in the content-type header field.  Instead,
+   it is provided as the "phantom body" when the media type is
+   "message/external-body" and the access-type is mail-server.
+
+   Note that MIME does not define a mail server syntax.  Rather, it
+   allows the inclusion of arbitrary mail server commands in the phantom
+   body.  Implementations must include the phantom body in the body of
+   the message it sends to the mail server address to retrieve the
+   relevant data.
+
+   Unlike other access-types, mail-server access is asynchronous and
+   will happen at an unpredictable time in the future.  For this reason,
+   it is important that there be a mechanism by which the returned data
+   can be matched up with the original "message/external-body" entity.
+   MIME mail servers must use the same Content-ID field on the returned
+   message that was used in the original "message/external-body"
+   entities, to facilitate such matching.
+
+5.2.3.6.  External-Body Security Issues
+
+   "Message/external-body" entities give rise to two important security
+   issues:
+
+    (1)   Accessing data via a "message/external-body" reference
+          effectively results in the message recipient performing
+          an operation that was specified by the message
+          originator.  It is therefore possible for the message
+          originator to trick a recipient into doing something
+          they would not have done otherwise.  For example, an
+          originator could specify a action that attempts
+          retrieval of material that the recipient is not
+          authorized to obtain, causing the recipient to
+          unwittingly violate some security policy.  For this
+          reason, user agents capable of resolving external
+          references must always take steps to describe the
+          action they are to take to the recipient and ask for
+          explicit permisssion prior to performing it.
+
+          The 'mail-server' access-type is particularly
+          vulnerable, in that it causes the recipient to send a
+          new message whose contents are specified by the
+          original message's originator.  Given the potential for
+          abuse, any such request messages that are constructed
+          should contain a clear indication that they were
+          generated automatically (e.g. in a Comments: header
+          field) in an attempt to resolve a MIME
+
+
+
+Freed & Borenstein          Standards Track                    [Page 37]
+
+RFC 2046                      Media Types                  November 1996
+
+
+          "message/external-body" reference.
+
+    (2)   MIME will sometimes be used in environments that
+          provide some guarantee of message integrity and
+          authenticity.  If present, such guarantees may apply
+          only to the actual direct content of messages -- they
+          may or may not apply to data accessed through MIME's
+          "message/external-body" mechanism.  In particular, it
+          may be possible to subvert certain access mechanisms
+          even when the messaging system itself is secure.
+
+          It should be noted that this problem exists either with
+          or without the availabilty of MIME mechanisms.  A
+          casual reference to an FTP site containing a document
+          in the text of a secure message brings up similar
+          issues -- the only difference is that MIME provides for
+          automatic retrieval of such material, and users may
+          place unwarranted trust is such automatic retrieval
+          mechanisms.
+
+5.2.3.7.  Examples and Further Explanations
+
+   When the external-body mechanism is used in conjunction with the
+   "multipart/alternative" media type it extends the functionality of
+   "multipart/alternative" to include the case where the same entity is
+   provided in the same format but via different accces mechanisms.
+   When this is done the originator of the message must order the parts
+   first in terms of preferred formats and then by preferred access
+   mechanisms.  The recipient's viewer should then evaluate the list
+   both in terms of format and access mechanisms.
+
+   With the emerging possibility of very wide-area file systems, it
+   becomes very hard to know in advance the set of machines where a file
+   will and will not be accessible directly from the file system.
+   Therefore it may make sense to provide both a file name, to be tried
+   directly, and the name of one or more sites from which the file is
+   known to be accessible.  An implementation can try to retrieve remote
+   files using FTP or any other protocol, using anonymous file retrieval
+   or prompting the user for the necessary name and password.  If an
+   external body is accessible via multiple mechanisms, the sender may
+   include multiple entities of type "message/external-body" within the
+   body parts of an enclosing "multipart/alternative" entity.
+
+   However, the external-body mechanism is not intended to be limited to
+   file retrieval, as shown by the mail-server access-type.  Beyond
+   this, one can imagine, for example, using a video server for external
+   references to video clips.
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 38]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   The embedded message header fields which appear in the body of the
+   "message/external-body" data must be used to declare the media type
+   of the external body if it is anything other than plain US-ASCII
+   text, since the external body does not have a header section to
+   declare its type.  Similarly, any Content-transfer-encoding other
+   than "7bit" must also be declared here.  Thus a complete
+   "message/external-body" message, referring to an object in PostScript
+   format, might look like this:
+
+     From: Whomever
+     To: Someone
+     Date: Whenever
+     Subject: whatever
+     MIME-Version: 1.0
+     Message-ID: <id1@host.com>
+     Content-Type: multipart/alternative; boundary=42
+     Content-ID: <id001@guppylake.bellcore.com>
+
+     --42
+     Content-Type: message/external-body; name="BodyFormats.ps";
+                   site="thumper.bellcore.com"; mode="image";
+                   access-type=ANON-FTP; directory="pub";
+                   expiration="Fri, 14 Jun 1991 19:13:14 -0400 (EDT)"
+
+     Content-type: application/postscript
+     Content-ID: <id42@guppylake.bellcore.com>
+
+     --42
+     Content-Type: message/external-body; access-type=local-file;
+                   name="/u/nsb/writing/rfcs/RFC-MIME.ps";
+                   site="thumper.bellcore.com";
+                   expiration="Fri, 14 Jun 1991 19:13:14 -0400 (EDT)"
+
+     Content-type: application/postscript
+     Content-ID: <id42@guppylake.bellcore.com>
+
+     --42
+     Content-Type: message/external-body;
+                   access-type=mail-server
+                   server="listserv@bogus.bitnet";
+                   expiration="Fri, 14 Jun 1991 19:13:14 -0400 (EDT)"
+
+     Content-type: application/postscript
+     Content-ID: <id42@guppylake.bellcore.com>
+
+     get RFC-MIME.DOC
+
+     --42--
+
+
+
+Freed & Borenstein          Standards Track                    [Page 39]
+
+RFC 2046                      Media Types                  November 1996
+
+
+   Note that in the above examples, the default Content-transfer-
+   encoding of "7bit" is assumed for the external postscript data.
+
+   Like the "message/partial" type, the "message/external-body" media
+   type is intended to be transparent, that is, to convey the data type
+   in the external body rather than to convey a message with a body of
+   that type.  Thus the headers on the outer and inner parts must be
+   merged using the same rules as for "message/partial".  In particular,
+   this means that the Content-type and Subject fields are overridden,
+   but the From field is preserved.
+
+   Note that since the external bodies are not transported along with
+   the external body reference, they need not conform to transport
+   limitations that apply to the reference itself. In particular,
+   Internet mail transports may impose 7bit and line length limits, but
+   these do not automatically apply to binary external body references.
+   Thus a Content-Transfer-Encoding is not generally necessary, though
+   it is permitted.
+
+   Note that the body of a message of type "message/external-body" is
+   governed by the basic syntax for an RFC 822 message.  In particular,
+   anything before the first consecutive pair of CRLFs is header
+   information, while anything after it is body information, which is
+   ignored for most access-types.
+
+5.2.4.  Other Message Subtypes
+
+   MIME implementations must in general treat unrecognized subtypes of
+   "message" as being equivalent to "application/octet-stream".
+
+   Future subtypes of "message" intended for use with email should be
+   restricted to "7bit" encoding. A type other than "message" should be
+   used if restriction to "7bit" is not possible.
+
+6.  Experimental Media Type Values
+
+   A media type value beginning with the characters "X-" is a private
+   value, to be used by consenting systems by mutual agreement.  Any
+   format without a rigorous and public definition must be named with an
+   "X-" prefix, and publicly specified values shall never begin with
+   "X-".  (Older versions of the widely used Andrew system use the "X-
+   BE2" name, so new systems should probably choose a different name.)
+
+   In general, the use of "X-" top-level types is strongly discouraged.
+   Implementors should invent subtypes of the existing types whenever
+   possible. In many cases, a subtype of "application" will be more
+   appropriate than a new top-level type.
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 40]
+
+RFC 2046                      Media Types                  November 1996
+
+
+7.  Summary
+
+   The five discrete media types provide provide a standardized
+   mechanism for tagging entities as "audio", "image", or several other
+   kinds of data. The composite "multipart" and "message" media types
+   allow mixing and hierarchical structuring of entities of different
+   types in a single message. A distinguished parameter syntax allows
+   further specification of data format details, particularly the
+   specification of alternate character sets.  Additional optional
+   header fields provide mechanisms for certain extensions deemed
+   desirable by many implementors. Finally, a number of useful media
+   types are defined for general use by consenting user agents, notably
+   "message/partial" and "message/external-body".
+
+9.  Security Considerations
+
+   Security issues are discussed in the context of the
+   "application/postscript" type, the "message/external-body" type, and
+   in RFC 2048.  Implementors should pay special attention to the
+   security implications of any media types that can cause the remote
+   execution of any actions in the recipient's environment.  In such
+   cases, the discussion of the "application/postscript" type may serve
+   as a model for considering other media types with remote execution
+   capabilities.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 41]
+
+RFC 2046                      Media Types                  November 1996
+
+
+9.  Authors' Addresses
+
+   For more information, the authors of this document are best contacted
+   via Internet mail:
+
+   Ned Freed
+   Innosoft International, Inc.
+   1050 East Garvey Avenue South
+   West Covina, CA 91790
+   USA
+
+   Phone: +1 818 919 3600
+   Fax:   +1 818 919 3614
+   EMail: ned@innosoft.com
+
+
+   Nathaniel S. Borenstein
+   First Virtual Holdings
+   25 Washington Avenue
+   Morristown, NJ 07960
+   USA
+
+   Phone: +1 201 540 8967
+   Fax:   +1 201 993 3032
+   EMail: nsb@nsb.fv.com
+
+
+   MIME is a result of the work of the Internet Engineering Task Force
+   Working Group on RFC 822 Extensions.  The chairman of that group,
+   Greg Vaudreuil, may be reached at:
+
+   Gregory M. Vaudreuil
+   Octel Network Services
+   17080 Dallas Parkway
+   Dallas, TX 75248-1905
+   USA
+
+   EMail: Greg.Vaudreuil@Octel.Com
+
+
+
+
+
+
+
+
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 42]
+
+RFC 2046                      Media Types                  November 1996
+
+
+Appendix A -- Collected Grammar
+
+   This appendix contains the complete BNF grammar for all the syntax
+   specified by this document.
+
+   By itself, however, this grammar is incomplete.  It refers by name to
+   several syntax rules that are defined by RFC 822.  Rather than
+   reproduce those definitions here, and risk unintentional differences
+   between the two, this document simply refers the reader to RFC 822
+   for the remaining definitions. Wherever a term is undefined, it
+   refers to the RFC 822 definition.
+
+     boundary := 0*69<bchars> bcharsnospace
+
+     bchars := bcharsnospace / " "
+
+     bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" /
+                      "+" / "_" / "," / "-" / "." /
+                      "/" / ":" / "=" / "?"
+
+     body-part := <"message" as defined in RFC 822, with all
+                   header fields optional, not starting with the
+                   specified dash-boundary, and with the
+                   delimiter not occurring anywhere in the
+                   body part.  Note that the semantics of a
+                   part differ from the semantics of a message,
+                   as described in the text.>
+
+     close-delimiter := delimiter "--"
+
+     dash-boundary := "--" boundary
+                      ; boundary taken from the value of
+                      ; boundary parameter of the
+                      ; Content-Type field.
+
+     delimiter := CRLF dash-boundary
+
+     discard-text := *(*text CRLF)
+                     ; May be ignored or discarded.
+
+     encapsulation := delimiter transport-padding
+                      CRLF body-part
+
+     epilogue := discard-text
+
+     multipart-body := [preamble CRLF]
+                       dash-boundary transport-padding CRLF
+                       body-part *encapsulation
+
+
+
+Freed & Borenstein          Standards Track                    [Page 43]
+
+RFC 2046                      Media Types                  November 1996
+
+
+                       close-delimiter transport-padding
+                       [CRLF epilogue]
+
+     preamble := discard-text
+
+     transport-padding := *LWSP-char
+                          ; Composers MUST NOT generate
+                          ; non-zero length transport
+                          ; padding, but receivers MUST
+                          ; be able to handle padding
+                          ; added by message transports.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Freed & Borenstein          Standards Track                    [Page 44]
+
diff --git a/filter.c b/filter.c
@@ -19,10 +19,10 @@ int main(int argc, char **argv) {
 		for (i = filter; i < argc; i++)
 			if (!strcmp (argv[i], "-e"))
 				edit = i;
-	}
-	for (i = 0; i < argc; i++) {
-		strncpy (argv2[i], argv[i], 1023);
-		argv2[i][1023] = '\0';
+		for (i = 0; i < argc; i++) {
+			strncpy (argv2[i], argv[i], 1023);
+			argv2[i][1023] = '\0';
+		}
 	}
 	memset (b, '\0', 1024);
 	/* Headers */
@@ -33,7 +33,7 @@ int main(int argc, char **argv) {
 			if (!strncmp (b, argv[i], strlen(argv[i])) || argv[i][0] == ':') {
 				/* Edit/Remove Headers */
 				print = 1;
-				for (j = edit + 1; j < argc && argv[j]; j++)
+				for (j = edit + 1; !value && j < argc && argv[j]; j++)
 					if ((ptr = strchr (argv[j], ':')) &&
 						!strncmp (b, argv[j], ptr - argv[j] + 1)) {
 						if (ptr[1] != '\0' && argv2[j][0])
diff --git a/mbox.c b/mbox.c
@@ -8,7 +8,6 @@
 FILE *fd;
 static char word[1024];
 
-// XXX maybe so many [1024] stuff. can this cause truncated mails?
 static void mbox_ls() {
 	char b[1024], from[1024], subject[1024], date[1024], *ptr;
 	int m = 0, headers = 1;

	dmc dynamic mail client
	git clone git://git.suckless.org/dmc
	Log \| Files \| Refs \| README \| LICENSE

doc/imf-rfc5322.txt	\|	3195	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
doc/mbox-rfc4155.txt	\|	507	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
doc/mime-p1-rfc2045.txt	\|	1739	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
doc/mime-p2-rfc2046.txt	\|	2467	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
filter.c	\|	10	+++++-----
mbox.c	\|	1	-