Warning, /pim/trojita/src/Imap/Parser/rfc5322.rl is written in an unsupported language. File is not indexed.
0001 # Generated by abnfgen at Thu Nov 1 22:36:51 2012
0002 # Output file: rfc5322.rl
0003 # Sources:
0004 # core
0005 # rfc5322.abnf
0006 %%{
0007 # write your name
0008 machine rfc5322;
0009
0010 # generated rules, define required actions
0011 ALPHA = 0x41..0x5a | 0x61..0x7a;
0012 BIT = "0" | "1";
0013 CHAR = 0x01..0x7f;
0014 CR = "\r";
0015 LF = "\n";
0016 CRLF = CR LF;
0017 CTL = 0x00..0x1f | 0x7f;
0018 DIGIT = 0x30..0x39;
0019 DQUOTE = "\"";
0020 HEXDIG = DIGIT | "A"i | "B"i | "C"i | "D"i | "E"i | "F"i;
0021 HTAB = "\t";
0022 SP = " ";
0023 WSP = SP | HTAB;
0024 LWSP = ( WSP | ( CRLF WSP ) )*;
0025 OCTET = 0x00..0xff;
0026 VCHAR = 0x21..0x7e;
0027 obs_NO_WS_CTL = 0x01..0x08 | "\v" | "\f" | 0x0e..0x1f | 0x7f;
0028 obs_qp = "\\" ( "\0" | obs_NO_WS_CTL | LF | CR );
0029
0030 # backslash + something, pushing into current string
0031 quoted_pair = ( ( "\\" ( VCHAR | WSP ) ) | obs_qp ) $push_current_backslashed;
0032
0033 # Changed to allow even stray CR or LF line termination within the folding
0034 # whitespace. There are real-world instances where some servers send such
0035 # data, see e.g. the test_Imap_Parser_parse's testParseUntagged:aox-messageid-spacing.
0036 obs_FWS = ( CR? LF? WSP )+;
0037 FWS = ( ( WSP* CRLF )? WSP+ ) | obs_FWS;
0038 obs_ctext = obs_NO_WS_CTL;
0039 ctext = 0x21..0x27 | 0x2a..0x5b | 0x5d..0x7e | obs_ctext;
0040 # FIXME: nested comments should be supported
0041 comment = "(" ( FWS? (ctext | quoted_pair) )* FWS? ")";
0042 ccontent = ctext | quoted_pair | comment;
0043 CFWS = ( ( FWS? comment )+ FWS? ) | FWS;
0044 atext = ALPHA | DIGIT | "!" | "#" | "$" | "%" | "&" | "'" | "*" | "+" | "-" | "/" | "=" | "?" | "^" | "_" | "`" | "{" | "|" | "}" | "~";
0045
0046 # pushing chars
0047 atom = CFWS? atext+ $push_current_char CFWS?;
0048
0049 # pushing chars
0050 dot_atom_text = (atext+ ( "." atext+ )*) $push_current_char;
0051
0052 # pushing chars
0053 dot_atom = CFWS? dot_atom_text CFWS?;
0054
0055 specials = "(" | ")" | "<" | ">" | "[" | "]" | ":" | ";" | "@" | "\\" | "," | "." | DQUOTE;
0056 obs_qtext = obs_NO_WS_CTL;
0057
0058 # pushing chars
0059 qtext = ("!" | 0x23..0x5b | 0x5d..0x7e | obs_qtext) $push_current_char;
0060
0061 # pushing chars
0062 qcontent = qtext | quoted_pair;
0063
0064 # pushing chars
0065 quoted_string = CFWS? DQUOTE ( ( ( FWS? qcontent )+ FWS? ) | FWS ) DQUOTE CFWS?;
0066
0067 # pushing chars
0068 word = atom | quoted_string;
0069 # pushing chars
0070 obs_phrase = word ( word | "." $push_current_char | CFWS )*;
0071 # pushing chars
0072 phrase = (word+ | obs_phrase);
0073
0074 obs_utext = "\0" | obs_NO_WS_CTL | VCHAR;
0075 obs_unstruct = ( ( CR* ( obs_utext | FWS )+ ) | LF+ )* CR*;
0076 unstructured = ( ( FWS? VCHAR )* WSP* ) | obs_unstruct;
0077 day_name = "Mon"i | "Tue"i | "Wed"i | "Thu"i | "Fri"i | "Sat"i | "Sun"i;
0078 obs_day_of_week = CFWS? day_name CFWS?;
0079 day_of_week = ( FWS? day_name ) | obs_day_of_week;
0080 obs_day = CFWS? DIGIT{1,2} CFWS?;
0081 day = ( FWS? DIGIT{1,2} FWS ) | obs_day;
0082 month = "Jan"i | "Feb"i | "Mar"i | "Apr"i | "May"i | "Jun"i | "Jul"i | "Aug"i | "Sep"i | "Oct"i | "Nov"i | "Dec"i;
0083 obs_year = CFWS? DIGIT{2,} CFWS?;
0084 year = ( FWS DIGIT{4,} FWS ) | obs_year;
0085 date = day month year;
0086 obs_hour = CFWS? DIGIT{2} CFWS?;
0087 hour = DIGIT{2} | obs_hour;
0088 obs_minute = CFWS? DIGIT{2} CFWS?;
0089 minute = DIGIT{2} | obs_minute;
0090 obs_second = CFWS? DIGIT{2} CFWS?;
0091 second = DIGIT{2} | obs_second;
0092 time_of_day = hour ":" minute ( ":" second )?;
0093 obs_zone = "UT"i | "GMT"i | "EST"i | "EDT"i | "CST"i | "CDT"i | "MST"i | "MDT"i | "PST"i | "PDT"i | 0x41..0x49 | 0x4b..0x5a | 0x61..0x69 | 0x6b..0x7a;
0094 zone = ( FWS ( "+" | "-" ) DIGIT{4} ) | obs_zone;
0095 time = time_of_day zone;
0096 date_time = ( day_of_week "," )? date time CFWS?;
0097 display_name = phrase;
0098
0099 # pushing chars
0100 obs_local_part = word ( "." $push_current_char word )*;
0101
0102 # pushing chars
0103 local_part = dot_atom | quoted_string | obs_local_part;
0104
0105 # pushing chars
0106 obs_dtext = (obs_NO_WS_CTL $push_current_char) | quoted_pair;
0107
0108 # pushing chars
0109 dtext = ((0x21..0x5a | 0x5e..0x7e) $push_current_char) | obs_dtext;
0110
0111 # pushing chars
0112 domain_literal = CFWS? "[" $push_current_char ( FWS? dtext )* FWS? "]" $push_current_char CFWS?;
0113 # pushing chars
0114 obs_domain = atom ( "." atom )*;
0115 # pushing chars
0116 domain = dot_atom | domain_literal | obs_domain;
0117
0118 addr_spec = local_part "@" domain;
0119 obs_domain_list = ( CFWS | "," )* "@" domain ( "," CFWS? ( "@" domain )? )*;
0120 obs_route = obs_domain_list ":";
0121 obs_angle_addr = CFWS? "<" obs_route addr_spec ">" CFWS?;
0122 angle_addr = ( CFWS? "<" addr_spec ">" CFWS? ) | obs_angle_addr;
0123 name_addr = display_name? angle_addr;
0124 mailbox = name_addr | addr_spec;
0125 obs_mbox_list = ( CFWS? "," )* mailbox ( "," ( mailbox | CFWS )? )*;
0126 mailbox_list = ( mailbox ( "," mailbox )* ) | obs_mbox_list;
0127 obs_group_list = ( CFWS? "," )+ CFWS?;
0128 group_list = mailbox_list | CFWS | obs_group_list;
0129 group = display_name ":" group_list? ";" CFWS?;
0130 address = mailbox | group;
0131 obs_addr_list = ( CFWS? "," )* address ( "," ( address | CFWS )? )*;
0132 address_list = ( address ( "," address )* ) | obs_addr_list;
0133 path = angle_addr | ( CFWS? "<" CFWS? ">" CFWS? );
0134 return = "Return-Path:"i path CRLF;
0135 received_token = word | angle_addr | addr_spec | domain;
0136 received = "Received:"i received_token* ";" date_time CRLF;
0137 trace = return? received+;
0138 ftext = 0x21..0x39 | 0x3b..0x7e;
0139 field_name = ftext+;
0140 optional_field = field_name ":" unstructured CRLF;
0141 resent_date = "Resent-Date:"i date_time CRLF;
0142 resent_from = "Resent-From:"i mailbox_list CRLF;
0143 resent_sender = "Resent-Sender:"i mailbox CRLF;
0144 resent_to = "Resent-To:"i address_list CRLF;
0145 resent_cc = "Resent-Cc:"i address_list CRLF;
0146 resent_bcc = "Resent-Bcc:"i ( address_list | CFWS )? CRLF;
0147
0148 # pushing chars
0149 obs_id_left = local_part;
0150
0151 # pushing chars
0152 id_left = dot_atom_text | obs_id_left;
0153
0154 # pushing chars
0155 no_fold_literal = ("[" $push_current_char) dtext* ("]" $push_current_char);
0156
0157 # pushing chars
0158 obs_id_right = domain;
0159
0160 # pushing chars
0161 id_right = dot_atom_text | no_fold_literal | obs_id_right;
0162
0163 # gets pushed into a list
0164 msg_id = CFWS? "<" id_left "@" $push_current_char id_right ">" %push_string_list CFWS?;
0165
0166 resent_msg_id = "Resent-Message-ID:"i msg_id CRLF;
0167 orig_date = "Date:"i date_time CRLF;
0168 hdr_from = "From:"i mailbox_list CRLF;
0169 sender = "Sender:"i mailbox CRLF;
0170 reply_to = "Reply-To:"i address_list CRLF;
0171 hdr_to = "To:"i address_list CRLF;
0172 cc = "Cc:"i address_list CRLF;
0173 bcc = "Bcc:"i ( address_list | CFWS )? CRLF;
0174 message_id = "Message-ID:"i >clear_list msg_id CRLF %got_message_id_header;
0175 in_reply_to = "In-Reply-To:"i >clear_list msg_id+ CRLF %got_in_reply_to_header;
0176 references = "References:"i >clear_list msg_id+ (CRLF %got_references_header);
0177 subject = "Subject:"i unstructured CRLF;
0178 comments = "Comments:"i unstructured CRLF;
0179 keywords = "Keywords:"i phrase ( "," phrase )* CRLF;
0180
0181 # RFC2369 doesn't provide a proper grammar, this is my best guess
0182 list_url = CFWS? "<" >clear_str CFWS? /[^<>\r\n]/* $push_current_char CFWS? ">" %push_string_list CFWS?;
0183 list_post_urls = "List-Post:"i >clear_list list_url ( "," list_url )* unstructured? (CRLF %got_list_post_header);
0184 list_post_no = "List-Post:"i CFWS? "NO"i unstructured? (CRLF >got_list_post_no);
0185 list_post = list_post_urls | list_post_no;
0186
0187
0188 fields = ( ( trace optional_field* ) | ( resent_date | resent_from | resent_sender | resent_to | resent_cc | resent_bcc | resent_msg_id )+ )* ( orig_date | hdr_from | sender | reply_to | hdr_to | cc | bcc | message_id | in_reply_to | references | subject | comments | keywords | optional_field )*;
0189 obs_return = "Return-Path"i WSP* ":" path CRLF;
0190 obs_received = "Received"i WSP* ":" received_token* CRLF;
0191 obs_orig_date = "Date"i WSP* ":" date_time CRLF;
0192 obs_from = "From"i WSP* ":" mailbox_list CRLF;
0193 obs_sender = "Sender"i WSP* ":" mailbox CRLF;
0194 obs_reply_to = "Reply-To"i WSP* ":" address_list CRLF;
0195 obs_to = "To"i WSP* ":" address_list CRLF;
0196 obs_cc = "Cc"i WSP* ":" address_list CRLF;
0197 obs_bcc = "Bcc"i WSP* ":" ( address_list | ( ( CFWS? "," )* CFWS? ) ) CRLF;
0198 obs_message_id = "Message-ID"i WSP* ":" >clear_list msg_id (CRLF %got_message_id_header);
0199
0200 # RFC5322 says that phrases shall be ignored in obs_in_reply_to and obs_references
0201 obs_in_reply_to = "In-Reply-To"i WSP* ":" >clear_list ( phrase | msg_id >clear_str )* (CRLF %got_in_reply_to_header);
0202 obs_references = "References"i WSP* ":" >clear_list ( phrase | msg_id >clear_str )* (CRLF %got_references_header);
0203
0204 obs_subject = "Subject"i WSP* ":" unstructured CRLF;
0205 obs_comments = "Comments"i WSP* ":" unstructured CRLF;
0206 obs_phrase_list = ( phrase | CFWS )? ( "," ( phrase | CFWS )? )*;
0207 obs_keywords = "Keywords"i WSP* ":" obs_phrase_list CRLF;
0208 obs_resent_date = "Resent-Date"i WSP* ":" date_time CRLF;
0209 obs_resent_from = "Resent-From"i WSP* ":" mailbox_list CRLF;
0210 obs_resent_send = "Resent-Sender"i WSP* ":" mailbox CRLF;
0211 obs_resent_rply = "Resent-Reply-To"i WSP* ":" address_list CRLF;
0212 obs_resent_to = "Resent-To"i WSP* ":" address_list CRLF;
0213 obs_resent_cc = "Resent-Cc"i WSP* ":" address_list CRLF;
0214 obs_resent_bcc = "Resent-Bcc"i WSP* ":" ( address_list | ( ( CFWS? "," )* CFWS? ) ) CRLF;
0215 obs_resent_mid = "Resent-Message-ID"i WSP* ":" msg_id CRLF;
0216 obs_optional = field_name WSP* ":" unstructured CRLF;
0217 obs_fields = ( obs_return | obs_received | obs_orig_date | obs_from | obs_sender | obs_reply_to | obs_to | obs_cc | obs_bcc | obs_message_id | obs_in_reply_to | obs_references | obs_subject | obs_comments | obs_keywords | obs_resent_date | obs_resent_from | obs_resent_send | obs_resent_rply | obs_resent_to | obs_resent_cc | obs_resent_bcc | obs_resent_mid | obs_optional )*;
0218 text = 0x01..0x09 | "\v" | "\f" | 0x0e..0x7f;
0219 obs_body = 0x00..0x7f*;
0220 body = ( ( text{,998} CRLF )* text{,998} ) | obs_body;
0221 message = ( fields | obs_fields ) ( CRLF body )?;
0222 }%%