Warning, /pim/trojita/src/Imap/Parser/rfc5322.rl is written in an unsupported language. File is not indexed.

0001 # Generated by abnfgen at Thu Nov  1 22:36:51 2012
0002 # Output file: rfc5322.rl
0003 # Sources:
0004 #   core
0005 #   rfc5322.abnf
0006 %%{
0007     # write your name
0008     machine rfc5322;
0009 
0010     # generated rules, define required actions
0011     ALPHA = 0x41..0x5a | 0x61..0x7a;
0012     BIT = "0" | "1";
0013     CHAR = 0x01..0x7f;
0014     CR = "\r";
0015     LF = "\n";
0016     CRLF = CR LF;
0017     CTL = 0x00..0x1f | 0x7f;
0018     DIGIT = 0x30..0x39;
0019     DQUOTE = "\"";
0020     HEXDIG = DIGIT | "A"i | "B"i | "C"i | "D"i | "E"i | "F"i;
0021     HTAB = "\t";
0022     SP = " ";
0023     WSP = SP | HTAB;
0024     LWSP = ( WSP | ( CRLF WSP ) )*;
0025     OCTET = 0x00..0xff;
0026     VCHAR = 0x21..0x7e;
0027     obs_NO_WS_CTL = 0x01..0x08 | "\v" | "\f" | 0x0e..0x1f | 0x7f;
0028     obs_qp = "\\" ( "\0" | obs_NO_WS_CTL | LF | CR );
0029 
0030     # backslash + something, pushing into current string
0031     quoted_pair = ( ( "\\" ( VCHAR | WSP ) ) | obs_qp ) $push_current_backslashed;
0032 
0033     # Changed to allow even stray CR or LF line termination within the folding
0034     # whitespace. There are real-world instances where some servers send such
0035     # data, see e.g. the test_Imap_Parser_parse's testParseUntagged:aox-messageid-spacing.
0036     obs_FWS = ( CR? LF? WSP )+;
0037     FWS = ( ( WSP* CRLF )? WSP+ ) | obs_FWS;
0038     obs_ctext = obs_NO_WS_CTL;
0039     ctext = 0x21..0x27 | 0x2a..0x5b | 0x5d..0x7e | obs_ctext;
0040     # FIXME: nested comments should be supported
0041     comment = "(" ( FWS? (ctext | quoted_pair) )* FWS? ")";
0042     ccontent = ctext | quoted_pair | comment;
0043     CFWS = ( ( FWS? comment )+ FWS? ) | FWS;
0044     atext = ALPHA | DIGIT | "!" | "#" | "$" | "%" | "&" | "'" | "*" | "+" | "-" | "/" | "=" | "?" | "^" | "_" | "`" | "{" | "|" | "}" | "~";
0045 
0046     # pushing chars
0047     atom = CFWS? atext+ $push_current_char CFWS?;
0048 
0049     # pushing chars
0050     dot_atom_text = (atext+ ( "." atext+ )*) $push_current_char;
0051 
0052     # pushing chars
0053     dot_atom = CFWS? dot_atom_text CFWS?;
0054 
0055     specials = "(" | ")" | "<" | ">" | "[" | "]" | ":" | ";" | "@" | "\\" | "," | "." | DQUOTE;
0056     obs_qtext = obs_NO_WS_CTL;
0057 
0058     # pushing chars
0059     qtext = ("!" | 0x23..0x5b | 0x5d..0x7e | obs_qtext) $push_current_char;
0060 
0061     # pushing chars
0062     qcontent = qtext | quoted_pair;
0063 
0064     # pushing chars
0065     quoted_string = CFWS? DQUOTE ( ( ( FWS? qcontent )+ FWS? ) | FWS ) DQUOTE CFWS?;
0066     
0067     # pushing chars
0068     word = atom | quoted_string;
0069     # pushing chars
0070     obs_phrase = word ( word | "." $push_current_char | CFWS )*;
0071     # pushing chars
0072     phrase = (word+ | obs_phrase);
0073     
0074     obs_utext = "\0" | obs_NO_WS_CTL | VCHAR;
0075     obs_unstruct = ( ( CR* ( obs_utext | FWS )+ ) | LF+ )* CR*;
0076     unstructured = ( ( FWS? VCHAR )* WSP* ) | obs_unstruct;
0077     day_name = "Mon"i | "Tue"i | "Wed"i | "Thu"i | "Fri"i | "Sat"i | "Sun"i;
0078     obs_day_of_week = CFWS? day_name CFWS?;
0079     day_of_week = ( FWS? day_name ) | obs_day_of_week;
0080     obs_day = CFWS? DIGIT{1,2} CFWS?;
0081     day = ( FWS? DIGIT{1,2} FWS ) | obs_day;
0082     month = "Jan"i | "Feb"i | "Mar"i | "Apr"i | "May"i | "Jun"i | "Jul"i | "Aug"i | "Sep"i | "Oct"i | "Nov"i | "Dec"i;
0083     obs_year = CFWS? DIGIT{2,} CFWS?;
0084     year = ( FWS DIGIT{4,} FWS ) | obs_year;
0085     date = day month year;
0086     obs_hour = CFWS? DIGIT{2} CFWS?;
0087     hour = DIGIT{2} | obs_hour;
0088     obs_minute = CFWS? DIGIT{2} CFWS?;
0089     minute = DIGIT{2} | obs_minute;
0090     obs_second = CFWS? DIGIT{2} CFWS?;
0091     second = DIGIT{2} | obs_second;
0092     time_of_day = hour ":" minute ( ":" second )?;
0093     obs_zone = "UT"i | "GMT"i | "EST"i | "EDT"i | "CST"i | "CDT"i | "MST"i | "MDT"i | "PST"i | "PDT"i | 0x41..0x49 | 0x4b..0x5a | 0x61..0x69 | 0x6b..0x7a;
0094     zone = ( FWS ( "+" | "-" ) DIGIT{4} ) | obs_zone;
0095     time = time_of_day zone;
0096     date_time = ( day_of_week "," )? date time CFWS?;
0097     display_name = phrase;
0098 
0099     # pushing chars
0100     obs_local_part = word ( "." $push_current_char word )*;
0101 
0102     # pushing chars
0103     local_part = dot_atom | quoted_string | obs_local_part;
0104 
0105     # pushing chars
0106     obs_dtext = (obs_NO_WS_CTL $push_current_char) | quoted_pair;
0107 
0108     # pushing chars
0109     dtext = ((0x21..0x5a | 0x5e..0x7e) $push_current_char) | obs_dtext;
0110 
0111     # pushing chars
0112     domain_literal = CFWS? "[" $push_current_char ( FWS? dtext )* FWS? "]" $push_current_char CFWS?;
0113     # pushing chars
0114     obs_domain = atom ( "." atom )*;
0115     # pushing chars
0116     domain = dot_atom | domain_literal | obs_domain;
0117 
0118     addr_spec = local_part "@" domain;
0119     obs_domain_list = ( CFWS | "," )* "@" domain ( "," CFWS? ( "@" domain )? )*;
0120     obs_route = obs_domain_list ":";
0121     obs_angle_addr = CFWS? "<" obs_route addr_spec ">" CFWS?;
0122     angle_addr = ( CFWS? "<" addr_spec ">" CFWS? ) | obs_angle_addr;
0123     name_addr = display_name? angle_addr;
0124     mailbox = name_addr | addr_spec;
0125     obs_mbox_list = ( CFWS? "," )* mailbox ( "," ( mailbox | CFWS )? )*;
0126     mailbox_list = ( mailbox ( "," mailbox )* ) | obs_mbox_list;
0127     obs_group_list = ( CFWS? "," )+ CFWS?;
0128     group_list = mailbox_list | CFWS | obs_group_list;
0129     group = display_name ":" group_list? ";" CFWS?;
0130     address = mailbox | group;
0131     obs_addr_list = ( CFWS? "," )* address ( "," ( address | CFWS )? )*;
0132     address_list = ( address ( "," address )* ) | obs_addr_list;
0133     path = angle_addr | ( CFWS? "<" CFWS? ">" CFWS? );
0134     return = "Return-Path:"i path CRLF;
0135     received_token = word | angle_addr | addr_spec | domain;
0136     received = "Received:"i received_token* ";" date_time CRLF;
0137     trace = return? received+;
0138     ftext = 0x21..0x39 | 0x3b..0x7e;
0139     field_name = ftext+;
0140     optional_field = field_name ":" unstructured CRLF;
0141     resent_date = "Resent-Date:"i date_time CRLF;
0142     resent_from = "Resent-From:"i mailbox_list CRLF;
0143     resent_sender = "Resent-Sender:"i mailbox CRLF;
0144     resent_to = "Resent-To:"i address_list CRLF;
0145     resent_cc = "Resent-Cc:"i address_list CRLF;
0146     resent_bcc = "Resent-Bcc:"i ( address_list | CFWS )? CRLF;
0147 
0148     # pushing chars
0149     obs_id_left = local_part;
0150 
0151     # pushing chars
0152     id_left = dot_atom_text | obs_id_left;
0153 
0154     # pushing chars
0155     no_fold_literal = ("[" $push_current_char) dtext* ("]" $push_current_char);
0156 
0157     # pushing chars
0158     obs_id_right = domain;
0159 
0160     # pushing chars
0161     id_right = dot_atom_text | no_fold_literal | obs_id_right;
0162     
0163     # gets pushed into a list
0164     msg_id = CFWS? "<" id_left "@" $push_current_char id_right ">" %push_string_list CFWS?;
0165 
0166     resent_msg_id = "Resent-Message-ID:"i msg_id CRLF;
0167     orig_date = "Date:"i date_time CRLF;
0168     hdr_from = "From:"i mailbox_list CRLF;
0169     sender = "Sender:"i mailbox CRLF;
0170     reply_to = "Reply-To:"i address_list CRLF;
0171     hdr_to = "To:"i address_list CRLF;
0172     cc = "Cc:"i address_list CRLF;
0173     bcc = "Bcc:"i ( address_list | CFWS )? CRLF;
0174     message_id = "Message-ID:"i >clear_list msg_id CRLF %got_message_id_header;
0175     in_reply_to = "In-Reply-To:"i >clear_list msg_id+ CRLF %got_in_reply_to_header;
0176     references = "References:"i >clear_list msg_id+ (CRLF %got_references_header);
0177     subject = "Subject:"i unstructured CRLF;
0178     comments = "Comments:"i unstructured CRLF;
0179     keywords = "Keywords:"i phrase ( "," phrase )* CRLF;
0180 
0181     # RFC2369 doesn't provide a proper grammar, this is my best guess
0182     list_url = CFWS? "<" >clear_str CFWS? /[^<>\r\n]/* $push_current_char CFWS? ">" %push_string_list CFWS?;
0183     list_post_urls = "List-Post:"i >clear_list list_url ( "," list_url )* unstructured? (CRLF %got_list_post_header);
0184     list_post_no = "List-Post:"i CFWS? "NO"i unstructured? (CRLF >got_list_post_no);
0185     list_post = list_post_urls | list_post_no;
0186 
0187 
0188     fields = ( ( trace optional_field* ) | ( resent_date | resent_from | resent_sender | resent_to | resent_cc | resent_bcc | resent_msg_id )+ )* ( orig_date | hdr_from | sender | reply_to | hdr_to | cc | bcc | message_id | in_reply_to | references | subject | comments | keywords | optional_field )*;
0189     obs_return = "Return-Path"i WSP* ":" path CRLF;
0190     obs_received = "Received"i WSP* ":" received_token* CRLF;
0191     obs_orig_date = "Date"i WSP* ":" date_time CRLF;
0192     obs_from = "From"i WSP* ":" mailbox_list CRLF;
0193     obs_sender = "Sender"i WSP* ":" mailbox CRLF;
0194     obs_reply_to = "Reply-To"i WSP* ":" address_list CRLF;
0195     obs_to = "To"i WSP* ":" address_list CRLF;
0196     obs_cc = "Cc"i WSP* ":" address_list CRLF;
0197     obs_bcc = "Bcc"i WSP* ":" ( address_list | ( ( CFWS? "," )* CFWS? ) ) CRLF;
0198     obs_message_id = "Message-ID"i WSP* ":" >clear_list msg_id (CRLF %got_message_id_header);
0199 
0200     # RFC5322 says that phrases shall be ignored in obs_in_reply_to and obs_references
0201     obs_in_reply_to = "In-Reply-To"i WSP* ":" >clear_list ( phrase | msg_id >clear_str )* (CRLF %got_in_reply_to_header);
0202     obs_references = "References"i WSP* ":" >clear_list ( phrase | msg_id >clear_str )* (CRLF %got_references_header);
0203 
0204     obs_subject = "Subject"i WSP* ":" unstructured CRLF;
0205     obs_comments = "Comments"i WSP* ":" unstructured CRLF;
0206     obs_phrase_list = ( phrase | CFWS )? ( "," ( phrase | CFWS )? )*;
0207     obs_keywords = "Keywords"i WSP* ":" obs_phrase_list CRLF;
0208     obs_resent_date = "Resent-Date"i WSP* ":" date_time CRLF;
0209     obs_resent_from = "Resent-From"i WSP* ":" mailbox_list CRLF;
0210     obs_resent_send = "Resent-Sender"i WSP* ":" mailbox CRLF;
0211     obs_resent_rply = "Resent-Reply-To"i WSP* ":" address_list CRLF;
0212     obs_resent_to = "Resent-To"i WSP* ":" address_list CRLF;
0213     obs_resent_cc = "Resent-Cc"i WSP* ":" address_list CRLF;
0214     obs_resent_bcc = "Resent-Bcc"i WSP* ":" ( address_list | ( ( CFWS? "," )* CFWS? ) ) CRLF;
0215     obs_resent_mid = "Resent-Message-ID"i WSP* ":" msg_id CRLF;
0216     obs_optional = field_name WSP* ":" unstructured CRLF;
0217     obs_fields = ( obs_return | obs_received | obs_orig_date | obs_from | obs_sender | obs_reply_to | obs_to | obs_cc | obs_bcc | obs_message_id | obs_in_reply_to | obs_references | obs_subject | obs_comments | obs_keywords | obs_resent_date | obs_resent_from | obs_resent_send | obs_resent_rply | obs_resent_to | obs_resent_cc | obs_resent_bcc | obs_resent_mid | obs_optional )*;
0218     text = 0x01..0x09 | "\v" | "\f" | 0x0e..0x7f;
0219     obs_body = 0x00..0x7f*;
0220     body = ( ( text{,998} CRLF )* text{,998} ) | obs_body;
0221     message = ( fields | obs_fields ) ( CRLF body )?;
0222 }%%