/* -*- c -*- elmo - ELectronic Mail Operator Copyright (C) 2002, 2003, 2004 rzyjontko This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ------------------------------------------------------------ */ %{ #define __USE_XOPEN #define _GNU_SOURCE /**************************************************************************** * IMPLEMENTATION HEADERS ****************************************************************************/ #include #include #include #include #include #include #include #ifdef HAVE_CONFIG_H # include #endif #ifdef HAVE_LOCALE_H # include #endif #include "xmalloc.h" #include "mlex.h" #include "mail.h" #include "mbox.h" #include "hash.h" #include "elmo.h" #include "address.h" #include "raddress.h" #include "error.h" #include "rstring.h" #include "mime.h" #include "rmime.h" #include "compose.h" #include "misc.h" /**************************************************************************** * IMPLEMENTATION PRIVATE DEFINITIONS / ENUMERATIONS / SIMPLE TYPEDEFS ****************************************************************************/ #define EXECUTE(fun) ((fun) ? fun (), 0 : 0) #define YY_DECL int mlex_scan YY_PROTO ((void)) #define YY_BREAK offset += yyleng; break; /* this means currently processed message, that is going to be added */ #define a_mail (mail + mail_index) #define ATEXT "([[:alpha:][:digit:]!#\\$%&\\\\\\'\\*\\+\\/\\=\\?\\^_\\`" \ "\\{\\}\\~\\|\\-])" #define DOT_ATOM_TEXT "(" ATEXT "+(\\." ATEXT "+)*)" #define DOT_ATOM "(" DOT_ATOM_TEXT ")" #define EMAIL_RE "(" DOT_ATOM "@" DOT_ATOM ")" #define FOR_EMAIL_RE "for <" EMAIL_RE ">;" #define WITH_SMTP_RE "with E?SMTP" #define MAX_BOUNDARY_LEN 80 /**************************************************************************** * IMPLEMENTATION PRIVATE CLASS PROTOTYPES / EXTERNAL CLASS REFERENCES ****************************************************************************/ /**************************************************************************** * IMPLEMENTATION PRIVATE STRUCTURES / UTILITY CLASSES ****************************************************************************/ struct bound { char boundary[MAX_BOUNDARY_LEN]; int boundary_len; mime_t *mime; }; /**************************************************************************** * IMPLEMENTATION REQUIRED EXTERNAL REFERENCES (AVOID) ****************************************************************************/ /**************************************************************************** * IMPLEMENTATION PRIVATE DATA ****************************************************************************/ /* offset means how many bytes we have already read mail[] is where we write info about letter currently being read the one pointed by newmail (see below) is for reading the second one is for writing mail_index may be 0 or 1, says which mail[] is for writing */ static unsigned offset = 0; static mail_t mail[2]; static int mail_index = 0; /* We use stack to push, and pop all necessary data each time we encounter multipart mime type. It has a constant size for the sake of simplicity. */ static struct bound stack[5]; static int top; /* A regular expression used to get the boundary from the header. */ static int re_compiled = 0; static regex_t bound_re; /** * This shows if we are going to get EXACTLY ONE mail. It is possible * that a mail in maildir has line beginning with "From_", just like * in mbox. We must reject the rule then. */ static int only_one = 0; /* This is how I determine if custom headers should be saved in headers rstring. */ static int collect_custom_headers = 0; /** * This holds a pointer to function which should be called after \n\n, * which ends header. It may reset this variable, not to be called * once for each mime-header. */ static void (*after_header_action)(void) = NULL; /**************************************************************************** * INTERFACE DATA ****************************************************************************/ mail_t *newmail = mail + 0; /**************************************************************************** * IMPLEMENTATION PRIVATE FUNCTION PROTOTYPES ****************************************************************************/ static char *unfold_content (char *content); static void clear_mail (time_t tm); static void write_headers (void); static mime_t *parent_mime (void); static mime_t *top_mime (void); static mime_t *top_or_parent_mime (void); static char *top_boundary (void); static int top_boundary_len (void); static void pop (void); static void push (char *boundary, int len); static void replace_top_mime (mime_t *mime); /**************************************************************************** * INTERFACE FUNCTIONS ****************************************************************************/ %} %x HEADER FINISH DATE [a-zA-Z]{3}\ [a-zA-Z]{3}\ \ ?[0-9][0-9]? TIME [0-9]{2}:[0-9]{2}:[0-9]{2} YEAR [0-9]{4} HEADER_CONTENT [^\n\r]*(\r?\n[ \t][^\n\r]+)* TEXT_PLAIN_CHARSET [ \t]*text\/plain.*(;charset[ \t]*=[^\r\n;]+)?.* %option noyywrap %% ^From\ [^ \n\t]+\ +{DATE}\ {TIME}\ {YEAR} { struct tm tm; char *begin; char *tmp; mime_t *mime; if (only_one){ YY_BREAK; } newmail = mail + mail_index; mail_index = 1 - mail_index; /* finish last mail */ if (!newmail->reply_to) newmail->reply_to = newmail->from; mime = top_mime (); if (mime){ mime->off_bound = offset; mime->off_end = offset; } /* this rule ends with return, so it does not reach YY_BREAK */ offset += yyleng; /* begin new mail */ begin = strchr (yytext, ' '); begin = strchr (begin + 1, ' '); while (*(begin + 1) == ' ') begin++; tmp = strptime (begin + 5, "%b %e %T %Y", &tm); if (!tmp){ error_ (0, "wrong time string"); return BROKEN_MAIL; } if (*tmp){ error_ (0, "not all parsed"); return BROKEN_MAIL; } else { offset++; /* to workaround \n that doesn't match */ clear_mail (mktime (&tm)); offset--; } BEGIN (HEADER); return NEXT_MAIL; }
^User\-Agent:{HEADER_CONTENT} { char *tmp = yytext + 11; while (isspace (*tmp)) tmp++; if (a_mail->mua == NULL){ a_mail->mua = unfold_content (tmp); } }
^X\-Mailer:{HEADER_CONTENT} { char *tmp = yytext + 10; while (isspace (*tmp)) tmp++; if (a_mail->mua == NULL){ a_mail->mua = unfold_content (tmp); } }
^Content\-Type:{HEADER_CONTENT} { int index; int ret; int len = 0; regmatch_t matches[4]; mime_t *mime = top_mime (); char *str = yytext + 12; mime_set_from_header (mime, str); ret = regexec (& bound_re, str, 4, matches, 0); if (ret == 0){ index = (matches[2].rm_so != -1) ? 2 : 3; len = matches[index].rm_eo - matches[index].rm_so; push (str + matches[index].rm_so, len); } else if (ret != REG_NOMATCH) error_regex (ret, & bound_re, "boundary regexp"); }
^Content\-Disposition:{HEADER_CONTENT} { mime_t *mime = top_or_parent_mime (); if (mime) mime_complete_file_name (mime, yytext + 20); }
^Content\-Transfer\-Encoding:{HEADER_CONTENT} { mime_t *mime = top_mime (); if (mime){ mime->encoding = MENC_NONE; if (strstr (yytext + 26, "uoted") || strstr (yytext + 26, "UOTED")) mime->encoding = MENC_QP; else if (strstr (yytext + 26, "ase64") || strstr (yytext + 26, "ASE64")) mime->encoding = MENC_BASE64; else if (strstr (yytext + 26, "uencode") || strstr (yytext + 26, "UENCODE")) mime->encoding = MENC_UUENCODE; else if (strstr (yytext + 26, "7bit") || strstr (yytext + 26, "7BIT")) mime->encoding = MENC_7BIT; else if (strstr (yytext + 26, "8bit") || strstr (yytext + 26, "8BIT")) mime->encoding = MENC_8BIT; } }
^Date:{HEADER_CONTENT} { char *tmp = yytext + 6; char *date = unfold_content (tmp); static struct tm tm; a_mail->date_str = date; while (*date && ! isdigit (*date)) date++; #ifdef HAVE_LOCALE_H setlocale (LC_ALL, "C"); #endif /** * here is a hack that makes it possible to parse a date with * obsolete year notation (2 - digits), we have to try the obsolete * one first, and check if it was possible to get the date */ tmp = strptime (date, "%d %b %y %H:%M:%S", &tm); if (tmp == NULL) tmp = strptime (date, "%d %b %Y %H:%M:%S", &tm); #ifdef HAVE_LOCALE_H setlocale (LC_ALL, ""); #endif a_mail->date = mktime (&tm); }
^Subject:{HEADER_CONTENT} { char *tmp = yytext + 9; a_mail->subject = unfold_content (tmp); }
^From:{HEADER_CONTENT} { char *tmp = yytext + 6; char *from = unfold_content (tmp); a_mail->from = address_from_string (from); xfree (from); }
^Sender:{HEADER_CONTENT} { char *tmp = yytext + 8; char *sender = unfold_content (tmp); if (a_mail->from == NULL) a_mail->from = address_from_string (sender); xfree (sender); }
^To:{HEADER_CONTENT} { char *tmp = yytext + 4; char *to = unfold_content (tmp); a_mail->to = raddress_get_from_header (to); xfree (to); }
^CC:{HEADER_CONTENT} { char *tmp = yytext + 4; char *cc = unfold_content (tmp); a_mail->cc = raddress_get_from_header (cc); xfree (cc); }
^Bcc:{HEADER_CONTENT} { char *tmp = yytext + 4; char *bcc = unfold_content (tmp); a_mail->bcc = raddress_get_from_header (bcc); xfree (bcc); }
^Message\-ID:{HEADER_CONTENT} { char c; char *pine; char *tmp = yytext + 12; a_mail->msg_id = unfold_content (tmp); if (a_mail->mua == NULL){ tmp = strstr (a_mail->msg_id, "Pine"); if (tmp){ pine = tmp + 13; while (*pine != '.') pine--; c = *pine; *pine = '\0'; a_mail->mua = xstrdup (tmp); *pine = c; } } }
^In\-Reply\-To:{HEADER_CONTENT} { char *tmp = yytext + 13; char *irt; rstring_t *result; if (a_mail->in_reply_to == NULL){ irt = unfold_content (tmp); result = rstring_create_size (2); result->allocated_first = 1; rstring_add (result, irt); rstring_shrink (result); a_mail->in_reply_to = result; } }
^References:{HEADER_CONTENT} { char *tmp = yytext + 12; if (a_mail->in_reply_to){ rstring_delete (a_mail->in_reply_to); } a_mail->in_reply_to = rstring_split_re (unfold_content (tmp), "[ \t\r\n]+"); a_mail->in_reply_to->allocated_first = 1; rstring_shrink (a_mail->in_reply_to); }
^Reply\-To:([^\n\r]:)*{HEADER_CONTENT} { char *tmp = yytext + 10; char *reply_to = unfold_content (tmp); a_mail->reply_to = address_from_string (reply_to); xfree (reply_to); }
^Status:{HEADER_CONTENT} { char *i; char *tmp = yytext + 8; if (! only_one){ tmp = unfold_content (tmp); for (i = tmp; *i; i++){ switch (*i){ case 'R': a_mail->flags |= FLAG_READ; break; case 'O': a_mail->flags |= FLAG_OLD; break; default: break; } } xfree (tmp); } }
^X\-Status:{HEADER_CONTENT} { char *i; char *tmp = yytext + 10; tmp = unfold_content (tmp); for (i = tmp; *i; i++){ switch (*i){ case 'A': a_mail->flags |= FLAG_ANSWERED; break; default: break; } } xfree (tmp); }
^Received:{HEADER_CONTENT} { int len; char *tmp; char *recv_for; regmatch_t matches[1]; if (a_mail->recv_for == NULL){ tmp = yytext + 9; recv_for = unfold_content (tmp); if (misc_regex (WITH_SMTP_RE, recv_for, matches) && misc_regex (FOR_EMAIL_RE, recv_for, matches)){ len = matches[0].rm_eo - matches[0].rm_so - 7 + 1; tmp = xmalloc (len); memcpy (tmp, recv_for + matches[0].rm_so + 5, len - 1); tmp[len - 1] = '\0'; a_mail->recv_for = address_from_string (tmp); xfree (tmp); } xfree (recv_for); } }
^X\-Elmo\-SMTP:{HEADER_CONTENT} { a_mail->smtp = unfold_content (yytext + 12); }
\r?\n\r?\n { mime_t *mime = top_or_parent_mime (); BEGIN (INITIAL); EXECUTE (after_header_action); if (mime){ mime->off_start = offset + yyleng; } }
{HEADER_CONTENT} { char *header; if (! collect_custom_headers){ YY_BREAK; } header = unfold_content (yytext); if (a_mail->headers == NULL){ a_mail->headers = rstring_create (); a_mail->headers->allocated_all = 1; } rstring_add (a_mail->headers, header); }
\r?\n
\r+ \-\-.+\r?\n { int len; char *boundary; char *seek; mime_t *mime; mime_t *parent; if (top == 0){ YY_BREAK; } boundary = top_boundary (); len = top_boundary_len (); if (boundary == NULL){ YY_BREAK; } seek = strstr (yytext + 2, boundary); if (seek == NULL){ YY_BREAK; } if (seek[len] == '-' && seek[len + 1] == '-'){ mime = top_or_parent_mime (); mime->off_bound = offset + yyleng; mime->off_end = offset; pop (); YY_BREAK; } mime = top_or_parent_mime (); mime->off_bound = offset; mime->off_end = offset - 1; parent = parent_mime (); if (parent->parts == NULL) parent->parts = rmime_create_size (4); mime = mime_create (); mime->off_header = offset + yyleng; rmime_add (parent->parts, mime); replace_top_mime (mime); BEGIN (HEADER); } .+ \n+
<> { only_one = 0; offset = 0; mail_destroy (a_mail, BOX_MAILDIR); BEGIN (INITIAL); return EOF_AT_HEADER; } .+ \n+ <> { BEGIN (INITIAL); return END_OF_FILE; /** * this is to get rid of warnings */ yyunput (0, NULL); yy_flex_realloc (0, 0); } <> { mime_t *mime; mime = top_or_parent_mime (); if (mime == NULL) return END_OF_FILE; mime->off_bound = offset; mime->off_end = offset; if (! a_mail->reply_to) a_mail->reply_to = a_mail->from; BEGIN (FINISH); newmail = mail + mail_index; only_one = 0; offset = 0; return NEXT_MAIL; } %% void mlex_init (void) { int ret; ret = regcomp (& bound_re, "multipart.*boundary=(\"([^\"]+)\"|([^\"][^; \\-]*);?)", REG_ICASE | REG_EXTENDED); if (ret) error_critical (1, 0, "internal error"); re_compiled = 1; } void mlex_free_resources (void) { if (re_compiled) regfree (& bound_re); re_compiled = 0; } int mlex_scan_file (time_t tm) { int ret; YY_BUFFER_STATE buffer = yy_create_buffer (yyin, YY_BUF_SIZE); collect_custom_headers = 0; only_one = 1; after_header_action = NULL; clear_mail (tm); BEGIN (HEADER); yy_switch_to_buffer (buffer); ret = mlex_scan (); yy_delete_buffer (buffer); BEGIN (INITIAL); return ret; } int mlex_scan_buffer (char *buf) { int ret; YY_BUFFER_STATE state; YY_BUFFER_STATE old_state = YY_CURRENT_BUFFER; collect_custom_headers = 0; only_one = 1; after_header_action = NULL; clear_mail (0); BEGIN (HEADER); state = yy_scan_string (buf); ret = mlex_scan (); yy_delete_buffer (state); yy_switch_to_buffer (old_state); if (ret == EOF_AT_HEADER || ret == NEXT_MAIL) return NEXT_MAIL; return BROKEN_MAIL; } int mlex_outmail_scan (void) { int ret; time_t tm = time (NULL); YY_BUFFER_STATE buffer = yy_create_buffer (yyin, YY_BUF_SIZE); only_one = 1; collect_custom_headers = 1; after_header_action = write_headers; clear_mail (tm); BEGIN (HEADER); yy_switch_to_buffer (buffer); ret = mlex_scan (); yy_delete_buffer (buffer); BEGIN (INITIAL); return ret; } int mlex_mbox_scan_start (void) { YY_BUFFER_STATE buffer = yy_create_buffer (yyin, YY_BUF_SIZE); collect_custom_headers = 0; only_one = 0; after_header_action = NULL; memset (mail, '\0', sizeof (mail)); BEGIN (INITIAL); yy_switch_to_buffer (buffer); return mlex_scan (); } /**************************************************************************** * STACK OPERATIONS ****************************************************************************/ static void pop (void) { stack[top].mime = NULL; if (top == 0){ return; } top--; } static void push (char *boundary, int len) { if (len >= MAX_BOUNDARY_LEN) len = MAX_BOUNDARY_LEN - 1; memcpy (stack[top].boundary, boundary, len); stack[top].boundary[len] = '\0'; stack[top].boundary_len = len; top++; stack[top].mime = NULL; } static void replace_top_mime (mime_t *mime) { stack[top].mime = mime; } static mime_t * parent_mime (void) { if (top <= 0) return NULL; return stack[top - 1].mime; } static mime_t * top_mime (void) { if (top < 0) return NULL; return stack[top].mime; } static mime_t * top_or_parent_mime (void) { mime_t *result = top_mime (); if (result) return result; if (top <= 0) return NULL; return stack[top - 1].mime; } static char * top_boundary (void) { if (top <= 0) return NULL; return stack[top - 1].boundary; } static int top_boundary_len (void) { if (top <= 0) return -1; return stack[top - 1].boundary_len; } /**************************************************************************** * IMPLEMENTATION PRIVATE FUNCTIONS ****************************************************************************/ /* This function unfolds header content and returns a string containing just one line of plain text. It allocates memory for string, so it should be freed, after use. */ static char * unfold_content (char *content) { char *result = xmalloc (strlen (content) + 1); char *result_ptr = result; char *ptr; while (*content && isspace (*content)) content++; for (ptr = content; *ptr; ptr++){ switch (*ptr){ case '\r': break; default: *result_ptr = *ptr; result_ptr++; break; } } *result_ptr = '\0'; result = mime_decode_header (result, result_ptr - result, 1); return result; } static void clear_mail (time_t tm) { mail_clear (a_mail); a_mail->mime = mime_info_create (); a_mail->mime->mime->off_header = offset; a_mail->place.offset_header = offset; a_mail->date = tm; top = 0; stack[0].mime = a_mail->mime->mime; } static void write_headers (void) { char *msg_id = compose_msg_id (); rstring_t *in_reply_to = compose_in_reply_to (); char *date; if (a_mail->msg_id) xfree (a_mail->msg_id); if (a_mail->in_reply_to) rstring_delete (a_mail->in_reply_to); if (a_mail->date_str) date = a_mail->date_str; else date = compose_date (); a_mail->msg_id = msg_id; a_mail->date_str = date; a_mail->in_reply_to = in_reply_to; after_header_action = NULL; } /**************************************************************************** * INTERFACE CLASS BODIES ****************************************************************************/ /**************************************************************************** * * END MODULE mlex.l * ****************************************************************************/