/* -*- c -*-

   elmo - ELectronic Mail Operator

   Copyright (C) 2002, 2003, 2004 rzyjontko

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  

   ------------------------------------------------------------
*/

%{

#define __USE_XOPEN
#define _GNU_SOURCE

/****************************************************************************
 *    IMPLEMENTATION HEADERS
 ****************************************************************************/

#include <stdio.h>
#include <time.h>
#include <errno.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <regex.h>

#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

#ifdef HAVE_LOCALE_H
# include <locale.h>
#endif

#include "xmalloc.h"  
#include "mlex.h"
#include "mail.h"
#include "mbox.h"
#include "hash.h"
#include "elmo.h"
#include "address.h"
#include "raddress.h"
#include "error.h"
#include "rstring.h"
#include "mime.h"
#include "rmime.h"
#include "compose.h"
#include "misc.h"

/****************************************************************************
 *    IMPLEMENTATION PRIVATE DEFINITIONS / ENUMERATIONS / SIMPLE TYPEDEFS
 ****************************************************************************/
  
#define EXECUTE(fun) ((fun) ? fun (), 0 : 0)
#define YY_DECL  int mlex_scan YY_PROTO ((void))
#define YY_BREAK offset += yyleng; break;


/* this means currently processed message, that is going to be added */
#define a_mail (mail + mail_index)

#define ATEXT "([[:alpha:][:digit:]!#\\$%&\\\\\\'\\*\\+\\/\\=\\?\\^_\\`" \
              "\\{\\}\\~\\|\\-])"
#define DOT_ATOM_TEXT "(" ATEXT "+(\\." ATEXT "+)*)"
#define DOT_ATOM "(" DOT_ATOM_TEXT ")"
#define EMAIL_RE "(" DOT_ATOM "@" DOT_ATOM ")"
#define FOR_EMAIL_RE "for <" EMAIL_RE ">;"
#define WITH_SMTP_RE "with E?SMTP"

#define MAX_BOUNDARY_LEN 80

/****************************************************************************
 *    IMPLEMENTATION PRIVATE CLASS PROTOTYPES / EXTERNAL CLASS REFERENCES
 ****************************************************************************/
/****************************************************************************
 *    IMPLEMENTATION PRIVATE STRUCTURES / UTILITY CLASSES
 ****************************************************************************/

        struct bound {
                char    boundary[MAX_BOUNDARY_LEN];
                int     boundary_len;
                mime_t *mime;
        };

/****************************************************************************
 *    IMPLEMENTATION REQUIRED EXTERNAL REFERENCES (AVOID)
 ****************************************************************************/
/****************************************************************************
 *    IMPLEMENTATION PRIVATE DATA
 ****************************************************************************/

/*
  offset      means how many bytes we have already read
  mail[]      is where we write info about letter currently being read
  the one pointed by newmail (see below) is for reading
  the second one is for writing
  mail_index  may be 0 or 1, says which mail[] is for writing
*/
        static unsigned offset     = 0;
        static mail_t   mail[2];
        static int      mail_index = 0;


        /*
          We use stack to push, and pop all necessary data each time we
          encounter multipart mime type.  It has a constant size for the
          sake of simplicity.
        */
        static struct bound stack[5];
        static int          top;

        /* A regular expression used to get the boundary from the header. */
        static int     re_compiled = 0;
        static regex_t bound_re;
        
        /**
         * This shows if we are going to get EXACTLY ONE mail.  It is possible
         * that a mail in maildir has line beginning with "From_", just like
         * in mbox.  We must reject the rule then.
         */
        static int      only_one = 0;


        /* This is how I determine if custom headers should be saved
           in headers rstring. */
        static int collect_custom_headers = 0;


        /**
         * This holds a pointer to function which should be called after \n\n,
         * which ends header.  It may reset this variable, not to be called
         * once for each mime-header.
         */
        static void (*after_header_action)(void) = NULL;


/****************************************************************************
 *    INTERFACE DATA
 ****************************************************************************/

        mail_t *newmail = mail + 0;

/****************************************************************************
 *    IMPLEMENTATION PRIVATE FUNCTION PROTOTYPES
 ****************************************************************************/

        static char *unfold_content (char *content);
        static void  clear_mail (time_t tm);
        static void  write_headers (void);

        static mime_t *parent_mime (void);
        static mime_t *top_mime (void);
        static mime_t *top_or_parent_mime (void);
        static char   *top_boundary (void);
        static int     top_boundary_len (void);
        static void    pop (void);
        static void    push (char *boundary, int len);
        static void    replace_top_mime (mime_t *mime);

/****************************************************************************
 *    INTERFACE FUNCTIONS
 ****************************************************************************/

%}

%x HEADER FINISH

DATE [a-zA-Z]{3}\ [a-zA-Z]{3}\ \ ?[0-9][0-9]?
TIME [0-9]{2}:[0-9]{2}:[0-9]{2}
YEAR [0-9]{4}
HEADER_CONTENT [^\n\r]*(\r?\n[ \t][^\n\r]+)*
TEXT_PLAIN_CHARSET [ \t]*text\/plain.*(;charset[ \t]*=[^\r\n;]+)?.*

%option noyywrap

%%

^From\ [^ \n\t]+\ +{DATE}\ {TIME}\ {YEAR} {

        struct tm  tm;
        char      *begin;
        char      *tmp;
        mime_t    *mime;
  
        if (only_one){
                YY_BREAK;
        }
  
        newmail    = mail + mail_index;
        mail_index = 1 - mail_index;
  
        /*
          finish last mail
        */
        if (!newmail->reply_to)
                newmail->reply_to = newmail->from;

        mime = top_mime ();
        if (mime){
                mime->off_bound = offset;
                mime->off_end   = offset;
        }

        /* this rule ends with return, so it does not reach YY_BREAK */
        offset += yyleng;

        /*
          begin new mail
        */
        begin = strchr (yytext, ' ');
        begin = strchr (begin + 1, ' ');
        while (*(begin + 1) == ' ')
                begin++;

        tmp   = strptime (begin + 5, "%b %e %T %Y", &tm);

        if (!tmp){
                error_ (0, "wrong time string");
                return BROKEN_MAIL;
        }
        if (*tmp){
                error_ (0, "not all parsed");
                return BROKEN_MAIL;
        }
        else {
                offset++;       /* to workaround \n that doesn't match */
                clear_mail (mktime (&tm));
                offset--;
        }
  
        BEGIN (HEADER);
        return NEXT_MAIL;
}



<HEADER>^User\-Agent:{HEADER_CONTENT} {
        char *tmp = yytext + 11;

        while (isspace (*tmp))
                tmp++;
  
        if (a_mail->mua == NULL){
                a_mail->mua = unfold_content (tmp);
        }
}


<HEADER>^X\-Mailer:{HEADER_CONTENT} {
        char *tmp = yytext + 10;

        while (isspace (*tmp))
                tmp++;
  
        if (a_mail->mua == NULL){
                a_mail->mua = unfold_content (tmp);
        }
}


<HEADER>^Content\-Type:{HEADER_CONTENT} {
        int         index;
        int         ret;
        int         len  = 0;
        regmatch_t  matches[4];
        mime_t     *mime = top_mime ();
        char       *str  = yytext + 12;

        mime_set_from_header (mime, str);

        ret = regexec (& bound_re, str, 4, matches, 0);
        
        if (ret == 0){
                index = (matches[2].rm_so != -1) ? 2 : 3;
                len   = matches[index].rm_eo - matches[index].rm_so;
                push (str + matches[index].rm_so, len);
        }
        else if (ret != REG_NOMATCH)
                error_regex (ret, & bound_re, "boundary regexp");
}


<HEADER>^Content\-Disposition:{HEADER_CONTENT} {
        mime_t *mime = top_or_parent_mime ();

        if (mime)
                mime_complete_file_name (mime, yytext + 20);
}


<HEADER>^Content\-Transfer\-Encoding:{HEADER_CONTENT} {
        mime_t *mime = top_mime ();

        if (mime){
                mime->encoding = MENC_NONE;

                if (strstr (yytext + 26, "uoted")
                    || strstr (yytext + 26, "UOTED"))
                        mime->encoding = MENC_QP;
                else if (strstr (yytext + 26, "ase64")
                         || strstr (yytext + 26, "ASE64"))
                        mime->encoding = MENC_BASE64;
                else if (strstr (yytext + 26, "uencode")
                         || strstr (yytext + 26, "UENCODE"))
                        mime->encoding = MENC_UUENCODE;
                else if (strstr (yytext + 26, "7bit")
                         || strstr (yytext + 26, "7BIT"))
                        mime->encoding = MENC_7BIT;
                else if (strstr (yytext + 26, "8bit")
                         || strstr (yytext + 26, "8BIT"))
                        mime->encoding = MENC_8BIT;
        }
}


<HEADER>^Date:{HEADER_CONTENT} {
        char      *tmp  = yytext + 6;
        char      *date = unfold_content (tmp);
        static struct tm tm;

        a_mail->date_str = date;
  
        while (*date && ! isdigit (*date))
                date++;

#ifdef HAVE_LOCALE_H
        setlocale (LC_ALL, "C");
#endif

        /**
         * here is a hack that makes it possible to parse a date with
         * obsolete year notation (2 - digits),  we have to try the obsolete
         * one first, and check if it was possible to get the date
         */
        tmp = strptime (date, "%d %b %y %H:%M:%S", &tm);
        if (tmp == NULL)
                tmp = strptime (date, "%d %b %Y %H:%M:%S", &tm);

#ifdef HAVE_LOCALE_H
        setlocale (LC_ALL, "");
#endif

        a_mail->date = mktime (&tm);
}


<HEADER>^Subject:{HEADER_CONTENT} {
        char *tmp = yytext + 9;
        a_mail->subject = unfold_content (tmp);
}


<HEADER>^From:{HEADER_CONTENT} {
        char *tmp  = yytext + 6;
        char *from = unfold_content (tmp);

        a_mail->from = address_from_string (from);
        xfree (from);
}


<HEADER>^Sender:{HEADER_CONTENT} {
        char *tmp    = yytext + 8;
        char *sender = unfold_content (tmp);
  
        if (a_mail->from == NULL)
                a_mail->from = address_from_string (sender);
        xfree (sender);
}


<HEADER>^To:{HEADER_CONTENT} {
        char *tmp = yytext + 4;
        char *to  = unfold_content (tmp);
  
        a_mail->to = raddress_get_from_header (to);
        xfree (to);
}


<HEADER>^CC:{HEADER_CONTENT} {
        char *tmp = yytext + 4;
        char *cc  = unfold_content (tmp);

        a_mail->cc = raddress_get_from_header (cc);
        xfree (cc);
}


<HEADER>^Bcc:{HEADER_CONTENT} {
        char *tmp = yytext + 4;
        char *bcc = unfold_content (tmp);

        a_mail->bcc = raddress_get_from_header (bcc);
        xfree (bcc);
}


<HEADER>^Message\-ID:{HEADER_CONTENT} {
        char  c;
        char *pine;
        char *tmp  = yytext + 12;

        a_mail->msg_id = unfold_content (tmp);
        if (a_mail->mua == NULL){
                tmp = strstr (a_mail->msg_id, "Pine");
                if (tmp){
                        pine = tmp + 13;
                        while (*pine != '.')
                                pine--;
                        c           = *pine;
                        *pine       = '\0';
                        a_mail->mua = xstrdup (tmp);
                        *pine       = c;
                }
        }
}


<HEADER>^In\-Reply\-To:{HEADER_CONTENT} {
        char      *tmp = yytext + 13;
        char      *irt;
        rstring_t *result;

        if (a_mail->in_reply_to == NULL){
                irt = unfold_content (tmp);
    
                result = rstring_create_size (2);
                result->allocated_first = 1;
                rstring_add (result, irt);
                rstring_shrink (result);
                a_mail->in_reply_to = result;
        }
}


<HEADER>^References:{HEADER_CONTENT} {
        char *tmp = yytext + 12;

        if (a_mail->in_reply_to){
                rstring_delete (a_mail->in_reply_to);
        }
  
        a_mail->in_reply_to = rstring_split_re (unfold_content (tmp), "[ \t\r\n]+");
        a_mail->in_reply_to->allocated_first = 1;
        rstring_shrink (a_mail->in_reply_to);
}


<HEADER>^Reply\-To:([^\n\r]:)*{HEADER_CONTENT} {
        char *tmp      = yytext + 10;
        char *reply_to = unfold_content (tmp);


        a_mail->reply_to = address_from_string (reply_to);
        xfree (reply_to);
}


<HEADER>^Status:{HEADER_CONTENT} {
        char *i;
        char *tmp = yytext + 8;

        if (! only_one){
                tmp       = unfold_content (tmp);

                for (i = tmp; *i; i++){
                        switch (*i){
                                case 'R':
                                        a_mail->flags |= FLAG_READ;
                                        break;
                                case 'O':
                                        a_mail->flags |= FLAG_OLD;
                                        break;
                                default:
                                        break;
                        }
                }

                xfree (tmp);
        }
}


<HEADER>^X\-Status:{HEADER_CONTENT} {
        char *i;
        char *tmp = yytext + 10;
        tmp       = unfold_content (tmp);

        for (i = tmp; *i; i++){
                switch (*i){
                        case 'A':
                                a_mail->flags |= FLAG_ANSWERED;
                                break;
                        default:
                                break;
                }
        }

        xfree (tmp);
}


<HEADER>^Received:{HEADER_CONTENT} {
        int         len;
        char       *tmp;
        char       *recv_for;
        regmatch_t  matches[1];

        if (a_mail->recv_for == NULL){
                tmp      = yytext + 9;
                recv_for = unfold_content (tmp);

                if (misc_regex (WITH_SMTP_RE, recv_for, matches)
                    && misc_regex (FOR_EMAIL_RE, recv_for, matches)){
                        len = matches[0].rm_eo - matches[0].rm_so - 7 + 1;
                        tmp = xmalloc (len);
                        memcpy (tmp, recv_for + matches[0].rm_so + 5, len - 1);
                        tmp[len - 1] = '\0';
                        a_mail->recv_for = address_from_string (tmp);
                        xfree (tmp);
                }
                xfree (recv_for);
        }
}


<HEADER>^X\-Elmo\-SMTP:{HEADER_CONTENT} {
        a_mail->smtp = unfold_content (yytext + 12);
}


<HEADER>\r?\n\r?\n {
        mime_t *mime = top_or_parent_mime ();
  
        BEGIN (INITIAL);
        EXECUTE (after_header_action);

        if (mime){
                mime->off_start = offset + yyleng;
        }
}


<HEADER>{HEADER_CONTENT} {
        char *header;

        if (! collect_custom_headers){
                YY_BREAK;
        }

        header = unfold_content (yytext);

        if (a_mail->headers == NULL){
                a_mail->headers = rstring_create ();
                a_mail->headers->allocated_all = 1;
        }

        rstring_add (a_mail->headers, header);
}


<HEADER>\r?\n


<HEADER>\r+


\-\-.+\r?\n {
        int     len;
        char   *boundary;
        char   *seek;
        mime_t *mime;
        mime_t *parent;

        if (top == 0){
                YY_BREAK;
        }

        boundary = top_boundary ();
        len      = top_boundary_len ();
        
        if (boundary == NULL){
                YY_BREAK;
        }
        
        seek = strstr (yytext + 2, boundary);
        if (seek == NULL){
                YY_BREAK;
        }

        if (seek[len] == '-' && seek[len + 1] == '-'){
                mime            = top_or_parent_mime ();
                mime->off_bound = offset + yyleng;
                mime->off_end   = offset;
                pop ();
                YY_BREAK;
        }

        mime            = top_or_parent_mime ();
        mime->off_bound = offset;
        mime->off_end   = offset - 1;
        parent          = parent_mime ();

        if (parent->parts == NULL)
                parent->parts = rmime_create_size (4);

        mime             = mime_create ();
        mime->off_header = offset + yyleng;
        rmime_add (parent->parts, mime);
        replace_top_mime (mime);

        BEGIN (HEADER);
}


.+


\n+


<HEADER><<EOF>> {

        only_one = 0;
        offset   = 0;

        mail_destroy (a_mail, BOX_MAILDIR);

        BEGIN (INITIAL);
        return EOF_AT_HEADER;
}


<FINISH>.+


<FINISH>\n+


<FINISH><<EOF>> {

        BEGIN (INITIAL);
        return END_OF_FILE;

        /**
         * this is to get rid of warnings
         */
        yyunput (0, NULL);
        yy_flex_realloc (0, 0);
}


<<EOF>> {
        mime_t *mime;

        mime = top_or_parent_mime ();

        if (mime == NULL)
                return END_OF_FILE;

        mime->off_bound = offset;
        mime->off_end   = offset;
        if (! a_mail->reply_to)
                a_mail->reply_to = a_mail->from;

        BEGIN (FINISH);
        newmail  = mail + mail_index;
        only_one = 0;
        offset   = 0;
        return NEXT_MAIL;
}



%%


void
mlex_init (void)
{
        int ret;

        ret = regcomp (& bound_re,
                       "multipart.*boundary=(\"([^\"]+)\"|([^\"][^; \\-]*);?)",
                       REG_ICASE | REG_EXTENDED);

        if (ret)
                error_critical (1, 0, "internal error");
        re_compiled = 1;
}



void
mlex_free_resources (void)
{
        if (re_compiled)
                regfree (& bound_re);
        re_compiled = 0;
}



int
mlex_scan_file (time_t tm)
{
        int ret;
        YY_BUFFER_STATE buffer = yy_create_buffer (yyin, YY_BUF_SIZE);
  
        collect_custom_headers = 0;
        only_one               = 1;
        after_header_action    = NULL;
  
        clear_mail (tm);
        BEGIN (HEADER);
        yy_switch_to_buffer (buffer);
        ret = mlex_scan ();
        yy_delete_buffer (buffer);
        BEGIN (INITIAL);
        return ret;
}


int
mlex_scan_buffer (char *buf)
{
        int             ret;
        YY_BUFFER_STATE state;
        YY_BUFFER_STATE old_state = YY_CURRENT_BUFFER;

        collect_custom_headers = 0;
        only_one               = 1;
        after_header_action    = NULL;
  
        clear_mail (0);
        BEGIN (HEADER);
        state = yy_scan_string (buf);
        ret   = mlex_scan ();
        yy_delete_buffer (state);
        yy_switch_to_buffer (old_state);
        if (ret == EOF_AT_HEADER || ret == NEXT_MAIL)
                return NEXT_MAIL;
        return BROKEN_MAIL;
}



int
mlex_outmail_scan (void)
{
        int    ret;
        time_t tm              = time (NULL);
        YY_BUFFER_STATE buffer = yy_create_buffer (yyin, YY_BUF_SIZE);

        only_one               = 1;
        collect_custom_headers = 1;
        after_header_action    = write_headers;

        clear_mail (tm);
        BEGIN (HEADER);
        yy_switch_to_buffer (buffer);
        ret = mlex_scan ();
        yy_delete_buffer (buffer);
        BEGIN (INITIAL);

        return ret;
}



int
mlex_mbox_scan_start (void)
{
        YY_BUFFER_STATE buffer = yy_create_buffer (yyin, YY_BUF_SIZE);

        collect_custom_headers = 0;
        only_one               = 0;
        after_header_action    = NULL;

        memset (mail, '\0', sizeof (mail));

        BEGIN (INITIAL);
        yy_switch_to_buffer (buffer);
        return mlex_scan ();
}


/****************************************************************************
 *    STACK OPERATIONS
 ****************************************************************************/

static void
pop (void)
{
        stack[top].mime = NULL;
        
        if (top == 0){
                return;
        }
        top--;
}



static void
push (char *boundary, int len)
{
        if (len >= MAX_BOUNDARY_LEN)
                len = MAX_BOUNDARY_LEN - 1;

        memcpy (stack[top].boundary, boundary, len);
        stack[top].boundary[len] = '\0';
        stack[top].boundary_len  = len;
        
        top++;
        stack[top].mime = NULL;
}



static void
replace_top_mime (mime_t *mime)
{
        stack[top].mime = mime;
}



static mime_t *
parent_mime (void)
{
        if (top <= 0)
                return NULL;

        return stack[top - 1].mime;
}



static mime_t *
top_mime (void)
{
        if (top < 0)
                return NULL;
        
        return stack[top].mime;
}



static mime_t *
top_or_parent_mime (void)
{
        mime_t *result = top_mime ();

        if (result)
                return result;

        if (top <= 0)
                return NULL;

        return stack[top - 1].mime;
}



static char *
top_boundary (void)
{
        if (top <= 0)
                return NULL;

        return stack[top - 1].boundary;
}



static int
top_boundary_len (void)
{
        if (top <= 0)
                return -1;

        return stack[top - 1].boundary_len;
}


/****************************************************************************
 *    IMPLEMENTATION PRIVATE FUNCTIONS
 ****************************************************************************/


/*
  This function unfolds header content and returns a string containing just
  one line of plain text.  It allocates memory for string, so it should be
  freed, after use.
*/
static char *
unfold_content (char *content)
{
        char *result     = xmalloc (strlen (content) + 1);
        char *result_ptr = result;
        char *ptr;

        while (*content && isspace (*content))
                content++;
        
        for (ptr = content; *ptr; ptr++){
                switch (*ptr){

                        case '\r':
                                break;
                        
                        default:
                                *result_ptr = *ptr;
                                result_ptr++;
                                break;
                }
        }
        *result_ptr = '\0';
        result = mime_decode_header (result, result_ptr - result, 1);
        return result;
}




static void
clear_mail (time_t tm)
{
        mail_clear (a_mail);

        a_mail->mime                   = mime_info_create ();
        a_mail->mime->mime->off_header = offset;
        a_mail->place.offset_header    = offset;
        a_mail->date                   = tm;

        top           = 0;
        stack[0].mime = a_mail->mime->mime;
}



static void
write_headers (void)
{
        char      *msg_id      = compose_msg_id ();
        rstring_t *in_reply_to = compose_in_reply_to ();
        char      *date;

        if (a_mail->msg_id)
                xfree (a_mail->msg_id);
        if (a_mail->in_reply_to)
                rstring_delete (a_mail->in_reply_to);

        if (a_mail->date_str)
                date = a_mail->date_str;
        else
                date = compose_date ();
        
        a_mail->msg_id      = msg_id;
        a_mail->date_str    = date;
        a_mail->in_reply_to = in_reply_to;
  
        after_header_action = NULL;
}



/****************************************************************************
 *    INTERFACE CLASS BODIES
 ****************************************************************************/
/****************************************************************************
 *
 *    END MODULE mlex.l
 *
 ****************************************************************************/


syntax highlighted by Code2HTML, v. 0.9.1