/* -*- c -*-
elmo - ELectronic Mail Operator
Copyright (C) 2002, 2003, 2004 rzyjontko
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
------------------------------------------------------------
*/
%{
#define __USE_XOPEN
#define _GNU_SOURCE
/****************************************************************************
* IMPLEMENTATION HEADERS
****************************************************************************/
#include <stdio.h>
#include <time.h>
#include <errno.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <regex.h>
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#ifdef HAVE_LOCALE_H
# include <locale.h>
#endif
#include "xmalloc.h"
#include "mlex.h"
#include "mail.h"
#include "mbox.h"
#include "hash.h"
#include "elmo.h"
#include "address.h"
#include "raddress.h"
#include "error.h"
#include "rstring.h"
#include "mime.h"
#include "rmime.h"
#include "compose.h"
#include "misc.h"
/****************************************************************************
* IMPLEMENTATION PRIVATE DEFINITIONS / ENUMERATIONS / SIMPLE TYPEDEFS
****************************************************************************/
#define EXECUTE(fun) ((fun) ? fun (), 0 : 0)
#define YY_DECL int mlex_scan YY_PROTO ((void))
#define YY_BREAK offset += yyleng; break;
/* this means currently processed message, that is going to be added */
#define a_mail (mail + mail_index)
#define ATEXT "([[:alpha:][:digit:]!#\\$%&\\\\\\'\\*\\+\\/\\=\\?\\^_\\`" \
"\\{\\}\\~\\|\\-])"
#define DOT_ATOM_TEXT "(" ATEXT "+(\\." ATEXT "+)*)"
#define DOT_ATOM "(" DOT_ATOM_TEXT ")"
#define EMAIL_RE "(" DOT_ATOM "@" DOT_ATOM ")"
#define FOR_EMAIL_RE "for <" EMAIL_RE ">;"
#define WITH_SMTP_RE "with E?SMTP"
#define MAX_BOUNDARY_LEN 80
/****************************************************************************
* IMPLEMENTATION PRIVATE CLASS PROTOTYPES / EXTERNAL CLASS REFERENCES
****************************************************************************/
/****************************************************************************
* IMPLEMENTATION PRIVATE STRUCTURES / UTILITY CLASSES
****************************************************************************/
struct bound {
char boundary[MAX_BOUNDARY_LEN];
int boundary_len;
mime_t *mime;
};
/****************************************************************************
* IMPLEMENTATION REQUIRED EXTERNAL REFERENCES (AVOID)
****************************************************************************/
/****************************************************************************
* IMPLEMENTATION PRIVATE DATA
****************************************************************************/
/*
offset means how many bytes we have already read
mail[] is where we write info about letter currently being read
the one pointed by newmail (see below) is for reading
the second one is for writing
mail_index may be 0 or 1, says which mail[] is for writing
*/
static unsigned offset = 0;
static mail_t mail[2];
static int mail_index = 0;
/*
We use stack to push, and pop all necessary data each time we
encounter multipart mime type. It has a constant size for the
sake of simplicity.
*/
static struct bound stack[5];
static int top;
/* A regular expression used to get the boundary from the header. */
static int re_compiled = 0;
static regex_t bound_re;
/**
* This shows if we are going to get EXACTLY ONE mail. It is possible
* that a mail in maildir has line beginning with "From_", just like
* in mbox. We must reject the rule then.
*/
static int only_one = 0;
/* This is how I determine if custom headers should be saved
in headers rstring. */
static int collect_custom_headers = 0;
/**
* This holds a pointer to function which should be called after \n\n,
* which ends header. It may reset this variable, not to be called
* once for each mime-header.
*/
static void (*after_header_action)(void) = NULL;
/****************************************************************************
* INTERFACE DATA
****************************************************************************/
mail_t *newmail = mail + 0;
/****************************************************************************
* IMPLEMENTATION PRIVATE FUNCTION PROTOTYPES
****************************************************************************/
static char *unfold_content (char *content);
static void clear_mail (time_t tm);
static void write_headers (void);
static mime_t *parent_mime (void);
static mime_t *top_mime (void);
static mime_t *top_or_parent_mime (void);
static char *top_boundary (void);
static int top_boundary_len (void);
static void pop (void);
static void push (char *boundary, int len);
static void replace_top_mime (mime_t *mime);
/****************************************************************************
* INTERFACE FUNCTIONS
****************************************************************************/
%}
%x HEADER FINISH
DATE [a-zA-Z]{3}\ [a-zA-Z]{3}\ \ ?[0-9][0-9]?
TIME [0-9]{2}:[0-9]{2}:[0-9]{2}
YEAR [0-9]{4}
HEADER_CONTENT [^\n\r]*(\r?\n[ \t][^\n\r]+)*
TEXT_PLAIN_CHARSET [ \t]*text\/plain.*(;charset[ \t]*=[^\r\n;]+)?.*
%option noyywrap
%%
^From\ [^ \n\t]+\ +{DATE}\ {TIME}\ {YEAR} {
struct tm tm;
char *begin;
char *tmp;
mime_t *mime;
if (only_one){
YY_BREAK;
}
newmail = mail + mail_index;
mail_index = 1 - mail_index;
/*
finish last mail
*/
if (!newmail->reply_to)
newmail->reply_to = newmail->from;
mime = top_mime ();
if (mime){
mime->off_bound = offset;
mime->off_end = offset;
}
/* this rule ends with return, so it does not reach YY_BREAK */
offset += yyleng;
/*
begin new mail
*/
begin = strchr (yytext, ' ');
begin = strchr (begin + 1, ' ');
while (*(begin + 1) == ' ')
begin++;
tmp = strptime (begin + 5, "%b %e %T %Y", &tm);
if (!tmp){
error_ (0, "wrong time string");
return BROKEN_MAIL;
}
if (*tmp){
error_ (0, "not all parsed");
return BROKEN_MAIL;
}
else {
offset++; /* to workaround \n that doesn't match */
clear_mail (mktime (&tm));
offset--;
}
BEGIN (HEADER);
return NEXT_MAIL;
}
<HEADER>^User\-Agent:{HEADER_CONTENT} {
char *tmp = yytext + 11;
while (isspace (*tmp))
tmp++;
if (a_mail->mua == NULL){
a_mail->mua = unfold_content (tmp);
}
}
<HEADER>^X\-Mailer:{HEADER_CONTENT} {
char *tmp = yytext + 10;
while (isspace (*tmp))
tmp++;
if (a_mail->mua == NULL){
a_mail->mua = unfold_content (tmp);
}
}
<HEADER>^Content\-Type:{HEADER_CONTENT} {
int index;
int ret;
int len = 0;
regmatch_t matches[4];
mime_t *mime = top_mime ();
char *str = yytext + 12;
mime_set_from_header (mime, str);
ret = regexec (& bound_re, str, 4, matches, 0);
if (ret == 0){
index = (matches[2].rm_so != -1) ? 2 : 3;
len = matches[index].rm_eo - matches[index].rm_so;
push (str + matches[index].rm_so, len);
}
else if (ret != REG_NOMATCH)
error_regex (ret, & bound_re, "boundary regexp");
}
<HEADER>^Content\-Disposition:{HEADER_CONTENT} {
mime_t *mime = top_or_parent_mime ();
if (mime)
mime_complete_file_name (mime, yytext + 20);
}
<HEADER>^Content\-Transfer\-Encoding:{HEADER_CONTENT} {
mime_t *mime = top_mime ();
if (mime){
mime->encoding = MENC_NONE;
if (strstr (yytext + 26, "uoted")
|| strstr (yytext + 26, "UOTED"))
mime->encoding = MENC_QP;
else if (strstr (yytext + 26, "ase64")
|| strstr (yytext + 26, "ASE64"))
mime->encoding = MENC_BASE64;
else if (strstr (yytext + 26, "uencode")
|| strstr (yytext + 26, "UENCODE"))
mime->encoding = MENC_UUENCODE;
else if (strstr (yytext + 26, "7bit")
|| strstr (yytext + 26, "7BIT"))
mime->encoding = MENC_7BIT;
else if (strstr (yytext + 26, "8bit")
|| strstr (yytext + 26, "8BIT"))
mime->encoding = MENC_8BIT;
}
}
<HEADER>^Date:{HEADER_CONTENT} {
char *tmp = yytext + 6;
char *date = unfold_content (tmp);
static struct tm tm;
a_mail->date_str = date;
while (*date && ! isdigit (*date))
date++;
#ifdef HAVE_LOCALE_H
setlocale (LC_ALL, "C");
#endif
/**
* here is a hack that makes it possible to parse a date with
* obsolete year notation (2 - digits), we have to try the obsolete
* one first, and check if it was possible to get the date
*/
tmp = strptime (date, "%d %b %y %H:%M:%S", &tm);
if (tmp == NULL)
tmp = strptime (date, "%d %b %Y %H:%M:%S", &tm);
#ifdef HAVE_LOCALE_H
setlocale (LC_ALL, "");
#endif
a_mail->date = mktime (&tm);
}
<HEADER>^Subject:{HEADER_CONTENT} {
char *tmp = yytext + 9;
a_mail->subject = unfold_content (tmp);
}
<HEADER>^From:{HEADER_CONTENT} {
char *tmp = yytext + 6;
char *from = unfold_content (tmp);
a_mail->from = address_from_string (from);
xfree (from);
}
<HEADER>^Sender:{HEADER_CONTENT} {
char *tmp = yytext + 8;
char *sender = unfold_content (tmp);
if (a_mail->from == NULL)
a_mail->from = address_from_string (sender);
xfree (sender);
}
<HEADER>^To:{HEADER_CONTENT} {
char *tmp = yytext + 4;
char *to = unfold_content (tmp);
a_mail->to = raddress_get_from_header (to);
xfree (to);
}
<HEADER>^CC:{HEADER_CONTENT} {
char *tmp = yytext + 4;
char *cc = unfold_content (tmp);
a_mail->cc = raddress_get_from_header (cc);
xfree (cc);
}
<HEADER>^Bcc:{HEADER_CONTENT} {
char *tmp = yytext + 4;
char *bcc = unfold_content (tmp);
a_mail->bcc = raddress_get_from_header (bcc);
xfree (bcc);
}
<HEADER>^Message\-ID:{HEADER_CONTENT} {
char c;
char *pine;
char *tmp = yytext + 12;
a_mail->msg_id = unfold_content (tmp);
if (a_mail->mua == NULL){
tmp = strstr (a_mail->msg_id, "Pine");
if (tmp){
pine = tmp + 13;
while (*pine != '.')
pine--;
c = *pine;
*pine = '\0';
a_mail->mua = xstrdup (tmp);
*pine = c;
}
}
}
<HEADER>^In\-Reply\-To:{HEADER_CONTENT} {
char *tmp = yytext + 13;
char *irt;
rstring_t *result;
if (a_mail->in_reply_to == NULL){
irt = unfold_content (tmp);
result = rstring_create_size (2);
result->allocated_first = 1;
rstring_add (result, irt);
rstring_shrink (result);
a_mail->in_reply_to = result;
}
}
<HEADER>^References:{HEADER_CONTENT} {
char *tmp = yytext + 12;
if (a_mail->in_reply_to){
rstring_delete (a_mail->in_reply_to);
}
a_mail->in_reply_to = rstring_split_re (unfold_content (tmp), "[ \t\r\n]+");
a_mail->in_reply_to->allocated_first = 1;
rstring_shrink (a_mail->in_reply_to);
}
<HEADER>^Reply\-To:([^\n\r]:)*{HEADER_CONTENT} {
char *tmp = yytext + 10;
char *reply_to = unfold_content (tmp);
a_mail->reply_to = address_from_string (reply_to);
xfree (reply_to);
}
<HEADER>^Status:{HEADER_CONTENT} {
char *i;
char *tmp = yytext + 8;
if (! only_one){
tmp = unfold_content (tmp);
for (i = tmp; *i; i++){
switch (*i){
case 'R':
a_mail->flags |= FLAG_READ;
break;
case 'O':
a_mail->flags |= FLAG_OLD;
break;
default:
break;
}
}
xfree (tmp);
}
}
<HEADER>^X\-Status:{HEADER_CONTENT} {
char *i;
char *tmp = yytext + 10;
tmp = unfold_content (tmp);
for (i = tmp; *i; i++){
switch (*i){
case 'A':
a_mail->flags |= FLAG_ANSWERED;
break;
default:
break;
}
}
xfree (tmp);
}
<HEADER>^Received:{HEADER_CONTENT} {
int len;
char *tmp;
char *recv_for;
regmatch_t matches[1];
if (a_mail->recv_for == NULL){
tmp = yytext + 9;
recv_for = unfold_content (tmp);
if (misc_regex (WITH_SMTP_RE, recv_for, matches)
&& misc_regex (FOR_EMAIL_RE, recv_for, matches)){
len = matches[0].rm_eo - matches[0].rm_so - 7 + 1;
tmp = xmalloc (len);
memcpy (tmp, recv_for + matches[0].rm_so + 5, len - 1);
tmp[len - 1] = '\0';
a_mail->recv_for = address_from_string (tmp);
xfree (tmp);
}
xfree (recv_for);
}
}
<HEADER>^X\-Elmo\-SMTP:{HEADER_CONTENT} {
a_mail->smtp = unfold_content (yytext + 12);
}
<HEADER>\r?\n\r?\n {
mime_t *mime = top_or_parent_mime ();
BEGIN (INITIAL);
EXECUTE (after_header_action);
if (mime){
mime->off_start = offset + yyleng;
}
}
<HEADER>{HEADER_CONTENT} {
char *header;
if (! collect_custom_headers){
YY_BREAK;
}
header = unfold_content (yytext);
if (a_mail->headers == NULL){
a_mail->headers = rstring_create ();
a_mail->headers->allocated_all = 1;
}
rstring_add (a_mail->headers, header);
}
<HEADER>\r?\n
<HEADER>\r+
\-\-.+\r?\n {
int len;
char *boundary;
char *seek;
mime_t *mime;
mime_t *parent;
if (top == 0){
YY_BREAK;
}
boundary = top_boundary ();
len = top_boundary_len ();
if (boundary == NULL){
YY_BREAK;
}
seek = strstr (yytext + 2, boundary);
if (seek == NULL){
YY_BREAK;
}
if (seek[len] == '-' && seek[len + 1] == '-'){
mime = top_or_parent_mime ();
mime->off_bound = offset + yyleng;
mime->off_end = offset;
pop ();
YY_BREAK;
}
mime = top_or_parent_mime ();
mime->off_bound = offset;
mime->off_end = offset - 1;
parent = parent_mime ();
if (parent->parts == NULL)
parent->parts = rmime_create_size (4);
mime = mime_create ();
mime->off_header = offset + yyleng;
rmime_add (parent->parts, mime);
replace_top_mime (mime);
BEGIN (HEADER);
}
.+
\n+
<HEADER><<EOF>> {
only_one = 0;
offset = 0;
mail_destroy (a_mail, BOX_MAILDIR);
BEGIN (INITIAL);
return EOF_AT_HEADER;
}
<FINISH>.+
<FINISH>\n+
<FINISH><<EOF>> {
BEGIN (INITIAL);
return END_OF_FILE;
/**
* this is to get rid of warnings
*/
yyunput (0, NULL);
yy_flex_realloc (0, 0);
}
<<EOF>> {
mime_t *mime;
mime = top_or_parent_mime ();
if (mime == NULL)
return END_OF_FILE;
mime->off_bound = offset;
mime->off_end = offset;
if (! a_mail->reply_to)
a_mail->reply_to = a_mail->from;
BEGIN (FINISH);
newmail = mail + mail_index;
only_one = 0;
offset = 0;
return NEXT_MAIL;
}
%%
void
mlex_init (void)
{
int ret;
ret = regcomp (& bound_re,
"multipart.*boundary=(\"([^\"]+)\"|([^\"][^; \\-]*);?)",
REG_ICASE | REG_EXTENDED);
if (ret)
error_critical (1, 0, "internal error");
re_compiled = 1;
}
void
mlex_free_resources (void)
{
if (re_compiled)
regfree (& bound_re);
re_compiled = 0;
}
int
mlex_scan_file (time_t tm)
{
int ret;
YY_BUFFER_STATE buffer = yy_create_buffer (yyin, YY_BUF_SIZE);
collect_custom_headers = 0;
only_one = 1;
after_header_action = NULL;
clear_mail (tm);
BEGIN (HEADER);
yy_switch_to_buffer (buffer);
ret = mlex_scan ();
yy_delete_buffer (buffer);
BEGIN (INITIAL);
return ret;
}
int
mlex_scan_buffer (char *buf)
{
int ret;
YY_BUFFER_STATE state;
YY_BUFFER_STATE old_state = YY_CURRENT_BUFFER;
collect_custom_headers = 0;
only_one = 1;
after_header_action = NULL;
clear_mail (0);
BEGIN (HEADER);
state = yy_scan_string (buf);
ret = mlex_scan ();
yy_delete_buffer (state);
yy_switch_to_buffer (old_state);
if (ret == EOF_AT_HEADER || ret == NEXT_MAIL)
return NEXT_MAIL;
return BROKEN_MAIL;
}
int
mlex_outmail_scan (void)
{
int ret;
time_t tm = time (NULL);
YY_BUFFER_STATE buffer = yy_create_buffer (yyin, YY_BUF_SIZE);
only_one = 1;
collect_custom_headers = 1;
after_header_action = write_headers;
clear_mail (tm);
BEGIN (HEADER);
yy_switch_to_buffer (buffer);
ret = mlex_scan ();
yy_delete_buffer (buffer);
BEGIN (INITIAL);
return ret;
}
int
mlex_mbox_scan_start (void)
{
YY_BUFFER_STATE buffer = yy_create_buffer (yyin, YY_BUF_SIZE);
collect_custom_headers = 0;
only_one = 0;
after_header_action = NULL;
memset (mail, '\0', sizeof (mail));
BEGIN (INITIAL);
yy_switch_to_buffer (buffer);
return mlex_scan ();
}
/****************************************************************************
* STACK OPERATIONS
****************************************************************************/
static void
pop (void)
{
stack[top].mime = NULL;
if (top == 0){
return;
}
top--;
}
static void
push (char *boundary, int len)
{
if (len >= MAX_BOUNDARY_LEN)
len = MAX_BOUNDARY_LEN - 1;
memcpy (stack[top].boundary, boundary, len);
stack[top].boundary[len] = '\0';
stack[top].boundary_len = len;
top++;
stack[top].mime = NULL;
}
static void
replace_top_mime (mime_t *mime)
{
stack[top].mime = mime;
}
static mime_t *
parent_mime (void)
{
if (top <= 0)
return NULL;
return stack[top - 1].mime;
}
static mime_t *
top_mime (void)
{
if (top < 0)
return NULL;
return stack[top].mime;
}
static mime_t *
top_or_parent_mime (void)
{
mime_t *result = top_mime ();
if (result)
return result;
if (top <= 0)
return NULL;
return stack[top - 1].mime;
}
static char *
top_boundary (void)
{
if (top <= 0)
return NULL;
return stack[top - 1].boundary;
}
static int
top_boundary_len (void)
{
if (top <= 0)
return -1;
return stack[top - 1].boundary_len;
}
/****************************************************************************
* IMPLEMENTATION PRIVATE FUNCTIONS
****************************************************************************/
/*
This function unfolds header content and returns a string containing just
one line of plain text. It allocates memory for string, so it should be
freed, after use.
*/
static char *
unfold_content (char *content)
{
char *result = xmalloc (strlen (content) + 1);
char *result_ptr = result;
char *ptr;
while (*content && isspace (*content))
content++;
for (ptr = content; *ptr; ptr++){
switch (*ptr){
case '\r':
break;
default:
*result_ptr = *ptr;
result_ptr++;
break;
}
}
*result_ptr = '\0';
result = mime_decode_header (result, result_ptr - result, 1);
return result;
}
static void
clear_mail (time_t tm)
{
mail_clear (a_mail);
a_mail->mime = mime_info_create ();
a_mail->mime->mime->off_header = offset;
a_mail->place.offset_header = offset;
a_mail->date = tm;
top = 0;
stack[0].mime = a_mail->mime->mime;
}
static void
write_headers (void)
{
char *msg_id = compose_msg_id ();
rstring_t *in_reply_to = compose_in_reply_to ();
char *date;
if (a_mail->msg_id)
xfree (a_mail->msg_id);
if (a_mail->in_reply_to)
rstring_delete (a_mail->in_reply_to);
if (a_mail->date_str)
date = a_mail->date_str;
else
date = compose_date ();
a_mail->msg_id = msg_id;
a_mail->date_str = date;
a_mail->in_reply_to = in_reply_to;
after_header_action = NULL;
}
/****************************************************************************
* INTERFACE CLASS BODIES
****************************************************************************/
/****************************************************************************
*
* END MODULE mlex.l
*
****************************************************************************/
syntax highlighted by Code2HTML, v. 0.9.1