/* * Copyright (C) 2004 Laird Breyer * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Author: Laird Breyer */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include "dbacl.h" #include "hmine.h" #include "util.h" extern options_t u_options; extern long system_pagesize; /* #define DEBUG_TOKEN(s,d) print_token_delim(stdout, (s), (d)) */ #define DEBUG_TOKEN(s,d) /*********************************************************** * PARSING FUNCTIONS * * * * The RFC 821,822,2821,2822 standards each define grammars* * which are subtly incompatible, most likely due to typos.* * For us, this poses a problem. The easiest solution is * * to define four different parsing functions, and try them* * all out. * *********************************************************** * The skip functions operate as follows: if line is NULL, * * the function returns NULL. Otherwise, line is returned * * as a pointer to the first character after the skipped * * pattern. If the patterns couldn't be traversed * * successfully, the function returns NULL. * ***********************************************************/ void print_token_delim(FILE *out, char *sym, token_delim_t *d) { char *p; if( sym && d) { fprintf(out, "%s[", sym); for(p = d->begin; p < d->end; p++) { fprintf(out, "%c", *p); } fprintf(out, "]\n"); } } void print_state(FILE *out, hline_t *t) { fprintf(stdout, "%s%s%s%s%s%s%s%s\n", (t->state & (1<state & (1<state & (1<state & (1<state & (1<state & (1<state & (1<state & (1<hdata.textbuf_len = system_pagesize; head->hdata.textbuf = (char *)malloc(head->hdata.textbuf_len); head->hdata.textbuf_end = head->hdata.textbuf; head->hdata.curline = 0; head->hstack.max = 100; head->hstack.top = 0; head->hstack.hlines = (hline_t *)calloc(head->hstack.max, sizeof(hline_t)); } void free_head_filter(HEADER_State *head) { if( head->hdata.textbuf ) { free(head->hdata.textbuf); head->hdata.textbuf = NULL; head->hdata.textbuf_end = NULL; head->hdata.curline = 0; } if( head->hstack.hlines ) { free(head->hstack.hlines); head->hstack.top = 0; } } char *head_append_hline(HEADER_State *head, const char *line) { int l, n; char *tmp; if( head->hdata.textbuf && line ) { l = head->hdata.textbuf_end - head->hdata.textbuf; n = strlen(line); while( (l+n) >= head->hdata.textbuf_len ) { tmp = (char *)realloc(head->hdata.textbuf, 2 * head->hdata.textbuf_len); if( !tmp ) { errormsg(E_ERROR, "not enough memory for input line (%d bytes)\n", head->hdata.textbuf_len); return NULL; } head->hdata.textbuf = tmp; head->hdata.textbuf_end = tmp + l; head->hdata.textbuf_len *= 2; } if( !isblank(*line) ) { /* separate logical header lines by NUL chars */ *head->hdata.textbuf_end++ = '\0'; head->hdata.curline = head->hdata.textbuf_end - head->hdata.textbuf; } strcpy(head->hdata.textbuf_end, line); head->hdata.textbuf_end += n; return head->hdata.textbuf + head->hdata.curline; } return NULL; } /*********************************************************** * CONVERSION FUNCTIONS * ***********************************************************/ int adjust_tz(struct tm *tim, char *p) { int s, a, i; typedef struct { char *nam; int offs; } zon_t; static zon_t zones[] = { /* note: zone offsets are already added to printed date */ {"UT", 0}, {"GMT", 0}, {"EST", -5}, {"EDT", -4}, {"CST", -6}, {"CDT", -5}, {"MST", -7}, {"MDT", -6}, {"PST", -8}, {"PDT", -7}, {"A", -1}, {"B", -2}, {"C", -3}, {"D", -4}, {"E", -5}, {"F", -6}, {"G", -7}, {"H", -8}, {"I", -9}, {"K", -10}, {"L", -11}, {"M", -12}, {"N", +1}, {"O", +2}, {"P", +3}, {"Q", +4}, {"R", +5}, {"S", +6}, {"T", +7}, {"U", +8}, {"V", +9}, {"W", +10}, {"X", +11}, {"Y", +12}, {"Z", 0} }; while( *p && !isalpha((int)*p) && (*p != '+') && (*p != '-') ) { p++; } switch(*p) { case '+': case '-': if( *p == '+' ) { s = -1; } else { s = +1; } while( *p && !isdigit((int)*p) ) { p++; } a = atoi(p); tim->tm_hour += s * (int)(a / 100); tim->tm_min += s * (int)(a % 100); break; default: a = -25; for(i = 0; i < sizeof(zones)/sizeof(zon_t); i++) { if( strncasecmp(p, zones[i].nam, strlen(zones[i].nam)) == 0 ) { a = -zones[i].offs; break; } } if( a == -25 ) { return -1; } else { tim->tm_hour += a; } } return 0; } /* not sure that we can use strptime(), so we do this manually :-( */ time_t cvt_date(token_delim_t *tok) { struct tm tim; time_t retval; char *p, *q; int i; static char *month_name[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; static char *day_name[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" }; p = tok->begin; if( !p ) { return -1; } while( *p && !isdigit((int)*p) ) { p++; } tim.tm_mday = atoi(p); if( (tim.tm_mday < 1) || (tim.tm_mday > 31) ) { return -1; } while( *p && !isalpha((int)*p) ) { p++; } tim.tm_mon = -1; for(i = 0; i < sizeof(month_name)/sizeof(char*); i++) { if( strncasecmp(p, month_name[i], 3) == 0 ) { tim.tm_mon = i; break; } } if( (tim.tm_mon < 0) || (tim.tm_mon > 11) ) { return -1; } while( *p && !isdigit((int)*p) ) { p++; } tim.tm_year = atoi(p) - 1900; if( (tim.tm_year < 0) || (tim.tm_year > 132) ) { return -1; } while( *p && !isspace((int)*p) ) { p++; } while( *p && !isdigit((int)*p) ) { p++; } tim.tm_hour = atoi(p); if( (tim.tm_hour < 0) || (tim.tm_hour > 23) ) { return -1; } while( *p && (*p != ':') ) { p++; } while( *p && !isdigit((int)*p) ) { p++; } tim.tm_min = atoi(p); if( (tim.tm_min < 0) || (tim.tm_min > 59) ) { return -1; } for(q = p; *q && (*q != ':'); q++); if( *q == ':' ) { for(p = q; *p && !isdigit((int)*p); p++); tim.tm_sec = atoi(p); if( (tim.tm_sec < 0) || (tim.tm_sec > 59) ) { return -1; } } else { tim.tm_sec = 0; } while( *p && isdigit((int)*p) ) { p++; } tim.tm_isdst = 0; /* verify weekday if present - this must be done before the time zone * adjustment! */ for(q = tok->begin; *q && !isalnum((int)*q); q++); /* used later */ if( isalpha((int)*q) ) { for(i = 0; i < sizeof(day_name)/sizeof(char*); i++) { if( strncasecmp(q, day_name[i], 3) == 0 ) { if( tim.tm_wday != i ) { retval = -1; } break; } } } /* time zone parsing is messy */ if( adjust_tz(&tim, p) > -1 ) { /* mktime assumes UTC */ retval = mktime(&tim); } else { retval = -1; } return retval; } /*********************************************************** * HEADER LINE FUNCTIONS * ***********************************************************/ hline_t *mkh_ignore(HEADER_State *head, hline_t *t, char *line) { memset(t, 0, sizeof(hline_t)); t->tag = hltIGN; t->data.ign.line = line; t->toffset = head->hdata.curline; if( (u_options & (1<tag = hltRET; t->toffset = head->hdata.curline; if( parse_822_return(line, &t->data.ret.rfc822) ) { t->state |= (1<data.ret.rfc821) ) { t->state |= (1<data.ret.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.ret.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.ret.rfc2822, RFC_2822_LAX) ) { t->state |= (1<data.ret.rfc2821, RFC_2821_STRICT) ) { t->state |= (1<tag = hltRCV; t->toffset = head->hdata.curline; if( parse_822_received(line, &t->data.rcv.rfc822) ) { t->state |= (1<data.rcv.rfc821) ) { t->state |= (1<data.rcv.rfc2821, RFC_2821_STRICT) ) { t->state |= (1<data.rcv.rfc2821, RFC_2821_LAX) ) { t->state |= (1<data.rcv.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.rcv.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.rcv.rfc2822, RFC_2822_LAX) ) { t->state |= (1<state & ((1<data.rcv.numsec = cvt_date(&t->data.rcv.rfc2821.datetime_); } else if( t->state & ((1<data.rcv.numsec = cvt_date(&t->data.rcv.rfc2822.datetime_); } else if( t->state & (1<data.rcv.numsec = cvt_date(&t->data.rcv.rfc822.datetime_); } else { t->data.rcv.numsec = -1; } if( t->data.dat.numsec == (time_t)(-1) ) { t->state |= (1<data.rcv.numsec); print_state(stdout, t); } return t; } hline_t *mkh_reply_to(HEADER_State *head, hline_t *t, char *line) { memset(t, 0, sizeof(hline_t)); t->tag = hltRPT; t->toffset = head->hdata.curline; if( parse_822_reply_to(line, &t->data.rpt.rfc822) ) { t->state |= (1<data.rpt.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.rpt.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.rpt.rfc2822, RFC_2822_LAX) ) { t->state |= (1<tag = hltFRM; t->toffset = head->hdata.curline; if( parse_822_from(line, &t->data.frm.rfc822) ) { t->state |= (1<data.frm.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.frm.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.frm.rfc2822, RFC_2822_LAX) ) { t->state |= (1<tag = hltSND; t->toffset = head->hdata.curline; if( parse_822_sender(line, &t->data.snd.rfc822) ) { t->state |= (1<data.snd.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.snd.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.snd.rfc2822, RFC_2822_LAX) ) { t->state |= (1<tag = hltRRT; t->toffset = head->hdata.curline; if( parse_822_resent_reply_to(line, &t->data.rrt.rfc822) ) { t->state |= (1<data.rrt.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.rrt.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.rrt.rfc2822, RFC_2822_LAX) ) { t->state |= (1<tag = hltRFR; t->toffset = head->hdata.curline; if( parse_822_from(line, &t->data.rfr.rfc822) ) { t->state |= (1<data.rfr.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.rfr.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.rfr.rfc2822, RFC_2822_LAX) ) { t->state |= (1<tag = hltRSN; t->toffset = head->hdata.curline; if( parse_822_resent_sender(line, &t->data.rsn.rfc822) ) { t->state |= (1<data.rsn.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.rsn.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.rsn.rfc2822, RFC_2822_LAX) ) { t->state |= (1<tag = hltDAT; t->toffset = head->hdata.curline; if( parse_822_date(line, &t->data.dat.rfc822) ) { t->state |= (1<data.dat.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.dat.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.dat.rfc2822, RFC_2822_LAX) ) { t->state |= (1<state & ((1<data.dat.numsec = cvt_date(&t->data.dat.rfc2822.datetime_); } else if( t->state & (1<data.dat.numsec = cvt_date(&t->data.dat.rfc822.datetime_); } else { t->data.dat.numsec = -1; } if( t->data.dat.numsec == (time_t)(-1) ) { t->state |= (1<data.dat.numsec); print_state(stdout, t); } return t; } hline_t *mkh_resent_date(HEADER_State *head, hline_t *t, char *line) { memset(t, 0, sizeof(hline_t)); t->tag = hltRDT; t->toffset = head->hdata.curline; if( parse_822_resent_date(line, &t->data.rdt.rfc822) ) { t->state |= (1<data.rdt.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.rdt.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.rdt.rfc2822, RFC_2822_LAX) ) { t->state |= (1<tag = hltTO; t->toffset = head->hdata.curline; if( parse_822_to(line, &t->data.to.rfc822) ) { t->state |= (1<data.to.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.to.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.to.rfc2822, RFC_2822_LAX) ) { t->state |= (1<tag = hltRTO; t->toffset = head->hdata.curline; if( parse_822_resent_to(line, &t->data.rto.rfc822) ) { t->state |= (1<data.rto.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.rto.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.rto.rfc2822, RFC_2822_LAX) ) { t->state |= (1<tag = hltCC; t->toffset = head->hdata.curline; if( parse_822_cc(line, &t->data.cc.rfc822) ) { t->state |= (1<data.cc.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.cc.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.cc.rfc2822, RFC_2822_LAX) ) { t->state |= (1<tag = hltRCC; t->toffset = head->hdata.curline; if( parse_822_resent_cc(line, &t->data.rcc.rfc822) ) { t->state |= (1<data.rcc.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.rcc.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.rcc.rfc2822, RFC_2822_LAX) ) { t->state |= (1<tag = hltBCC; t->toffset = head->hdata.curline; if( parse_822_bcc(line, &t->data.bcc.rfc822) ) { t->state |= (1<data.bcc.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.bcc.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.bcc.rfc2822, RFC_2822_LAX) ) { t->state |= (1<tag = hltRBC; t->toffset = head->hdata.curline; if( parse_822_resent_bcc(line, &t->data.rbc.rfc822) ) { t->state |= (1<data.rbc.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.rbc.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.rbc.rfc2822, RFC_2822_LAX) ) { t->state |= (1<tag = hltMID; t->toffset = head->hdata.curline; if( parse_822_message_id(line, &t->data.mid.rfc822) ) { t->state |= (1<data.mid.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.mid.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.mid.rfc2822, RFC_2822_LAX) ) { t->state |= (1<tag = hltRID; t->toffset = head->hdata.curline; if( parse_822_resent_message_id(line, &t->data.rid.rfc822) ) { t->state |= (1<data.rid.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.rid.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.rid.rfc2822, RFC_2822_LAX) ) { t->state |= (1<tag = hltIRT; t->toffset = head->hdata.curline; if( parse_822_in_reply_to(line, &t->data.irt.rfc822) ) { t->state |= (1<data.irt.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.irt.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.irt.rfc2822, RFC_2822_LAX) ) { t->state |= (1<tag = hltREF; t->toffset = head->hdata.curline; if( parse_822_references(line, &t->data.ref.rfc822) ) { t->state |= (1<data.ref.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.ref.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.ref.rfc2822, RFC_2822_LAX) ) { t->state |= (1<tag = hltSBJ; t->toffset = head->hdata.curline; if( parse_822_subject(line, &t->data.sbj.rfc822) ) { t->state |= (1<data.sbj.rfc2822, RFC_2822_STRICT) ) { t->state |= (1<data.sbj.rfc2822, RFC_2822_OBSOLETE) ) { t->state |= (1<data.sbj.rfc2822, RFC_2822_LAX) ) { t->state |= (1<hstack.top >= head->hstack.max ) { tmp = (hline_t *)realloc(head->hstack.hlines, 2 * head->hstack.max); if( !tmp ) { errormsg(E_ERROR, "could not grow header stack (%d)\n", head->hstack.max * 2); return NULL; } head->hstack.hlines = tmp; head->hstack.max *= 2; } tmp = make_new_header(head, head->hstack.hlines + head->hstack.top, line); if( tmp ) { head->hstack.top++; } return tmp; } /*********************************************************** * MORE UTILITY FUNCTIONS * ***********************************************************/ char *unfold_token(char *buf, int buflen, char *tokb, char *toke, char delim) { if( buf && tokb && toke ) { buflen--; /* need space for the NUL */ for(; (tokb < toke) && isspace((int)*tokb); tokb++); /* skip leading whitespace */ while( (tokb < toke) && (buflen > 0) ) { switch(*tokb) { case '\n': case '\r': for(; (tokb < toke) && isspace((int)*tokb); tokb++); *buf++ = ' '; buflen--; break; case '\0': /* ignore */ break; default: if( *tokb != delim ) { *buf++ = *tokb; buflen--; } else { *buf = '\0'; return (++tokb); } } tokb++; } *buf = '\0'; return tokb; } return NULL; } /* * Assumes that buf contains a mailbox token or similar and uses * heuristics to check for an email address. Makes sure such an email * is bracketed <>. Returns true if email appears to exist. */ bool_t bracketize_mailbox(char *buf, int buflen) { /* a group ends in ; othewise it's a mailbox */ char *p; int n; if( !strrchr(buf, ';') ) { p = strrchr(buf, '<'); if( !p || (p >= strrchr(buf, '>')) ) { n = strlen(buf); if( (n + 2) < buflen ) { memmove(buf + 1, buf, n); buf[0] = '<'; buf[n+1] = '>'; buf[n+2] = '\0'; } } return 1; } return 0; } /*********************************************************** * TESTING * ***********************************************************/ #if defined TEST_PARSER #define CHECK_EQ(x,y) if( (x) != (y) ) { \ fprintf(stdout, "%s != %s\n", #x, #y); \ return 1; \ } #define CHECK_NEQ(x,y) if( (x) == (y) ) { \ fprintf(stdout, "%s == %s\n", #x, #y); \ return 1; \ } #if defined HAVE_UNISTD_H #include #endif extern HEADER_State head; extern char *textbuf; extern charbuf_len_t textbuf_len; /* call this as: cat sample.headers.2822.good | pcheck 2822 good */ int main(int argc, char **argv) { options_t mask = 0; hline_count_t i = 0; char *pptextbuf = NULL; int extra_lines = 0; hline_t *h = NULL; #if defined(HAVE_GETPAGESIZE) system_pagesize = getpagesize(); #endif if( system_pagesize == -1 ) { system_pagesize = BUFSIZ; } init_buffers(); init_head_filter(&head); set_iobuf_mode(stdin); /* u_options |= (1<