/* * Copyright (C) 2004 Laird Breyer * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Author: Laird Breyer */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include "dbacl.h" #include "hmine.h" #include "util.h" /*********************************************************** * PARSING FUNCTIONS RFC 2822/2821 * * The skip functions operate as follows: if line is NULL, * * the function returns NULL. Otherwise, line is returned * * as a pointer to the first character after the skipped * * pattern. If the patterns couldn't be traversed * * successfully, the function returns NULL. * ***********************************************************/ /*********************************************************** * GENERAL LEXICAL TOKENS * ***********************************************************/ static __inline__ char *skip_recursive(char *line, char opening, char closing, char quote) { int c = 1; if( !line || !(*line == opening) ) { return NULL; } line++; while( *line ) { /* when closing == opening, we never increment c */ if( *line == closing ) { c--; } else if( *line == opening ) { c++; } else if( *line == quote ) { line++; } line++; if( c <= 0 ) { break; } } return (c == 0) ? line : NULL; } static __inline__ char *skip_single_char(char *line, char what) { if( !line || (*line != what) ) { return NULL; } return ++line; } static __inline__ char *skip_twodigit(char *line) { if( !line || !isdigit((int)line[0]) || !isdigit((int)line[1]) ) { return NULL; } line += 2; return line; } static __inline__ char *skip_onetwodigit(char *line) { if( !line || !isdigit((int)line[0]) ) { return NULL; } line++; if( isdigit((int)line[0]) ) { line++; } return line; } static __inline__ char *skip_fourdigit(char *line) { if( !line || !isdigit((int)line[0]) || !isdigit((int)line[1]) || !isdigit((int)line[2]) || !isdigit((int)line[3]) ) { return NULL; } line += 4; return line; } static __inline__ char *skip_single_string(char *line, char *what) { if( !line || (strncasecmp(line, what, strlen(what)) != 0) ) { return NULL; } return line + strlen(what); } static __inline__ char *skip_string_list(char *line, char *list[], int n) { int i; if( !line ) { return NULL; } for(i = 0; i < n; i++) { if( strncasecmp(line, list[i], strlen(list[i])) == 0 ) { return line + strlen(list[i]); } } return NULL; } /*********************************************************** * RFC 2822 LEXICAL TOKENS * ***********************************************************/ /* fws parsing is slightly wrong: we don't check for CRLF as a unit, * instead we accept isspace(). It's possible the OS has mangled the actual CRLF. */ static __inline__ char *skip_2822_fws(char *line) { if( !line ) { return NULL; } while( !isblank((int)*line) ) { if( !isspace((int)*line) ) { return NULL; } line++; } line++; while( isblank((int)*line) ) { line++; } return line; } /* the grammar sometimes allows constructs such as token1 token2, where * token1 = t1 [CFWS] and token2 = CFWS t2. By the time token1 was parsed, * token2 cannot be obtained anymore! We could change the grammar, or we could * inspect token pairs critically to prevent this special case. The easiest solution * is to allow FWS "undo" and use it just before CFWS. Use this carefully only if you * know you can't fall off the beginning of the string. */ static __inline__ char *unskip_2822_fws(char *line) { while( line && isspace((int)line[-1]) ) { line--; } return line; } static __inline__ char *skip_2822_cfws(char *line, int min) { char *tmp; if( !line ) { return NULL; } while(min-- > 0) { if( isspace((int)*line ) ) { line = skip_2822_fws(line); } else if( *line == '(' ) { line = skip_recursive(line, '(', ')', '\\'); } else { return NULL; } } /* below space is optional */ while( line ) { if( isspace((int)*line ) ) { tmp = skip_2822_fws(line); if( tmp ) { line = tmp; } else { break; } } else if( *line == '(' ) { line = skip_recursive(line, '(', ')', '\\'); } else { break; } } return line; } static __inline__ char *skip_2822_obs_list(char *line, token_delim_t *tok, char *(*skip)(char *,token_delim_t *, options_t opt), options_t opt) { char *tmp; BOT; tmp = (*skip)(line, NULL, opt); if( tmp ) { line = tmp; } line = skip_2822_cfws(line, 0); line = skip_single_char(line, ','); line = skip_2822_cfws(line, 0); do { tmp = (*skip)(line, NULL, opt); if( tmp ) { line = tmp; } tmp = skip_2822_cfws(line, 0); tmp = skip_single_char(tmp, ','); tmp = skip_2822_cfws(tmp, 0); if( tmp ) { line = tmp; } } while( tmp ); EOT; DT("2822_obs_list"); return line; } char *skip_2822_seq(char *line, token_delim_t *tok, char *(*skip)(char *,token_delim_t *, options_t), char delim, options_t opt) { char *tmp; BOT; line = (*skip)(line, NULL, opt); tmp = skip_single_char(line, delim); while( tmp ) { line = (*skip)(tmp, NULL, opt); tmp = skip_single_char(line, delim); } EOT; DT("2822_seq"); return line; } static char rfc2822_atom[256] = { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '!', 0, '#', '$', '%', '&','\'', 0, 0, '*', '+', 0, '-', 0, '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, '=', 0, '?', 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 0, 0, 0, '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', 0, }; static __inline__ char *skip_2822_1atext(char *line, token_delim_t *tok, options_t opt) { if( !line || (rfc2822_atom[(unsigned int)*line] != *line) ) { return NULL; } BOT; while( rfc2822_atom[(unsigned int)*line] == *line ) { line++; } EOT; DT("2822_atext"); return line; } static __inline__ char *skip_2822_quoted_string(char *line, token_delim_t *tok, options_t opt) { line = skip_2822_cfws(line, 0); BOT; line = skip_recursive(line, '"', '"', '\\'); EOT; line = skip_2822_cfws(line, 0); DT("2822_quoted_string"); return line; } static __inline__ char *skip_2822_atom(char *line, token_delim_t *tok, options_t opt) { line = skip_2822_cfws(line, 0); BOT; line = skip_2822_1atext(line, NULL, opt); EOT; line = skip_2822_cfws(line, 0); DT("2822_atom"); return line; } static __inline__ char *skip_2822_word(char *line, token_delim_t *tok, options_t opt) { char *tmp = line; if( !line ) { return NULL; } BOT; line = skip_2822_quoted_string(line, NULL, opt); if( !line ) { line = skip_2822_atom(tmp, NULL, opt); } EOT; DT("2822_word"); return line; } static __inline__ char *skip_2822_dot_atom_text(char *line, token_delim_t *tok, options_t opt) { line = skip_2822_seq(line, tok, skip_2822_1atext, '.', opt); DT("2822_dot_atom_text"); return line; } static __inline__ char *skip_2822_dot_atom(char *line, token_delim_t *tok, options_t opt) { line = skip_2822_cfws(line, 0); BOT; line = skip_2822_dot_atom_text(line, tok, opt); EOT; line = skip_2822_cfws(line, 0); DT("2822_dot_atom"); return line; } static __inline__ char *skip_2822_domain_literal(char *line, token_delim_t *tok, options_t opt) { line = skip_2822_cfws(line, 0); if( !line || (*line != '[') ) { return NULL; } BOT; line = skip_recursive(line, '[', ']', '\\'); EOT; line = skip_2822_cfws(line, 0); DT("2822_domain_literal"); return line; } static __inline__ char *skip_2822_obs_domain(char *line, token_delim_t *tok, options_t opt) { line = skip_2822_seq(line, tok, skip_2822_atom, '.', opt); DT("2822_obs_domain"); return line; } static __inline__ char *skip_2822_domain(char *line, token_delim_t *tok, options_t opt) { if( !line ) { return NULL; } BOT; if( *line == '[' ) { line = skip_2822_domain_literal(line, NULL, opt); } else { /* we try obs form before normal form, as both could work and normal form would be smaller */ if( opt & (1<'); EOT; line = skip_2822_cfws(line, 0); DT("2822_obs_angle_addr"); return line; } static __inline__ char *skip_2822_obs_path(char *line, token_delim_t *tok, options_t opt) { opt |= (1<'); if( (opt & (1<'); if( (opt & (1<'); EOT; line = skip_2822_cfws(line, 0); DT("2822_msg_id"); return line; } static __inline__ char *skip_2822_refs(char *line, token_delim_t *tok, options_t opt) { char *tmp; BOT; line = skip_2822_msg_id(line, NULL, opt); tmp = skip_2822_msg_id(line, NULL, opt); while( tmp ) { line = tmp; tmp = skip_2822_msg_id(line, NULL, opt); } EOT; DT("2822_refs"); return line; } static __inline__ char *skip_2822_text(char *line, token_delim_t *tok) { if( !line ) { return NULL; } BOT; while( *line ) { line++; } EOT; DT("2822_text"); return line; } static __inline__ char *skip_2822_field_name(char *line, char *name, options_t opt) { line = skip_single_string(line, name); if( (opt & (1< 2) ) { return NULL; } while( isspace((int)*line) ) { line++; } return (*line) ? NULL : line; } /*********************************************************** * FULL RFC 822 HEADER LINES * ***********************************************************/ char *parse_2822_return(char *line, parse_2822_pth_t *p, options_t opt) { memset(p, 0, sizeof(parse_2822_pth_t)); line = skip_2822_field_name(line, "Return-Path", opt); line = skip_2822_path(line, &p->path_, opt); return skip_2822_end_of_line(line); } char *parse_2822_received(char *line, parse_2822_rcv_t *p, options_t opt) { char *tmp; memset(p, 0, sizeof(parse_2822_rcv_t)); line = skip_2822_field_name(line, "Received", opt); tmp = line; line = skip_2822_name_val_list(line, &p->naval_, opt); line = skip_single_char(line, ';'); line = skip_2822_date_time(line, &p->datetime_, opt); if( (opt & (1<naval_, opt); } return skip_2822_end_of_line(line); } char *parse_2822_from(char *line, parse_2822_mls_t *p, options_t opt) { memset(p, 0, sizeof(parse_2822_mls_t)); line = skip_2822_field_name(line, "From", opt); line = skip_2822_mailbox_list(line, &p->mailboxl_, opt); return skip_2822_end_of_line(line); } char *parse_2822_resent_from(char *line, parse_2822_mls_t *p, options_t opt) { line = skip_single_string(line, "Resent-"); return parse_2822_from(line, p, opt); } char *parse_2822_sender(char *line, parse_2822_mbx_t *p, options_t opt) { memset(p, 0, sizeof(parse_2822_mbx_t)); line = skip_2822_field_name(line, "Sender", opt); line = skip_2822_mailbox(line, &p->mailbox_, opt); return skip_2822_end_of_line(line); } char *parse_2822_resent_sender(char *line, parse_2822_mbx_t *p, options_t opt) { line = skip_single_string(line, "Resent-"); return parse_2822_sender(line, p, opt); } char *parse_2822_reply_to(char *line, parse_2822_als_t *p, options_t opt) { memset(p, 0, sizeof(parse_2822_als_t)); line = skip_2822_field_name(line, "Reply-To", opt); line = skip_2822_address_list(line, &p->addressl_, opt); return skip_2822_end_of_line(line); } char *parse_2822_resent_reply_to(char *line, parse_2822_als_t *p, options_t opt) { line = skip_single_string(line, "Resent-"); return parse_2822_reply_to(line, p, opt); } char *parse_2822_to(char *line, parse_2822_als_t *p, options_t opt) { memset(p, 0, sizeof(parse_2822_als_t)); line = skip_2822_field_name(line, "To", opt); line = skip_2822_address_list(line, &p->addressl_, opt); return skip_2822_end_of_line(line); } char *parse_2822_resent_to(char *line, parse_2822_als_t *p, options_t opt) { line = skip_single_string(line, "Resent-"); return parse_2822_to(line, p, opt); } char *parse_2822_cc(char *line, parse_2822_als_t *p, options_t opt) { memset(p, 0, sizeof(parse_2822_als_t)); line = skip_2822_field_name(line, "Cc", opt); line = skip_2822_address_list(line, &p->addressl_, opt); return skip_2822_end_of_line(line); } char *parse_2822_resent_cc(char *line, parse_2822_als_t *p, options_t opt) { line = skip_single_string(line, "Resent-"); return parse_2822_cc(line, p, opt); } char *parse_2822_bcc(char *line, parse_2822_als_t *p, options_t opt) { char *tmp; memset(p, 0, sizeof(parse_2822_als_t)); line = skip_2822_field_name(line, "Bcc", opt); tmp = skip_2822_address_list(line, &p->addressl_, opt); if( tmp ) { line = tmp; } else { line = skip_2822_cfws(line, 0); } return skip_2822_end_of_line(line); } char *parse_2822_resent_bcc(char *line, parse_2822_als_t *p, options_t opt) { line = skip_single_string(line, "Resent-"); return parse_2822_bcc(line, p, opt); } char *parse_2822_message_id(char *line, parse_2822_mid_t *p, options_t opt) { memset(p, 0, sizeof(parse_2822_mid_t)); line = skip_2822_field_name(line, "Message-ID", opt); line = skip_2822_msg_id(line, &p->msg_id_, opt); return skip_2822_end_of_line(line); } char *parse_2822_resent_message_id(char *line, parse_2822_mid_t *p, options_t opt) { line = skip_single_string(line, "Resent-"); return parse_2822_message_id(line, p, opt); } char *parse_2822_date(char *line, parse_2822_dat_t *p, options_t opt) { memset(p, 0, sizeof(parse_2822_dat_t)); line = skip_2822_field_name(line, "Date", opt); line = skip_2822_date_time(line, &p->datetime_, opt); return skip_2822_end_of_line(line); } char *parse_2822_resent_date(char *line, parse_2822_dat_t *p, options_t opt) { line = skip_single_string(line, "Resent-"); return parse_2822_date(line, p, opt); } char *parse_2822_in_reply_to(char *line, parse_2822_ref_t *p, options_t opt) { memset(p, 0, sizeof(parse_2822_ref_t)); line = skip_2822_field_name(line, "In-Reply-To", opt); line = skip_2822_refs(line, &p->refs_, opt); return skip_2822_end_of_line(line); } char *parse_2822_references(char *line, parse_2822_ref_t *p, options_t opt) { memset(p, 0, sizeof(parse_2822_ref_t)); line = skip_2822_field_name(line, "References", opt); line = skip_2822_refs(line, &p->refs_, opt); return skip_2822_end_of_line(line); } char *parse_2822_subject(char *line, parse_2822_txt_t *p, options_t opt) { memset(p, 0, sizeof(parse_2822_txt_t)); line = skip_2822_field_name(line, "Subject", opt); line = skip_2822_text(line, &p->text_); return skip_2822_end_of_line(line); } /*********************************************************** * RFC 2821 LEXICAL TOKENS * ***********************************************************/ static __inline__ char *skip_2821_sub_domain(char *line, token_delim_t *tok, options_t opt) { if( !line || !isalnum((int)*line) ) { return NULL; } BOT; line++; while( isalnum((int)*line) || (*line == '-') ) { line++; } EOT; if( line[-1] == '-' ) { return NULL; } DT("2821_sub_domain"); return line; } /* this is not correct, but we won't quibble */ static __inline__ char *skip_2821_address_literal(char *line) { return skip_recursive(line, '[', ']', '\\'); } static __inline__ char *skip_2821_domain(char *line, token_delim_t *tok, options_t opt) { char *tmp; if( !line ) { return NULL; } BOT; if( *line == '[' ) { line = skip_2821_address_literal(line); } else { line = skip_2821_sub_domain(line, NULL, opt); tmp = skip_single_char(line, '.'); tmp = skip_2822_seq(tmp, NULL, skip_2821_sub_domain, '.', opt); if( tmp ) { line = tmp; } else if( !(opt & (1<'); EOT; DT("2821_path"); return line; } static __inline__ char *skip_2821_reverse_path(char *line, token_delim_t *tok, options_t opt) { return skip_2821_path(line, tok, opt); } static __inline__ char *skip_2821_tcp_info(char *line, options_t opt) { if( !line ) { return NULL; } if( *line == '[' ) { return skip_2821_address_literal(line); } line = skip_2821_domain(line, NULL, opt); line = skip_2822_fws(line); return skip_2821_address_literal(line); } static __inline__ char *skip_2821_extended_domain(char *line, token_delim_t *tok, options_t opt) { char *tmp; if( !line ) { return NULL; } BOT; if( *line == '[' ) { line = skip_2821_address_literal(line); line = skip_2822_fws(line); line = skip_single_char(line, '('); line = skip_2821_tcp_info(line, opt); line = skip_single_char(line, ')'); } else { line = skip_2821_domain(line, NULL, opt); tmp = skip_2822_fws(line); tmp = skip_single_char(tmp, '('); tmp = skip_2821_tcp_info(tmp, opt); tmp = skip_single_char(tmp, ')'); if( tmp ) { line = tmp; } else if( opt & (1<path_, opt); return skip_2822_end_of_line(line); } char *parse_2821_time_stamp_line(char *line, parse_2821_rcv_t *p, options_t opt) { char *tmp; memset(p, 0, sizeof(parse_2821_rcv_t)); line = skip_single_string(line, "Received:"); line = skip_2822_fws(line); line = skip_2821_from_domain(line, &p->from_, opt); line = skip_2821_by_domain(line, &p->by_, opt); tmp = skip_single_string(line, "via"); if( tmp ) { line = skip_2822_fws(tmp); line = skip_2821_link(line, &p->via_, opt); tmp = unskip_2822_fws(line); /* link eats trailing space */ tmp = skip_2822_cfws(line, 1); if( tmp ) { line = tmp; } else if( !(opt & (1<with_, opt); tmp = unskip_2822_fws(line); /* protocol eats trailing space */ tmp = skip_2822_cfws(line, 1); if( tmp ) { line = tmp; } else if( !(opt & (1<smid_, opt); tmp = unskip_2822_fws(line); /* id eats trailing space */ tmp = skip_2822_cfws(line, 1); if( tmp ) { line = tmp; } else if( !(opt & (1<for_, opt); tmp = unskip_2822_fws(line); /* previous eats trailing space */ tmp = skip_2822_cfws(line, 1); if( tmp ) { line = tmp; } else if( !(opt & (1<datetime_, opt); return skip_2822_end_of_line(line); }