/* elmo - ELectronic Mail Operator Copyright (C) 2002, 2003, 2004 rzyjontko This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ---------------------------------------------------------------------- for operating on email addresses */ /**************************************************************************** * IMPLEMENTATION HEADERS ****************************************************************************/ #include #include #include #include #include #include "address.h" #include "ask.h" #include "xmalloc.h" #include "memblock.h" #include "memchunk.h" #include "error.h" #include "misc.h" #include "hash.h" /**************************************************************************** * IMPLEMENTATION PRIVATE DEFINITIONS / ENUMERATIONS / SIMPLE TYPEDEFS ****************************************************************************/ #define INITIAL_BLOCK_SIZE 2048 #define INITIAL_HASH_SIZE 10 /**************************************************************************** * REGULAR EXPRESSIONS ****************************************************************************/ #define WS "([[:space:]]?)" #define ATEXT "([[:alpha:][:digit:]!#\\$%&\\\\\\'\\*\\+\\/\\=\\?\\^_\\`" \ "\\{\\}\\~\\|\\-])" #define ATOM "(" WS ATEXT "+" WS ")" #define DOT_ATOM_TEXT "(" ATEXT "+(\\." ATEXT "+)*)" #define DOT_ATOM "(" DOT_ATOM_TEXT ")" #define QUOTED_STRING "\"(([^\"])|(\\\\\"))*\"" #define WORD "(" ATOM "|" QUOTED_STRING ")" #define PHRASE "(" WORD "+)" #define PHRASE_RE "^" PHRASE #define ATOM_RE "^" ATOM "$" #define EMAIL_RE "(" DOT_ATOM "@" DOT_ATOM ")" #define COMMENT_RE "[(][^()]*[)]" #define FEMALE_RE "^[^[:space:]]*[aA](([^[:alpha:]])|($))" #define LAST_RE "[[:space:]][^[:space:]]+$" /**************************************************************************** * IMPLEMENTATION PRIVATE CLASS PROTOTYPES / EXTERNAL CLASS REFERENCES ****************************************************************************/ /**************************************************************************** * IMPLEMENTATION PRIVATE STRUCTURES / UTILITY CLASSES ****************************************************************************/ /**************************************************************************** * IMPLEMENTATION REQUIRED EXTERNAL REFERENCES (AVOID) ****************************************************************************/ /**************************************************************************** * IMPLEMENTATION PRIVATE DATA ****************************************************************************/ /* This function is used to iterate through all addresses in address table. */ static void (*iterator_fun)(address_t *addr) = NULL; /* This table stores all addresses from all the boxes. Idea of this table has been changing many times. Discussion: The addresses are stored as pairs: (key, value) where key is addresse's full text (like "Ala "). Using only email address as a key is a bad idea, as people use many different names for their email addresses. Value is an address_t structure. Main purpose, why this table exists is that some addresses are very frequent (user's email address will be the most frequent probably). There is no need to duplicate this data if we can store just many pointers to each address. It also increase performance when caching mailboxes. I only store the table, and then index for each address. I also used to think, that it would be nice to have separate tables for each box. There were problems with addressbook, but I worked them around. Unfortunately headers fetched from pop3 server may be parsed while user switches to different box. Let's make it clear. There is only _one_ addr_table, that stores all the addresses used through the program execution. It is never made empty. */ static htable_t *addr_table = NULL; /* This block is used to minimize memory allocations. */ static memblock_t *data_block = NULL; /**************************************************************************** * INTERFACE DATA ****************************************************************************/ /**************************************************************************** * IMPLEMENTATION PRIVATE FUNCTION PROTOTYPES ****************************************************************************/ /**************************************************************************** * IMPLEMENTATION PRIVATE FUNCTIONS ****************************************************************************/ /** * copy given string to memory block, and remove leading and trailing * white spaces */ static char * clean_copy (memblock_t **block, const char *str) { char *last_space = NULL; char *end; char *result; while (isspace (*str)) str++; result = memblock_strdup (block, str); for (end = result; *end; end++){ if (isspace (*end) && last_space == NULL) last_space = end; else last_space = NULL; } if (last_space) *last_space = '\0'; return result; } /** * ALGORITHM: * 1. find email (according to RFC2822 syntax) in the addr->full string * 2. if it is surrounded with angle brackets (<>), then set kind to * email_in_angles * 3. if the type of the addr is still unknown try to find a comment * that can be used as a display name (this is an obsolete form * described in RFC822); when found, set kind to name_as_comment * 4. if the email was found (and nothing else), set kind to pure_email * 5. set kind to strange */ static void determine_kind (address_t *addr, memblock_t **data_block) { int len; int off; regmatch_t matches[1]; if (addr->flags.bits.kind != KIND_UNKNOWN) return; if (misc_regex (EMAIL_RE, addr->full, matches) == 0){ addr->email = NULL; addr->flags.bits.kind = KIND_NO_EMAIL; } else { len = matches[0].rm_eo - matches[0].rm_so; off = matches[0].rm_so; addr->email = memblock_malloc (data_block, len + 1); memcpy (addr->email, addr->full + off, len); addr->email[len] = '\0'; if (off > 0 && addr->full[off - 1] == '<' && addr->full[off + len] == '>') addr->flags.bits.kind = KIND_EMAIL_IN_ANGLES; else if (off > 0) addr->flags.bits.kind = KIND_STRANGE; if (off == 0 && addr->full[len] == '\0') addr->flags.bits.kind = KIND_PURE_EMAIL; } if (addr->flags.bits.kind != KIND_UNKNOWN) return; if (misc_regex (COMMENT_RE, addr->full, matches)){ addr->flags.bits.kind = KIND_NAME_AS_COMMENT; addr->first = addr->full + matches[0].rm_so; } else addr->flags.bits.kind = KIND_STRANGE; } static void strip_escapes (char *str) { int len; if (str == NULL) return; len = strlen (str); while (*str){ if (*str == '\\'){ memmove (str, str + 1, len); len--; } str++; len--; } } static void strip_name (address_t *addr) { char *seek; char *last; if (addr->name == NULL) return; while (isspace (*addr->name) || *addr->name == '(') addr->name++; last = seek = addr->name; while (*seek){ if (! isspace (*seek) && *seek != ')') last = seek; seek++; } seek = last + 1; *seek = '\0'; } static void strip_quotes (char *str) { char *src = str; char *dest = str; int off = 0; int len = 0; int slen; regmatch_t matches[1]; if (str == NULL) return; while (misc_regex (QUOTED_STRING, src, matches)){ off = matches[0].rm_so; len = matches[0].rm_eo - matches[0].rm_so; memmove (dest, src, off); memmove (dest + off, src + off + 1, len - 2); dest += off + len - 2; src = src + matches[0].rm_eo; } if (len){ slen = strlen (src); if (slen > 0) memmove (dest, src + 1, slen - 1); dest[slen] = '\0'; } } static int is_atom (const char *str) { regmatch_t matches[1]; if (str == NULL) return 1; return misc_regex (ATOM_RE, str, matches); } static void determine_name (address_t *addr, memblock_t **data_block) { int len; int off; char *re = NULL; regmatch_t matches[1]; if (addr->name != NULL) return; switch (addr->flags.bits.kind){ case KIND_UNKNOWN: case KIND_PURE_EMAIL: addr->name = NULL; return; case KIND_NO_EMAIL: addr->name = addr->full; return; case KIND_EMAIL_IN_ANGLES: case KIND_STRANGE: re = PHRASE_RE; break; case KIND_NAME_AS_COMMENT: re = COMMENT_RE; break; } if (misc_regex (re, addr->full, matches)){ off = matches[0].rm_so; len = matches[0].rm_eo - matches[0].rm_so; addr->name = memblock_malloc (data_block, len + 1); memcpy (addr->name, addr->full + off, len); addr->name[len] = '\0'; } else addr->name = NULL; strip_name (addr); strip_quotes (addr->name); strip_escapes (addr->name); if (is_atom (addr->name)) addr->flags.bits.atomic_name = YES; } static void determine_sex (address_t *addr) { char *str = NULL; regmatch_t matches[1]; if (addr->flags.bits.sex != SEX_UNKNOWN) return; if (addr->first) str = addr->first; else if (addr->name) str = addr->name; else if (addr->full) str = addr->full; else str = addr->email; if (str == NULL) return; if (misc_regex (FEMALE_RE, str, matches)){ addr->flags.bits.sex = SEX_FEMALE; } else { addr->flags.bits.sex = SEX_MALE; } } static void find_last (address_t *addr) { regmatch_t matches[1]; if (addr->name == NULL) return; if (misc_regex (LAST_RE, addr->name, matches)){ addr->last = addr->name + matches[0].rm_so + 1; } } static void make_initials (address_t *addr, memblock_t **data_block) { int len; int first = 1; char *seek; char *initials; char *iseek; if (addr->name == NULL) return; len = strlen (addr->name); if (len < 1) return; iseek = initials = xmalloc (len + 1); for (seek = addr->name; *seek; seek++){ if (isalpha (*seek) && first){ *iseek = *seek; first = 0; iseek++; } else if (isspace (*seek)){ first = 1; } } *iseek = '\0'; addr->initials = memblock_strdup (data_block, initials); xfree (initials); } static void make_full (address_t *addr, memblock_t **data_block) { int ret; int len; int n_len; int e_len; int f_len; char *f_ptr; char *seek; if (addr->name && addr->email){ ret = is_atom (addr->name); if (ret){ addr->flags.bits.atomic_name = YES; addr->full = memblock_sprintf (data_block, "%s <%s>", addr->name, addr->email); return; } addr->flags.bits.atomic_name = NO; n_len = strlen (addr->name); e_len = strlen (addr->email); /* " name " < email > \0 */ f_len = 1 + 2 * n_len + 1 + 1 + e_len + 1 + 1; f_ptr = addr->full = memblock_malloc (data_block, f_len); *f_ptr = '"'; f_ptr++; for (seek = addr->name; *seek; seek++){ if (*seek == '"'){ *f_ptr = '\\'; f_ptr++; addr->flags.bits.quotes_in_name = YES; } else if (*seek == '\\'){ *f_ptr = '\\'; f_ptr++; } *f_ptr = *seek; f_ptr++; } len = f_ptr - addr->full; len += sprintf (f_ptr, "\" <%s>", addr->email); len += 1; memblock_shrink_last (*data_block, len); } else if (addr->name){ addr->full = addr->name; addr->flags.bits.kind = KIND_NO_EMAIL; } else { addr->full = addr->email; addr->flags.bits.kind = KIND_PURE_EMAIL; } } static void complete_info (address_t *addr, memblock_t **data_block) { if (addr->full == NULL) make_full (addr, data_block); if (addr->flags.bits.kind == KIND_UNKNOWN) determine_kind (addr, data_block); if (addr->name == NULL) determine_name (addr, data_block); if (addr->flags.bits.sex == SEX_UNKNOWN) determine_sex (addr); if (addr->last == NULL) find_last (addr); if (addr->initials == NULL) make_initials (addr, data_block); } static address_t * insert (address_t *addr) { entry_t *entry; entry = htable_insert (addr_table, addr->full, NULL); if (entry->content != NULL) return entry->content; entry->content = addr; return NULL; } static void iterator (entry_t *entry) { address_t *addr = entry->content; iterator_fun (addr); } static void reset_index (entry_t *entry) { address_t *addr = entry->content; addr->index = -1; } /**************************************************************************** * INTERFACE FUNCTIONS ****************************************************************************/ void address_init (void) { addr_table = htable_create (INITIAL_HASH_SIZE); data_block = memblock_create (INITIAL_BLOCK_SIZE); } void address_free_resources (void) { if (addr_table) htable_destroy (addr_table, NULL); addr_table = NULL; if (data_block) memblock_destroy (data_block); data_block = NULL; } /**************************************************************************** * OBJECT OPERATIONS ****************************************************************************/ address_t * address_empty (void) { address_t *addr; addr = memblock_malloc (& data_block, sizeof (address_t)); addr->full = NULL; addr->email = NULL; addr->name = NULL; addr->first = NULL; addr->last = NULL; addr->initials = NULL; addr->groups = NULL; addr->flags.value = 0; return addr; } address_t * address_from_string (const char *str) { address_t *addr; address_t *inserted; memblock_set_mark (data_block); addr = address_empty (); addr->full = clean_copy (& data_block, str); inserted = insert (addr); if (inserted){ memblock_free_marked (& data_block); return inserted; } complete_info (addr, & data_block); return addr; } address_t * address_complete (address_t *addr) { address_t *inserted; complete_info (addr, & data_block); inserted = insert (addr); if (inserted){ return inserted; } return addr; } address_t * address_find (const char *str) { entry_t *entry; entry = htable_lookup (addr_table, str); return (address_t *) entry->content; } address_t * address_read (memchunk_t *chunk) { address_t *addr; char *full; full = memchunk_strget (chunk); if (full == NULL) return NULL; addr = address_from_string (full); xfree (full); return addr; } void address_dump (address_t *addr, memchunk_t *chunk) { memchunk_strdump (chunk, addr->full); } /**************************************************************************** * SERVICE FUNCTIONS ****************************************************************************/ char * address_name (address_t *addr) { if (addr == NULL) return NULL; if (addr->name) return addr->name; return addr->full; } char * address_wrote_format (address_t *addr) { if (addr == NULL) return NULL; switch (addr->flags.bits.sex){ case SEX_UNKNOWN: return NULL; case SEX_MALE: if (addr->flags.bits.official) return ask_for_default ("o_male_wrote", NULL); else return ask_for_default ("male_wrote", NULL); case SEX_FEMALE: if (addr->flags.bits.official) return ask_for_default ("o_female_wrote", NULL); else return ask_for_default ("female_wrote", NULL); default: return NULL; } } int address_cmp (const address_t *a, const address_t *b) { char *as = NULL; char *bs = NULL; if (a->last) as = a->last; else if (a->name) as = a->name; else as = a->full; if (b->last) bs = b->last; else if (b->name) bs = b->name; else bs = b->full; if (as == NULL || bs == NULL) return 0; return strcoll (as, bs); } void address_for_all (void (*fun)(address_t *)) { iterator_fun = fun; htable_iterator (addr_table, iterator); iterator_fun = NULL; } void address_reset_indexes (void) { htable_iterator (addr_table, reset_index); } /**************************************************************************** * INTERFACE CLASS BODIES ****************************************************************************/ /**************************************************************************** * * END MODULE address.c * ****************************************************************************/