/*
elmo - ELectronic Mail Operator
Copyright (C) 2002, 2003, 2004 rzyjontko
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
----------------------------------------------------------------------
for operating on email addresses
*/
/****************************************************************************
* IMPLEMENTATION HEADERS
****************************************************************************/
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <regex.h>
#include <ctype.h>
#include "address.h"
#include "ask.h"
#include "xmalloc.h"
#include "memblock.h"
#include "memchunk.h"
#include "error.h"
#include "misc.h"
#include "hash.h"
/****************************************************************************
* IMPLEMENTATION PRIVATE DEFINITIONS / ENUMERATIONS / SIMPLE TYPEDEFS
****************************************************************************/
#define INITIAL_BLOCK_SIZE 2048
#define INITIAL_HASH_SIZE 10
/****************************************************************************
* REGULAR EXPRESSIONS
****************************************************************************/
#define WS "([[:space:]]?)"
#define ATEXT "([[:alpha:][:digit:]!#\\$%&\\\\\\'\\*\\+\\/\\=\\?\\^_\\`" \
"\\{\\}\\~\\|\\-])"
#define ATOM "(" WS ATEXT "+" WS ")"
#define DOT_ATOM_TEXT "(" ATEXT "+(\\." ATEXT "+)*)"
#define DOT_ATOM "(" DOT_ATOM_TEXT ")"
#define QUOTED_STRING "\"(([^\"])|(\\\\\"))*\""
#define WORD "(" ATOM "|" QUOTED_STRING ")"
#define PHRASE "(" WORD "+)"
#define PHRASE_RE "^" PHRASE
#define ATOM_RE "^" ATOM "$"
#define EMAIL_RE "(" DOT_ATOM "@" DOT_ATOM ")"
#define COMMENT_RE "[(][^()]*[)]"
#define FEMALE_RE "^[^[:space:]]*[aA](([^[:alpha:]])|($))"
#define LAST_RE "[[:space:]][^[:space:]]+$"
/****************************************************************************
* IMPLEMENTATION PRIVATE CLASS PROTOTYPES / EXTERNAL CLASS REFERENCES
****************************************************************************/
/****************************************************************************
* IMPLEMENTATION PRIVATE STRUCTURES / UTILITY CLASSES
****************************************************************************/
/****************************************************************************
* IMPLEMENTATION REQUIRED EXTERNAL REFERENCES (AVOID)
****************************************************************************/
/****************************************************************************
* IMPLEMENTATION PRIVATE DATA
****************************************************************************/
/* This function is used to iterate through all addresses in address
table. */
static void (*iterator_fun)(address_t *addr) = NULL;
/* This table stores all addresses from all the boxes.
Idea of this table has been changing many times. Discussion:
The addresses are stored as pairs:
(key, value)
where key is addresse's full text (like "Ala <ala@kot.org>").
Using only email address as a key is a bad idea, as people use
many different names for their email addresses. Value is an
address_t structure.
Main purpose, why this table exists is that some addresses are
very frequent (user's email address will be the most frequent
probably). There is no need to duplicate this data if we can store
just many pointers to each address.
It also increase performance when caching mailboxes. I only store
the table, and then index for each address.
I also used to think, that it would be nice to have separate tables
for each box. There were problems with addressbook, but I worked
them around. Unfortunately headers fetched from pop3 server may be
parsed while user switches to different box.
Let's make it clear. There is only _one_ addr_table, that stores
all the addresses used through the program execution. It is never
made empty. */
static htable_t *addr_table = NULL;
/* This block is used to minimize memory allocations. */
static memblock_t *data_block = NULL;
/****************************************************************************
* INTERFACE DATA
****************************************************************************/
/****************************************************************************
* IMPLEMENTATION PRIVATE FUNCTION PROTOTYPES
****************************************************************************/
/****************************************************************************
* IMPLEMENTATION PRIVATE FUNCTIONS
****************************************************************************/
/**
* copy given string to memory block, and remove leading and trailing
* white spaces
*/
static char *
clean_copy (memblock_t **block, const char *str)
{
char *last_space = NULL;
char *end;
char *result;
while (isspace (*str))
str++;
result = memblock_strdup (block, str);
for (end = result; *end; end++){
if (isspace (*end) && last_space == NULL)
last_space = end;
else
last_space = NULL;
}
if (last_space)
*last_space = '\0';
return result;
}
/**
* ALGORITHM:
* 1. find email (according to RFC2822 syntax) in the addr->full string
* 2. if it is surrounded with angle brackets (<>), then set kind to
* email_in_angles
* 3. if the type of the addr is still unknown try to find a comment
* that can be used as a display name (this is an obsolete form
* described in RFC822); when found, set kind to name_as_comment
* 4. if the email was found (and nothing else), set kind to pure_email
* 5. set kind to strange
*/
static void
determine_kind (address_t *addr, memblock_t **data_block)
{
int len;
int off;
regmatch_t matches[1];
if (addr->flags.bits.kind != KIND_UNKNOWN)
return;
if (misc_regex (EMAIL_RE, addr->full, matches) == 0){
addr->email = NULL;
addr->flags.bits.kind = KIND_NO_EMAIL;
}
else {
len = matches[0].rm_eo - matches[0].rm_so;
off = matches[0].rm_so;
addr->email = memblock_malloc (data_block, len + 1);
memcpy (addr->email, addr->full + off, len);
addr->email[len] = '\0';
if (off > 0 && addr->full[off - 1] == '<'
&& addr->full[off + len] == '>')
addr->flags.bits.kind = KIND_EMAIL_IN_ANGLES;
else if (off > 0)
addr->flags.bits.kind = KIND_STRANGE;
if (off == 0 && addr->full[len] == '\0')
addr->flags.bits.kind = KIND_PURE_EMAIL;
}
if (addr->flags.bits.kind != KIND_UNKNOWN)
return;
if (misc_regex (COMMENT_RE, addr->full, matches)){
addr->flags.bits.kind = KIND_NAME_AS_COMMENT;
addr->first = addr->full + matches[0].rm_so;
}
else
addr->flags.bits.kind = KIND_STRANGE;
}
static void
strip_escapes (char *str)
{
int len;
if (str == NULL)
return;
len = strlen (str);
while (*str){
if (*str == '\\'){
memmove (str, str + 1, len);
len--;
}
str++;
len--;
}
}
static void
strip_name (address_t *addr)
{
char *seek;
char *last;
if (addr->name == NULL)
return;
while (isspace (*addr->name) || *addr->name == '(')
addr->name++;
last = seek = addr->name;
while (*seek){
if (! isspace (*seek) && *seek != ')')
last = seek;
seek++;
}
seek = last + 1;
*seek = '\0';
}
static void
strip_quotes (char *str)
{
char *src = str;
char *dest = str;
int off = 0;
int len = 0;
int slen;
regmatch_t matches[1];
if (str == NULL)
return;
while (misc_regex (QUOTED_STRING, src, matches)){
off = matches[0].rm_so;
len = matches[0].rm_eo - matches[0].rm_so;
memmove (dest, src, off);
memmove (dest + off, src + off + 1, len - 2);
dest += off + len - 2;
src = src + matches[0].rm_eo;
}
if (len){
slen = strlen (src);
if (slen > 0)
memmove (dest, src + 1, slen - 1);
dest[slen] = '\0';
}
}
static int
is_atom (const char *str)
{
regmatch_t matches[1];
if (str == NULL)
return 1;
return misc_regex (ATOM_RE, str, matches);
}
static void
determine_name (address_t *addr, memblock_t **data_block)
{
int len;
int off;
char *re = NULL;
regmatch_t matches[1];
if (addr->name != NULL)
return;
switch (addr->flags.bits.kind){
case KIND_UNKNOWN:
case KIND_PURE_EMAIL:
addr->name = NULL;
return;
case KIND_NO_EMAIL:
addr->name = addr->full;
return;
case KIND_EMAIL_IN_ANGLES:
case KIND_STRANGE:
re = PHRASE_RE;
break;
case KIND_NAME_AS_COMMENT:
re = COMMENT_RE;
break;
}
if (misc_regex (re, addr->full, matches)){
off = matches[0].rm_so;
len = matches[0].rm_eo - matches[0].rm_so;
addr->name = memblock_malloc (data_block, len + 1);
memcpy (addr->name, addr->full + off, len);
addr->name[len] = '\0';
}
else
addr->name = NULL;
strip_name (addr);
strip_quotes (addr->name);
strip_escapes (addr->name);
if (is_atom (addr->name))
addr->flags.bits.atomic_name = YES;
}
static void
determine_sex (address_t *addr)
{
char *str = NULL;
regmatch_t matches[1];
if (addr->flags.bits.sex != SEX_UNKNOWN)
return;
if (addr->first)
str = addr->first;
else if (addr->name)
str = addr->name;
else if (addr->full)
str = addr->full;
else
str = addr->email;
if (str == NULL)
return;
if (misc_regex (FEMALE_RE, str, matches)){
addr->flags.bits.sex = SEX_FEMALE;
}
else {
addr->flags.bits.sex = SEX_MALE;
}
}
static void
find_last (address_t *addr)
{
regmatch_t matches[1];
if (addr->name == NULL)
return;
if (misc_regex (LAST_RE, addr->name, matches)){
addr->last = addr->name + matches[0].rm_so + 1;
}
}
static void
make_initials (address_t *addr, memblock_t **data_block)
{
int len;
int first = 1;
char *seek;
char *initials;
char *iseek;
if (addr->name == NULL)
return;
len = strlen (addr->name);
if (len < 1)
return;
iseek = initials = xmalloc (len + 1);
for (seek = addr->name; *seek; seek++){
if (isalpha (*seek) && first){
*iseek = *seek;
first = 0;
iseek++;
}
else if (isspace (*seek)){
first = 1;
}
}
*iseek = '\0';
addr->initials = memblock_strdup (data_block, initials);
xfree (initials);
}
static void
make_full (address_t *addr, memblock_t **data_block)
{
int ret;
int len;
int n_len;
int e_len;
int f_len;
char *f_ptr;
char *seek;
if (addr->name && addr->email){
ret = is_atom (addr->name);
if (ret){
addr->flags.bits.atomic_name = YES;
addr->full = memblock_sprintf (data_block, "%s <%s>", addr->name,
addr->email);
return;
}
addr->flags.bits.atomic_name = NO;
n_len = strlen (addr->name);
e_len = strlen (addr->email);
/* " name " < email > \0 */
f_len = 1 + 2 * n_len + 1 + 1 + e_len + 1 + 1;
f_ptr = addr->full = memblock_malloc (data_block, f_len);
*f_ptr = '"';
f_ptr++;
for (seek = addr->name; *seek; seek++){
if (*seek == '"'){
*f_ptr = '\\';
f_ptr++;
addr->flags.bits.quotes_in_name = YES;
}
else if (*seek == '\\'){
*f_ptr = '\\';
f_ptr++;
}
*f_ptr = *seek;
f_ptr++;
}
len = f_ptr - addr->full;
len += sprintf (f_ptr, "\" <%s>", addr->email);
len += 1;
memblock_shrink_last (*data_block, len);
}
else if (addr->name){
addr->full = addr->name;
addr->flags.bits.kind = KIND_NO_EMAIL;
}
else {
addr->full = addr->email;
addr->flags.bits.kind = KIND_PURE_EMAIL;
}
}
static void
complete_info (address_t *addr, memblock_t **data_block)
{
if (addr->full == NULL)
make_full (addr, data_block);
if (addr->flags.bits.kind == KIND_UNKNOWN)
determine_kind (addr, data_block);
if (addr->name == NULL)
determine_name (addr, data_block);
if (addr->flags.bits.sex == SEX_UNKNOWN)
determine_sex (addr);
if (addr->last == NULL)
find_last (addr);
if (addr->initials == NULL)
make_initials (addr, data_block);
}
static address_t *
insert (address_t *addr)
{
entry_t *entry;
entry = htable_insert (addr_table, addr->full, NULL);
if (entry->content != NULL)
return entry->content;
entry->content = addr;
return NULL;
}
static void
iterator (entry_t *entry)
{
address_t *addr = entry->content;
iterator_fun (addr);
}
static void
reset_index (entry_t *entry)
{
address_t *addr = entry->content;
addr->index = -1;
}
/****************************************************************************
* INTERFACE FUNCTIONS
****************************************************************************/
void
address_init (void)
{
addr_table = htable_create (INITIAL_HASH_SIZE);
data_block = memblock_create (INITIAL_BLOCK_SIZE);
}
void
address_free_resources (void)
{
if (addr_table)
htable_destroy (addr_table, NULL);
addr_table = NULL;
if (data_block)
memblock_destroy (data_block);
data_block = NULL;
}
/****************************************************************************
* OBJECT OPERATIONS
****************************************************************************/
address_t *
address_empty (void)
{
address_t *addr;
addr = memblock_malloc (& data_block, sizeof (address_t));
addr->full = NULL;
addr->email = NULL;
addr->name = NULL;
addr->first = NULL;
addr->last = NULL;
addr->initials = NULL;
addr->groups = NULL;
addr->flags.value = 0;
return addr;
}
address_t *
address_from_string (const char *str)
{
address_t *addr;
address_t *inserted;
memblock_set_mark (data_block);
addr = address_empty ();
addr->full = clean_copy (& data_block, str);
inserted = insert (addr);
if (inserted){
memblock_free_marked (& data_block);
return inserted;
}
complete_info (addr, & data_block);
return addr;
}
address_t *
address_complete (address_t *addr)
{
address_t *inserted;
complete_info (addr, & data_block);
inserted = insert (addr);
if (inserted){
return inserted;
}
return addr;
}
address_t *
address_find (const char *str)
{
entry_t *entry;
entry = htable_lookup (addr_table, str);
return (address_t *) entry->content;
}
address_t *
address_read (memchunk_t *chunk)
{
address_t *addr;
char *full;
full = memchunk_strget (chunk);
if (full == NULL)
return NULL;
addr = address_from_string (full);
xfree (full);
return addr;
}
void
address_dump (address_t *addr, memchunk_t *chunk)
{
memchunk_strdump (chunk, addr->full);
}
/****************************************************************************
* SERVICE FUNCTIONS
****************************************************************************/
char *
address_name (address_t *addr)
{
if (addr == NULL)
return NULL;
if (addr->name)
return addr->name;
return addr->full;
}
char *
address_wrote_format (address_t *addr)
{
if (addr == NULL)
return NULL;
switch (addr->flags.bits.sex){
case SEX_UNKNOWN:
return NULL;
case SEX_MALE:
if (addr->flags.bits.official)
return ask_for_default ("o_male_wrote", NULL);
else
return ask_for_default ("male_wrote", NULL);
case SEX_FEMALE:
if (addr->flags.bits.official)
return ask_for_default ("o_female_wrote", NULL);
else
return ask_for_default ("female_wrote", NULL);
default:
return NULL;
}
}
int
address_cmp (const address_t *a, const address_t *b)
{
char *as = NULL;
char *bs = NULL;
if (a->last)
as = a->last;
else if (a->name)
as = a->name;
else
as = a->full;
if (b->last)
bs = b->last;
else if (b->name)
bs = b->name;
else
bs = b->full;
if (as == NULL || bs == NULL)
return 0;
return strcoll (as, bs);
}
void
address_for_all (void (*fun)(address_t *))
{
iterator_fun = fun;
htable_iterator (addr_table, iterator);
iterator_fun = NULL;
}
void
address_reset_indexes (void)
{
htable_iterator (addr_table, reset_index);
}
/****************************************************************************
* INTERFACE CLASS BODIES
****************************************************************************/
/****************************************************************************
*
* END MODULE address.c
*
****************************************************************************/
syntax highlighted by Code2HTML, v. 0.9.1