/* 
   elmo - ELectronic Mail Operator

   Copyright (C) 2002, 2003, 2004 rzyjontko

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  

   ----------------------------------------------------------------------

   for operating on email addresses
   
*/
/****************************************************************************
 *    IMPLEMENTATION HEADERS
 ****************************************************************************/

#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <regex.h>
#include <ctype.h>

#include "address.h"
#include "ask.h"
#include "xmalloc.h"
#include "memblock.h"
#include "memchunk.h"
#include "error.h"
#include "misc.h"
#include "hash.h"

/****************************************************************************
 *    IMPLEMENTATION PRIVATE DEFINITIONS / ENUMERATIONS / SIMPLE TYPEDEFS
 ****************************************************************************/

#define INITIAL_BLOCK_SIZE 2048
#define INITIAL_HASH_SIZE  10

/****************************************************************************
 *    REGULAR EXPRESSIONS
 ****************************************************************************/

#define WS "([[:space:]]?)"

#define ATEXT "([[:alpha:][:digit:]!#\\$%&\\\\\\'\\*\\+\\/\\=\\?\\^_\\`" \
              "\\{\\}\\~\\|\\-])"
#define ATOM "(" WS ATEXT "+" WS ")"
#define DOT_ATOM_TEXT "(" ATEXT "+(\\." ATEXT "+)*)"
#define DOT_ATOM "(" DOT_ATOM_TEXT ")"

#define QUOTED_STRING "\"(([^\"])|(\\\\\"))*\""
#define WORD  "(" ATOM "|" QUOTED_STRING ")"
#define PHRASE "(" WORD "+)"


#define PHRASE_RE "^" PHRASE
#define ATOM_RE "^" ATOM "$"
#define EMAIL_RE "(" DOT_ATOM "@" DOT_ATOM ")"
#define COMMENT_RE "[(][^()]*[)]"

#define FEMALE_RE "^[^[:space:]]*[aA](([^[:alpha:]])|($))"
#define LAST_RE "[[:space:]][^[:space:]]+$"

/****************************************************************************
 *    IMPLEMENTATION PRIVATE CLASS PROTOTYPES / EXTERNAL CLASS REFERENCES
 ****************************************************************************/
/****************************************************************************
 *    IMPLEMENTATION PRIVATE STRUCTURES / UTILITY CLASSES
 ****************************************************************************/
/****************************************************************************
 *    IMPLEMENTATION REQUIRED EXTERNAL REFERENCES (AVOID)
 ****************************************************************************/
/****************************************************************************
 *    IMPLEMENTATION PRIVATE DATA
 ****************************************************************************/

/* This function is used to iterate through all addresses in address
   table. */
static void (*iterator_fun)(address_t *addr) = NULL;

/* This table stores all addresses from all the boxes.
   Idea of this table has been changing many times.  Discussion:

   The addresses are stored as pairs:
      (key, value)
   where key is addresse's full text (like "Ala <ala@kot.org>").
   Using only email address as a key is a bad idea, as people use
   many different names for their email addresses.  Value is an
   address_t structure.

   Main purpose, why this table exists is that some addresses are
   very frequent (user's email address will be the most frequent
   probably).  There is no need to duplicate this data if we can store
   just many pointers to each address.
   It also increase performance when caching mailboxes.  I only store
   the table, and then index for each address.

   I also used to think, that it would be nice to have separate tables
   for each box.  There were problems with addressbook, but I worked
   them around.  Unfortunately headers fetched from pop3 server may be
   parsed while user switches to different box.

   Let's make it clear.  There is only _one_ addr_table, that stores
   all the addresses used through the program execution.  It is never
   made empty. */
static htable_t *addr_table = NULL;

/* This block is used to minimize memory allocations. */
static memblock_t *data_block = NULL;

/****************************************************************************
 *    INTERFACE DATA
 ****************************************************************************/
/****************************************************************************
 *    IMPLEMENTATION PRIVATE FUNCTION PROTOTYPES
 ****************************************************************************/
/****************************************************************************
 *    IMPLEMENTATION PRIVATE FUNCTIONS
 ****************************************************************************/


/**
 * copy given string to memory block, and remove leading and trailing
 * white spaces
 */
static char *
clean_copy (memblock_t **block, const char *str)
{
        char *last_space = NULL;
        char *end;
        char *result;
  
        while (isspace (*str))
                str++;

        result = memblock_strdup (block, str);
  
        for (end = result; *end; end++){
                if (isspace (*end) && last_space == NULL)
                        last_space = end;
                else
                        last_space = NULL;
        }

        if (last_space)
                *last_space = '\0';

        return result;
}


/**
 * ALGORITHM:
 *   1. find email (according to RFC2822 syntax) in the addr->full string
 *   2. if it is surrounded with angle brackets (<>), then set kind to
 *      email_in_angles
 *   3. if the type of the addr is still unknown try to find a comment
 *      that can be used as a display name (this is an obsolete form
 *      described in RFC822); when found, set kind to name_as_comment
 *   4. if the email was found (and nothing else), set kind to pure_email
 *   5. set kind to strange
 */
static void
determine_kind (address_t *addr, memblock_t **data_block)
{
        int        len;
        int        off;
        regmatch_t matches[1];

        if (addr->flags.bits.kind != KIND_UNKNOWN)
                return;
  
        if (misc_regex (EMAIL_RE, addr->full, matches) == 0){
                addr->email      = NULL;
                addr->flags.bits.kind = KIND_NO_EMAIL;
        }
        else {
                len         = matches[0].rm_eo - matches[0].rm_so;
                off         = matches[0].rm_so;
                addr->email = memblock_malloc (data_block, len + 1);
                memcpy (addr->email, addr->full + off, len);
                addr->email[len] = '\0';

                if (off > 0 && addr->full[off - 1] == '<'
                    && addr->full[off + len] == '>')
                        addr->flags.bits.kind = KIND_EMAIL_IN_ANGLES;
                else if (off > 0)
                        addr->flags.bits.kind = KIND_STRANGE;

                if (off == 0 && addr->full[len] == '\0')
                        addr->flags.bits.kind = KIND_PURE_EMAIL;
        }

        if (addr->flags.bits.kind != KIND_UNKNOWN)
                return;

        if (misc_regex (COMMENT_RE, addr->full, matches)){
                addr->flags.bits.kind = KIND_NAME_AS_COMMENT;
                addr->first           = addr->full + matches[0].rm_so;
        }
        else
                addr->flags.bits.kind = KIND_STRANGE;
}



static void
strip_escapes (char *str)
{
        int len;
  
        if (str == NULL)
                return;

        len = strlen (str);
  
        while (*str){
                if (*str == '\\'){
                        memmove (str, str + 1, len);
                        len--;
                }
                str++;
                len--;
        }
}


static void
strip_name (address_t *addr)
{
        char *seek;
        char *last;
  
        if (addr->name == NULL)
                return;

        while (isspace (*addr->name) || *addr->name == '(')
                addr->name++;

        last = seek = addr->name;
        while (*seek){
                if (! isspace (*seek) && *seek != ')')
                        last = seek;
                seek++;
        }
        seek  = last + 1;
        *seek = '\0';
}



static void
strip_quotes (char *str)
{
        char *src  = str;
        char *dest = str;
        int   off  = 0;
        int   len  = 0;
        int   slen;
        regmatch_t matches[1];

        if (str == NULL)
                return;

        while (misc_regex (QUOTED_STRING, src, matches)){
                off  = matches[0].rm_so;
                len  = matches[0].rm_eo - matches[0].rm_so;

                memmove (dest, src, off);
                memmove (dest + off, src + off + 1, len - 2);
                dest += off + len - 2;
                src   = src + matches[0].rm_eo;
        }

        if (len){
                slen = strlen (src);
                if (slen > 0)
                        memmove (dest, src + 1, slen - 1);
                dest[slen] = '\0';
        }
}



static int
is_atom (const char *str)
{
        regmatch_t matches[1];
  
        if (str == NULL)
                return 1;
  
        return misc_regex (ATOM_RE, str, matches);
}


static void
determine_name (address_t *addr, memblock_t **data_block)
{
        int         len;
        int         off;
        char       *re = NULL;
        regmatch_t  matches[1];
  
        if (addr->name != NULL)
                return;
  
        switch (addr->flags.bits.kind){

                case KIND_UNKNOWN:
                case KIND_PURE_EMAIL:
                        addr->name = NULL;
                        return;

                case KIND_NO_EMAIL:
                        addr->name = addr->full;
                        return;

                case KIND_EMAIL_IN_ANGLES:
                case KIND_STRANGE:
                        re = PHRASE_RE;
                        break;

                case KIND_NAME_AS_COMMENT:
                        re = COMMENT_RE;
                        break;
        }

        if (misc_regex (re, addr->full, matches)){
                off        = matches[0].rm_so;
                len        = matches[0].rm_eo - matches[0].rm_so;
                addr->name = memblock_malloc (data_block, len + 1);
                memcpy (addr->name, addr->full + off, len);
                addr->name[len] = '\0';
        }
        else
                addr->name = NULL;

        strip_name (addr);
        strip_quotes (addr->name);
        strip_escapes (addr->name);
        if (is_atom (addr->name))
                addr->flags.bits.atomic_name = YES;
}



static void
determine_sex (address_t *addr)
{
        char       *str = NULL;
        regmatch_t  matches[1];
  
        if (addr->flags.bits.sex != SEX_UNKNOWN)
                return;
  
        if (addr->first)
                str = addr->first;
        else if (addr->name)
                str = addr->name;
        else if (addr->full)
                str = addr->full;
        else
                str = addr->email;

        if (str == NULL)
                return;
  
        if (misc_regex (FEMALE_RE, str, matches)){
                addr->flags.bits.sex = SEX_FEMALE;
        }
        else {
                addr->flags.bits.sex = SEX_MALE;
        }
}



static void
find_last (address_t *addr)
{
        regmatch_t matches[1];

        if (addr->name == NULL)
                return;
  
        if (misc_regex (LAST_RE, addr->name, matches)){
                addr->last = addr->name + matches[0].rm_so + 1;
        }
}



static void
make_initials (address_t *addr, memblock_t **data_block)
{
        int   len;
        int   first = 1;
        char *seek;
        char *initials;
        char *iseek;

        if (addr->name == NULL)
                return;

        len = strlen (addr->name);

        if (len < 1)
                return;
  
        iseek = initials = xmalloc (len + 1);

        for (seek = addr->name; *seek; seek++){
                if (isalpha (*seek) && first){
                        *iseek = *seek;
                        first  = 0;
                        iseek++;
                }
                else if (isspace (*seek)){
                        first  = 1;
                }
        }
        *iseek = '\0';
        addr->initials = memblock_strdup (data_block, initials);
        xfree (initials);
}


static void
make_full (address_t *addr, memblock_t **data_block)
{
        int   ret;
        int   len;
        int   n_len;
        int   e_len;
        int   f_len;
        char *f_ptr;
        char *seek;
  
        if (addr->name && addr->email){
                ret = is_atom (addr->name);
                if (ret){
                        addr->flags.bits.atomic_name = YES;
                        addr->full = memblock_sprintf (data_block, "%s <%s>", addr->name,
                                                       addr->email);
                        return;
                }

                addr->flags.bits.atomic_name = NO;
    
                n_len = strlen (addr->name);
                e_len = strlen (addr->email);
                /*      "    name       "   <   email   >  \0 */
                f_len = 1 + 2 * n_len + 1 + 1 + e_len + 1 + 1;
    
                f_ptr  = addr->full = memblock_malloc (data_block, f_len);
                *f_ptr = '"';
                f_ptr++;
                for (seek = addr->name; *seek; seek++){
                        if (*seek == '"'){
                                *f_ptr = '\\';
                                f_ptr++;
                                addr->flags.bits.quotes_in_name = YES;
                        }
                        else if (*seek == '\\'){
                                *f_ptr = '\\';
                                f_ptr++;
                        }
                        *f_ptr = *seek;
                        f_ptr++;
                }

                len  = f_ptr - addr->full;
                len += sprintf (f_ptr, "\" <%s>", addr->email);
                len += 1;

                memblock_shrink_last (*data_block, len);
        }
        else if (addr->name){
                addr->full            = addr->name;
                addr->flags.bits.kind = KIND_NO_EMAIL;
        }
        else {
                addr->full            = addr->email;
                addr->flags.bits.kind = KIND_PURE_EMAIL;
        }
}



static void
complete_info (address_t *addr, memblock_t **data_block)
{
        if (addr->full == NULL)
                make_full (addr, data_block);
        if (addr->flags.bits.kind == KIND_UNKNOWN)
                determine_kind (addr, data_block);
        if (addr->name == NULL)
                determine_name (addr, data_block);
        if (addr->flags.bits.sex == SEX_UNKNOWN)
                determine_sex (addr);
        if (addr->last == NULL)
                find_last (addr);
        if (addr->initials == NULL)
                make_initials (addr, data_block);
}


static address_t *
insert (address_t *addr)
{
        entry_t *entry;

        entry = htable_insert (addr_table, addr->full, NULL);
        if (entry->content != NULL)
                return entry->content;

        entry->content = addr;
        return NULL;
}


static void
iterator (entry_t *entry)
{
        address_t *addr = entry->content;

        iterator_fun (addr);
}


static void
reset_index (entry_t *entry)
{
        address_t *addr = entry->content;

        addr->index = -1;
}


/****************************************************************************
 *    INTERFACE FUNCTIONS
 ****************************************************************************/


void
address_init (void)
{
        addr_table = htable_create (INITIAL_HASH_SIZE);
        data_block = memblock_create (INITIAL_BLOCK_SIZE);
}



void
address_free_resources (void)
{
        if (addr_table)
                htable_destroy (addr_table, NULL);
        addr_table = NULL;

        if (data_block)
                memblock_destroy (data_block);
        data_block = NULL;
}


/****************************************************************************
 *    OBJECT OPERATIONS
 ****************************************************************************/

address_t *
address_empty (void)
{
        address_t *addr;

        addr = memblock_malloc (& data_block, sizeof (address_t));

        addr->full        = NULL;
        addr->email       = NULL;
        addr->name        = NULL;
        addr->first       = NULL;
        addr->last        = NULL;
        addr->initials    = NULL;
        addr->groups      = NULL;
        addr->flags.value = 0;
  
        return addr;
}


address_t *
address_from_string (const char *str)
{
        address_t *addr;
        address_t *inserted;

        memblock_set_mark (data_block);
        addr       = address_empty ();
        addr->full = clean_copy (& data_block, str);
        inserted   = insert (addr);

        if (inserted){
                memblock_free_marked (& data_block);
                return inserted;
        }

        complete_info (addr, & data_block);
        return addr;
}


address_t *
address_complete (address_t *addr)
{
        address_t *inserted;
  
        complete_info (addr, & data_block);

        inserted = insert (addr);

        if (inserted){
                return inserted;
        }

        return addr;
}


address_t *
address_find (const char *str)
{
        entry_t *entry;
  
        entry = htable_lookup (addr_table, str);
        return (address_t *) entry->content;
}



address_t *
address_read (memchunk_t *chunk)
{
        address_t *addr;
        char      *full;

        full = memchunk_strget (chunk);

        if (full == NULL)
                return NULL;
        
        addr = address_from_string (full);

        xfree (full);
        return addr;
}



void
address_dump (address_t *addr, memchunk_t *chunk)
{
        memchunk_strdump (chunk, addr->full);
}


/****************************************************************************
 *    SERVICE FUNCTIONS
 ****************************************************************************/


char *
address_name (address_t *addr)
{
        if (addr == NULL)
                return NULL;

        if (addr->name)
                return addr->name;

        return addr->full;
}


char *
address_wrote_format (address_t *addr)
{
        if (addr == NULL)
                return NULL;
  
        switch (addr->flags.bits.sex){

                case SEX_UNKNOWN:
                        return NULL;

                case SEX_MALE:
                        if (addr->flags.bits.official)
                                return ask_for_default ("o_male_wrote", NULL);
                        else
                                return ask_for_default ("male_wrote", NULL);

                case SEX_FEMALE:
                        if (addr->flags.bits.official)
                                return ask_for_default ("o_female_wrote", NULL);
                        else
                                return ask_for_default ("female_wrote", NULL);

                default:
                        return NULL;
        }
}


int
address_cmp (const address_t *a, const address_t *b)
{
        char *as = NULL;
        char *bs = NULL;

        if (a->last)
                as = a->last;
        else if (a->name)
                as = a->name;
        else
                as = a->full;

        if (b->last)
                bs = b->last;
        else if (b->name)
                bs = b->name;
        else
                bs = b->full;

        if (as == NULL || bs == NULL)
                return 0;

        return strcoll (as, bs);
}



void
address_for_all (void (*fun)(address_t *))
{
        iterator_fun = fun;
        htable_iterator (addr_table, iterator);
        iterator_fun = NULL;
}



void
address_reset_indexes (void)
{
        htable_iterator (addr_table, reset_index);
}

/****************************************************************************
 *    INTERFACE CLASS BODIES
 ****************************************************************************/
/****************************************************************************
 *
 *    END MODULE address.c
 *
 ****************************************************************************/


syntax highlighted by Code2HTML, v. 0.9.1