/* 
   elmo - ELectronic Mail Operator

   Copyright (C) 2003 rzyjontko, University of Wroclaw

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  

   ----------------------------------------------------------------------

   This file is a part of implementation of Bayesian Mail Filter, which is
   coverd by Paul Graham's article at
   http://www.paulgraham.com/spam.html
   and
   http://www.paulgraham.com/better.html

   This module was written for course of Artifficial Intelligence at
   University of Wroclaw, Institute of Computer Science.

*/
/****************************************************************************
 *    IMPLEMENTATION HEADERS
 ****************************************************************************/

#include <sys/types.h>
#include <regex.h>
#include <stdlib.h>

#include "token.h"
#include "error.h"

/****************************************************************************
 *    IMPLEMENTATION PRIVATE DEFINITIONS / ENUMERATIONS / SIMPLE TYPEDEFS
 ****************************************************************************/

#define STREAM_COUNT 2

/****************************************************************************
 *    IMPLEMENTATION PRIVATE CLASS PROTOTYPES / EXTERNAL CLASS REFERENCES
 ****************************************************************************/
/****************************************************************************
 *    IMPLEMENTATION PRIVATE STRUCTURES / UTILITY CLASSES
 ****************************************************************************/

struct tstream {
        /* internal values */
        int         unused;
        regex_t     compiled;
        regmatch_t  matches[1];
        char       *position;
        char       *last_token;

        /* user specified */
        char       *buffer;
        int         max_len;
        int         may_modify;
};

/****************************************************************************
 *    IMPLEMENTATION REQUIRED EXTERNAL REFERENCES (AVOID)
 ****************************************************************************/
/****************************************************************************
 *    IMPLEMENTATION PRIVATE DATA
 ****************************************************************************/

static struct tstream streams[STREAM_COUNT];

/****************************************************************************
 *    INTERFACE DATA
 ****************************************************************************/
/****************************************************************************
 *    IMPLEMENTATION PRIVATE FUNCTION PROTOTYPES
 ****************************************************************************/
/****************************************************************************
 *    IMPLEMENTATION PRIVATE FUNCTIONS
 ****************************************************************************/
/****************************************************************************
 *    INTERFACE FUNCTIONS
 ****************************************************************************/


void
token_init (void)
{
        int i;

        for (i = 0; i < STREAM_COUNT; i++)
                streams[i].unused = 1;
}



int
token_open (char *buffer, int max_len, const char *regex, int may_modify)
{
        int ret;
        int i;
  
        if (buffer == NULL)
                return -1;
  
        for (i = 0; i < STREAM_COUNT; i++){
                if (streams[i].unused)
                        break;
        }
        if (i == STREAM_COUNT)
                return -1;
  
        streams[i].unused     = 0;
        streams[i].buffer     = buffer;
        streams[i].max_len    = max_len;
        streams[i].may_modify = may_modify;
        streams[i].position   = buffer;
        streams[i].last_token = NULL;
  
        ret = regcomp (&streams[i].compiled, regex,
                       REG_ICASE | REG_EXTENDED | REG_NEWLINE);
        if (ret){
                error_regex (ret, &streams[i].compiled, regex);
                regfree (&streams[i].compiled);
                return -1;
        }
        return i;
}



void
token_close (int td)
{
        if (streams[td].unused)
                return;

        streams[td].unused = 1;
        regfree (&streams[td].compiled);
}



char *
token_read_next (int td)
{
        int   ret;
        int   len;
        char *string;
        struct tstream *stream = streams + td;

        if (stream->unused)
                return NULL;
  
        string = stream->position;

        if (*string == '\0')
                return NULL;
        
        do {
                stream->position = string;
                ret = regexec (&stream->compiled, stream->position, 1,
                               stream->matches, 0);
                if (ret){
                        if (ret != REG_NOMATCH){
                                error_regex (ret, &stream->compiled, NULL);
                        }
                        return NULL;
                }
                len     = stream->matches[0].rm_eo - stream->matches[0].rm_so;
                string += stream->matches[0].rm_eo + 1;
        }
        while (stream->max_len > 0 && len > stream->max_len);
  
        stream->last_token = stream->position + stream->matches[0].rm_so;
        if (stream->may_modify){
                *(stream->position + stream->matches[0].rm_eo) = '\0';
        }

        stream->position += stream->matches[0].rm_eo;
        if (*stream->position)
                stream->position++;

        return stream->last_token;
}


/****************************************************************************
 *    INTERFACE CLASS BODIES
 ****************************************************************************/
/****************************************************************************
 *
 *    END MODULE token.c
 *
 ****************************************************************************/


syntax highlighted by Code2HTML, v. 0.9.1