/*
elmo - ELectronic Mail Operator
Copyright (C) 2003 rzyjontko, University of Wroclaw
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
----------------------------------------------------------------------
This file is a part of implementation of Bayesian Mail Filter, which is
coverd by Paul Graham's article at
http://www.paulgraham.com/spam.html
and
http://www.paulgraham.com/better.html
This module was written for course of Artifficial Intelligence at
University of Wroclaw, Institute of Computer Science.
*/
/****************************************************************************
* IMPLEMENTATION HEADERS
****************************************************************************/
#include <sys/types.h>
#include <regex.h>
#include <stdlib.h>
#include "token.h"
#include "error.h"
/****************************************************************************
* IMPLEMENTATION PRIVATE DEFINITIONS / ENUMERATIONS / SIMPLE TYPEDEFS
****************************************************************************/
#define STREAM_COUNT 2
/****************************************************************************
* IMPLEMENTATION PRIVATE CLASS PROTOTYPES / EXTERNAL CLASS REFERENCES
****************************************************************************/
/****************************************************************************
* IMPLEMENTATION PRIVATE STRUCTURES / UTILITY CLASSES
****************************************************************************/
struct tstream {
/* internal values */
int unused;
regex_t compiled;
regmatch_t matches[1];
char *position;
char *last_token;
/* user specified */
char *buffer;
int max_len;
int may_modify;
};
/****************************************************************************
* IMPLEMENTATION REQUIRED EXTERNAL REFERENCES (AVOID)
****************************************************************************/
/****************************************************************************
* IMPLEMENTATION PRIVATE DATA
****************************************************************************/
static struct tstream streams[STREAM_COUNT];
/****************************************************************************
* INTERFACE DATA
****************************************************************************/
/****************************************************************************
* IMPLEMENTATION PRIVATE FUNCTION PROTOTYPES
****************************************************************************/
/****************************************************************************
* IMPLEMENTATION PRIVATE FUNCTIONS
****************************************************************************/
/****************************************************************************
* INTERFACE FUNCTIONS
****************************************************************************/
void
token_init (void)
{
int i;
for (i = 0; i < STREAM_COUNT; i++)
streams[i].unused = 1;
}
int
token_open (char *buffer, int max_len, const char *regex, int may_modify)
{
int ret;
int i;
if (buffer == NULL)
return -1;
for (i = 0; i < STREAM_COUNT; i++){
if (streams[i].unused)
break;
}
if (i == STREAM_COUNT)
return -1;
streams[i].unused = 0;
streams[i].buffer = buffer;
streams[i].max_len = max_len;
streams[i].may_modify = may_modify;
streams[i].position = buffer;
streams[i].last_token = NULL;
ret = regcomp (&streams[i].compiled, regex,
REG_ICASE | REG_EXTENDED | REG_NEWLINE);
if (ret){
error_regex (ret, &streams[i].compiled, regex);
regfree (&streams[i].compiled);
return -1;
}
return i;
}
void
token_close (int td)
{
if (streams[td].unused)
return;
streams[td].unused = 1;
regfree (&streams[td].compiled);
}
char *
token_read_next (int td)
{
int ret;
int len;
char *string;
struct tstream *stream = streams + td;
if (stream->unused)
return NULL;
string = stream->position;
if (*string == '\0')
return NULL;
do {
stream->position = string;
ret = regexec (&stream->compiled, stream->position, 1,
stream->matches, 0);
if (ret){
if (ret != REG_NOMATCH){
error_regex (ret, &stream->compiled, NULL);
}
return NULL;
}
len = stream->matches[0].rm_eo - stream->matches[0].rm_so;
string += stream->matches[0].rm_eo + 1;
}
while (stream->max_len > 0 && len > stream->max_len);
stream->last_token = stream->position + stream->matches[0].rm_so;
if (stream->may_modify){
*(stream->position + stream->matches[0].rm_eo) = '\0';
}
stream->position += stream->matches[0].rm_eo;
if (*stream->position)
stream->position++;
return stream->last_token;
}
/****************************************************************************
* INTERFACE CLASS BODIES
****************************************************************************/
/****************************************************************************
*
* END MODULE token.c
*
****************************************************************************/
syntax highlighted by Code2HTML, v. 0.9.1