/*
 * filter.c
 * 
 * $Id: filter.c,v 1.5 2002/11/21 06:59:46 conrads Exp $
 */

#include <stdio.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <regex.h>

#include "msuck.h"
#include "nntp.h"

extern char *filterspath;
extern CONNECTION local;

/* list of active groups */
extern ACTIVE_INFO active[];

extern FILE *killlog, *xoverfile;
extern char xoverpath[];

static int numfilters;
extern int local_active_groups;

FILE *resultsfile;
char resultspath[PATH_MAX + 1];

FILTER filter[MAXREGEXES];

static regmatch_t pmatches[MAXREGEXES];
static int nmatches;

typedef struct overview
{
	char artnum[MAXART + 1];
	char subject[256];
	char from[256];
	char date[128];
	char message_id[256];
	char references[2048];
	char bytes[16];
	char lines[16];
	char xref[1024];
} OVERVIEW;

static OVERVIEW xover;

/*
 * Read in and compile the filters file
 * 
 * returns -1 on error, 0 if no filters file or empty file, else number of
 * filters
 */
int
read_filters(void)
{
	FILE *filters;
	char errbuf[MAXERR + 1];
	int i, n, err;

	/*
	 * read in and compile regex patterns
	 */

	if ((filters = fopen(filterspath, "r")) == NULL)
	{
		perror(filterspath);
		return 0;
	}

	for (i = 0; (!feof(filters)) && (i < MAXREGEXES); ++i)
	{
		n = fscanf(filters, "%[^\t]\t%[^\t]\t%[^\t]\t%[^\t\n]\n",
			   filter[i].action,
			   filter[i].group.pattern,
			   filter[i].field.pattern,
			   filter[i].data.pattern);
		if ((n != 4))
		{
			fprintf(killlog,
				"read_filters(): format error in filters file, line %d: %s %s %s %s\n",
				i + 1,
				filter[i].action,
				filter[i].group.pattern,
				filter[i].field.pattern,
				filter[i].data.pattern);
			fclose(filters);
			return -1;
		}
		if ((err = regcomp((regex_t *) & filter[i].group.re, filter[i].group.pattern, REGEXFLAGS)) != 0)
		{
			regerror(err, (regex_t *) & filter[i].group.re, errbuf, MAXERR);
			fclose(filters);
			return -1;
		}
		if ((err = regcomp((regex_t *) & filter[i].field.re, filter[i].field.pattern, REGEXFLAGS)) != 0)
		{
			regerror(err, (regex_t *) & filter[i].field.re, errbuf, MAXERR);
			fclose(filters);
			return -1;
		}
		if ((err = regcomp((regex_t *) & filter[i].data.re, filter[i].data.pattern, REGEXFLAGS)) != 0)
		{
			regerror(err, (regex_t *) & filter[i].data.re, errbuf, MAXERR);
			fclose(filters);
			return -1;
		}
	}

	fclose(filters);
	return i;
}

/*
 * remove all filters from memory
 */
void
remove_filters(void)
{
	int i;

	for (i = 0; i < numfilters; ++i)
	{
		regfree(&filter[i].group.re);
		regfree(&filter[i].field.re);
		regfree(&filter[i].data.re);
	}
}

/*
 * Read a single xover record from overview file and break it down into its
 * components
 * 
 * returns 1 on success, or 0
 */
static int
getxover(void)
{
	char *line, *ptr;
	char buf[MAXXOVER + 1];

	if (!feof(xoverfile))
	{
		if (fgets(buf, MAXXOVER, xoverfile) == NULL)
		{
			return 0;
		}

		line = buf;

		ptr = xover.artnum;

		while (*line != '\t')
			*ptr++ = *line++;
		*ptr = '\0';

		ptr = xover.subject;

		while (*++line != '\t')
			*ptr++ = *line;
		*ptr = '\0';

		ptr = xover.from;

		while (*++line != '\t')
			*ptr++ = *line;
		*ptr = '\0';

		ptr = xover.date;

		while (*++line != '\t')
			*ptr++ = *line;
		*ptr = '\0';

		ptr = xover.message_id;

		while (*++line != '\t')
			*ptr++ = *line;
		*ptr = '\0';

		ptr = xover.references;

		while (*++line != '\t')
			*ptr++ = *line;
		*ptr = '\0';

		ptr = xover.bytes;

		while (*++line != '\t')
			*ptr++ = *line;
		*ptr = '\0';

		ptr = xover.lines;

		while (*++line != '\t')
			*ptr++ = *line;
		*ptr = '\0';

		ptr = xover.xref;

		while (*++line != '\r')
			*ptr++ = *line;
		*ptr = '\0';

		return 1;
	}
	return 0;
}

static int
match_group(const char *group, int i)
{
	return (regexec((regex_t *) & filter[i].group.re, group, nmatches, pmatches, 0) == 0);
}

static char *
match_field(char *str, int i)
{
	if (regexec((regex_t *) & filter[i].data.re, str, nmatches, pmatches, 0) == 0)
		return str;
	else
		return NULL;
}

static char *
match_filter(const char *group, int i)
{
	if (!match_group(group, i))
		return NULL;
	if (strcmp(filter[i].field.pattern, "Subject") == 0)
		return (match_field(xover.subject, i));
	if (strcmp(filter[i].field.pattern, "From") == 0)
		return (match_field(xover.from, i));
	if (strcmp(filter[i].field.pattern, "Date") == 0)
		return (match_field(xover.date, i));
	if (strcmp(filter[i].field.pattern, "Bytes") == 0)
		return (match_field(xover.bytes, i));
	if (strcmp(filter[i].field.pattern, "Lines") == 0)
		return (match_field(xover.lines, i));
	if (strcmp(filter[i].field.pattern, "Message-ID") == 0)
		return (match_field(xover.message_id, i));
	if (strcmp(filter[i].field.pattern, "Xref") == 0)
		return (match_field(xover.xref, i));
	if (strcmp(filter[i].field.pattern, "References") == 0)
		return (match_field(xover.references, i));
	return NULL;
}

/*
 * see if article already exists on local server
 * 
 * return 1 on article exists or 0
 */
int
article_exists(const char *group, const char *artnum, const char *mess_id)
{
	char linebuf[MAXLINE + 1];
	int result;

	fprintf(local.out, "check %s\r\n", mess_id);
	if (fgets(linebuf, MAXLINE, local.in) == NULL)
	{
		perror("article_exists(): read error on local socket");
		return -1;
	}

	/* check response code */
	if ((result = (strncmp(linebuf, NNTP_ERR_GOTID, 3) == 0)))
		fprintf(killlog, "Article exists: %s:%s %s\n", group, artnum, mess_id);

	return result;
}

static int
bad_xref(void)
{
	char xrefs[32][80], *ptr;
	int numxrefs, found;
	int i = 0;
	int j = 0;

	ptr = xover.xref;

	/* skip the Xref: part */
	while (*ptr++ != ' ');

	/* skip the hostname */
	while (*ptr++ != ' ');

	/* now pointing to first xref, loop until end */
	while (*ptr != '\0')
	{
		/* disregard the : and number */
		while ((*ptr != ':') && (*ptr != '\0'))
			xrefs[i][j++] = *ptr++;
		xrefs[i][j] = '\0';
		++i;
		j = 0;
		while ((*ptr != ' ') && (*ptr != '\0'))
			++ptr;
		if (*ptr == ' ')
			++ptr;
	}

	numxrefs = i;

	/* now check the xref groups against the local active groups */
	for (i = 0; i < numxrefs; ++i)
	{
		found = 0;

		for (j = 0; j < local_active_groups; ++j)
		{
			if (strcmp(xrefs[i], active[j]) == 0)
			{
				found = 1;
				break;
			}
		}
		if (!found)
		{
			fprintf(killlog, "Bad Xref: %s\n", xrefs[i]);
			return 1;
		}
	}
	return 0;
}

static int
filter_article(const char *group)
{
	int i, n;
	char *str;

	if ((n = article_exists(group, xover.artnum, xover.message_id)) == 1)
		return 1;
	else if (n < 0)	/* error occurred */
		return -1;

	if (bad_xref())
		return 1;

	/* check article against list of filters */
	for (i = 0; i < numfilters; ++i)
	{
		if ((str = match_filter(group, i)) != NULL)
		{
			if (strcmp(filter[i].action, "D") == 0)	/* Delete or keep? */
			{
				fprintf(killlog, "%s: %s MATCHES %s\n",
					filter[i].field.pattern, str, filter[i].data.pattern);
				return 1;
			}
			else
				return 0;
		}
	}
	return 0;
}

/*
 * filter articles in group
 * 
 * returns number of articles which passed, or -1 on error
 */
int
filter_group(const char *group)
{
	int n;
	int i = 0;

	/* open xover data obtained from remote server */
	snprintf(xoverpath, PATH_MAX, XOVER_PATH_TEMPLATE, group);

	if ((xoverfile = fopen(xoverpath, "r")) == NULL)
	{
		perror(xoverpath);
		return -1;
	}

	/* open file for filter results */
	snprintf(resultspath, PATH_MAX, "%s.filtered", xoverpath);

	if ((resultsfile = fopen(resultspath, "w")) == NULL)
	{
		perror(resultspath);
		fclose(xoverfile);
		return -1;
	}

	/* compile the filters file */
	if ((numfilters = read_filters()) == -1)
	{
		perror("error reading filters file, exiting");
		return -1;
	}

	if (connect_to_server("localhost", &local) <= 0)
	{
		perror("filter_group(): connection to local server failed");
		remove_filters();
		return -1;
	}

	while (getxover())
	{
		if ((n = filter_article(group)) == 0)	/* article passed */
		{
			fprintf(resultsfile, "%s %s\n", xover.artnum, xover.message_id);
			++i;
		}
		else if (n < 0)	/* an error occurred */
		{
			i -= 1;
			break;
		}
	}

	quit_server(local);
	fclose(xoverfile);
	fclose(resultsfile);
	unlink(xoverpath);
	remove_filters();
	return i;
}


syntax highlighted by Code2HTML, v. 0.9.1