/*
 * Copyright (c) 1998,1999,2000  Kazushi (Jam) Marukawa
 * All rights of my changes are reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice in the documentation and/or other materials provided with
 *    the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/* $Orig-Id: util.c,v 1.22 1997/07/23 18:35:18 agulbra Exp $ */
/* $Orig-Id: fetch.c,v 1.37 1997/07/20 00:33:38 agulbra Exp $ */
/*

Written by Arnt Gulbrandsen <agulbra@troll.no> and copyright 1995
Troll Tech AS, Postboks 6133 Etterstad, 0602 Oslo, Norway, fax +47
22646949.

Use, modification and distribution is allowed without limitation,
warranty, or liability of any kind. */

/*
This code is derived from only leafnode+ by using same structure
of Cornelius's leafnode to prepare for merging with Cornelius's code.
*/

#ifdef SOCKS
#include <socks.h>
#endif

#include <sys/types.h>
#ifdef BSD
#include <sys/errno.h>
#endif
#include <ctype.h>
#include <dirent.h>
#include <fcntl.h>
#include <netdb.h>
#include <netinet/in.h>
#include <pwd.h>
#include <setjmp.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <time.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <unistd.h>

#include "leafnode.h"

/* build and return an open fd to pseudoart in group */
FILE* buildpseudoart(const char* grp)
{
    FILE* f;
    char* name;

    f = tmpfile();
    if (!f)
	return f;

    if (fqdn != NULL) {
	name = fqdn;
    } else {
	name = "nowhere";
    }
    fprintf(f, "Path: %s\n", name);
    fprintf(f, "Newsgroups: %s\n", grp);
    fprintf(f, "From: Leafnode+ <nobody@%s>\n", name);
    fprintf(f, "Subject: Leafnode+ placeholder for group %s\n", grp);
    fprintf(f, "Date: %s\n", rfctime());
    fprintf(f, "Message-ID: <leafnode:placeholder:%s@%s>\n", grp, name);
    fprintf(f, "\n");
    fprintf(f, "This server is running leafnode+, which is a dynamic NNTP proxy\n");
    fprintf(f, "This means that it does not retrieve newsgroups unless someone is\n");
    fprintf(f, "actively reading them.\n");
    fprintf(f, "\n");
    fprintf(f, "If you do an operation on a group - such as reading an article,\n");
    fprintf(f, "looking at the group table of contents or similar, then leafnode+\n");
    fprintf(f, "will go and fetch articles from that group when it next updates.\n");
    fprintf(f, "\n");
    fprintf(f, "If you see articles in groups you do not read, that is almost\n");
    fprintf(f, "always because of cross-posting.  These articles do not occupy any\n");
    fprintf(f, "more space - they are hard-linked into each newsgroup directory\n");
    fprintf(f, "\n");

    fprintf(f, "Since you have read this dummy article, leafnode+ will retrieve this");
    fprintf(f, "group on the next update.  Please look in this group a little later\n");
    fprintf(f, "and you should see some articles.\n");
    fprintf(f, "\n");
    fprintf(f, "If you have any queries about this, please talk to your newsmaster.\n");
    fprintf(f, "\n");
    fprintf(f, "Leafnode+ can be found at\n");
    fprintf(f, "\thttp://www.pobox.com/~jam/leafnode+/\n");
    fprintf(f, "\n");
    fprintf(f, "\n");

    rewind(f);
    return f;
}

struct header_info* parse_header(FILE* fp)
{
    struct header_info* p =
	(struct header_info*)critmalloc(sizeof(struct header_info),
					"parsing header");
    char* l;
    char** h = 0;
    int alllen = 0;
    int len = 0;
    int lines = 0;
    int bytes = 0;

    memset(p, 0, sizeof(struct header_info));

    while ((l = getaline(fp)) != NULL && *l && strcmp(l, ".") != 0) {
	len = strlen(l);
	/* count lines and bytes */
	lines++;
	bytes += len + 2;
	/* put all headers except Xref: into p->all */
	if ((isspace(*l) && h != &p->xref) ||
	    (!isspace(*l) && strncasecmp(l, "Xref: ", 6) != 0)) {
	    p->all = critrealloc(p->all, alllen + len + 2,
				 "Fetching article header");
	    strcpy(&p->all[alllen], l);
	    p->all[alllen + len] = '\n';
	    p->all[alllen + len + 1] = '\0';
	    alllen += len + 1;
	}
	/* put each specified header into p->each */
	if (isspace(*l)) {
	    /* save it */
	    if (h) {
		stripspace(l);
		len = strlen(l);
		if (*l) {
		    int hlen = *h ? strlen(*h) : 0;
		    *h = critrealloc(*h, hlen + len + 1,
				     "Fetching article header");
		    strcpy(&(*h)[hlen], l);
		}
	    }
	} else {
	    char* ll = l;
	    if (strncasecmp(l, "Path: ", 6) == 0) {
		h = &p->path;
		ll += 6;
	    } else if (strncasecmp(l, "Message-ID: ", 12) == 0) {
		h = &p->msgid;
		ll += 12;
	    } else if (strncasecmp(l, "From: ", 6) == 0) {
		h = &p->from;
		ll += 6;
	    } else if (strncasecmp(l, "Newsgroups: ", 12) == 0) {
		h = &p->newsgroups;
		ll += 12;
	    } else if (strncasecmp(l, "Subject: ", 9) == 0) {
		h = &p->subject;
		ll += 9;
	    } else if (strncasecmp(l, "Date: ", 6) == 0) {
		h = &p->date;
		ll += 6;
	    } else if (strncasecmp(l, "References: ", 12) == 0) {
		h = &p->references;
		ll += 12;
	    } else if (strncasecmp(l, "Lines: ", 7) == 0) {
		h = &p->lines;
		ll += 7;
	    } else if (strncasecmp(l, "Bytes: ", 7) == 0) {
		h = &p->bytes;
		ll += 7;
	    } else if (strncasecmp(l, "Xref: ", 6) == 0) {
		h = &p->xref;
		ll += 6;
	    } else if (strncasecmp(l, "Supersedes: ", 12) == 0) {
		h = &p->supersedes;
		ll += 12;
	    } else {
		h = NULL;
	    }
	    /* second occurance is "other header" */
	    if (h && *h)
		h = NULL;
	    /* save it */
	    if (h) {
		stripspace(ll);
		len = strlen(ll);
		if (*ll) {
		    int hlen = *h ? strlen(*h) : 0;
		    *h = critrealloc(*h, hlen + len + 1,
				     "Fetching article header");
		    strcpy(&(*h)[hlen], ll);
		}
		if (verbose > 4)
		    printf("...saw header %s\n", l);
	    }
	}
    }
    if (l && *l == '\0') {
	lines++;
	bytes += 2;
    }
    p->n_lines = lines;
    p->n_bytes = bytes;
    return p;
}

struct header_info* parse_all(FILE* fp)
{
    struct header_info* p = parse_header(fp);
    int lines;
    int bytes;
    const char* l;

    lines = p->n_lines;
    bytes = p->n_bytes;
    while ((l = getaline(fp)) != NULL && strcmp(l, ".") != 0) {
	/* count lines and bytes */
	lines++;
	bytes += strlen(l) + 2;
    }
    p->n_lines = lines;
    p->n_bytes = bytes;
    return p;
}

void free_header_info(struct header_info* p)
{
    if (!p) return;
    if (p->path) free(p->path);
    if (p->msgid) free(p->msgid);
    if (p->from) free(p->from);
    if (p->newsgroups) free(p->newsgroups);
    if (p->subject) free(p->subject);
    if (p->date) free(p->date);
    if (p->references) free(p->references);
    if (p->lines) free(p->lines);
    if (p->bytes) free(p->bytes);
    if (p->xref) free(p->xref);
    if (p->supersedes) free(p->supersedes);
    if (p->all) free(p->all);
    free(p);
}

/*
 * Estimate correct number for year from any representations
 */
static int estimate_year(int year)
{
    if (year > 999) {
	/* Assume all 4 or more digits forms are correct */
	return year;
    } else if (year >= 100) {
	/* Assume all 3 digits forms are caused by mis-use of struct tm. */
	return year + 1900;
    } else {
	/* Assume all 2 digits forms are used around current year */
	/* Assume next 5 years or last 95 years are represented in this form */
	time_t now;
	struct tm local;
	int y;

	/* get local and Greenwich times */
	now = time(0);
	local = *(localtime(&now));

	/* get next 5 year in 2 digits form */
	y = (local.tm_year + 1905) % 100;

	/* estimate year */
	if (year > y) {
	    year += (local.tm_year + 1905 - 100) / 100 * 100;
	} else {
	    year += (local.tm_year + 1905) / 100 * 100;
	}
	return year;
    }
}

static int age(const char* date)
{
    char monthname[11];
    static int datesofmonth[12] = {
	 31,  59,  90, 120, 151, 181,
	212, 243, 273, 304, 334, 365
    };
    int month;
    int year;
    int day;
    const char* d;
    time_t tmp;
    struct tm gmt;

    if (!date)
	return 1000; /* large number: OLD */
    d = skipspaces((char*)date);

#ifndef NOTSTRICTDATEFORMAT
    /*
     * XXX: Leafnode+ doesn't support "Monday," format since
     * it is not suitable with RFC.
     */
    if (isalpha(d[0]) && isalpha(d[1]) && isalpha(d[2]) &&
	d[3] == ',' && isspace(d[4]))
	d += 5; /* skip "XXX," as day of the week */
#else
    /* Skip all XXXXXX, stuff. */
    {
	char* tmp;
	tmp = strchr(d, ',');
	if (tmp != NULL)
	    d = tmp + 1;
    }
#endif

    monthname[0] = '\0';
    if (sscanf(d, "%d %10s %d", &day, monthname, &year) < 3)
	return 1003;

    /* Adjust year. */
    year = estimate_year(year);

    /* Check year, month, dates, etc. */
#ifndef NOTSTRICTDATEFORMAT
    if (strlen(monthname) != 3)
	return 1005;
#endif
    if (!strncasecmp(monthname, "jan", 3))
	month = 0;
    else if (!strncasecmp(monthname, "feb", 3))
	month = 1;
    else if (!strncasecmp(monthname, "mar", 3))
	month = 2;
    else if (!strncasecmp(monthname, "apr", 3))
	month = 3;
    else if (!strncasecmp(monthname, "may", 3))
	month = 4;
    else if (!strncasecmp(monthname, "jun", 3))
	month = 5;
    else if (!strncasecmp(monthname, "jul", 3))
	month = 6;
    else if (!strncasecmp(monthname, "aug", 3))
	month = 7;
    else if (!strncasecmp(monthname, "sep", 3))
	month = 8;
    else if (!strncasecmp(monthname, "oct", 3))
	month = 9;
    else if (!strncasecmp(monthname, "nov", 3))
	month = 10;
    else if (!strncasecmp(monthname, "dec", 3))
	month = 11;
    else
	return 1001;
    tmp = time(0);
    gmt = *(gmtime(&tmp));
    /*
     * This calculation is not exactly correct
     */
    return (((gmt.tm_year + 1900) - year) * 365 +
	    (datesofmonth[gmt.tm_mon] - datesofmonth[month]) +
	    (gmt.tm_mday - day));
}

int storep(unsigned long artno, const struct header_info* hi)
{
    int i;
    struct stat st;

    if (hi->msgid == NULL || *hi->msgid == '\0') {
	if (verbose)
	    printf("discarding %lu: no message-id found\n", artno);
	return 0;
    } else if (hi->from == NULL) {
	if (verbose)
	    printf("discarding %lu: no from found\n", artno);
	return 0;
    }

    /* check message-id */
    if (stat(getmsgidfname(hi->msgid), &st) == 0) {
	/* find an article same message-id */
	if (verbose > 1)
	    printf("discarding %lu: it is in local spool: %s\n", artno, hi->msgid);
	return 0;
    }

    /* check subject */
    if (hi->subject && (killsubject.num > 0 || killsubjecti.num > 0)) {
	char* p = skipspaces(skipre(skipspaces(hi->subject)));

	if (killsubject.num > 0) {
	    static struct patterns pats = { 0, 0, 0 };

	    if (pats.num == 0) {
		initpatterns(&pats, &killsubject, 0);
	    }
	    if ((i = matchpatterns(&pats, p)) >= 0) {
		if (verbose)
		    printf("discarding %lu: it has subject line matches "
			   "with '%s'\n", artno, killsubject.strarray[i]);
		return 0;
	    }
	}
	if (killsubjecti.num > 0) {
	    static struct patterns pats = { 0, 0, 0 };

	    if (pats.num == 0) {
		initpatterns(&pats, &killsubjecti, 1);
	    }
	    if ((i = matchpatterns(&pats, p)) >= 0) {
		if (verbose)
		    printf("discarding %lu: it has subject line matches "
			   "with '%s'\n", artno, killsubjecti.strarray[i]);
		return 0;
	    }
	}
    }
    /* check from */
    if (hi->from && killfrom.num > 0) {
	static struct patterns pats = { 0, 0, 0 };

	if (pats.num == 0) {
	    initpatterns(&pats, &killfrom, 0);
	}
	if ((i = matchpatterns(&pats, hi->from)) >= 0) {
	    if (verbose)
		printf("discarding %lu: it has from line matches "
		       "with '%s'\n", artno, killfrom.strarray[i]);
	    return 0;
	}
    }
    if (hi->from && killfromaddress.num > 0) {
	static struct string a = { 0, 0 };
	char* cp;
	static struct patterns pats = { 0, 0, 0 };

	if (pats.num == 0) {
	    initpatterns(&pats, &killfromaddress, 0);
	}

	setstring(&a, hi->from);
	cp = a.str;
	while (*cp && *cp <= ' ')
	    cp++;
	if (*cp == '(') {
	    for (cp++; *cp; cp++) {
		if (*cp == ')') {
		    cp++;
		    break;
		}
	    }
	    while (*cp && *cp <= ' ')
		cp++;
	}
	for (i = 0; cp[i]; i++) {
	    if (cp[i] == '<') {
		cp += i + 1;
		for (i = 0; cp[i]; i++) {
		    if (cp[i] == '>')
			break;
		}
		break;
	    }
	    if (cp[i] == '(')
		break;
	}
	while (i > 0 && cp[i - 1] <= ' ')
	    i--;
	cp[i] = '\0';

	if (i > 0) {
	    if ((i = matchpatterns(&pats, cp)) >= 0) {
		if (verbose)
		    printf("discarding %lu: it has from line matches "
			   "with '%s'\n", artno, killfromaddress.strarray[i]);
		return 0;
	    }
	}
    }
    /* check date */
    if (hi->date && (i = age(hi->date)) > maxold) {
	if (verbose || i > 1000)
	    printf("discarding %lu: it is posted more than %d days ago: %s\n",
		   artno, i, hi->date);
	return 0;
    }
    /* check references */
    /* check bytes */
    if (hi->bytes) {
	i = strtol(hi->bytes, NULL, 10);
	if (maxbytes > 0 && i > maxbytes) {
	    if (verbose)
		printf("discarding %lu: it has %d bytes\n",
		       artno, i);
	    return 0;
	}
    }
    /* check lines */
    if (hi->lines) {
	i = strtol(hi->lines, NULL, 10);
	if (maxlines > 0 && i > maxlines) {
	    if (verbose)
		printf("discarding %lu: it has %d lines\n",
		       artno, i);
	    return 0;
	}
	if (minlines > 0 && i < minlines) {
	    if (verbose)
		printf("discarding %lu: it has %d lines\n",
		       artno, i);
	    return 0;
	}
    }
    /* xref is optional field, so check it if it is */
    if (hi->xref && *hi->xref) {
	const char* p = hi->xref;
	i = -1;		/* skip "Xref:" part */
	while ((p = strchr(p, ':')) != NULL) {
	    p++;
	    i++;
	}
	if (maxgroups > 0 && i > maxgroups) {
	    if (verbose)
		printf("discarding %lu: it is posted in %d news groups\n",
		       artno, i);
	    return 0;
	}
    }
    return 1;
}

void  store(const char* filename,
	    FILE* filehandle,
	    char* newsgroups,
	    const struct header_info* hi)
{
    char tmp[10];
    char xrefincase[4096]; /* 1024 for newsgroups, plus article numbers */
    char* p;
    char* q;
    char* x;
    int n;
    static struct newsgroup* cg = NULL;


    x = xrefincase;
    n = 0;

    if (verbose == 3)
	printf("storing %s\n", hi->msgid);
    else if (verbose > 3)
	printf("storing %s: %s\n", hi->msgid, hi->newsgroups);

    p = newsgroups;
    while (p && *p) {
	n++;
	q = strchr(p, ',');
	if (q)
	    *q++ = '\0';
	if (*p) {
	    if (cg == 0 || strcmp(cg->name, p) != 0) {
		cg = findgroup(p);
	    }
	    if (cg) {
		if (chdirgroup(cg, 1) == 0) {
		    mysyslog(LOG_ERR, "chdir to %s: %s", p, strerror(errno));
		}
	    }
	    if (cg) {
		do {
		    sprintf(tmp, "%lu", ++cg->last);
		    errno = 0;
		    if (verbose > 2)
			printf("..as article %lu in %s\n",
			       cg->last, cg->name);
		} while (link(filename, tmp) < 0 && errno == EEXIST);
		if (errno) {
		    mysyslog(LOG_ERR, "error linking %s into %s: %s",
			   filename, p, strerror(errno));
		} else {
		    sprintf(x, " %s:%lu", cg->name, cg->last);
		    x += strlen(x);
		}
	    } else {
		if (verbose > 1)
		    printf(".. discarding unknown group %s\n", p);
	    }
	}
	p = q;
    }
    fprintf(filehandle, "Xref: %s%s\n", fqdn ? fqdn : "nowhere", xrefincase);
}