/*
* Copyright (c) 1998,1999,2000 Kazushi (Jam) Marukawa
* All rights of my changes are reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice in the documentation and/or other materials provided with
* the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
* OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* $Orig-Id: util.c,v 1.22 1997/07/23 18:35:18 agulbra Exp $ */
/* $Orig-Id: fetch.c,v 1.37 1997/07/20 00:33:38 agulbra Exp $ */
/*
Written by Arnt Gulbrandsen <agulbra@troll.no> and copyright 1995
Troll Tech AS, Postboks 6133 Etterstad, 0602 Oslo, Norway, fax +47
22646949.
Use, modification and distribution is allowed without limitation,
warranty, or liability of any kind. */
/*
This code is derived from only leafnode+ by using same structure
of Cornelius's leafnode to prepare for merging with Cornelius's code.
*/
#ifdef SOCKS
#include <socks.h>
#endif
#include <sys/types.h>
#ifdef BSD
#include <sys/errno.h>
#endif
#include <ctype.h>
#include <dirent.h>
#include <fcntl.h>
#include <netdb.h>
#include <netinet/in.h>
#include <pwd.h>
#include <setjmp.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <time.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <unistd.h>
#include "leafnode.h"
/* build and return an open fd to pseudoart in group */
FILE* buildpseudoart(const char* grp)
{
FILE* f;
char* name;
f = tmpfile();
if (!f)
return f;
if (fqdn != NULL) {
name = fqdn;
} else {
name = "nowhere";
}
fprintf(f, "Path: %s\n", name);
fprintf(f, "Newsgroups: %s\n", grp);
fprintf(f, "From: Leafnode+ <nobody@%s>\n", name);
fprintf(f, "Subject: Leafnode+ placeholder for group %s\n", grp);
fprintf(f, "Date: %s\n", rfctime());
fprintf(f, "Message-ID: <leafnode:placeholder:%s@%s>\n", grp, name);
fprintf(f, "\n");
fprintf(f, "This server is running leafnode+, which is a dynamic NNTP proxy\n");
fprintf(f, "This means that it does not retrieve newsgroups unless someone is\n");
fprintf(f, "actively reading them.\n");
fprintf(f, "\n");
fprintf(f, "If you do an operation on a group - such as reading an article,\n");
fprintf(f, "looking at the group table of contents or similar, then leafnode+\n");
fprintf(f, "will go and fetch articles from that group when it next updates.\n");
fprintf(f, "\n");
fprintf(f, "If you see articles in groups you do not read, that is almost\n");
fprintf(f, "always because of cross-posting. These articles do not occupy any\n");
fprintf(f, "more space - they are hard-linked into each newsgroup directory\n");
fprintf(f, "\n");
fprintf(f, "Since you have read this dummy article, leafnode+ will retrieve this");
fprintf(f, "group on the next update. Please look in this group a little later\n");
fprintf(f, "and you should see some articles.\n");
fprintf(f, "\n");
fprintf(f, "If you have any queries about this, please talk to your newsmaster.\n");
fprintf(f, "\n");
fprintf(f, "Leafnode+ can be found at\n");
fprintf(f, "\thttp://www.pobox.com/~jam/leafnode+/\n");
fprintf(f, "\n");
fprintf(f, "\n");
rewind(f);
return f;
}
struct header_info* parse_header(FILE* fp)
{
struct header_info* p =
(struct header_info*)critmalloc(sizeof(struct header_info),
"parsing header");
char* l;
char** h = 0;
int alllen = 0;
int len = 0;
int lines = 0;
int bytes = 0;
memset(p, 0, sizeof(struct header_info));
while ((l = getaline(fp)) != NULL && *l && strcmp(l, ".") != 0) {
len = strlen(l);
/* count lines and bytes */
lines++;
bytes += len + 2;
/* put all headers except Xref: into p->all */
if ((isspace(*l) && h != &p->xref) ||
(!isspace(*l) && strncasecmp(l, "Xref: ", 6) != 0)) {
p->all = critrealloc(p->all, alllen + len + 2,
"Fetching article header");
strcpy(&p->all[alllen], l);
p->all[alllen + len] = '\n';
p->all[alllen + len + 1] = '\0';
alllen += len + 1;
}
/* put each specified header into p->each */
if (isspace(*l)) {
/* save it */
if (h) {
stripspace(l);
len = strlen(l);
if (*l) {
int hlen = *h ? strlen(*h) : 0;
*h = critrealloc(*h, hlen + len + 1,
"Fetching article header");
strcpy(&(*h)[hlen], l);
}
}
} else {
char* ll = l;
if (strncasecmp(l, "Path: ", 6) == 0) {
h = &p->path;
ll += 6;
} else if (strncasecmp(l, "Message-ID: ", 12) == 0) {
h = &p->msgid;
ll += 12;
} else if (strncasecmp(l, "From: ", 6) == 0) {
h = &p->from;
ll += 6;
} else if (strncasecmp(l, "Newsgroups: ", 12) == 0) {
h = &p->newsgroups;
ll += 12;
} else if (strncasecmp(l, "Subject: ", 9) == 0) {
h = &p->subject;
ll += 9;
} else if (strncasecmp(l, "Date: ", 6) == 0) {
h = &p->date;
ll += 6;
} else if (strncasecmp(l, "References: ", 12) == 0) {
h = &p->references;
ll += 12;
} else if (strncasecmp(l, "Lines: ", 7) == 0) {
h = &p->lines;
ll += 7;
} else if (strncasecmp(l, "Bytes: ", 7) == 0) {
h = &p->bytes;
ll += 7;
} else if (strncasecmp(l, "Xref: ", 6) == 0) {
h = &p->xref;
ll += 6;
} else if (strncasecmp(l, "Supersedes: ", 12) == 0) {
h = &p->supersedes;
ll += 12;
} else {
h = NULL;
}
/* second occurance is "other header" */
if (h && *h)
h = NULL;
/* save it */
if (h) {
stripspace(ll);
len = strlen(ll);
if (*ll) {
int hlen = *h ? strlen(*h) : 0;
*h = critrealloc(*h, hlen + len + 1,
"Fetching article header");
strcpy(&(*h)[hlen], ll);
}
if (verbose > 4)
printf("...saw header %s\n", l);
}
}
}
if (l && *l == '\0') {
lines++;
bytes += 2;
}
p->n_lines = lines;
p->n_bytes = bytes;
return p;
}
struct header_info* parse_all(FILE* fp)
{
struct header_info* p = parse_header(fp);
int lines;
int bytes;
const char* l;
lines = p->n_lines;
bytes = p->n_bytes;
while ((l = getaline(fp)) != NULL && strcmp(l, ".") != 0) {
/* count lines and bytes */
lines++;
bytes += strlen(l) + 2;
}
p->n_lines = lines;
p->n_bytes = bytes;
return p;
}
void free_header_info(struct header_info* p)
{
if (!p) return;
if (p->path) free(p->path);
if (p->msgid) free(p->msgid);
if (p->from) free(p->from);
if (p->newsgroups) free(p->newsgroups);
if (p->subject) free(p->subject);
if (p->date) free(p->date);
if (p->references) free(p->references);
if (p->lines) free(p->lines);
if (p->bytes) free(p->bytes);
if (p->xref) free(p->xref);
if (p->supersedes) free(p->supersedes);
if (p->all) free(p->all);
free(p);
}
/*
* Estimate correct number for year from any representations
*/
static int estimate_year(int year)
{
if (year > 999) {
/* Assume all 4 or more digits forms are correct */
return year;
} else if (year >= 100) {
/* Assume all 3 digits forms are caused by mis-use of struct tm. */
return year + 1900;
} else {
/* Assume all 2 digits forms are used around current year */
/* Assume next 5 years or last 95 years are represented in this form */
time_t now;
struct tm local;
int y;
/* get local and Greenwich times */
now = time(0);
local = *(localtime(&now));
/* get next 5 year in 2 digits form */
y = (local.tm_year + 1905) % 100;
/* estimate year */
if (year > y) {
year += (local.tm_year + 1905 - 100) / 100 * 100;
} else {
year += (local.tm_year + 1905) / 100 * 100;
}
return year;
}
}
static int age(const char* date)
{
char monthname[11];
static int datesofmonth[12] = {
31, 59, 90, 120, 151, 181,
212, 243, 273, 304, 334, 365
};
int month;
int year;
int day;
const char* d;
time_t tmp;
struct tm gmt;
if (!date)
return 1000; /* large number: OLD */
d = skipspaces((char*)date);
#ifndef NOTSTRICTDATEFORMAT
/*
* XXX: Leafnode+ doesn't support "Monday," format since
* it is not suitable with RFC.
*/
if (isalpha(d[0]) && isalpha(d[1]) && isalpha(d[2]) &&
d[3] == ',' && isspace(d[4]))
d += 5; /* skip "XXX," as day of the week */
#else
/* Skip all XXXXXX, stuff. */
{
char* tmp;
tmp = strchr(d, ',');
if (tmp != NULL)
d = tmp + 1;
}
#endif
monthname[0] = '\0';
if (sscanf(d, "%d %10s %d", &day, monthname, &year) < 3)
return 1003;
/* Adjust year. */
year = estimate_year(year);
/* Check year, month, dates, etc. */
#ifndef NOTSTRICTDATEFORMAT
if (strlen(monthname) != 3)
return 1005;
#endif
if (!strncasecmp(monthname, "jan", 3))
month = 0;
else if (!strncasecmp(monthname, "feb", 3))
month = 1;
else if (!strncasecmp(monthname, "mar", 3))
month = 2;
else if (!strncasecmp(monthname, "apr", 3))
month = 3;
else if (!strncasecmp(monthname, "may", 3))
month = 4;
else if (!strncasecmp(monthname, "jun", 3))
month = 5;
else if (!strncasecmp(monthname, "jul", 3))
month = 6;
else if (!strncasecmp(monthname, "aug", 3))
month = 7;
else if (!strncasecmp(monthname, "sep", 3))
month = 8;
else if (!strncasecmp(monthname, "oct", 3))
month = 9;
else if (!strncasecmp(monthname, "nov", 3))
month = 10;
else if (!strncasecmp(monthname, "dec", 3))
month = 11;
else
return 1001;
tmp = time(0);
gmt = *(gmtime(&tmp));
/*
* This calculation is not exactly correct
*/
return (((gmt.tm_year + 1900) - year) * 365 +
(datesofmonth[gmt.tm_mon] - datesofmonth[month]) +
(gmt.tm_mday - day));
}
int storep(unsigned long artno, const struct header_info* hi)
{
int i;
struct stat st;
if (hi->msgid == NULL || *hi->msgid == '\0') {
if (verbose)
printf("discarding %lu: no message-id found\n", artno);
return 0;
} else if (hi->from == NULL) {
if (verbose)
printf("discarding %lu: no from found\n", artno);
return 0;
}
/* check message-id */
if (stat(getmsgidfname(hi->msgid), &st) == 0) {
/* find an article same message-id */
if (verbose > 1)
printf("discarding %lu: it is in local spool: %s\n", artno, hi->msgid);
return 0;
}
/* check subject */
if (hi->subject && (killsubject.num > 0 || killsubjecti.num > 0)) {
char* p = skipspaces(skipre(skipspaces(hi->subject)));
if (killsubject.num > 0) {
static struct patterns pats = { 0, 0, 0 };
if (pats.num == 0) {
initpatterns(&pats, &killsubject, 0);
}
if ((i = matchpatterns(&pats, p)) >= 0) {
if (verbose)
printf("discarding %lu: it has subject line matches "
"with '%s'\n", artno, killsubject.strarray[i]);
return 0;
}
}
if (killsubjecti.num > 0) {
static struct patterns pats = { 0, 0, 0 };
if (pats.num == 0) {
initpatterns(&pats, &killsubjecti, 1);
}
if ((i = matchpatterns(&pats, p)) >= 0) {
if (verbose)
printf("discarding %lu: it has subject line matches "
"with '%s'\n", artno, killsubjecti.strarray[i]);
return 0;
}
}
}
/* check from */
if (hi->from && killfrom.num > 0) {
static struct patterns pats = { 0, 0, 0 };
if (pats.num == 0) {
initpatterns(&pats, &killfrom, 0);
}
if ((i = matchpatterns(&pats, hi->from)) >= 0) {
if (verbose)
printf("discarding %lu: it has from line matches "
"with '%s'\n", artno, killfrom.strarray[i]);
return 0;
}
}
if (hi->from && killfromaddress.num > 0) {
static struct string a = { 0, 0 };
char* cp;
static struct patterns pats = { 0, 0, 0 };
if (pats.num == 0) {
initpatterns(&pats, &killfromaddress, 0);
}
setstring(&a, hi->from);
cp = a.str;
while (*cp && *cp <= ' ')
cp++;
if (*cp == '(') {
for (cp++; *cp; cp++) {
if (*cp == ')') {
cp++;
break;
}
}
while (*cp && *cp <= ' ')
cp++;
}
for (i = 0; cp[i]; i++) {
if (cp[i] == '<') {
cp += i + 1;
for (i = 0; cp[i]; i++) {
if (cp[i] == '>')
break;
}
break;
}
if (cp[i] == '(')
break;
}
while (i > 0 && cp[i - 1] <= ' ')
i--;
cp[i] = '\0';
if (i > 0) {
if ((i = matchpatterns(&pats, cp)) >= 0) {
if (verbose)
printf("discarding %lu: it has from line matches "
"with '%s'\n", artno, killfromaddress.strarray[i]);
return 0;
}
}
}
/* check date */
if (hi->date && (i = age(hi->date)) > maxold) {
if (verbose || i > 1000)
printf("discarding %lu: it is posted more than %d days ago: %s\n",
artno, i, hi->date);
return 0;
}
/* check references */
/* check bytes */
if (hi->bytes) {
i = strtol(hi->bytes, NULL, 10);
if (maxbytes > 0 && i > maxbytes) {
if (verbose)
printf("discarding %lu: it has %d bytes\n",
artno, i);
return 0;
}
}
/* check lines */
if (hi->lines) {
i = strtol(hi->lines, NULL, 10);
if (maxlines > 0 && i > maxlines) {
if (verbose)
printf("discarding %lu: it has %d lines\n",
artno, i);
return 0;
}
if (minlines > 0 && i < minlines) {
if (verbose)
printf("discarding %lu: it has %d lines\n",
artno, i);
return 0;
}
}
/* xref is optional field, so check it if it is */
if (hi->xref && *hi->xref) {
const char* p = hi->xref;
i = -1; /* skip "Xref:" part */
while ((p = strchr(p, ':')) != NULL) {
p++;
i++;
}
if (maxgroups > 0 && i > maxgroups) {
if (verbose)
printf("discarding %lu: it is posted in %d news groups\n",
artno, i);
return 0;
}
}
return 1;
}
void store(const char* filename,
FILE* filehandle,
char* newsgroups,
const struct header_info* hi)
{
char tmp[10];
char xrefincase[4096]; /* 1024 for newsgroups, plus article numbers */
char* p;
char* q;
char* x;
int n;
static struct newsgroup* cg = NULL;
x = xrefincase;
n = 0;
if (verbose == 3)
printf("storing %s\n", hi->msgid);
else if (verbose > 3)
printf("storing %s: %s\n", hi->msgid, hi->newsgroups);
p = newsgroups;
while (p && *p) {
n++;
q = strchr(p, ',');
if (q)
*q++ = '\0';
if (*p) {
if (cg == 0 || strcmp(cg->name, p) != 0) {
cg = findgroup(p);
}
if (cg) {
if (chdirgroup(cg, 1) == 0) {
mysyslog(LOG_ERR, "chdir to %s: %s", p, strerror(errno));
}
}
if (cg) {
do {
sprintf(tmp, "%lu", ++cg->last);
errno = 0;
if (verbose > 2)
printf("..as article %lu in %s\n",
cg->last, cg->name);
} while (link(filename, tmp) < 0 && errno == EEXIST);
if (errno) {
mysyslog(LOG_ERR, "error linking %s into %s: %s",
filename, p, strerror(errno));
} else {
sprintf(x, " %s:%lu", cg->name, cg->last);
x += strlen(x);
}
} else {
if (verbose > 1)
printf(".. discarding unknown group %s\n", p);
}
}
p = q;
}
fprintf(filehandle, "Xref: %s%s\n", fqdn ? fqdn : "nowhere", xrefincase);
}
syntax highlighted by Code2HTML, v. 0.9.1