/* * Copyright (c) 1998,1999,2000 Kazushi (Jam) Marukawa * All rights of my changes are reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice in the documentation and/or other materials provided with * the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* $Orig-Id: util.c,v 1.22 1997/07/23 18:35:18 agulbra Exp $ */ /* $Orig-Id: fetch.c,v 1.37 1997/07/20 00:33:38 agulbra Exp $ */ /* Written by Arnt Gulbrandsen and copyright 1995 Troll Tech AS, Postboks 6133 Etterstad, 0602 Oslo, Norway, fax +47 22646949. Use, modification and distribution is allowed without limitation, warranty, or liability of any kind. */ /* This code is derived from only leafnode+ by using same structure of Cornelius's leafnode to prepare for merging with Cornelius's code. */ #ifdef SOCKS #include #endif #include #ifdef BSD #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "leafnode.h" /* build and return an open fd to pseudoart in group */ FILE* buildpseudoart(const char* grp) { FILE* f; char* name; f = tmpfile(); if (!f) return f; if (fqdn != NULL) { name = fqdn; } else { name = "nowhere"; } fprintf(f, "Path: %s\n", name); fprintf(f, "Newsgroups: %s\n", grp); fprintf(f, "From: Leafnode+ \n", name); fprintf(f, "Subject: Leafnode+ placeholder for group %s\n", grp); fprintf(f, "Date: %s\n", rfctime()); fprintf(f, "Message-ID: \n", grp, name); fprintf(f, "\n"); fprintf(f, "This server is running leafnode+, which is a dynamic NNTP proxy\n"); fprintf(f, "This means that it does not retrieve newsgroups unless someone is\n"); fprintf(f, "actively reading them.\n"); fprintf(f, "\n"); fprintf(f, "If you do an operation on a group - such as reading an article,\n"); fprintf(f, "looking at the group table of contents or similar, then leafnode+\n"); fprintf(f, "will go and fetch articles from that group when it next updates.\n"); fprintf(f, "\n"); fprintf(f, "If you see articles in groups you do not read, that is almost\n"); fprintf(f, "always because of cross-posting. These articles do not occupy any\n"); fprintf(f, "more space - they are hard-linked into each newsgroup directory\n"); fprintf(f, "\n"); fprintf(f, "Since you have read this dummy article, leafnode+ will retrieve this"); fprintf(f, "group on the next update. Please look in this group a little later\n"); fprintf(f, "and you should see some articles.\n"); fprintf(f, "\n"); fprintf(f, "If you have any queries about this, please talk to your newsmaster.\n"); fprintf(f, "\n"); fprintf(f, "Leafnode+ can be found at\n"); fprintf(f, "\thttp://www.pobox.com/~jam/leafnode+/\n"); fprintf(f, "\n"); fprintf(f, "\n"); rewind(f); return f; } struct header_info* parse_header(FILE* fp) { struct header_info* p = (struct header_info*)critmalloc(sizeof(struct header_info), "parsing header"); char* l; char** h = 0; int alllen = 0; int len = 0; int lines = 0; int bytes = 0; memset(p, 0, sizeof(struct header_info)); while ((l = getaline(fp)) != NULL && *l && strcmp(l, ".") != 0) { len = strlen(l); /* count lines and bytes */ lines++; bytes += len + 2; /* put all headers except Xref: into p->all */ if ((isspace(*l) && h != &p->xref) || (!isspace(*l) && strncasecmp(l, "Xref: ", 6) != 0)) { p->all = critrealloc(p->all, alllen + len + 2, "Fetching article header"); strcpy(&p->all[alllen], l); p->all[alllen + len] = '\n'; p->all[alllen + len + 1] = '\0'; alllen += len + 1; } /* put each specified header into p->each */ if (isspace(*l)) { /* save it */ if (h) { stripspace(l); len = strlen(l); if (*l) { int hlen = *h ? strlen(*h) : 0; *h = critrealloc(*h, hlen + len + 1, "Fetching article header"); strcpy(&(*h)[hlen], l); } } } else { char* ll = l; if (strncasecmp(l, "Path: ", 6) == 0) { h = &p->path; ll += 6; } else if (strncasecmp(l, "Message-ID: ", 12) == 0) { h = &p->msgid; ll += 12; } else if (strncasecmp(l, "From: ", 6) == 0) { h = &p->from; ll += 6; } else if (strncasecmp(l, "Newsgroups: ", 12) == 0) { h = &p->newsgroups; ll += 12; } else if (strncasecmp(l, "Subject: ", 9) == 0) { h = &p->subject; ll += 9; } else if (strncasecmp(l, "Date: ", 6) == 0) { h = &p->date; ll += 6; } else if (strncasecmp(l, "References: ", 12) == 0) { h = &p->references; ll += 12; } else if (strncasecmp(l, "Lines: ", 7) == 0) { h = &p->lines; ll += 7; } else if (strncasecmp(l, "Bytes: ", 7) == 0) { h = &p->bytes; ll += 7; } else if (strncasecmp(l, "Xref: ", 6) == 0) { h = &p->xref; ll += 6; } else if (strncasecmp(l, "Supersedes: ", 12) == 0) { h = &p->supersedes; ll += 12; } else { h = NULL; } /* second occurance is "other header" */ if (h && *h) h = NULL; /* save it */ if (h) { stripspace(ll); len = strlen(ll); if (*ll) { int hlen = *h ? strlen(*h) : 0; *h = critrealloc(*h, hlen + len + 1, "Fetching article header"); strcpy(&(*h)[hlen], ll); } if (verbose > 4) printf("...saw header %s\n", l); } } } if (l && *l == '\0') { lines++; bytes += 2; } p->n_lines = lines; p->n_bytes = bytes; return p; } struct header_info* parse_all(FILE* fp) { struct header_info* p = parse_header(fp); int lines; int bytes; const char* l; lines = p->n_lines; bytes = p->n_bytes; while ((l = getaline(fp)) != NULL && strcmp(l, ".") != 0) { /* count lines and bytes */ lines++; bytes += strlen(l) + 2; } p->n_lines = lines; p->n_bytes = bytes; return p; } void free_header_info(struct header_info* p) { if (!p) return; if (p->path) free(p->path); if (p->msgid) free(p->msgid); if (p->from) free(p->from); if (p->newsgroups) free(p->newsgroups); if (p->subject) free(p->subject); if (p->date) free(p->date); if (p->references) free(p->references); if (p->lines) free(p->lines); if (p->bytes) free(p->bytes); if (p->xref) free(p->xref); if (p->supersedes) free(p->supersedes); if (p->all) free(p->all); free(p); } /* * Estimate correct number for year from any representations */ static int estimate_year(int year) { if (year > 999) { /* Assume all 4 or more digits forms are correct */ return year; } else if (year >= 100) { /* Assume all 3 digits forms are caused by mis-use of struct tm. */ return year + 1900; } else { /* Assume all 2 digits forms are used around current year */ /* Assume next 5 years or last 95 years are represented in this form */ time_t now; struct tm local; int y; /* get local and Greenwich times */ now = time(0); local = *(localtime(&now)); /* get next 5 year in 2 digits form */ y = (local.tm_year + 1905) % 100; /* estimate year */ if (year > y) { year += (local.tm_year + 1905 - 100) / 100 * 100; } else { year += (local.tm_year + 1905) / 100 * 100; } return year; } } static int age(const char* date) { char monthname[11]; static int datesofmonth[12] = { 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }; int month; int year; int day; const char* d; time_t tmp; struct tm gmt; if (!date) return 1000; /* large number: OLD */ d = skipspaces((char*)date); #ifndef NOTSTRICTDATEFORMAT /* * XXX: Leafnode+ doesn't support "Monday," format since * it is not suitable with RFC. */ if (isalpha(d[0]) && isalpha(d[1]) && isalpha(d[2]) && d[3] == ',' && isspace(d[4])) d += 5; /* skip "XXX," as day of the week */ #else /* Skip all XXXXXX, stuff. */ { char* tmp; tmp = strchr(d, ','); if (tmp != NULL) d = tmp + 1; } #endif monthname[0] = '\0'; if (sscanf(d, "%d %10s %d", &day, monthname, &year) < 3) return 1003; /* Adjust year. */ year = estimate_year(year); /* Check year, month, dates, etc. */ #ifndef NOTSTRICTDATEFORMAT if (strlen(monthname) != 3) return 1005; #endif if (!strncasecmp(monthname, "jan", 3)) month = 0; else if (!strncasecmp(monthname, "feb", 3)) month = 1; else if (!strncasecmp(monthname, "mar", 3)) month = 2; else if (!strncasecmp(monthname, "apr", 3)) month = 3; else if (!strncasecmp(monthname, "may", 3)) month = 4; else if (!strncasecmp(monthname, "jun", 3)) month = 5; else if (!strncasecmp(monthname, "jul", 3)) month = 6; else if (!strncasecmp(monthname, "aug", 3)) month = 7; else if (!strncasecmp(monthname, "sep", 3)) month = 8; else if (!strncasecmp(monthname, "oct", 3)) month = 9; else if (!strncasecmp(monthname, "nov", 3)) month = 10; else if (!strncasecmp(monthname, "dec", 3)) month = 11; else return 1001; tmp = time(0); gmt = *(gmtime(&tmp)); /* * This calculation is not exactly correct */ return (((gmt.tm_year + 1900) - year) * 365 + (datesofmonth[gmt.tm_mon] - datesofmonth[month]) + (gmt.tm_mday - day)); } int storep(unsigned long artno, const struct header_info* hi) { int i; struct stat st; if (hi->msgid == NULL || *hi->msgid == '\0') { if (verbose) printf("discarding %lu: no message-id found\n", artno); return 0; } else if (hi->from == NULL) { if (verbose) printf("discarding %lu: no from found\n", artno); return 0; } /* check message-id */ if (stat(getmsgidfname(hi->msgid), &st) == 0) { /* find an article same message-id */ if (verbose > 1) printf("discarding %lu: it is in local spool: %s\n", artno, hi->msgid); return 0; } /* check subject */ if (hi->subject && (killsubject.num > 0 || killsubjecti.num > 0)) { char* p = skipspaces(skipre(skipspaces(hi->subject))); if (killsubject.num > 0) { static struct patterns pats = { 0, 0, 0 }; if (pats.num == 0) { initpatterns(&pats, &killsubject, 0); } if ((i = matchpatterns(&pats, p)) >= 0) { if (verbose) printf("discarding %lu: it has subject line matches " "with '%s'\n", artno, killsubject.strarray[i]); return 0; } } if (killsubjecti.num > 0) { static struct patterns pats = { 0, 0, 0 }; if (pats.num == 0) { initpatterns(&pats, &killsubjecti, 1); } if ((i = matchpatterns(&pats, p)) >= 0) { if (verbose) printf("discarding %lu: it has subject line matches " "with '%s'\n", artno, killsubjecti.strarray[i]); return 0; } } } /* check from */ if (hi->from && killfrom.num > 0) { static struct patterns pats = { 0, 0, 0 }; if (pats.num == 0) { initpatterns(&pats, &killfrom, 0); } if ((i = matchpatterns(&pats, hi->from)) >= 0) { if (verbose) printf("discarding %lu: it has from line matches " "with '%s'\n", artno, killfrom.strarray[i]); return 0; } } if (hi->from && killfromaddress.num > 0) { static struct string a = { 0, 0 }; char* cp; static struct patterns pats = { 0, 0, 0 }; if (pats.num == 0) { initpatterns(&pats, &killfromaddress, 0); } setstring(&a, hi->from); cp = a.str; while (*cp && *cp <= ' ') cp++; if (*cp == '(') { for (cp++; *cp; cp++) { if (*cp == ')') { cp++; break; } } while (*cp && *cp <= ' ') cp++; } for (i = 0; cp[i]; i++) { if (cp[i] == '<') { cp += i + 1; for (i = 0; cp[i]; i++) { if (cp[i] == '>') break; } break; } if (cp[i] == '(') break; } while (i > 0 && cp[i - 1] <= ' ') i--; cp[i] = '\0'; if (i > 0) { if ((i = matchpatterns(&pats, cp)) >= 0) { if (verbose) printf("discarding %lu: it has from line matches " "with '%s'\n", artno, killfromaddress.strarray[i]); return 0; } } } /* check date */ if (hi->date && (i = age(hi->date)) > maxold) { if (verbose || i > 1000) printf("discarding %lu: it is posted more than %d days ago: %s\n", artno, i, hi->date); return 0; } /* check references */ /* check bytes */ if (hi->bytes) { i = strtol(hi->bytes, NULL, 10); if (maxbytes > 0 && i > maxbytes) { if (verbose) printf("discarding %lu: it has %d bytes\n", artno, i); return 0; } } /* check lines */ if (hi->lines) { i = strtol(hi->lines, NULL, 10); if (maxlines > 0 && i > maxlines) { if (verbose) printf("discarding %lu: it has %d lines\n", artno, i); return 0; } if (minlines > 0 && i < minlines) { if (verbose) printf("discarding %lu: it has %d lines\n", artno, i); return 0; } } /* xref is optional field, so check it if it is */ if (hi->xref && *hi->xref) { const char* p = hi->xref; i = -1; /* skip "Xref:" part */ while ((p = strchr(p, ':')) != NULL) { p++; i++; } if (maxgroups > 0 && i > maxgroups) { if (verbose) printf("discarding %lu: it is posted in %d news groups\n", artno, i); return 0; } } return 1; } void store(const char* filename, FILE* filehandle, char* newsgroups, const struct header_info* hi) { char tmp[10]; char xrefincase[4096]; /* 1024 for newsgroups, plus article numbers */ char* p; char* q; char* x; int n; static struct newsgroup* cg = NULL; x = xrefincase; n = 0; if (verbose == 3) printf("storing %s\n", hi->msgid); else if (verbose > 3) printf("storing %s: %s\n", hi->msgid, hi->newsgroups); p = newsgroups; while (p && *p) { n++; q = strchr(p, ','); if (q) *q++ = '\0'; if (*p) { if (cg == 0 || strcmp(cg->name, p) != 0) { cg = findgroup(p); } if (cg) { if (chdirgroup(cg, 1) == 0) { mysyslog(LOG_ERR, "chdir to %s: %s", p, strerror(errno)); } } if (cg) { do { sprintf(tmp, "%lu", ++cg->last); errno = 0; if (verbose > 2) printf("..as article %lu in %s\n", cg->last, cg->name); } while (link(filename, tmp) < 0 && errno == EEXIST); if (errno) { mysyslog(LOG_ERR, "error linking %s into %s: %s", filename, p, strerror(errno)); } else { sprintf(x, " %s:%lu", cg->name, cg->last); x += strlen(x); } } else { if (verbose > 1) printf(".. discarding unknown group %s\n", p); } } p = q; } fprintf(filehandle, "Xref: %s%s\n", fqdn ? fqdn : "nowhere", xrefincase); }