/* * Copyright (c) 1998,1999,2000 Kazushi (Jam) Marukawa * All rights of my changes are reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice in the documentation and/or other materials provided with * the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* $Orig-Id: fetch.c,v 1.37 1997/07/20 00:33:38 agulbra Exp $ Written by Arnt Gulbrandsen and copyright 1995 Troll Tech AS, Postboks 6133 Etterstad, 0602 Oslo, Norway, fax +47 22646949. Use, modification and distribution is allowed without limitation, warranty, or liability of any kind. */ #ifdef SOCKS #include #endif #include #ifdef BSD #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "leafnode.h" time_t now; int extraarticles = 0; int writeserver_eachgroup = 0; unsigned long getnewsgroup(struct newsgroup*, unsigned long); void fixxover(void); static void checkgroups(void) { DIR* d; struct dirent* de; struct newsgroup* g; static struct newsgroup** stufftoget = NULL; static int stufftogetsize = 0; int window; int last; int outstanding; const char* s = getinterestingdname(); d = opendir(s); if (!d) { mysyslog(LOG_ERR, "opendir %s: %s", s, strerror(errno)); return; } last = 0; while ((de = readdir(d))) { if (de->d_name[0] != '.') { /* retrive articles from specified news groups only */ if (servers->newsgroups.num > 0) { /* should we feed this news group? */ int i; for (i = 0; i < servers->newsgroups.num; i++) { if (myfnmatch(servers->newsgroups.strarray[i], de->d_name) == 0) break; } /* no, skip this news group. */ if (i >= servers->newsgroups.num) { if (verbose > 1) printf("skip %s since it isn't in newsgroups list\n", de->d_name); continue; } } if (servers->filteredngs.num > 0) { /* should we filter this news group? */ int i; for (i = 0; i < servers->filteredngs.num; i++) { if (myfnmatch(servers->filteredngs.strarray[i], de->d_name) == 0) break; } /* yes, skip this news group. */ if (i < servers->filteredngs.num) { if (verbose > 1) printf("skip %s since it is in filteredngs list\n", de->d_name); continue; } } /* yes, going on retrieving. */ /* mark each news group to retrive articles */ g = findgroup(de->d_name); if (g) { if (last >= stufftogetsize) { stufftogetsize += 128; stufftoget = (struct newsgroup**) realloc(stufftoget, stufftogetsize * sizeof(struct newsgroup*)); if (stufftoget == NULL) { mysyslog(LOG_ERR, "allocating newsgroup buffer %s: %s", s, strerror(errno)); return; } } /* marked */ stufftoget[last++] = g; } } } closedir(d); /* * check new articles number for each marked news group by using * GROUP command asynchronously. */ window = 0; outstanding = 0; while (window < last || outstanding) { while (outstanding < 47 && window < last) { if (window < last) { outstanding++; if (verbose > 1) printf("GROUP %s (%d up in the air)\n", stufftoget[window]->name, outstanding); sprintf(lineout, "GROUP %s\r\n", stufftoget[window++]->name); putaline(); } } if (outstanding > 0) { int n; unsigned long count; unsigned long first; unsigned long last; char newsgroup[512 + 1]; char *l; l = getaline(nntpin); if (!l) return; g = stufftoget[window - outstanding]; outstanding--; if (sscanf(l, "%3d %lu %lu %lu %512s", &n, &count, &first, &last, newsgroup) < 5 || n != 211 || strcmp(newsgroup, g->name) != 0) { if (n == 480) printf("%s: need authentication, %s\n", g->name, l); if (verbose > 1) printf("%s: ignore errors, %s\n", g->name, l); continue; /* simply ignore error messages */ } if ((first == 0 && last == 0) || last < first || count == 0) { if (verbose > 1) printf("%s: mark to not read\n", g->name); ; /* no articles on server */ continue; } if (g->server <= last) { if (verbose > 1) printf("%s: mark to read\n", g->name); g->newarticles = 1; /* marked to retrive new articles */ continue; } if (extraarticles) { int i = g->server - extraarticles; if (g->server < extraarticles) i = 0; if (i < last) { if (verbose > 1) printf("%s: mark to read\n", g->name); g->newarticles = 1; /* marked to retrive new articles */ continue; } } } } } unsigned long getnewsgroup(struct newsgroup* g, unsigned long server) { int n; unsigned long last; unsigned long window; /* last ARTICLE n command sent */ unsigned long art; char* l; FILE* f; const char* c; struct stat st; int outstanding; static char* stufftoget; struct header_info* hi; if (!g) return server; sprintf(lineout, "GROUP %s\r\n", g->name); putaline(); l = getaline(nntpin); if (!l) return server; if (sscanf(l, "%3d %lu %lu %lu ", &n, &art, &window, &last) < 4 || n != 211) return server; if (extraarticles && server != 0) { int i; i = server - extraarticles; if (server < extraarticles || i < window) i = window; if (verbose > 1) printf("backing up from %lu to %d\n", server, i); server = i; } if (window == 0 && last == 0) { /* there is no article */ } else if (server > last + 1) { mysyslog(LOG_INFO, "last seen article was %lu, server now has %lu-%lu", server, window, last); #if 1 if (server > last + 20) { if (verbose) printf("switched upstream servers? %lu > %lu\n", server - 1, last); server = window; /* insane - recover thoroughly */ } else { if (verbose) printf("rampant spam cancel? %lu > %lu\n", server - 1, last); server = last > 20 ? last - 20 : 1; /* a little bit too much */ } #else server = last + 1; #endif } if (initiallimit && server == 0 && last - server > initiallimit) { if (verbose) printf("skipping articles %lu-%lu inclusive (initial limit)\n", server, last-initiallimit - 1); server = last - initiallimit; } if (artlimit && last-server > artlimit) { if (verbose) printf("skipping articles %lu-%lu inclusive (article limit)\n", server, last-artlimit - 1); server = last - artlimit; } c = getinterestingngfname(g); if (stat(c, &st) < 0) return last; /* race condition: I hope this is proper */ if (server <= last && ((timeout_short > 0 && st.st_atime == st.st_mtime && now - (timeout_short * 86400) > st.st_mtime) || (timeout_long > 0 && now - (timeout_long * 86400) > st.st_atime))) { if (verbose) { printf("skipping %s from now on\n", g->name); if (timeout_short > 0 && st.st_atime == st.st_mtime && now - (timeout_short * 86400) > st.st_mtime) printf("since now %lu - %lu > %lu\n", now, timeout_short * 86400, st.st_mtime); if (timeout_long > 0 && now - (timeout_long * 86400) > st.st_atime) printf("since now %lu - %lu > %lu\n", now, timeout_long * 86400, st.st_atime); } mysyslog(LOG_INFO, "skip %s: %lu, %lu", g->name, server, last); unlink(c); g->last++; /* increment last to let users know */ return server; } if (window == 0 && last == 0) { if (verbose > 1) printf("%s: no new articles\n", g->name); return server; } if (window < server) window = server; if (window < 1) window = 1; server = window; if (server <= last) { if (verbose) printf("%s: considering articles %lu-%lu\n", g->name, server, last); sprintf(lineout, "XOVER %lu-%lu\r\n", server, last); putaline(); if (nntpreply() == 224) { stufftoget = (char*)realloc(stufftoget, last + 1 - server); if (stufftoget) memset(stufftoget, 0, last + 1 - server); while ((l = getaline(nntpin)) && strcmp(l, ".")) { char* p; unsigned long art; char* xover[10]; /* xover[9] is used to keep rest */ struct header_info hi; int i; memset(&hi, 0, sizeof(struct header_info)); /* if array cannot be allocated, drain all inputs */ if (!stufftoget) continue; /* * Whole line is following format. * ART<\t>SUBJ<\t>FROM<\t>DATE<\t>MSG-ID<\t>REFS * <\t>BYTES<\t>LINES<\t>XREF(opt) */ p = l; for (i = 0; i < 10; i++) { xover[i] = p; while (*p && *p != '\t') p++; if (*p == '\t') *p++ = '\0'; } /* get article number */ art = strtoul(xover[0], &p, 10); if (!p || *p != '\0') continue; /* check article number */ if (art < server || art > last) continue; /* check xover fields */ hi.subject = xover[1]; hi.from = xover[2]; hi.date = xover[3]; hi.msgid = xover[4]; hi.references = xover[5]; hi.bytes = xover[6]; hi.lines = xover[7]; hi.xref = xover[8]; if (storep(art, &hi) == 0) continue; stufftoget[art - server] = 'y'; /* anything */ if (verbose > 1) printf("%s: will fetch %lu (%s)\n", g->name, art, xover[4]); } if (!l) return server; } else { stufftoget = realloc(stufftoget, 0); } } else if (verbose > 1) { printf("%s: no new articles\n", g->name); } outstanding = 0; while (window <= last || outstanding) { /* 47 is based on SO_SNDWIN and the NNTP maximum line length */ while (outstanding < 47 && window <= last) { while (stufftoget && window <= last && !stufftoget[window - server]) { if (verbose > 3) printf("%s: skipping %lu - not available or too old\n", g->name, window); window++; } if (window <= last) { outstanding++; if (verbose > 1) printf("%s: ARTICLE %lu (%d up in the air)\n", g->name, window, outstanding); sprintf(lineout, "ARTICLE %lu\r\n", window++); putaline(); } } if (outstanding > 0) { l = getaline(nntpin); if (!l) return art; outstanding--; if (sscanf(l, "%3d %lu", &n, &art) < 2 || n != 220) { if (verbose) printf("%s: receiving article (reply %03d)\n", g->name, n); continue; } if (verbose) printf("%s: receiving article %lu (%ld are left)\n", g->name, art, last - window + 1 + outstanding); hi = parse_header(nntpin); f = NULL; if (hi->msgid == NULL || *hi->msgid == '\0' || hi->from == NULL || hi->newsgroups == NULL) { if (verbose) printf("discarding it - no message-id found\n"); } else { c = getmsgidfname(hi->msgid); if (stat(c, &st) < 0 && errno == ENOENT) f = fopen(c, "w"); else if (verbose) printf("discarding it - already have it or " "file system error\n"); } if (f) { fprintf(f, "%s", hi->all); store(c, f, hi->newsgroups, hi); fprintf(f, "\n"); } while ((l = getaline(nntpin)) != NULL && strcmp(l, ".") != 0) { if (l && *l == '.') l++; if (f) fprintf(f, "%s\n", l); } if (f) { fclose(f); if (!l) { /* * unlink file since writing error. */ unlink(c); } else { if (hi->supersedes) { /* do supersede */ c = skipspaces(hi->supersedes); if (*c == '<') { char* c2 = strchr(c, '>'); if (c2) { c2[1] = '\0'; removearts(c); } } } } } free_header_info(hi); } } return last +1; } static void parsesuckfile(FILE* fp) { unsigned long art = 1; int numprocessed = 0; char* l; FILE* f; const char* c; struct stat st; struct header_info* hi; do { hi = parse_header(fp); f = NULL; if (hi->msgid == NULL || *hi->msgid == '\0' || hi->from == NULL || hi->newsgroups == NULL) { if (verbose && hi->n_lines > 0) printf("discarding %lu - no message-id found\n", art); } else if (storep(art, hi) == 0) { /* discard it */ } else { c = getmsgidfname(hi->msgid); if (stat(c, &st) < 0 && errno == ENOENT) f = fopen(c, "w"); else if (verbose) printf("discarding it - already have it or " "file system error\n"); } if (f) { if (verbose) printf("%s: receiving article %lu\n", hi->newsgroups, art); fprintf(f, "%s", hi->all); store(c, f, hi->newsgroups, hi); fprintf(f, "\n"); } while ((l = getaline(fp)) != NULL && strcmp(l, ".") != 0) { if (l && *l == '.') l++; if (f) fprintf(f, "%s\n", l); } if (f) { fclose(f); if (!l) { /* * unlink file since writing error. */ unlink(c); } else { numprocessed++; if (hi->supersedes) { /* do supersede */ c = skipspaces(hi->supersedes); if (*c == '<') { char* c2 = strchr(c, '>'); if (c2) { c2[1] = '\0'; removearts(c); } } } } } free_header_info(hi); art++; } while (l != NULL); if (verbose) printf("processed %d out of %lu\n", numprocessed, art); } /* * get active file from remote server */ static void checkactive(void) { const char* s; struct stat st; s = getactivefname(servers->servername); if (active && stat(s, &st) == 0 && now - (timeout_active * 86400) < st.st_mtime ) { if (verbose) printf("LIST ACTIVE done only %d seconds ago, skipping\n", (int)(now-st.st_mtime)); return; } nntpactive(); s = getactivefname(servers->servername); unlink(s); close(open(s, O_WRONLY|O_CREAT, 0664)); writeactive(); /* write groupinfo file */ } /* * post all spooled articles * * if all postings succeed, returns 1 * if there are no postings to post, returns 1 * if a posting is strange for some reason, closes the nntp connection * and returns 0. this is to recover from unknown states * * a posting is deleted once it has been posted, whether it succeeded * or not: we don't want to re-do an error. * */ static int postarticles(void) { char* line; DIR* d; struct dirent* de; FILE* f; struct stat st; int r; const char* outgoing = getoutgoingdname(); const char* failedpostings = getfailedpostingsdname(); d = opendir(outgoing); if (!d) { mysyslog(LOG_ERR, "Unable to opendir out.going: %s", strerror(errno)); return 1; } while ((de = readdir(d)) != NULL) { const char* fname = getoutgoingfname(de->d_name); if (stat(fname, &st) == 0 && S_ISREG(st.st_mode) && (f = fopen(fname, "r")) != NULL) { if (verbose) printf("Posting %s...\n", fname); sprintf(lineout, "POST\r\n"); putaline(); r = nntpreply(); if (r == 340) { do { line = getaline(f); if (line) { sprintf(lineout, "%s\r\n", line); putaline(); } } while (line && strcmp(line, ".")); if (line) { /* done correctly, so receive server's reply */ line = getaline(nntpin); } if (line && !strncmp(line, "240", 3)) { if (verbose) printf(" - OK\n"); mysyslog(LOG_INFO, "posted article"); /* useless */ unlink(fname); } else if (line && !strncmp(line, "441 435", 7)) { if (verbose) printf(" - upstream server had that message-id\n"); mysyslog(LOG_INFO, "duplicated article"); unlink(fname); } else { const char* s = getfailedpostingsfname(de->d_name); if (line) { /* posting is rejected, so skip this article */ /* dump if all servers rejected article */ if (verbose) printf(" - article rejected: %s\n", line); if (servers->postable <= 1) { mkdir(failedpostings, 0775); mysyslog(LOG_ERR, "unable to post (%s), moving to %s", line, s); if (rename(fname, s)) mysyslog(LOG_ERR, "unable to move failed posting " "to %s: %s", s, strerror(errno)); } } else { /* posting is stopped by the server, so dump */ /* this article */ if (verbose) printf(" - failed: %03d reply to POST\n", r); mkdir(failedpostings, 0775); mysyslog(LOG_ERR, "unable to post (%s), moving to %s", line, s); if (rename(fname, s)) mysyslog(LOG_ERR, "unable to move failed posting " "to %s: %s", s, strerror(errno)); fclose(f); closedir(d); /* strange state, so re-connect */ if (nntpreconnect()) /* continue posting */ return 0; else /* stop posting since cannot make connection */ return 1; } } } else if (r == 480) { mysyslog(LOG_ERR, "unable to post: %03d (need authentication)", r); } else { mysyslog(LOG_ERR, "unable to post: %03d", r); } fclose(f); } } closedir(d); return 1; } static void helpgetnewsgroups(struct newsgroup* g) { if (g) { helpgetnewsgroups(g->right); if (g->newarticles) { g->alive = 1; g->server = getnewsgroup(g, g->server); if (writeserver_eachgroup) writeserver(); mysyslog(LOG_INFO, "%s: %s: fetched to %lu", servers->servername, g->name, g->server); } helpgetnewsgroups(g->left); } } static void getnewsgroups(void) { checkgroups(); helpgetnewsgroups(active); #if DOTNGFILE { DIR* d; struct dirent* de; struct newsgroup* g; const char* s = getinterestingdname(); d = opendir(s); if (!d) { mysyslog(LOG_ERR, "opendir %s: %s", s, strerror(errno)); return; } while ((de = readdir(d))) { if (de->d_name[0] == '.') { g = findgroup(de->d_name + 1); if (verbose > 3 && (!g || !g->alive)) printf("considering %s - %s upstream\n", de->d_name+1, g ? " exists" : "does not exist"); if (g && !g->alive) { g->alive = 1; g->last++; } } } closedir(d); } #endif } void fixxover(void) { DIR* d; struct dirent* de; const char* s; s = getinterestingdname(); d = opendir(s); if (!d) { mysyslog(LOG_ERR, "opendir %s: %s", s, strerror(errno)); return; } while ((de = readdir(d))) { struct newsgroup* g; if (de->d_name[0] != '.' && (g = findgroup(de->d_name)) != 0) { chdirgroup(g, 1); getxover(); } } closedir(d); } static void usage(void) { fprintf(stderr, "Usage: fetchnews [-v] [-f] [-n] [-x #] [-s [suck-data-file]]\n" " -v: more verbose (may be repeated)\n" " -f: force re-read of newsgroups list from server\n" " -n: write server everytime for new server (long connection)\n" " -x: check for # extra articles in each group\n" " -s: parse sucked data and put them\n"); exit(1); } int main(int argc, char** argv) { struct passwd* pw; int option; int opt_s = 0; int force = 0; verbose = 0; pw = getpwnam("news"); if (!pw) { fprintf(stderr, "no such user: news\n"); exit(1); } setregid(-1, pw->pw_gid); setreuid(-1, pw->pw_uid); if (geteuid() != pw->pw_uid || getegid() != pw->pw_gid) { fprintf(stderr, "%s: must be run as news or root\n", argv[0]); exit(1); } while ((option = getopt(argc, argv, "nfvx:s")) != -1) { if (option == 'v') { verbose++; } else if (option == 'n') { writeserver_eachgroup = 1; } else if (option == 'f') { force = 1; } else if (option == 'x') { char* nptr = optarg; char* endptr = NULL; extraarticles = strtol(nptr, &endptr, 0); if (!nptr || !*nptr || !endptr || *endptr || !extraarticles) { usage(); } } else if (option == 's') { opt_s = 1; } else if (option == 'h' || option == '?') { usage(); } } argc -= optind - 1; argv += optind - 1; if (opt_s && argc > 1) usage(); else if (!opt_s && argc != 1) usage(); whoami(); now = time(NULL); umask(2); openlog("fetchnews", LOG_PID|LOG_CONS, LOG_NEWS); readconfig(); if (force) { timeout_active = 0; printf("Forced LIST ACTIVE\n"); } if (verbose) { printf("%s: verbosity level is %d\n", version, verbose); if (verbose > 3) { printf("Don't fetch threads that nobody has read after %d " "days\n", (int)timeout_long); printf("Don't fetch threads that have been read once after %d " "days\n", (int)timeout_short); } } if (!opt_s && servers->servername == NULL) { mysyslog(LOG_ERR, "no server name in %s", getconfigfname()); exit(2); } lockactive(); /* lock whole information */ readactive(); /* read groupinfo file */ if (opt_s) { /* fetch articles from sucked file */ FILE* fp; if (argc == 1) { if (verbose) printf("suck from stdio\n"); fp = stdin; } else { if (verbose) printf("suck from %s\n", argv[1]); fp = fopen(argv[1], "r"); } if (fp == NULL) { mysyslog(LOG_ERR, "cannot open file: %s: %s", argv[1], strerror(errno)); } parsesuckfile(fp); if (argc != 1) fclose(fp); } else { /* fetch articles */ while (servers) { if (verbose) printf("Connecting to %s.\n", servers->servername); fflush(stdout); if (nntpreconnect()) { /* post articles if they are. */ if (servers->postable) { do { sprintf(lineout, "MODE READER\r\n"); putaline(); fflush(nntpout); if (nntpreply() == 498) continue; } while (!postarticles()); } /* get articles if they are. */ checkactive(); /* get active from remote on demand */ readserver(); /* read server info file */ getnewsgroups(); /* fetch all */ writeserver(); /* write server info file */ sprintf(lineout, "QUIT\r\n"); /* say it, then just exit :) */ putaline(); if (servers->next) writeactive(); /* write groupinfo file */ } else { if (verbose) printf("Connection failed to %s.\n", servers->servername); } servers = servers->next; } } writeactive(); /* write groupinfo file */ mysyslog(LOG_INFO, "done"); if (verbose || fork() <= 0) { #if defined(PRIO_MAX) setpriority(PRIO_PROCESS, 0, PRIO_MAX/2); #endif fixxover(); } exit(0); }