/*
* Copyright (c) 1998,1999,2000 Kazushi (Jam) Marukawa
* All rights of my changes are reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice in the documentation and/or other materials provided with
* the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
* OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* $Orig-Id: fetch.c,v 1.37 1997/07/20 00:33:38 agulbra Exp $
Written by Arnt Gulbrandsen <agulbra@troll.no> and copyright 1995
Troll Tech AS, Postboks 6133 Etterstad, 0602 Oslo, Norway, fax +47
22646949.
Use, modification and distribution is allowed without limitation,
warranty, or liability of any kind. */
#ifdef SOCKS
#include <socks.h>
#endif
#include <sys/types.h>
#ifdef BSD
#include <sys/errno.h>
#endif
#include <ctype.h>
#include <dirent.h>
#include <fcntl.h>
#include <netdb.h>
#include <netinet/in.h>
#include <pwd.h>
#include <setjmp.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <time.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <unistd.h>
#include "leafnode.h"
time_t now;
int extraarticles = 0;
int writeserver_eachgroup = 0;
unsigned long getnewsgroup(struct newsgroup*, unsigned long);
void fixxover(void);
static void checkgroups(void)
{
DIR* d;
struct dirent* de;
struct newsgroup* g;
static struct newsgroup** stufftoget = NULL;
static int stufftogetsize = 0;
int window;
int last;
int outstanding;
const char* s = getinterestingdname();
d = opendir(s);
if (!d) {
mysyslog(LOG_ERR, "opendir %s: %s", s, strerror(errno));
return;
}
last = 0;
while ((de = readdir(d))) {
if (de->d_name[0] != '.') {
/* retrive articles from specified news groups only */
if (servers->newsgroups.num > 0) {
/* should we feed this news group? */
int i;
for (i = 0; i < servers->newsgroups.num; i++) {
if (myfnmatch(servers->newsgroups.strarray[i],
de->d_name) == 0)
break;
}
/* no, skip this news group. */
if (i >= servers->newsgroups.num) {
if (verbose > 1)
printf("skip %s since it isn't in newsgroups list\n",
de->d_name);
continue;
}
}
if (servers->filteredngs.num > 0) {
/* should we filter this news group? */
int i;
for (i = 0; i < servers->filteredngs.num; i++) {
if (myfnmatch(servers->filteredngs.strarray[i],
de->d_name) == 0)
break;
}
/* yes, skip this news group. */
if (i < servers->filteredngs.num) {
if (verbose > 1)
printf("skip %s since it is in filteredngs list\n",
de->d_name);
continue;
}
}
/* yes, going on retrieving. */
/* mark each news group to retrive articles */
g = findgroup(de->d_name);
if (g) {
if (last >= stufftogetsize) {
stufftogetsize += 128;
stufftoget = (struct newsgroup**)
realloc(stufftoget, stufftogetsize *
sizeof(struct newsgroup*));
if (stufftoget == NULL) {
mysyslog(LOG_ERR, "allocating newsgroup buffer %s: %s",
s, strerror(errno));
return;
}
}
/* marked */
stufftoget[last++] = g;
}
}
}
closedir(d);
/*
* check new articles number for each marked news group by using
* GROUP command asynchronously.
*/
window = 0;
outstanding = 0;
while (window < last || outstanding) {
while (outstanding < 47 && window < last) {
if (window < last) {
outstanding++;
if (verbose > 1)
printf("GROUP %s (%d up in the air)\n",
stufftoget[window]->name, outstanding);
sprintf(lineout, "GROUP %s\r\n", stufftoget[window++]->name);
putaline();
}
}
if (outstanding > 0) {
int n;
unsigned long count;
unsigned long first;
unsigned long last;
char newsgroup[512 + 1];
char *l;
l = getaline(nntpin);
if (!l)
return;
g = stufftoget[window - outstanding];
outstanding--;
if (sscanf(l, "%3d %lu %lu %lu %512s", &n, &count, &first, &last,
newsgroup) < 5 ||
n != 211 ||
strcmp(newsgroup, g->name) != 0) {
if (n == 480)
printf("%s: need authentication, %s\n", g->name, l);
if (verbose > 1)
printf("%s: ignore errors, %s\n", g->name, l);
continue; /* simply ignore error messages */
}
if ((first == 0 && last == 0) ||
last < first ||
count == 0) {
if (verbose > 1)
printf("%s: mark to not read\n", g->name);
; /* no articles on server */
continue;
}
if (g->server <= last) {
if (verbose > 1)
printf("%s: mark to read\n", g->name);
g->newarticles = 1; /* marked to retrive new articles */
continue;
}
if (extraarticles) {
int i = g->server - extraarticles;
if (g->server < extraarticles)
i = 0;
if (i < last) {
if (verbose > 1)
printf("%s: mark to read\n", g->name);
g->newarticles = 1; /* marked to retrive new articles */
continue;
}
}
}
}
}
unsigned long getnewsgroup(struct newsgroup* g, unsigned long server)
{
int n;
unsigned long last;
unsigned long window; /* last ARTICLE n command sent */
unsigned long art;
char* l;
FILE* f;
const char* c;
struct stat st;
int outstanding;
static char* stufftoget;
struct header_info* hi;
if (!g)
return server;
sprintf(lineout, "GROUP %s\r\n", g->name);
putaline();
l = getaline(nntpin);
if (!l)
return server;
if (sscanf(l, "%3d %lu %lu %lu ", &n, &art, &window, &last) < 4 ||
n != 211)
return server;
if (extraarticles && server != 0) {
int i;
i = server - extraarticles;
if (server < extraarticles || i < window)
i = window;
if (verbose > 1)
printf("backing up from %lu to %d\n", server, i);
server = i;
}
if (window == 0 && last == 0) {
/* there is no article */
} else if (server > last + 1) {
mysyslog(LOG_INFO, "last seen article was %lu, server now has %lu-%lu",
server, window, last);
#if 1
if (server > last + 20) {
if (verbose)
printf("switched upstream servers? %lu > %lu\n",
server - 1, last);
server = window; /* insane - recover thoroughly */
} else {
if (verbose)
printf("rampant spam cancel? %lu > %lu\n",
server - 1, last);
server = last > 20 ? last - 20 : 1; /* a little bit too much */
}
#else
server = last + 1;
#endif
}
if (initiallimit && server == 0 &&
last - server > initiallimit) {
if (verbose)
printf("skipping articles %lu-%lu inclusive (initial limit)\n",
server, last-initiallimit - 1);
server = last - initiallimit;
}
if (artlimit && last-server > artlimit) {
if (verbose)
printf("skipping articles %lu-%lu inclusive (article limit)\n",
server, last-artlimit - 1);
server = last - artlimit;
}
c = getinterestingngfname(g);
if (stat(c, &st) < 0)
return last; /* race condition: I hope this is proper */
if (server <= last &&
((timeout_short > 0 && st.st_atime == st.st_mtime &&
now - (timeout_short * 86400) > st.st_mtime) ||
(timeout_long > 0 &&
now - (timeout_long * 86400) > st.st_atime))) {
if (verbose) {
printf("skipping %s from now on\n", g->name);
if (timeout_short > 0 && st.st_atime == st.st_mtime &&
now - (timeout_short * 86400) > st.st_mtime)
printf("since now %lu - %lu > %lu\n",
now, timeout_short * 86400, st.st_mtime);
if (timeout_long > 0 && now - (timeout_long * 86400) > st.st_atime)
printf("since now %lu - %lu > %lu\n",
now, timeout_long * 86400, st.st_atime);
}
mysyslog(LOG_INFO, "skip %s: %lu, %lu", g->name, server, last);
unlink(c);
g->last++; /* increment last to let users know */
return server;
}
if (window == 0 && last == 0) {
if (verbose > 1)
printf("%s: no new articles\n", g->name);
return server;
}
if (window < server)
window = server;
if (window < 1)
window = 1;
server = window;
if (server <= last) {
if (verbose)
printf("%s: considering articles %lu-%lu\n",
g->name, server, last);
sprintf(lineout, "XOVER %lu-%lu\r\n",
server, last);
putaline();
if (nntpreply() == 224) {
stufftoget = (char*)realloc(stufftoget, last + 1 - server);
if (stufftoget)
memset(stufftoget, 0, last + 1 - server);
while ((l = getaline(nntpin)) && strcmp(l, ".")) {
char* p;
unsigned long art;
char* xover[10]; /* xover[9] is used to keep rest */
struct header_info hi;
int i;
memset(&hi, 0, sizeof(struct header_info));
/* if array cannot be allocated, drain all inputs */
if (!stufftoget)
continue;
/*
* Whole line is following format.
* ART<\t>SUBJ<\t>FROM<\t>DATE<\t>MSG-ID<\t>REFS
* <\t>BYTES<\t>LINES<\t>XREF(opt)
*/
p = l;
for (i = 0; i < 10; i++) {
xover[i] = p;
while (*p && *p != '\t')
p++;
if (*p == '\t')
*p++ = '\0';
}
/* get article number */
art = strtoul(xover[0], &p, 10);
if (!p || *p != '\0')
continue;
/* check article number */
if (art < server || art > last)
continue;
/* check xover fields */
hi.subject = xover[1];
hi.from = xover[2];
hi.date = xover[3];
hi.msgid = xover[4];
hi.references = xover[5];
hi.bytes = xover[6];
hi.lines = xover[7];
hi.xref = xover[8];
if (storep(art, &hi) == 0)
continue;
stufftoget[art - server] = 'y'; /* anything */
if (verbose > 1)
printf("%s: will fetch %lu (%s)\n", g->name, art, xover[4]);
}
if (!l)
return server;
} else {
stufftoget = realloc(stufftoget, 0);
}
} else if (verbose > 1) {
printf("%s: no new articles\n", g->name);
}
outstanding = 0;
while (window <= last || outstanding) {
/* 47 is based on SO_SNDWIN and the NNTP maximum line length */
while (outstanding < 47 && window <= last) {
while (stufftoget &&
window <= last &&
!stufftoget[window - server]) {
if (verbose > 3)
printf("%s: skipping %lu - not available or too old\n",
g->name, window);
window++;
}
if (window <= last) {
outstanding++;
if (verbose > 1)
printf("%s: ARTICLE %lu (%d up in the air)\n",
g->name, window, outstanding);
sprintf(lineout, "ARTICLE %lu\r\n", window++);
putaline();
}
}
if (outstanding > 0) {
l = getaline(nntpin);
if (!l)
return art;
outstanding--;
if (sscanf(l, "%3d %lu", &n, &art) < 2 || n != 220) {
if (verbose)
printf("%s: receiving article (reply %03d)\n",
g->name, n);
continue;
}
if (verbose)
printf("%s: receiving article %lu (%ld are left)\n",
g->name, art, last - window + 1 + outstanding);
hi = parse_header(nntpin);
f = NULL;
if (hi->msgid == NULL || *hi->msgid == '\0' ||
hi->from == NULL || hi->newsgroups == NULL) {
if (verbose)
printf("discarding it - no message-id found\n");
} else {
c = getmsgidfname(hi->msgid);
if (stat(c, &st) < 0 && errno == ENOENT)
f = fopen(c, "w");
else if (verbose)
printf("discarding it - already have it or "
"file system error\n");
}
if (f) {
fprintf(f, "%s", hi->all);
store(c, f, hi->newsgroups, hi);
fprintf(f, "\n");
}
while ((l = getaline(nntpin)) != NULL && strcmp(l, ".") != 0) {
if (l && *l == '.')
l++;
if (f)
fprintf(f, "%s\n", l);
}
if (f) {
fclose(f);
if (!l) {
/*
* unlink <msg-id> file since writing error.
*/
unlink(c);
} else {
if (hi->supersedes) {
/* do supersede */
c = skipspaces(hi->supersedes);
if (*c == '<') {
char* c2 = strchr(c, '>');
if (c2) {
c2[1] = '\0';
removearts(c);
}
}
}
}
}
free_header_info(hi);
}
}
return last +1;
}
static void parsesuckfile(FILE* fp)
{
unsigned long art = 1;
int numprocessed = 0;
char* l;
FILE* f;
const char* c;
struct stat st;
struct header_info* hi;
do {
hi = parse_header(fp);
f = NULL;
if (hi->msgid == NULL || *hi->msgid == '\0' ||
hi->from == NULL || hi->newsgroups == NULL) {
if (verbose && hi->n_lines > 0)
printf("discarding %lu - no message-id found\n", art);
} else if (storep(art, hi) == 0) {
/* discard it */
} else {
c = getmsgidfname(hi->msgid);
if (stat(c, &st) < 0 && errno == ENOENT)
f = fopen(c, "w");
else if (verbose)
printf("discarding it - already have it or "
"file system error\n");
}
if (f) {
if (verbose)
printf("%s: receiving article %lu\n", hi->newsgroups, art);
fprintf(f, "%s", hi->all);
store(c, f, hi->newsgroups, hi);
fprintf(f, "\n");
}
while ((l = getaline(fp)) != NULL && strcmp(l, ".") != 0) {
if (l && *l == '.')
l++;
if (f)
fprintf(f, "%s\n", l);
}
if (f) {
fclose(f);
if (!l) {
/*
* unlink <msg-id> file since writing error.
*/
unlink(c);
} else {
numprocessed++;
if (hi->supersedes) {
/* do supersede */
c = skipspaces(hi->supersedes);
if (*c == '<') {
char* c2 = strchr(c, '>');
if (c2) {
c2[1] = '\0';
removearts(c);
}
}
}
}
}
free_header_info(hi);
art++;
} while (l != NULL);
if (verbose)
printf("processed %d out of %lu\n", numprocessed, art);
}
/*
* get active file from remote server
*/
static void checkactive(void)
{
const char* s;
struct stat st;
s = getactivefname(servers->servername);
if (active && stat(s, &st) == 0 &&
now - (timeout_active * 86400) < st.st_mtime ) {
if (verbose)
printf("LIST ACTIVE done only %d seconds ago, skipping\n",
(int)(now-st.st_mtime));
return;
}
nntpactive();
s = getactivefname(servers->servername);
unlink(s);
close(open(s, O_WRONLY|O_CREAT, 0664));
writeactive(); /* write groupinfo file */
}
/*
* post all spooled articles
*
* if all postings succeed, returns 1
* if there are no postings to post, returns 1
* if a posting is strange for some reason, closes the nntp connection
* and returns 0. this is to recover from unknown states
*
* a posting is deleted once it has been posted, whether it succeeded
* or not: we don't want to re-do an error.
*
*/
static int postarticles(void)
{
char* line;
DIR* d;
struct dirent* de;
FILE* f;
struct stat st;
int r;
const char* outgoing = getoutgoingdname();
const char* failedpostings = getfailedpostingsdname();
d = opendir(outgoing);
if (!d) {
mysyslog(LOG_ERR, "Unable to opendir out.going: %s", strerror(errno));
return 1;
}
while ((de = readdir(d)) != NULL) {
const char* fname = getoutgoingfname(de->d_name);
if (stat(fname, &st) == 0 && S_ISREG(st.st_mode) &&
(f = fopen(fname, "r")) != NULL) {
if (verbose)
printf("Posting %s...\n", fname);
sprintf(lineout, "POST\r\n");
putaline();
r = nntpreply();
if (r == 340) {
do {
line = getaline(f);
if (line) {
sprintf(lineout, "%s\r\n", line);
putaline();
}
} while (line && strcmp(line, "."));
if (line) {
/* done correctly, so receive server's reply */
line = getaline(nntpin);
}
if (line && !strncmp(line, "240", 3)) {
if (verbose)
printf(" - OK\n");
mysyslog(LOG_INFO, "posted article"); /* useless */
unlink(fname);
} else if (line && !strncmp(line, "441 435", 7)) {
if (verbose)
printf(" - upstream server had that message-id\n");
mysyslog(LOG_INFO, "duplicated article");
unlink(fname);
} else {
const char* s = getfailedpostingsfname(de->d_name);
if (line) {
/* posting is rejected, so skip this article */
/* dump if all servers rejected article */
if (verbose)
printf(" - article rejected: %s\n", line);
if (servers->postable <= 1) {
mkdir(failedpostings, 0775);
mysyslog(LOG_ERR,
"unable to post (%s), moving to %s",
line, s);
if (rename(fname, s))
mysyslog(LOG_ERR,
"unable to move failed posting "
"to %s: %s", s, strerror(errno));
}
} else {
/* posting is stopped by the server, so dump */
/* this article */
if (verbose)
printf(" - failed: %03d reply to POST\n", r);
mkdir(failedpostings, 0775);
mysyslog(LOG_ERR, "unable to post (%s), moving to %s",
line, s);
if (rename(fname, s))
mysyslog(LOG_ERR, "unable to move failed posting "
"to %s: %s", s, strerror(errno));
fclose(f);
closedir(d);
/* strange state, so re-connect */
if (nntpreconnect())
/* continue posting */
return 0;
else
/* stop posting since cannot make connection */
return 1;
}
}
} else if (r == 480) {
mysyslog(LOG_ERR, "unable to post: %03d (need authentication)",
r);
} else {
mysyslog(LOG_ERR, "unable to post: %03d", r);
}
fclose(f);
}
}
closedir(d);
return 1;
}
static void helpgetnewsgroups(struct newsgroup* g)
{
if (g) {
helpgetnewsgroups(g->right);
if (g->newarticles) {
g->alive = 1;
g->server = getnewsgroup(g, g->server);
if (writeserver_eachgroup)
writeserver();
mysyslog(LOG_INFO, "%s: %s: fetched to %lu", servers->servername,
g->name, g->server);
}
helpgetnewsgroups(g->left);
}
}
static void getnewsgroups(void)
{
checkgroups();
helpgetnewsgroups(active);
#if DOTNGFILE
{
DIR* d;
struct dirent* de;
struct newsgroup* g;
const char* s = getinterestingdname();
d = opendir(s);
if (!d) {
mysyslog(LOG_ERR, "opendir %s: %s", s, strerror(errno));
return;
}
while ((de = readdir(d))) {
if (de->d_name[0] == '.') {
g = findgroup(de->d_name + 1);
if (verbose > 3 && (!g || !g->alive))
printf("considering %s - %s upstream\n", de->d_name+1,
g ? " exists" : "does not exist");
if (g && !g->alive) {
g->alive = 1;
g->last++;
}
}
}
closedir(d);
}
#endif
}
void fixxover(void)
{
DIR* d;
struct dirent* de;
const char* s;
s = getinterestingdname();
d = opendir(s);
if (!d) {
mysyslog(LOG_ERR, "opendir %s: %s", s, strerror(errno));
return;
}
while ((de = readdir(d))) {
struct newsgroup* g;
if (de->d_name[0] != '.' && (g = findgroup(de->d_name)) != 0) {
chdirgroup(g, 1);
getxover();
}
}
closedir(d);
}
static void usage(void)
{
fprintf(stderr, "Usage: fetchnews [-v] [-f] [-n] [-x #] [-s [suck-data-file]]\n"
" -v: more verbose (may be repeated)\n"
" -f: force re-read of newsgroups list from server\n"
" -n: write server everytime for new server (long connection)\n"
" -x: check for # extra articles in each group\n"
" -s: parse sucked data and put them\n");
exit(1);
}
int main(int argc, char** argv)
{
struct passwd* pw;
int option;
int opt_s = 0;
int force = 0;
verbose = 0;
pw = getpwnam("news");
if (!pw) {
fprintf(stderr, "no such user: news\n");
exit(1);
}
setregid(-1, pw->pw_gid);
setreuid(-1, pw->pw_uid);
if (geteuid() != pw->pw_uid || getegid() != pw->pw_gid) {
fprintf(stderr, "%s: must be run as news or root\n", argv[0]);
exit(1);
}
while ((option = getopt(argc, argv, "nfvx:s")) != -1) {
if (option == 'v') {
verbose++;
} else if (option == 'n') {
writeserver_eachgroup = 1;
} else if (option == 'f') {
force = 1;
} else if (option == 'x') {
char* nptr = optarg;
char* endptr = NULL;
extraarticles = strtol(nptr, &endptr, 0);
if (!nptr || !*nptr || !endptr || *endptr || !extraarticles) {
usage();
}
} else if (option == 's') {
opt_s = 1;
} else if (option == 'h' || option == '?') {
usage();
}
}
argc -= optind - 1;
argv += optind - 1;
if (opt_s && argc > 1)
usage();
else if (!opt_s && argc != 1)
usage();
whoami();
now = time(NULL);
umask(2);
openlog("fetchnews", LOG_PID|LOG_CONS, LOG_NEWS);
readconfig();
if (force) {
timeout_active = 0;
printf("Forced LIST ACTIVE\n");
}
if (verbose) {
printf("%s: verbosity level is %d\n", version, verbose);
if (verbose > 3) {
printf("Don't fetch threads that nobody has read after %d "
"days\n", (int)timeout_long);
printf("Don't fetch threads that have been read once after %d "
"days\n", (int)timeout_short);
}
}
if (!opt_s && servers->servername == NULL) {
mysyslog(LOG_ERR, "no server name in %s", getconfigfname());
exit(2);
}
lockactive(); /* lock whole information */
readactive(); /* read groupinfo file */
if (opt_s) {
/* fetch articles from sucked file */
FILE* fp;
if (argc == 1) {
if (verbose) printf("suck from stdio\n");
fp = stdin;
} else {
if (verbose) printf("suck from %s\n", argv[1]);
fp = fopen(argv[1], "r");
}
if (fp == NULL) {
mysyslog(LOG_ERR, "cannot open file: %s: %s",
argv[1], strerror(errno));
}
parsesuckfile(fp);
if (argc != 1)
fclose(fp);
} else {
/* fetch articles */
while (servers) {
if (verbose)
printf("Connecting to %s.\n", servers->servername);
fflush(stdout);
if (nntpreconnect()) {
/* post articles if they are. */
if (servers->postable) {
do {
sprintf(lineout, "MODE READER\r\n");
putaline();
fflush(nntpout);
if (nntpreply() == 498)
continue;
} while (!postarticles());
}
/* get articles if they are. */
checkactive(); /* get active from remote on demand */
readserver(); /* read server info file */
getnewsgroups(); /* fetch all */
writeserver(); /* write server info file */
sprintf(lineout, "QUIT\r\n"); /* say it, then just exit :) */
putaline();
if (servers->next)
writeactive(); /* write groupinfo file */
} else {
if (verbose)
printf("Connection failed to %s.\n", servers->servername);
}
servers = servers->next;
}
}
writeactive(); /* write groupinfo file */
mysyslog(LOG_INFO, "done");
if (verbose || fork() <= 0) {
#if defined(PRIO_MAX)
setpriority(PRIO_PROCESS, 0, PRIO_MAX/2);
#endif
fixxover();
}
exit(0);
}
syntax highlighted by Code2HTML, v. 0.9.1