/* $Id: http_rsp.c,v 1.12 2004/03/07 12:48:20 ossi Exp $ *
*
* puf 0.9 Copyright (C) 2000-2004 by Oswald Buddenhagen <puf@ossi.cjb.net>
* based on puf 0.1.x (C) 1999,2000 by Anders Gavare <gavare@hotmail.com>
*
* You may modify and distribute this code under the terms of the GPL.
* There is NO WARRANTY of any kind. See COPYING for details.
*
* http_rst.c - receive and process http response message
*
*/
#include "puf.h"
int economize_files;
/* create all directories in the path */
static void
create_dir(char *buf)
{
int p;
for (p = 0; buf[p]; p++)
if (p && buf[p] == '/') {
buf[p] = '\0';
mkdir(buf, 0777);
buf[p] = '/';
}
}
/* open a file.
create the directory it should live in, if it's not there.
try to free up handles, if necessary. */
int
mmfopen(char *name, int flags, int *f)
{
int fi, try_free, try_mkdir;
static int cf = -1;
if (cf != -1) {
close(cf);
cf = -1;
}
try_free = try_mkdir = 0;
retry:
if ((fi = open(name, flags, 0666)) < 0) {
if (errno == ENFILE || errno == EMFILE) {
if (!try_free && !economize_files && free_fd()) {
try_free++;
goto retry;
}
return -2;
}
if (errno == ENOENT && !try_mkdir) {
create_dir(name);
try_mkdir++;
goto retry;
}
return -1;
}
if (economize_files)
return cf = fi;
else
return *f = fi;
}
/* open file and store the handle in aurl_t structure */
static int
mfopen(aurl_t *au, int flags)
{
return mmfopen(au->disposition, flags, &(au->f));
}
/* try to "steal" a handle from an open target file */
int
free_fd()
{
lnq_iterate(queue_urls_reply, aurl_t, au, {
if (au->f != -1) {
close(au->f);
au->f = -1;
return 1;
}
});
return 0;
}
/* save data to overlap buffer. note, that we possibly are
saving contents of the previous buffer! */
static int
save_buff(aurl_t *au, char *buf, int len)
{
char *bp;
int siz;
if ((siz = len < OVERLAPLEN ? OVERLAPLEN : len) > au->size) {
au->size = siz;
if (!(bp = mmalloc(siz))) {
if (au->buffer) {
free (au->buffer);
au->buffer = 0;
au->size = au->offset = 0;
}
return 0;
}
au->offset = len;
memcpy(bp, buf, len);
if (au->buffer)
free (au->buffer);
au->buffer = bp;
} else {
au->offset = len;
memcpy(au->buffer, buf, len);
}
return 1;
}
static int
needs_recurse(aurl_t *au)
{
return
(!au->url->parm->opt->max_recurse ||
au->url->link_depth < au->url->parm->opt->max_recurse) &&
(au->url->parm->opt->follow_href > NOT_RECURSIVE ||
au->url->parm->opt->follow_src > DONT_FETCH) &&
(au->content_is_html || au->url->parm->opt->force_html);
}
/* handle http reply message */
int
handle_reply(aurl_t *au)
{
char *bufp, *nbuf;
ptrarr_t *sh;
int fi, a, e, l, o, p, len, alen, orglen, nsiz;
unsigned u;
char databuf[OVERLAPLEN + MAXBUFSIZE], buf[SHORTSTR];
/* Receive some data: */
bufp = databuf + OVERLAPLEN;
if ((orglen = read(au->socket, bufp, MAXBUFSIZE)) < 0)
return errm(au->url, "data read for $u failed");
len = orglen + au->offset;
if (orglen) {
/* Copy overlap buffer from last read */
if (au->offset) {
if (au->offset > OVERLAPLEN) {
char *nbuf;
if (len > MAXHEADERLEN || !(nbuf = mrealloc(au->buffer, len))) {
return errm(au->url, "reply header for $u has insane length");
} else {
memcpy(nbuf + au->offset, bufp, orglen);
au->buffer = bufp = nbuf;
au->size = len;
}
} else {
bufp -= au->offset;
memcpy(bufp, au->buffer, au->offset);
}
}
} else {
/* note: close with size_fetched < size_total is accepted.
this is basically incorrect, but common practice. */
/* handle remainig data in overlap buffer */
bufp = au->buffer;
}
/* first the http message header */
if (!au->http_done_header) {
for (p = 0;;) {
for (a = p;;) {
/* reached end-of-buffer before header end? */
if (p >= len) {
if (!orglen)
return errm(au->url, "broken reply header for $u");
return save_buff(au, bufp + a, len - a) ? RT_OK : RT_RETRY;
}
if (bufp[p++] == '\n')
break;
}
for (e = p - 1; e > a && bufp[e - 1] <= ' '; e--);
l = e - a;
bufp[e] = '\0';
dbg(HDR, ("read header: %s\n", bufp + a));
if (!au->http_result_code) {
/* empty lines before reply header are incorrect,
but should be handled for robustness */
if (l) {
/* get result code */
if (strncasecmp(bufp + a, "http/", 5))
return errm(au->url, "broken reply header for $u");
au->http_result_code = atoi(bufp + a + 9);
if (bufp[a + 9] != '1')
switch (au->http_result_code) {
case 200: /* ok */
au->file_off = 0;
case 206: /* partial content */
case 304: /* not modified */
break;
case 300: /* multiple choices */
case 301: /* moved permanently */
case 302: /* moved temporarily */
case 307: /* temporary redirect (new 302) */
au->reloc = 1;
break;
case 400: /* bad request */
case 505: /* http version not supported */
if (!au->url->host->info->is_http11)
return errm(au->url, "!sever failed to parse request for $u");
prx(NFO,
"falling back to HTTP/1.0 for host '%s'\n",
au->url->host->name);
au->url->host->info->is_http11 = 0;
return RT_AGAIN;
case 401:
return errm(au->url, "!need authorization for $u");
/* the following two are theoretically fatal errors,
but on some servers they indicate temporary
failure ... strange ... */
case 403: /* access denied */
{
static const char msg[] = "!access to $u denied";
return errm(au->url,
au->url->parm->opt->http_err_trans ?
msg + 1 : msg);
}
case 404: /* not found */
{
static const char msg[] = "!$u not found";
return errm(au->url,
au->url->parm->opt->http_err_trans ?
msg + 1 : msg);
}
case 407: /* proxy auth required */
au->proxy->host = 0; /* mark dead */
return RT_AGAIN;
case 503: /* service unavailable - connection refused, etc. */
return RT_REFUSED;
case 504: /* gateway timeout - server not responding */
return RT_TIMEOUT;
default:
return errm(au->url,
"unrecognised HTTP status '%s' for $u",
bufp + a);
}
}
} else { /* have_result */
/* end of headers? */
if (!l) {
if (au->http_result_code >= 200)
break;
else {
au->http_result_code = 0;
dbg(HDR, ("awaiting next header after 1xx response.\n"));
continue;
}
}
/* continued header? */
if (bufp[a] <= ' ')
continue;
/* save requested */
sh = &au->url->parm->opt->save_headers;
for (u = 0; u < sh->nents; u++)
if (!strncasecmp(bufp + a, ((char **)sh->ents)[u],
strlen(((char **)sh->ents)[u]))) {
if (au->hdrssiz < au->hdrslen + e - a + 1) {
nsiz = au->hdrslen * 2 + e - a + 1;
if (!(nbuf = mrealloc(au->headers, nsiz)))
break;
au->headers = nbuf;
au->hdrssiz = nsiz;
}
memcpy(au->headers + au->hdrslen, bufp + a, e - a);
au->hdrslen += e - a;
au->headers[au->hdrslen++] = '\n';
break;
}
/* split header name and content */
for (o = a; o < e && bufp[o] > ' '; o++);
bufp[o++] = '\0';
for (; o < e && bufp[o] <= ' '; o++);
/* handle header */
if (au->reloc) {
if (!strcasecmp(bufp + a, "Location:")) {
prx(NFO, "relocation from http://%s/%s to %s\n",
au->url->host->name, au->url->local_part,
bufp + o);
if (au->url->relocs < 5) {
parse_add_url(bufp + o, e - o,
au->url->referer, au->url->parm,
au->url->is_top_dir,
au->url->is_requisite,
au->url->relocs + 1,
au->url->link_depth);
} else
prx(ERR,
"%s exceeds maximal redirection count!\n",
bufp + o);
return RT_SKIP;
}
/* needn't check other headers when redirect encoutered */
} else if (!strcasecmp(bufp + a, "Last-Modified:")) {
if ((au->file_time = parseHTTPdate(bufp + o)) == BAD_DATE)
prx(WRN, "unrecognised date format '%s'", bufp + o);
} else if (!strcasecmp(bufp + a, "Content-Length:"))
sscanf(bufp + o, SOFFT, &(au->size_total));
else if (!strcasecmp(bufp + a, "Content-Type:")) {
if (!strncasecmp(bufp + o, "text/html", 9) &&
!isalpha((int)bufp[o + 9]))
au->content_is_html = 1;
} else if (!strcasecmp(bufp + a, "Content-Range:")) {
/* The Content-Range string should look somewhat like
this: "bytes 250260-664041471/664041472" */
off_t rs, re, rt;
if(sscanf(bufp + o, "bytes "SOFFT"-"SOFFT"/"SOFFT,
&rs, &re, &rt) != 3) {
return errm(au->url,
"unrecognized Content-Range for $u");
}
}
} /* have_result */
} /* main header loop */
if (au->reloc) /* no relocation url found */
return errm(au->url, "missing new location while redirecting $u");
if (au->http_result_code == 304) { /* Not Modified */
if (needs_recurse(au))
recurse_file(au->url, au->disposition);
return RT_DONE; /* would HR_SKIP be more appropriate? */
}
if (au->size_total) {
if (au->url->parm->opt->max_bytes &&
au->size_total > au->url->parm->opt->max_bytes)
au->size_total = au->url->parm->opt->max_bytes;
total_bytes += au->size_total; /* update statistics */
}
au->http_done_header = 1;
bufp += p; /* let the header vanish */
len -= p;
alen = len;
} else /* done_header */
alen = orglen;
/* http message body */
if (au->file_created) {
if (au->f != -1)
/* If the file is already open, let's just write to it ... */
fi = au->f;
else
/* file is switched */
if ((fi = mfopen(au, O_WRONLY | _O_BINARY)) < 0)
return errm(au->url, "!$u: cannot open %s for appending: %s",
au->disposition, strerror(errno));
} else { /* no attempt to open the file till now */
au->file_created = 1;
if (au->file_off && needs_recurse(au)) {
if ((fi = mfopen(au, O_RDWR)) < 0)
return errm(au->url,
"!$u: cannot open %s for reading and appending: %s",
au->disposition, strerror(errno));
recurse_pfile(au->url, fi, &bufp, &len);
} else {
if ((fi = mfopen(au, au->file_off ? O_WRONLY | _O_BINARY :
O_WRONLY | O_CREAT | O_TRUNC | _O_BINARY)) < 0)
return errm(au->url, "!$u: cannot create %s: %s",
au->disposition, strerror(errno));
}
}
/* scan the buffer for references */
if (needs_recurse(au)) {
int done = recurse_buff(au->url, bufp, len, orglen);
if (orglen && !save_buff(au, bufp + done, len - done))
return RT_RETRY;
}
if (orglen) {
int retaf;
/* point at "fresh" data */
bufp += len - alen;
/* hard file size limitation */
if (au->url->parm->opt->max_bytes &&
au->file_off + alen >= au->url->parm->opt->max_bytes) {
alen = au->url->parm->opt->max_bytes - au->file_off;
retaf = 0;
} else
retaf = 1;
/* write the buffer to disk */
lseek(fi, au->file_off, SEEK_SET);
if (write(fi, bufp, alen) != alen)
die(1, "Write error! Disk full?\n");
/* Update the counters and statistics: */
au->file_off += alen;
au->size_fetched += alen;
if (!au->size_total)
total_bytes += alen;
fetched_bytes += alen;
if (max_bytes && fetched_bytes > max_bytes)
byebye("byte quota exceeded!");
if (retaf)
return RT_OK;
}
/* file is complete -> rename it */
touch(au);
memcpy(buf, au->disposition, au->displen);
if (au->url->parm->opt->enumerate_files)
sprintf(buf + au->displen, "%d.puf",
++au->url->parm->opt->disp_path->file_num);
else
buf[au->displen] = '\0';
rename(au->disposition, buf);
/* save headers */
if (au->headers) {
strcat(buf, ".hdr");
if ((fi = open(buf, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0 &&
((errno != ENFILE && errno != EMFILE) || economize_files || !free_fd() ||
(fi = open(buf, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0))
return errm(au->url, "!$u: cannot create %s: %s",
buf, strerror(errno));
write(fi, au->headers, au->hdrslen);
close(fi);
}
return RT_DONE;
}
syntax highlighted by Code2HTML, v. 0.9.1