/* $Id: http_rsp.c,v 1.12 2004/03/07 12:48:20 ossi Exp $ *
 *
 * puf 0.9  Copyright (C) 2000-2004 by Oswald Buddenhagen <puf@ossi.cjb.net>
 * based on puf 0.1.x (C) 1999,2000 by Anders Gavare <gavare@hotmail.com>
 *
 * You may modify and distribute this code under the terms of the GPL.
 * There is NO WARRANTY of any kind. See COPYING for details.
 *
 * http_rst.c - receive and process http response message
 *
 */

#include "puf.h"


int economize_files;

/*  create all directories in the path  */
static void 
create_dir(char *buf)
{
    int p;

    for (p = 0; buf[p]; p++)
	if (p && buf[p] == '/') {
	    buf[p] = '\0';
	    mkdir(buf, 0777);
	    buf[p] = '/';
	}
}


/*  open a file.
    create the directory it should live in, if it's not there.
    try to free up handles, if necessary.  */
int 
mmfopen(char *name, int flags, int *f)
{
    int fi, try_free, try_mkdir;
    static int cf = -1;

    if (cf != -1) {
	close(cf);
	cf = -1;
    }
	
    try_free = try_mkdir = 0;
  retry:
    if ((fi = open(name, flags, 0666)) < 0) {
	if (errno == ENFILE || errno == EMFILE) {
	    if (!try_free && !economize_files && free_fd()) {
		try_free++;
		goto retry;
	    }
	    return -2;
	}
	if (errno == ENOENT && !try_mkdir) {
	    create_dir(name);
	    try_mkdir++;
	    goto retry;
	}
	return -1;
    }
    
    if (economize_files)
	return cf = fi;
    else
	return *f = fi;
}


/*  open file and store the handle in aurl_t structure  */
static int 
mfopen(aurl_t *au, int flags)
{
    return mmfopen(au->disposition, flags, &(au->f));
}

/*  try to "steal" a handle from an open target file  */
int 
free_fd()
{
    lnq_iterate(queue_urls_reply, aurl_t, au, {
        if (au->f != -1) {
            close(au->f);
            au->f = -1;
            return 1;
        }
    });
    return 0;
}


/*  save data to overlap buffer. note, that we possibly are
    saving contents of the previous buffer!  */
static int 
save_buff(aurl_t *au, char *buf, int len)
{
    char *bp;
    int siz;

    if ((siz = len < OVERLAPLEN ? OVERLAPLEN : len) > au->size) {
	au->size = siz;
	if (!(bp = mmalloc(siz))) {
	    if (au->buffer) {
		free (au->buffer);
		au->buffer = 0;
		au->size = au->offset = 0;
	    }
	    return 0;
	}
	au->offset = len;
	memcpy(bp, buf, len);
	if (au->buffer)
	    free (au->buffer);
	au->buffer = bp;
    } else {
	au->offset = len;
	memcpy(au->buffer, buf, len);
    }
    return 1;
}


static int 
needs_recurse(aurl_t *au)
{
    return
	(!au->url->parm->opt->max_recurse ||
	 au->url->link_depth < au->url->parm->opt->max_recurse) &&
	(au->url->parm->opt->follow_href > NOT_RECURSIVE ||
	 au->url->parm->opt->follow_src > DONT_FETCH) &&
	(au->content_is_html || au->url->parm->opt->force_html);
}

/*  handle http reply message  */
int 
handle_reply(aurl_t *au)
{
    char *bufp, *nbuf;
    ptrarr_t *sh;
    int fi, a, e, l, o, p, len, alen, orglen, nsiz;
    unsigned u;
    char databuf[OVERLAPLEN + MAXBUFSIZE], buf[SHORTSTR];

    /*  Receive some data:  */
    bufp = databuf + OVERLAPLEN;
    if ((orglen = read(au->socket, bufp, MAXBUFSIZE)) < 0)
	return errm(au->url, "data read for $u failed");

    len = orglen + au->offset;
    if (orglen) {
	/*  Copy overlap buffer from last read  */
	if (au->offset) {
	    if (au->offset > OVERLAPLEN) {
		char *nbuf;
		if (len > MAXHEADERLEN || !(nbuf = mrealloc(au->buffer, len))) {
		    return errm(au->url, "reply header for $u has insane length");
		} else {
		    memcpy(nbuf + au->offset, bufp, orglen);
		    au->buffer = bufp = nbuf;
		    au->size = len;
		}
	    } else {
		bufp -= au->offset;
		memcpy(bufp, au->buffer, au->offset);
	    }
	}
    } else {
	/* note: close with size_fetched < size_total is accepted.
	   this is basically incorrect, but common practice. */
	/* handle remainig data in overlap buffer  */
	bufp = au->buffer;
    }

    /*  first the http message header  */
    if (!au->http_done_header) {
	for (p = 0;;) {
	    for (a = p;;) {
		/*  reached end-of-buffer before header end?  */
		if (p >= len) {
		    if (!orglen)
			return errm(au->url, "broken reply header for $u");
		    return save_buff(au, bufp + a, len - a) ? RT_OK : RT_RETRY;
		}
		if (bufp[p++] == '\n')
		    break;
	    }
	    for (e = p - 1; e > a && bufp[e - 1] <= ' '; e--);
	    l = e - a;
	    bufp[e] = '\0';
	    dbg(HDR, ("read header: %s\n", bufp + a));
	    if (!au->http_result_code) {
		/* empty lines before reply header are incorrect, 
		   but should be handled for robustness */
		if (l) {
		    /*  get result code  */
		    if (strncasecmp(bufp + a, "http/", 5))
			return errm(au->url, "broken reply header for $u");
		    au->http_result_code = atoi(bufp + a + 9);
		    if (bufp[a + 9] != '1')
		    switch (au->http_result_code) {
			case 200:	/*  ok  */
			    au->file_off = 0;
			case 206:	/*  partial content  */
			case 304:	/*  not modified  */
			    break;
			case 300:	/*  multiple choices  */
			case 301:	/*  moved permanently  */
			case 302:	/*  moved temporarily  */
			case 307:	/*  temporary redirect (new 302)  */
			    au->reloc = 1;
			    break;
			case 400:	/*  bad request  */
			case 505:	/*  http version not supported  */
			    if (!au->url->host->info->is_http11)
				return errm(au->url, "!sever failed to parse request for $u");
			    prx(NFO,
				"falling back to HTTP/1.0 for host '%s'\n",
				au->url->host->name);
			    au->url->host->info->is_http11 = 0;
			    return RT_AGAIN;
			case 401:
			    return errm(au->url, "!need authorization for $u");
			/*  the following two are theoretically fatal errors,
			    but on some servers they indicate temporary
			    failure ... strange ...  */
			case 403:	/*  access denied  */
			  {
			    static const char msg[] = "!access to $u denied";
			    return errm(au->url,
					au->url->parm->opt->http_err_trans ?
						msg + 1 : msg);
			  }
			case 404:	/*  not found  */
			  {
			    static const char msg[] = "!$u not found";
			    return errm(au->url,
					au->url->parm->opt->http_err_trans ?
						msg + 1 : msg);
			  }
			case 407:	/*  proxy auth required  */
			    au->proxy->host = 0;	/* mark dead  */
			    return RT_AGAIN;
			case 503:	/*  service unavailable - connection refused, etc.  */
			    return RT_REFUSED;
			case 504:	/*  gateway timeout - server not responding  */
			    return RT_TIMEOUT;
			default:
			    return errm(au->url, 
					"unrecognised HTTP status '%s' for $u",
					bufp + a);
		    }
		}
	    } else {		/*  have_result  */
		/*  end of headers?  */
		if (!l) {
		    if (au->http_result_code >= 200)
			break;
		    else {
			au->http_result_code = 0;
			dbg(HDR, ("awaiting next header after 1xx response.\n"));
			continue;
		    }
		}
		/*  continued header?  */
		if (bufp[a] <= ' ')
		    continue;
		/*  save requested  */
		sh = &au->url->parm->opt->save_headers;
		for (u = 0; u < sh->nents; u++)
		    if (!strncasecmp(bufp + a, ((char **)sh->ents)[u],
				    strlen(((char **)sh->ents)[u]))) {
			if (au->hdrssiz < au->hdrslen + e - a + 1) {
			    nsiz = au->hdrslen * 2 + e - a + 1;
			    if (!(nbuf = mrealloc(au->headers, nsiz)))
				break;
			    au->headers = nbuf;
			    au->hdrssiz = nsiz;
			}
			memcpy(au->headers + au->hdrslen, bufp + a, e - a);
			au->hdrslen += e - a;
			au->headers[au->hdrslen++] = '\n';
			break;
		    }
		/*  split header name and content  */
		for (o = a; o < e && bufp[o] > ' '; o++);
		bufp[o++] = '\0';
		for (; o < e && bufp[o] <= ' '; o++);
		/*  handle header  */
		if (au->reloc) {
		    if (!strcasecmp(bufp + a, "Location:")) {
			prx(NFO, "relocation from http://%s/%s to %s\n",
			    au->url->host->name, au->url->local_part,
			    bufp + o);
			if (au->url->relocs < 5) {
			    parse_add_url(bufp + o, e - o,
					  au->url->referer, au->url->parm,
					  au->url->is_top_dir,
					  au->url->is_requisite,
					  au->url->relocs + 1, 
					  au->url->link_depth);
			} else
			    prx(ERR,
				"%s exceeds maximal redirection count!\n",
				bufp + o);
			return RT_SKIP;
		    }
		    /*  needn't check other headers when redirect encoutered  */
		} else if (!strcasecmp(bufp + a, "Last-Modified:")) {
		    if ((au->file_time = parseHTTPdate(bufp + o)) == BAD_DATE)
			prx(WRN, "unrecognised date format '%s'", bufp + o);
		} else if (!strcasecmp(bufp + a, "Content-Length:"))
		    sscanf(bufp + o, SOFFT, &(au->size_total));
		else if (!strcasecmp(bufp + a, "Content-Type:")) {
		    if (!strncasecmp(bufp + o, "text/html", 9) &&
			!isalpha((int)bufp[o + 9]))
			au->content_is_html = 1;
		} else if (!strcasecmp(bufp + a, "Content-Range:")) {
		    /* The Content-Range string should look somewhat like
		       this: "bytes 250260-664041471/664041472" */
		    off_t rs, re, rt;

		    if(sscanf(bufp + o, "bytes "SOFFT"-"SOFFT"/"SOFFT, 
			      &rs, &re, &rt) != 3) {
			return errm(au->url,
				    "unrecognized Content-Range for $u");
		    }
		}
	    }			/*  have_result  */
	}			/*  main header loop  */

	if (au->reloc)	/*  no relocation url found  */
	    return errm(au->url, "missing new location while redirecting $u");

	if (au->http_result_code == 304) {	/*  Not Modified  */
	    if (needs_recurse(au))
		recurse_file(au->url, au->disposition);
	    return RT_DONE;	/*  would HR_SKIP be more appropriate?  */
	}

	if (au->size_total) {
	    if (au->url->parm->opt->max_bytes && 
		au->size_total > au->url->parm->opt->max_bytes)
		au->size_total = au->url->parm->opt->max_bytes;
	    total_bytes += au->size_total;	/*  update statistics  */
	}
	au->http_done_header = 1;
	bufp += p;		/*  let the header vanish  */
	len -= p;
	alen = len;
    } else	/*  done_header  */
	alen = orglen;

    /*  http message body  */

    if (au->file_created) {
	if (au->f != -1)
	    /*  If the file is already open, let's just write to it ...  */
	    fi = au->f;
	else
	    /*  file is switched  */
	    if ((fi = mfopen(au, O_WRONLY | _O_BINARY)) < 0)
		return errm(au->url, "!$u: cannot open %s for appending: %s", 
			    au->disposition, strerror(errno));
    } else {	/*  no attempt to open the file till now  */
	au->file_created = 1;
	if (au->file_off && needs_recurse(au)) {
	    if ((fi = mfopen(au, O_RDWR)) < 0)
		return errm(au->url, 
			    "!$u: cannot open %s for reading and appending: %s", 
			    au->disposition, strerror(errno));
	    recurse_pfile(au->url, fi, &bufp, &len);
	} else {
	    if ((fi = mfopen(au, au->file_off ? O_WRONLY | _O_BINARY : 
			     O_WRONLY | O_CREAT | O_TRUNC | _O_BINARY)) < 0)
		return errm(au->url, "!$u: cannot create %s: %s", 
			    au->disposition, strerror(errno));
	}
    }

    /*  scan the buffer for references  */
    if (needs_recurse(au)) {
	int done = recurse_buff(au->url, bufp, len, orglen);
	if (orglen && !save_buff(au, bufp + done, len - done))
	    return RT_RETRY;
    }

    if (orglen) {
	int retaf;

	/*  point at "fresh" data  */
	bufp += len - alen;

	/*  hard file size limitation  */
	if (au->url->parm->opt->max_bytes && 
	    au->file_off + alen >= au->url->parm->opt->max_bytes) {
	    alen = au->url->parm->opt->max_bytes - au->file_off;
	    retaf = 0;
	} else
	    retaf = 1;

	/*  write the buffer to disk */
	lseek(fi, au->file_off, SEEK_SET);
	if (write(fi, bufp, alen) != alen)
	    die(1, "Write error! Disk full?\n");

	/*  Update the counters and statistics:  */
	au->file_off += alen;
	au->size_fetched += alen;
	if (!au->size_total)
	    total_bytes += alen;
	fetched_bytes += alen;
	if (max_bytes && fetched_bytes > max_bytes)
	    byebye("byte quota exceeded!");

	if (retaf)
	    return RT_OK;
    }

    /*  file is complete -> rename it  */
    touch(au);
    memcpy(buf, au->disposition, au->displen);
    if (au->url->parm->opt->enumerate_files) 
	sprintf(buf + au->displen, "%d.puf", 
		++au->url->parm->opt->disp_path->file_num);
    else
	buf[au->displen] = '\0';
    rename(au->disposition, buf);
    /*  save headers  */
    if (au->headers) {
	strcat(buf, ".hdr");
	if ((fi = open(buf, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0 &&
	    ((errno != ENFILE && errno != EMFILE) || economize_files || !free_fd() ||
	     (fi = open(buf, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0))
	    return errm(au->url, "!$u: cannot create %s: %s", 
			buf, strerror(errno));
	write(fi, au->headers, au->hdrslen);
	close(fi);
    }

    return RT_DONE;
}


syntax highlighted by Code2HTML, v. 0.9.1