/* $Id: http_conn.c,v 1.10 2004/02/13 15:36:23 ossi Exp $ *
 *
 * puf 0.9  Copyright (C) 2000-2004 by Oswald Buddenhagen <puf@ossi.cjb.net>
 * based on puf 0.1.x (C) 1999,2000 by Anders Gavare <gavare@hotmail.com>
 *
 * You may modify and distribute this code under the terms of the GPL.
 * There is NO WARRANTY of any kind. See COPYING for details.
 *
 * http_conn.c - prepare creation of a connection
 *
 */

#include "puf.h"

/*  find a (working) ip for a host  */
static int 
get_ip(host_t *h, int fail_no_wait)
{
    hinfo_t *hi = h->info;
    int i, j;

    dbg(CON, ("Want to connect '%s' ... ", hi->name));
    if (!hi->num_ips) {
	dbge(CON, ("already given up!\n"));
	return -2;
    }
    for (i = j = hi->num_ips; i; i--) {
	if (++hi->cur_ip >= hi->num_ips)
	    hi->cur_ip = 0;
	if (hi->ips[hi->cur_ip].last_errt == 3)
	    j--;
	else {
	    if (hi->ips[hi->cur_ip].retry_time <= cur_tv.tv_sec) {
		dbge(CON, ("ok\n"));
		return hi->cur_ip;
	    }
	}
    }
    if (fail_no_wait) {
	if (++hi->cur_ip >= hi->num_ips)
	    hi->cur_ip = 0;
	dbge(CON, ("forced\n"));
	return hi->cur_ip;
    }
    dbge(CON, ("\n"));
    if (!j) {
	prx(ERR, "Giving up host '%s'!\n", hi->name);
	hi->num_ips = 0;
	return -2;
    }
/*    prx(WRN, "No valid IP address for '%s' by now!\n", hi->name); */
    return -1;
}

#define FAT_BAD_CHARS "\\:*?\"<>|"

/*  prepare url for connection:
    - find working host
    - find working proxy
    - create a "shadow" element
    - calculate a local disposition for the file
*/
int 
activate_url(url_t *u, aurl_t **aup)
{
    static int cur_tnum;
    char buf[SHORTSTR], *disp;
    struct stat statbuf;
    unsigned dl, wil;		/*  unsigned will catch -1 also  */
    int ipidx, pipidx, i, ql;
    proxy_t *proxy, *prox;
    aurl_t *au;
    off_t foff;
    time_t ftime;

    checken("activate_url (top)");

    if (u->parm->proxy) {
	proxy = u->parm->proxy;
	if (!proxy->ready)
	    return RT_AGAIN;
	if (!proxy->host ||	/* proxy dead (needed auth) */
	    !proxy->host->info)	/* host dead (lookup failed) */
	    goto pfail;
	else {
	    pipidx = get_ip(proxy->host, u->parm->opt->fail_no_wait);
	    if (pipidx < 0) {
		if (pipidx == -1 && u->parm->strictproxy)
		    return RT_AGAIN;
	      pfail:
		if (u->parm->strictproxy)
		    return RT_GIVEUP;
		if (!clone_parm(u))
		    return RT_GIVEUP;
		u->parm->proxy = 0;
		proxy = 0;
		pipidx = 0;
	    }
	}
    } else {
	proxy = 0;
	pipidx = 0;
    }

    if (!proxy && u->parm->opt->proxies.nents) {
	int score, tscor, round, rtval;
	u_int pidx;

	if (all_proxy_wait && waiting_proxies)
	    return RT_RETRY;
	/* 
	 * we try the best proxy left in every pass until we find one 
         * that actually works.
         */
	for (round = 0; ; round++) {
	    for (proxy = 0, score = INT_MAX, rtval = RT_GIVEUP,
		 pidx = 0; pidx < u->parm->opt->proxies.nents; pidx++)
	    {
		prox = ((proxy_t **)u->parm->opt->proxies.ents)[pidx];
		if (!prox->ready) {
		    rtval = RT_RETRY;
		    continue;
		}
		if (!prox->host ||		/* proxy dead (needed auth) */
		    !prox->host->info ||	/* host dead (lookup failed) */
		    !prox->host->info->num_ips)	/* host dead (connects failed) */
		    continue;
		if (!round)
		    prox->host->info->maybe = 1;
		else if (!prox->host->info->maybe) {
		    rtval = RT_RETRY;
		    continue;
		}
		tscor = prox->score * 100 / prox->ratio;
		if (tscor < score) {
		    score = tscor;
		    proxy = prox;
		}
	    }
	    if (!proxy)
		return rtval;
	    pipidx = get_ip(proxy->host, u->parm->opt->fail_no_wait);
	    if (pipidx >= 0)
		break;
	    proxy->host->info->maybe = 0;
	}
    }

    ipidx = get_ip(u->host, u->parm->opt->fail_no_wait);
    if (ipidx == -1)
	return RT_AGAIN;	/*  transient server problem  */
    if (ipidx < 0) {
	errm(u, "!Giving up $u (host given up)");
	return RT_GIVEUP;
    }

    foff = 0;
    ftime = 0;

    dl = 0;
    if (u->parm->opt->disp_path->path[0]) {
	dl = snprintf(buf, sizeof(buf), "%s/", u->parm->opt->disp_path->path);
	if (dl >= sizeof(buf) - 1)
	    return RT_GIVEUP;
    }

    /*  decide about disk file name for the url  */
    if (u->parm->opt->enumerate_files) {
	wil = dl;
	dl += snprintf(buf + dl, sizeof(buf) - dl, "t%d", ++cur_tnum);
    } else {
	if (u->parm->disposition) {
	    if (u->parm->disposition[0] == '/')
		dl = 0;
	    wil = strlen(u->parm->disposition) + 1;
	    if (dl + wil > sizeof(buf) - sizeof(PART_EXT))
		return RT_GIVEUP;
	    memcpy(buf + dl, u->parm->disposition, wil);
	    dl += wil - 1;
	} else {
	    if (u->parm->opt->dir_mode == DIRS_NONE);
	    else if (u->parm->opt->dir_mode == DIRS_ALWAYS ||
		     u->parm->opt->follows_max >= HOST_RECURSIVE ||
		     (u->parm->opt->follows_max >= SUBDIR_RECURSIVE && 
		      u->disp_pathoff < 0))
		dl +=
		    snprintf(buf + dl, sizeof(buf) - dl, "%s/%.*s",
			     u->host->info->lname, u->path_len,
			     u->local_part);
	    else if (u->parm->opt->follows_max >= SUBDIR_RECURSIVE)
		dl +=
		    snprintf(buf + dl, sizeof(buf) - dl, "%.*s",
			     u->path_len - u->disp_pathoff,
			     u->local_part + u->disp_pathoff);

	    disp = u->local_part[u->path_len] ?
		u->local_part + u->path_len : 
		u->parm->opt->index_filename ? 
		u->parm->opt->index_filename : DEFAULT_INDEX_FILE_NAME;

	    for (;; disp++) {
		if (dl >= sizeof(buf) - sizeof(PART_EXT))
		    return RT_GIVEUP;
		if (!*disp)
		    break;
		buf[dl++] = *disp == '/' ? '!' : *disp;
	    }
#ifndef HAVE_CYGWIN
	    if (u->parm->opt->fat_quotes)
#endif
	    {
		for (i = dl, ql = sizeof(buf) - sizeof(PART_EXT); --i >= 0; )
		{
		    unsigned char c = buf[i];
		    static const char hextab[] = "0123456789abcdef";
		    if (memchr("#"FAT_BAD_CHARS, c, sizeof(FAT_BAD_CHARS))) {
			if ((ql -= 3) < i)
			    return RT_GIVEUP;
			buf[ql + 2] = hextab[c & 15];
			buf[ql + 1] = hextab[c >> 4];
			buf[ql] = '#';
		    } else {
			if (--ql < i)
			    return RT_GIVEUP;
			buf[ql] = c;
		    }
		}
		dl = sizeof(buf) - sizeof(PART_EXT) - ql;
		memcpy(buf, buf + ql, dl);
	    }
	    buf[dl] = 0;
	}
	wil = dl;
    }

    /*  decide if we want to continue a download or skip the file at all  */
    if (u->parm->opt->update_mode != EX_CLOBBER) {
	if (!stat(buf, &statbuf)) {
	    if (u->parm->opt->update_mode == EX_UPDATE)
		/*  mark it as update canditate  */
		ftime = statbuf.st_mtime;
	    else {		/*  EX_CONTINUE & EX_NO_CLOBBER  */
		if (u->parm->opt->update_mode == EX_NO_CLOBBER)
		    prx(WRN, "file %s exists\n", buf);
		else if (u->parm->opt->follows_max > NOT_RECURSIVE)
		    /*  scan the file for links. we scan the file with no
		       regard to it's content type - we simply don't know it.
		       extension-based type detection is too unreliable.  */
		    recurse_file(u, buf);
		return RT_SKIP;
	    }
	} else {
	    memcpy(buf + dl, PART_EXT, sizeof(PART_EXT));
	    if (!stat(buf, &statbuf)) {
		if (u->parm->opt->update_mode == EX_NO_CLOBBER) {
		    prx(WRN, "file %s exists\n", buf);
		    return RT_SKIP;
		} else {	/*  EX_UPDATE & EX_CONTINUE  */
		    /*  mark as continuation candidate  */
		    foff = statbuf.st_size;
		    ftime = statbuf.st_mtime;
		}
	    }
	}
    }

    if (!(au = mmalloc(sizeof(*au) + dl + sizeof(PART_EXT))))
	return RT_RETRY;

    au->file_off = foff;
    au->file_time = ftime;
    au->url = u;
    au->displen = wil;
    memcpy(au->disposition, buf, dl);
    memcpy(au->disposition + dl, PART_EXT, sizeof(PART_EXT));

    au->f = -1;
    au->size_total = 0;
    au->size_fetched = 0;
    au->buffer = NULL;
    au->offset = 0;
    au->size = 0;
    au->http_result_code = 0;
    au->http_done_header = 0;
    au->content_is_html = 0;
    au->reloc = 0;
    au->file_created = 0;

    au->headers = NULL;
    au->hdrslen = au->hdrssiz = 0;

    au->ipidx = ipidx;
    au->pipidx = pipidx;
    au->proxy = proxy;

    checken("activate_url (pre-end)");

    dbg(CON, ("activated %s/%s - ipidx: %d  proxy: '%s'  pipidx: %d\n", au->url->host->name, au->url->local_part, ipidx, proxy ? proxy->host->name : "(null)", pipidx));

    *aup = au;
    return RT_OK;
}


syntax highlighted by Code2HTML, v. 0.9.1