/* $Id: http_conn.c,v 1.10 2004/02/13 15:36:23 ossi Exp $ * * * puf 0.9 Copyright (C) 2000-2004 by Oswald Buddenhagen * based on puf 0.1.x (C) 1999,2000 by Anders Gavare * * You may modify and distribute this code under the terms of the GPL. * There is NO WARRANTY of any kind. See COPYING for details. * * http_conn.c - prepare creation of a connection * */ #include "puf.h" /* find a (working) ip for a host */ static int get_ip(host_t *h, int fail_no_wait) { hinfo_t *hi = h->info; int i, j; dbg(CON, ("Want to connect '%s' ... ", hi->name)); if (!hi->num_ips) { dbge(CON, ("already given up!\n")); return -2; } for (i = j = hi->num_ips; i; i--) { if (++hi->cur_ip >= hi->num_ips) hi->cur_ip = 0; if (hi->ips[hi->cur_ip].last_errt == 3) j--; else { if (hi->ips[hi->cur_ip].retry_time <= cur_tv.tv_sec) { dbge(CON, ("ok\n")); return hi->cur_ip; } } } if (fail_no_wait) { if (++hi->cur_ip >= hi->num_ips) hi->cur_ip = 0; dbge(CON, ("forced\n")); return hi->cur_ip; } dbge(CON, ("\n")); if (!j) { prx(ERR, "Giving up host '%s'!\n", hi->name); hi->num_ips = 0; return -2; } /* prx(WRN, "No valid IP address for '%s' by now!\n", hi->name); */ return -1; } #define FAT_BAD_CHARS "\\:*?\"<>|" /* prepare url for connection: - find working host - find working proxy - create a "shadow" element - calculate a local disposition for the file */ int activate_url(url_t *u, aurl_t **aup) { static int cur_tnum; char buf[SHORTSTR], *disp; struct stat statbuf; unsigned dl, wil; /* unsigned will catch -1 also */ int ipidx, pipidx, i, ql; proxy_t *proxy, *prox; aurl_t *au; off_t foff; time_t ftime; checken("activate_url (top)"); if (u->parm->proxy) { proxy = u->parm->proxy; if (!proxy->ready) return RT_AGAIN; if (!proxy->host || /* proxy dead (needed auth) */ !proxy->host->info) /* host dead (lookup failed) */ goto pfail; else { pipidx = get_ip(proxy->host, u->parm->opt->fail_no_wait); if (pipidx < 0) { if (pipidx == -1 && u->parm->strictproxy) return RT_AGAIN; pfail: if (u->parm->strictproxy) return RT_GIVEUP; if (!clone_parm(u)) return RT_GIVEUP; u->parm->proxy = 0; proxy = 0; pipidx = 0; } } } else { proxy = 0; pipidx = 0; } if (!proxy && u->parm->opt->proxies.nents) { int score, tscor, round, rtval; u_int pidx; if (all_proxy_wait && waiting_proxies) return RT_RETRY; /* * we try the best proxy left in every pass until we find one * that actually works. */ for (round = 0; ; round++) { for (proxy = 0, score = INT_MAX, rtval = RT_GIVEUP, pidx = 0; pidx < u->parm->opt->proxies.nents; pidx++) { prox = ((proxy_t **)u->parm->opt->proxies.ents)[pidx]; if (!prox->ready) { rtval = RT_RETRY; continue; } if (!prox->host || /* proxy dead (needed auth) */ !prox->host->info || /* host dead (lookup failed) */ !prox->host->info->num_ips) /* host dead (connects failed) */ continue; if (!round) prox->host->info->maybe = 1; else if (!prox->host->info->maybe) { rtval = RT_RETRY; continue; } tscor = prox->score * 100 / prox->ratio; if (tscor < score) { score = tscor; proxy = prox; } } if (!proxy) return rtval; pipidx = get_ip(proxy->host, u->parm->opt->fail_no_wait); if (pipidx >= 0) break; proxy->host->info->maybe = 0; } } ipidx = get_ip(u->host, u->parm->opt->fail_no_wait); if (ipidx == -1) return RT_AGAIN; /* transient server problem */ if (ipidx < 0) { errm(u, "!Giving up $u (host given up)"); return RT_GIVEUP; } foff = 0; ftime = 0; dl = 0; if (u->parm->opt->disp_path->path[0]) { dl = snprintf(buf, sizeof(buf), "%s/", u->parm->opt->disp_path->path); if (dl >= sizeof(buf) - 1) return RT_GIVEUP; } /* decide about disk file name for the url */ if (u->parm->opt->enumerate_files) { wil = dl; dl += snprintf(buf + dl, sizeof(buf) - dl, "t%d", ++cur_tnum); } else { if (u->parm->disposition) { if (u->parm->disposition[0] == '/') dl = 0; wil = strlen(u->parm->disposition) + 1; if (dl + wil > sizeof(buf) - sizeof(PART_EXT)) return RT_GIVEUP; memcpy(buf + dl, u->parm->disposition, wil); dl += wil - 1; } else { if (u->parm->opt->dir_mode == DIRS_NONE); else if (u->parm->opt->dir_mode == DIRS_ALWAYS || u->parm->opt->follows_max >= HOST_RECURSIVE || (u->parm->opt->follows_max >= SUBDIR_RECURSIVE && u->disp_pathoff < 0)) dl += snprintf(buf + dl, sizeof(buf) - dl, "%s/%.*s", u->host->info->lname, u->path_len, u->local_part); else if (u->parm->opt->follows_max >= SUBDIR_RECURSIVE) dl += snprintf(buf + dl, sizeof(buf) - dl, "%.*s", u->path_len - u->disp_pathoff, u->local_part + u->disp_pathoff); disp = u->local_part[u->path_len] ? u->local_part + u->path_len : u->parm->opt->index_filename ? u->parm->opt->index_filename : DEFAULT_INDEX_FILE_NAME; for (;; disp++) { if (dl >= sizeof(buf) - sizeof(PART_EXT)) return RT_GIVEUP; if (!*disp) break; buf[dl++] = *disp == '/' ? '!' : *disp; } #ifndef HAVE_CYGWIN if (u->parm->opt->fat_quotes) #endif { for (i = dl, ql = sizeof(buf) - sizeof(PART_EXT); --i >= 0; ) { unsigned char c = buf[i]; static const char hextab[] = "0123456789abcdef"; if (memchr("#"FAT_BAD_CHARS, c, sizeof(FAT_BAD_CHARS))) { if ((ql -= 3) < i) return RT_GIVEUP; buf[ql + 2] = hextab[c & 15]; buf[ql + 1] = hextab[c >> 4]; buf[ql] = '#'; } else { if (--ql < i) return RT_GIVEUP; buf[ql] = c; } } dl = sizeof(buf) - sizeof(PART_EXT) - ql; memcpy(buf, buf + ql, dl); } buf[dl] = 0; } wil = dl; } /* decide if we want to continue a download or skip the file at all */ if (u->parm->opt->update_mode != EX_CLOBBER) { if (!stat(buf, &statbuf)) { if (u->parm->opt->update_mode == EX_UPDATE) /* mark it as update canditate */ ftime = statbuf.st_mtime; else { /* EX_CONTINUE & EX_NO_CLOBBER */ if (u->parm->opt->update_mode == EX_NO_CLOBBER) prx(WRN, "file %s exists\n", buf); else if (u->parm->opt->follows_max > NOT_RECURSIVE) /* scan the file for links. we scan the file with no regard to it's content type - we simply don't know it. extension-based type detection is too unreliable. */ recurse_file(u, buf); return RT_SKIP; } } else { memcpy(buf + dl, PART_EXT, sizeof(PART_EXT)); if (!stat(buf, &statbuf)) { if (u->parm->opt->update_mode == EX_NO_CLOBBER) { prx(WRN, "file %s exists\n", buf); return RT_SKIP; } else { /* EX_UPDATE & EX_CONTINUE */ /* mark as continuation candidate */ foff = statbuf.st_size; ftime = statbuf.st_mtime; } } } } if (!(au = mmalloc(sizeof(*au) + dl + sizeof(PART_EXT)))) return RT_RETRY; au->file_off = foff; au->file_time = ftime; au->url = u; au->displen = wil; memcpy(au->disposition, buf, dl); memcpy(au->disposition + dl, PART_EXT, sizeof(PART_EXT)); au->f = -1; au->size_total = 0; au->size_fetched = 0; au->buffer = NULL; au->offset = 0; au->size = 0; au->http_result_code = 0; au->http_done_header = 0; au->content_is_html = 0; au->reloc = 0; au->file_created = 0; au->headers = NULL; au->hdrslen = au->hdrssiz = 0; au->ipidx = ipidx; au->pipidx = pipidx; au->proxy = proxy; checken("activate_url (pre-end)"); dbg(CON, ("activated %s/%s - ipidx: %d proxy: '%s' pipidx: %d\n", au->url->host->name, au->url->local_part, ipidx, proxy ? proxy->host->name : "(null)", pipidx)); *aup = au; return RT_OK; }