/* $Id: getopts.c,v 1.15 2004/02/13 16:09:29 ossi Exp $ * * * puf 0.9 Copyright (C) 2000-2004 by Oswald Buddenhagen * based on puf 0.1.x (C) 1999,2000 by Anders Gavare * * You may modify and distribute this code under the terms of the GPL. * There is NO WARRANTY of any kind. See COPYING for details. * * getopts.c - command line parsing * */ #include "puf.h" static char *url_prefix; static char *disposition; static int enumerate_urls; enum { O_ST_I, O_ST_O, O_ST_CI, O_RF_S, O_LST_I, O_LST_O, O_LST_CI, O_LRF_S, O_HELP, O_DPATH, O_URLF, O_AHDR, O_SHDR, O_THROT, O_PRX, O_PRXF, O_BIND, O_BINDF, O_AGENT, O_AGENTF }; static char *onams[] = { "NR", "NR", "", "STR", "NR", "NR", "", "STR", "", "STR", "FILE", "STR", "STR", "NR", "PRX", "FILE", "IP", "FILE", "STR", "FILE" }; /* Some compilers, like SunOS4 cc, don't have offsetof in . */ #ifndef offsetof # define offsetof(type,ident) ((size_t)&(((type*)0)->ident)) #endif #define uo(o) ((void*)offsetof(options_t, o)) #define stringify(s) tostring(s) #define tostring(s) #s /* *INDENT-OFF* */ static struct { char *opt; int todo; void *argptr; int argval; int defargval; char *desc; } options[] = { {0, -1, 0, 0, 0, "\nSPEC format: URL[*disposition][^[^]proxy-URL]\n" "URL format: [http://][user:pass@]host[.domain][:port][/path]\n" "\nAll options except those marked as global have effect only on the following\n" "URLs. Their effect can be cancelled by specifying - without\n" "any parameters possibly required by the original option or by overriding them\n" "with another option with an opposite effect. All URL-local options can be\n" "reverted to their default state by specifying a single comma as an argument.\n" "\nWhat to download:"}, {"p", O_LST_CI, uo(follow_src), NOT_RECURSIVE, DONT_FETCH, "Download page requisites from same directory"}, {"pr", O_LST_CI, uo(follow_src), SUBDIR_RECURSIVE, DONT_FETCH, "Download page requisites also from subdirectories (implies -p)"}, {"pr+", O_LST_CI, uo(follow_src), HOST_RECURSIVE, DONT_FETCH, "Download page requisites from whole server (implies -pr)"}, {"pr++", O_LST_CI, uo(follow_src), GLOBAL_RECURSIVE, DONT_FETCH, "Download page requisites from whole net (implies -pr+)"}, {"r", O_LST_CI, uo(follow_href), SUBDIR_RECURSIVE, NOT_RECURSIVE, "Recurse download into subdirectories (implies -pr)"}, {"r+", O_LST_CI, uo(follow_href), HOST_RECURSIVE, NOT_RECURSIVE, "Recurse download across whole server (implies -r & -pr+)"}, {"r++", O_LST_CI, uo(follow_href), GLOBAL_RECURSIVE, NOT_RECURSIVE, "Recurse download across whole net (implies -r+ & -pr++; caution!)"}, {"ld", O_LST_I, uo(max_depth), 0, -1, "Limit directory nesting level to NR (with -r)"}, {"l", O_LST_I, uo(max_recurse), 0, 0, "Limit recursion depth to NR (with -r)"}, {"lb", O_LST_O, uo(max_bytes), 0, 0, "Download only first NR bytes of every SPEC"}, {"xg", O_LST_CI, uo(inhibit_cgiget), -1, 0, "Allow recursion into URLs with ? signs (i.e., CGIs)"}, {"ng", O_LST_CI, uo(inhibit_cgiget), 1, 0, "Disallow ?-URLs, even if given on the command line"}, {"F", O_LST_CI, uo(force_html), 1, 0, "Treat all files as HTML (scan for links)"}, {"B", O_RF_S, &url_prefix, 0, 0, "Prefix to add to every SPEC on the command line"}, {"i", O_URLF, 0, 0, 0, "Read SPECs from FILE"}, {0, -1, 0, 0, 0, "\nWhat to to with existing files:"}, {"u", O_LST_CI, uo(update_mode), EX_UPDATE, EX_CLOBBER, "Update existing "/* "and delete obsolete "*/"files, continue partial"}, {"c", O_LST_CI, uo(update_mode), EX_CONTINUE, EX_CLOBBER, "Continue download of partial files"}, {"nc", O_LST_CI, uo(update_mode), EX_NO_CLOBBER, EX_CLOBBER, "Don't clobber existing files"}, {0, -1, 0, 0, 0, "\nStorage of files:"}, {"na", O_ST_CI, &always_primary_name, 1, 0, "Don't use hostname aliases for directory names (global)"}, {"nd", O_LST_CI, uo(dir_mode), DIRS_NONE, DIRS_NORMAL, "Don't create subdirectories"}, {"xd", O_LST_CI, uo(dir_mode), DIRS_ALWAYS, DIRS_NORMAL, "Create all subdirectories (default for -r+ & -r++)"}, {"O", O_RF_S, &disposition, 0, 0, "Save next SPEC to file STR"}, {"P", O_DPATH, 0, 0, 0, "Save files to directory STR/"}, {"xi", O_LRF_S, uo(index_filename), 0, 0, "Set the name for anonymous index files (default is " DEFAULT_INDEX_FILE_NAME ")"}, {"xe", O_LST_CI, uo(enumerate_files), 1, 0, "Enumerate files (1.puf, ...) in download order. Implies -nd"}, {"xE", O_ST_CI, &enumerate_urls, 1, 0, "Enumerate files in command line order. Implies -nd"}, #ifndef HAVE_CYGWIN {"xq", O_LST_CI, uo(fat_quotes), 1, 0, "Quote file names suitably for storage on FAT file systems"}, #endif {"nt", O_LST_CI, uo(no_touch), 1, 0, "Don't timestamp files according to server response"}, {"nb", O_LST_CI, uo(delete_broken), 1, 0, "Delete partial files from broken downloads"}, /* {"xr", O_LST_CI, uo(regard_disposition), 1, 0, "Regard \"Disposition:\" HTTP headers"}, */ {"xh", O_SHDR, 0, 0, 0, "Save HTTP headers starting with STR"}, {0, -1, 0, 0, 0, "\nNetwork options:"}, {"ni", O_LST_CI, uo(send_if_range), 0, 1, "Don't send \"If-Range:\" (assume up-to-date partial files)"}, {"nR", O_LST_CI, uo(send_referer), 0, 1, "Don't send \"Referer:\""}, {"U", O_AGENT, 0, 0, 0, "Send \"User-Agent: STR\" (use \"\" for none)"}, {"iU", O_AGENTF, 0, 0, 0, "Choose User-Agent strings from FILE"}, {"xH", O_AHDR, 0, 0, 0, "Add arbitarary header STR to HTTP requests"}, {"Tl", O_ST_I, &timeout_dns, 0, 0, "Set DNS lookup timeout to NR seconds (global; default is " stringify(DEFAULT_TIMEOUT_DNS) ")"}, {"Tc", O_LST_I, uo(timeout_connect), 0, DEFAULT_TIMEOUT_CONNECT, "Set connect timeout to NR seconds (default is " stringify(DEFAULT_TIMEOUT_CONNECT) ")"}, {"Td", O_LST_I, uo(timeout_data), 0, DEFAULT_TIMEOUT_DATA, "Set data timeout to NR seconds (default is " stringify(DEFAULT_TIMEOUT_DATA) ")"}, {"t", O_LST_I, uo(max_attempts), 0, DEFAULT_MAX_ATTEMPTS, "Set maximum number of download attempts per URL (default is " stringify(DEFAULT_MAX_ATTEMPTS) ")"}, {"nw", O_LST_CI, uo(fail_no_wait), 1, 0, "Don't wait before connecting a busy/dead host"}, {"xT", O_LST_CI, uo(http_err_trans), 1, 0, "Treat HTTP errors 403 and 404 as transient"}, {"xb", O_BIND, 0, 0, 0, "Bind outgoing connections to IP"}, {"ib", O_BINDF, 0, 0, 0, "Bind outgoing connections to random IPs from FILE"}, {"y", O_PRX, 0, 0, 0, "Use proxy PRX. Multiple -y's are allowed"}, {"yi", O_PRXF, 0, 0, 0, "Read proxies from FILE. PRX format: URL[*load ratio]"}, {0, -1, 0, 0, 0, "\nResource usage quotas (global):"}, {"Q", O_ST_O, &max_bytes, 0, 0, "Abort puf after NR bytes (unlimited by default)"}, {"Qu", O_ST_I, &max_urls, 0, 0, "Abort puf after NR URLs (unlimited by default)"}, {"Qt", O_ST_I, &max_time, 0, 0, "Abort puf after NR seconds (unlimited by default)"}, {"lc", O_ST_I, &max_urls_active, 0, 0, "Max NR simultaneous connections (default is " stringify(DEFAULT_MAX_ACTIVE) ")"}, {"ll", O_ST_I, &max_dnss_active, 0, 0, "Max NR simultaneous DNS lookups (default is " stringify(DEFAULT_MAX_DNS_FORKS) ")"}, {"nf", O_ST_CI, &economize_files, 1, 0, "Use fewer file descriptors. Slightly slower"}, {"nh", O_ST_CI, &economize_dns, 1, 0, "Do fewer DNS lookups. May miss some references"}, {"dc", O_THROT, 0, 0, 0, "Delay consecutive connects by NR milliseconds"}, {0, -1, 0, 0, 0, "\nLogging (global):"}, {"ns", O_ST_CI, &show_stat, 0, 0, "Disable download progress statistics"}, {"v", O_ST_CI, &verbose, ERR, 0, "Be verbose (show errors). Implies -ns"}, {"vv", O_ST_CI, &verbose, WRN, 0, "Be very verbose (show warnings). Implies -v"}, {"vvv", O_ST_CI, &verbose, NFO, 0, "Be extremely verbose (show infos). Implies -vv"}, {"d", O_ST_I, &debug, 0, 0, "Debug: URL=1 DNS=2 QUE=4 CON=8 HDR=16 CHK=32 MEM=64"}, {"h", O_HELP, 0, 0, 0, "This help screen"}, {0, -1, 0, 0, 0, "\nExample:\n" "puf -P stuff -r+ www.foo.com -r www.bar.com -r- www.some.org , www.blub.de"}, }; /* *INDENT-ON* */ /* Did user specify URL? */ static int tried_url; static int cur_url; static void * irealloc (void *ptr, size_t size) { void *ret; if (!(ret = realloc(ptr, size))) die(2, "out of memory."); return ret; } static void * imalloc (size_t size) { return irealloc(0, size); } static char * istrdup (char *str) { char *ret; if (!(ret = strdup(str))) die(2, "out of memory."); return ret; } static void clr_ptrarr (ptrarr_t *arr) { arr->nents = arr->rents = 0; if (arr->ents) { if (!arr->cow) free (arr->ents); arr->ents = 0; } arr->cow = 0; } static void dext_ptrarr (ptrarr_t *arr, void *data) { void *ents; if (arr->cow) { arr->rents = arr->nents * 2 + 1; ents = imalloc(arr->rents * sizeof(void *)); memcpy(ents, arr->ents, arr->nents * sizeof(void *)); arr->ents = ents; arr->cow = 0; } else { if (arr->nents == arr->rents) { arr->rents = arr->rents * 2 + 1; arr->ents = irealloc(arr->ents, arr->rents * sizeof(void *)); } } ((void **)arr->ents)[arr->nents++] = data; } static void * ext_ptrarr (ptrarr_t *arr, int size) { void *data = imalloc(size); dext_ptrarr(arr, data); return data; } static options_t *lopt; static int lopt_cow; static void initlopt(void) { u_int i; lopt = imalloc(sizeof(*lopt)); memset (lopt, 0, sizeof(*lopt)); for (i = 0; i < sizeof(options) / sizeof(options[0]); i++) switch (options[i].todo) { case O_LST_CI: case O_LST_I: *(int *)((char *)lopt + (size_t)options[i].argptr) = options[i].defargval; break; } lopt_cow = 0; } static void localize(void) { options_t *nlopt; if (lopt_cow) { nlopt = imalloc(sizeof(*nlopt)); memcpy(nlopt, lopt, sizeof(*nlopt)); lopt = nlopt; lopt->user_agents.cow = 1; lopt->aux_headers.cow = 1; lopt->bind_addrs.cow = 1; lopt->proxies.cow = 1; lopt_cow = 0; } } static void set_dpath(char *path) { int len; len = strlen(path) + 1; lopt->disp_path = imalloc(sizeof(*lopt->disp_path) + len); lopt->disp_path->file_num = 0; memcpy (&lopt->disp_path->path, path, len); } static void adden(char *url) { url_parm_t *parm; char *ptr; char buf[SHORTSTR], dbuf[20]; if (!lopt->disp_path) set_dpath(""); if (lopt->follow_href > NOT_RECURSIVE && lopt->follow_src < lopt->follow_href) lopt->follow_src = lopt->follow_href; lopt->follows_max = lopt->follow_src > lopt->follow_href ? lopt->follow_src : lopt->follow_href; checken_updgen(lopt); parm = imalloc(sizeof(*parm)); parm->ref_count = 0; parm->opt = lopt; parm->http_auth = 0; if ((ptr = strchr(url, '^'))) { *ptr++ = '\0'; if (*ptr == '^') { ptr++; parm->strictproxy = 1; } else parm->strictproxy = 0; parm->proxy = parse_proxy(ptr, 1); } else parm->proxy = 0; if ((ptr = strchr(url, '*'))) { *ptr = '\0'; parm->disposition = ptr + 1; } else if (enumerate_urls) { sprintf(dbuf, "%d.puf", ++cur_url); parm->disposition = istrdup(dbuf); } else parm->disposition = disposition; disposition = 0; if (parm->disposition && (lopt->follow_src > DONT_FETCH || lopt->follow_href > NOT_RECURSIVE)) die(2, "-r and -p are mutually exclusive to -O."); checken_updgen(parm); snprintf(buf, SHORTSTR, "%s%s", url_prefix ? url_prefix : "", url); dbg(URL, ("Trying URL '%s' from command line\n", buf)); if (!parse_add_url(buf, strlen(buf), 0, parm, 2, 0, 0, 0)) { prx(ERR, "Invalid URL '%s'.\n", buf); free(parm); } else lopt_cow = 1; tried_url = 1; } static void prx_adden(char *proxy) { proxy_t *prox; char *ptr; int ratio; if ((ptr = strchr(proxy, '*'))) { *ptr = '\0'; ratio = atoi(ptr + 1); if (ratio <= 0) die(1, "invalid load ratio '%s'.", ptr + 1); } else ratio = 100; if (!(prox = parse_proxy(proxy, ratio))) die(1, "invalid proxy specification '%s'.", proxy); else dext_ptrarr(&lopt->proxies, prox); } static void add_bind_ip(char *ip) { int s; if ((bind_addr.sin_addr.s_addr = inet_addr(ip)) == (unsigned)-1) die(2, "'%s' is not a valid IP address.", ip); if ((s = socket(PF_INET, SOCK_STREAM, 0)) < 0 ) die(2, "cannot open test socket."); if (bind(s, (struct sockaddr *)&bind_addr, sizeof(struct sockaddr))) die(2, "cannot bind to %s.", ip); close(s); *(struct in_addr *)ext_ptrarr(&lopt->bind_addrs, sizeof(struct in_addr)) = bind_addr.sin_addr; } static void adden_agent(char *agent, int perc) { agent_t *ag; int len = strlen(agent) + 1; ag = (agent_t *)ext_ptrarr(&lopt->user_agents, sizeof(*ag) + len); ag->ratio = perc; lopt->uar_total += perc; memcpy(ag->agent, agent, len); } static void add_agent(char *agent) { int perc = -1, nch; sscanf(agent, "%i %n", &perc, &nch); if (perc < 0) die(2, "invalid percentage/agent spec '%s'.", agent); adden_agent(agent + nch, perc); } static void showhelp_advanced() { char ona[SHORTSTR]; unsigned i; for (i = 0; i < sizeof(options) / sizeof(options[0]); i++) { if (options[i].opt) { sprintf(ona, "%s %s", options[i].opt, onams[options[i].todo]); printf(" -%-9s", ona); } puts(options[i].desc); } } static void showhelp_basic() { printf("Usage: %s [options] [SPEC...]\n", progname); } static void showhelp() { showhelp_basic(); printf("\nTry '%s -h' for more information.\n", progname); } static char * mfgets(char *buf, int len, FILE *f) { int ba, be, bp; for (;;) { nxtl: if (fgets(buf, len, f)) { for (ba = 0; buf[ba] <= ' '; ba++) if (!buf[ba]) goto nxtl; if (buf[ba] == '#') continue; for (be = (bp = ba) - 1; buf[bp]; bp++) if (buf[bp] > ' ') be = bp; buf[be + 1] = 0; return buf + ba; } return 0; } } static off_t matoll(char *val, char *opt) { char *fbad; off_t oll; fbad = val; oll = 0; if (*fbad == '0' && fbad[1] == 'x') { fbad += 2; while (isxdigit((int)*fbad)) { oll = oll * 16 + (isdigit((int)*fbad) ? *fbad - '0' : tolower((int)*fbad) - 'a' + 10); fbad++; } } else while (isdigit((int)*fbad)) { oll = oll * 10 + (*fbad - '0'); fbad++; } if (*fbad == 'k') { oll *= 1024; fbad++; } else if (*fbad == 'm') { oll *= 1024 * 1024; fbad++; } else if (*fbad == 'g') { oll *= 1024 * 1024 * 1024; fbad++; } if (*fbad) die(2, "invalid numeric argument '%s' to option '%s'.", val, opt); return oll; } static int matoi(char *val, char *opt) { char *fbad; int oint; oint = strtol(val, &fbad, 0); if (*fbad || oint < 1) die(2, "invalid numeric argument '%s' to option '%s'.", val, opt); return oint; } void getopts(int argc, char *argv[]) { char buf[SHORTSTR], *fbad; int oind, dopts = 0, t; unsigned i; FILE *f; initlopt(); for (oind = 1; oind < argc; oind++) { if (!dopts && argv[oind][0] == '-') { int olen = strlen (argv[oind] + 1); int inv = 0; if (argv[oind][olen] == '-') { argv[oind][olen] = 0; inv = 1; } for (i = 0; i < sizeof(options) / sizeof(options[0]); i++) if (options[i].opt && !memcmp(argv[oind] + 1, options[i].opt, olen)) goto fopt; die(2, "unrecognized option '%s', try \"%s -h\".", argv[oind], progname); fopt: if (inv) { localize(); switch (options[i].todo) { case O_LST_CI: case O_LST_I: *(int *)((char *)lopt + (size_t)options[i].argptr) = options[i].defargval; break; case O_LST_O: *(off_t *)((char *)lopt + (size_t)options[i].argptr) = 0; break; case O_LRF_S: *(char **)((char *)lopt + (size_t)options[i].argptr) = 0; break; case O_DPATH: lopt->disp_path = 0; break; case O_BIND: case O_BINDF: clr_ptrarr(&lopt->bind_addrs); break; case O_AGENT: case O_AGENTF: clr_ptrarr(&lopt->user_agents); break; case O_PRX: case O_PRXF: clr_ptrarr(&lopt->proxies); break; case O_AHDR: clr_ptrarr(&lopt->aux_headers); break; case O_SHDR: clr_ptrarr(&lopt->save_headers); break; default: die(2, "'%s' has no inverse option.", argv[oind]); break; } continue; } if (options[i].todo == O_ST_CI) *(int *)options[i].argptr = options[i].argval; else if (options[i].todo == O_LST_CI) { localize(); *(int *)((char *)lopt + (size_t)options[i].argptr) = options[i].argval; } else if (options[i].todo == O_HELP) { showhelp_basic(); showhelp_advanced(); exit(0); } else { if (++oind >= argc) die(2, "missing argument to option '%s', try \"%s -h\".", argv[oind - 1], progname); switch (options[i].todo) { case O_ST_I: *(int *)options[i].argptr = matoi(argv[oind], argv[oind - 1]); break; case O_ST_O: *(off_t *)options[i].argptr = matoll(argv[oind], argv[oind - 1]); break; case O_RF_S: *(char **)options[i].argptr = argv[oind]; break; case O_LST_I: localize(); *(int *)((char *)lopt + (size_t)options[i].argptr) = matoi(argv[oind], argv[oind - 1]); break; case O_LST_O: localize(); *(off_t *)((char *)lopt + (size_t)options[i].argptr) = matoll(argv[oind], argv[oind - 1]); break; case O_LRF_S: localize(); *(char **)((char *)lopt + (size_t)options[i].argptr) = argv[oind]; break; case O_DPATH: localize(); set_dpath(argv[oind]); break; case O_BIND: localize(); add_bind_ip(argv[oind]); break; case O_BINDF: localize(); if (!(f = fopen(argv[oind], "r"))) die(2, "cannot open IP list '%s'.", argv[oind]); while ((fbad = mfgets(buf, sizeof(buf), f)) != 0) add_bind_ip(fbad); fclose(f); break; case O_AGENT: localize(); adden_agent(argv[oind], 1); break; case O_AGENTF: localize(); if (!(f = fopen(argv[oind], "r"))) die(2, "cannot open User-Agent list '%s'.", argv[oind]); while ((fbad = mfgets(buf, sizeof(buf), f)) != 0) add_agent(fbad); fclose(f); break; case O_PRX: localize(); prx_adden(argv[oind]); break; case O_PRXF: localize(); if (!(f = fopen(argv[oind], "r"))) die(2, "cannot open proxy list '%s'.", argv[oind]); while ((fbad = mfgets(buf, sizeof(buf), f)) != 0) prx_adden(fbad); fclose(f); break; case O_AHDR: localize(); dext_ptrarr(&lopt->aux_headers, argv[oind]); break; case O_SHDR: localize(); dext_ptrarr(&lopt->save_headers, argv[oind]); break; case O_URLF: if (!strcmp(argv[oind], "-")) f = stdin; else if (!(f = fopen(argv[oind], "r"))) die(2, "cannot open URL list '%s'.", argv[oind]); while ((fbad = mfgets(buf, sizeof(buf), f)) != 0) adden(fbad); if (f != stdin) fclose(f); break; case O_THROT: t = matoi(argv[oind], argv[oind - 1]); throttle.tv_sec = t / 1000; throttle.tv_usec = t % 1000 * 1000; break; } } } else if (!strcmp(argv[oind], ",")) { initlopt(); dopts = 0; } else if (!dopts && !strcmp(argv[oind], "--")) dopts++; else adden(argv[oind]); } /* If no url was given, show help message: */ if (!tried_url) { showhelp(); exit(2); } if ((verbose #ifdef DEBUG || debug #endif ) && isatty(2)) show_stat = 0; }