/* $Id: getopts.c,v 1.15 2004/02/13 16:09:29 ossi Exp $ *
*
* puf 0.9 Copyright (C) 2000-2004 by Oswald Buddenhagen <puf@ossi.cjb.net>
* based on puf 0.1.x (C) 1999,2000 by Anders Gavare <gavare@hotmail.com>
*
* You may modify and distribute this code under the terms of the GPL.
* There is NO WARRANTY of any kind. See COPYING for details.
*
* getopts.c - command line parsing
*
*/
#include "puf.h"
static char *url_prefix;
static char *disposition;
static int enumerate_urls;
enum { O_ST_I, O_ST_O, O_ST_CI, O_RF_S,
O_LST_I, O_LST_O, O_LST_CI, O_LRF_S,
O_HELP, O_DPATH, O_URLF, O_AHDR, O_SHDR, O_THROT,
O_PRX, O_PRXF, O_BIND, O_BINDF, O_AGENT, O_AGENTF };
static char *onams[] = {
"NR", "NR", "", "STR",
"NR", "NR", "", "STR",
"", "STR", "FILE", "STR", "STR", "NR",
"PRX", "FILE", "IP", "FILE", "STR", "FILE" };
/* Some compilers, like SunOS4 cc, don't have offsetof in <stddef.h>. */
#ifndef offsetof
# define offsetof(type,ident) ((size_t)&(((type*)0)->ident))
#endif
#define uo(o) ((void*)offsetof(options_t, o))
#define stringify(s) tostring(s)
#define tostring(s) #s
/* *INDENT-OFF* */
static struct {
char *opt;
int todo;
void *argptr;
int argval;
int defargval;
char *desc;
} options[] = {
{0, -1, 0, 0, 0,
"\nSPEC format: URL[*disposition][^[^]proxy-URL]\n"
"URL format: [http://][user:pass@]host[.domain][:port][/path]\n"
"\nAll options except those marked as global have effect only on the following\n"
"URLs. Their effect can be cancelled by specifying <original option>- without\n"
"any parameters possibly required by the original option or by overriding them\n"
"with another option with an opposite effect. All URL-local options can be\n"
"reverted to their default state by specifying a single comma as an argument.\n"
"\nWhat to download:"},
{"p", O_LST_CI, uo(follow_src), NOT_RECURSIVE, DONT_FETCH, "Download page requisites from same directory"},
{"pr", O_LST_CI, uo(follow_src), SUBDIR_RECURSIVE, DONT_FETCH, "Download page requisites also from subdirectories (implies -p)"},
{"pr+", O_LST_CI, uo(follow_src), HOST_RECURSIVE, DONT_FETCH, "Download page requisites from whole server (implies -pr)"},
{"pr++", O_LST_CI, uo(follow_src), GLOBAL_RECURSIVE, DONT_FETCH, "Download page requisites from whole net (implies -pr+)"},
{"r", O_LST_CI, uo(follow_href), SUBDIR_RECURSIVE, NOT_RECURSIVE, "Recurse download into subdirectories (implies -pr)"},
{"r+", O_LST_CI, uo(follow_href), HOST_RECURSIVE, NOT_RECURSIVE, "Recurse download across whole server (implies -r & -pr+)"},
{"r++", O_LST_CI, uo(follow_href), GLOBAL_RECURSIVE, NOT_RECURSIVE, "Recurse download across whole net (implies -r+ & -pr++; caution!)"},
{"ld", O_LST_I, uo(max_depth), 0, -1, "Limit directory nesting level to NR (with -r)"},
{"l", O_LST_I, uo(max_recurse), 0, 0, "Limit recursion depth to NR (with -r)"},
{"lb", O_LST_O, uo(max_bytes), 0, 0, "Download only first NR bytes of every SPEC"},
{"xg", O_LST_CI, uo(inhibit_cgiget), -1, 0, "Allow recursion into URLs with ? signs (i.e., CGIs)"},
{"ng", O_LST_CI, uo(inhibit_cgiget), 1, 0, "Disallow ?-URLs, even if given on the command line"},
{"F", O_LST_CI, uo(force_html), 1, 0, "Treat all files as HTML (scan for links)"},
{"B", O_RF_S, &url_prefix, 0, 0, "Prefix to add to every SPEC on the command line"},
{"i", O_URLF, 0, 0, 0, "Read SPECs from FILE"},
{0, -1, 0, 0, 0, "\nWhat to to with existing files:"},
{"u", O_LST_CI, uo(update_mode), EX_UPDATE, EX_CLOBBER, "Update existing "/* "and delete obsolete "*/"files, continue partial"},
{"c", O_LST_CI, uo(update_mode), EX_CONTINUE, EX_CLOBBER, "Continue download of partial files"},
{"nc", O_LST_CI, uo(update_mode), EX_NO_CLOBBER, EX_CLOBBER, "Don't clobber existing files"},
{0, -1, 0, 0, 0, "\nStorage of files:"},
{"na", O_ST_CI, &always_primary_name, 1, 0, "Don't use hostname aliases for directory names (global)"},
{"nd", O_LST_CI, uo(dir_mode), DIRS_NONE, DIRS_NORMAL, "Don't create subdirectories"},
{"xd", O_LST_CI, uo(dir_mode), DIRS_ALWAYS, DIRS_NORMAL, "Create all subdirectories (default for -r+ & -r++)"},
{"O", O_RF_S, &disposition, 0, 0, "Save next SPEC to file STR"},
{"P", O_DPATH, 0, 0, 0, "Save files to directory STR/"},
{"xi", O_LRF_S, uo(index_filename), 0, 0, "Set the name for anonymous index files (default is " DEFAULT_INDEX_FILE_NAME ")"},
{"xe", O_LST_CI, uo(enumerate_files), 1, 0, "Enumerate files (1.puf, ...) in download order. Implies -nd"},
{"xE", O_ST_CI, &enumerate_urls, 1, 0, "Enumerate files in command line order. Implies -nd"},
#ifndef HAVE_CYGWIN
{"xq", O_LST_CI, uo(fat_quotes), 1, 0, "Quote file names suitably for storage on FAT file systems"},
#endif
{"nt", O_LST_CI, uo(no_touch), 1, 0, "Don't timestamp files according to server response"},
{"nb", O_LST_CI, uo(delete_broken), 1, 0, "Delete partial files from broken downloads"},
/* {"xr", O_LST_CI, uo(regard_disposition), 1, 0, "Regard \"Disposition:\" HTTP headers"}, */
{"xh", O_SHDR, 0, 0, 0, "Save HTTP headers starting with STR"},
{0, -1, 0, 0, 0, "\nNetwork options:"},
{"ni", O_LST_CI, uo(send_if_range), 0, 1, "Don't send \"If-Range:\" (assume up-to-date partial files)"},
{"nR", O_LST_CI, uo(send_referer), 0, 1, "Don't send \"Referer:\""},
{"U", O_AGENT, 0, 0, 0, "Send \"User-Agent: STR\" (use \"\" for none)"},
{"iU", O_AGENTF, 0, 0, 0, "Choose User-Agent strings from FILE"},
{"xH", O_AHDR, 0, 0, 0, "Add arbitarary header STR to HTTP requests"},
{"Tl", O_ST_I, &timeout_dns, 0, 0, "Set DNS lookup timeout to NR seconds (global; default is " stringify(DEFAULT_TIMEOUT_DNS) ")"},
{"Tc", O_LST_I, uo(timeout_connect), 0, DEFAULT_TIMEOUT_CONNECT, "Set connect timeout to NR seconds (default is " stringify(DEFAULT_TIMEOUT_CONNECT) ")"},
{"Td", O_LST_I, uo(timeout_data), 0, DEFAULT_TIMEOUT_DATA, "Set data timeout to NR seconds (default is " stringify(DEFAULT_TIMEOUT_DATA) ")"},
{"t", O_LST_I, uo(max_attempts), 0, DEFAULT_MAX_ATTEMPTS, "Set maximum number of download attempts per URL (default is " stringify(DEFAULT_MAX_ATTEMPTS) ")"},
{"nw", O_LST_CI, uo(fail_no_wait), 1, 0, "Don't wait before connecting a busy/dead host"},
{"xT", O_LST_CI, uo(http_err_trans), 1, 0, "Treat HTTP errors 403 and 404 as transient"},
{"xb", O_BIND, 0, 0, 0, "Bind outgoing connections to IP"},
{"ib", O_BINDF, 0, 0, 0, "Bind outgoing connections to random IPs from FILE"},
{"y", O_PRX, 0, 0, 0, "Use proxy PRX. Multiple -y's are allowed"},
{"yi", O_PRXF, 0, 0, 0, "Read proxies from FILE. PRX format: URL[*load ratio]"},
{0, -1, 0, 0, 0, "\nResource usage quotas (global):"},
{"Q", O_ST_O, &max_bytes, 0, 0, "Abort puf after NR bytes (unlimited by default)"},
{"Qu", O_ST_I, &max_urls, 0, 0, "Abort puf after NR URLs (unlimited by default)"},
{"Qt", O_ST_I, &max_time, 0, 0, "Abort puf after NR seconds (unlimited by default)"},
{"lc", O_ST_I, &max_urls_active, 0, 0, "Max NR simultaneous connections (default is " stringify(DEFAULT_MAX_ACTIVE) ")"},
{"ll", O_ST_I, &max_dnss_active, 0, 0, "Max NR simultaneous DNS lookups (default is " stringify(DEFAULT_MAX_DNS_FORKS) ")"},
{"nf", O_ST_CI, &economize_files, 1, 0, "Use fewer file descriptors. Slightly slower"},
{"nh", O_ST_CI, &economize_dns, 1, 0, "Do fewer DNS lookups. May miss some references"},
{"dc", O_THROT, 0, 0, 0, "Delay consecutive connects by NR milliseconds"},
{0, -1, 0, 0, 0, "\nLogging (global):"},
{"ns", O_ST_CI, &show_stat, 0, 0, "Disable download progress statistics"},
{"v", O_ST_CI, &verbose, ERR, 0, "Be verbose (show errors). Implies -ns"},
{"vv", O_ST_CI, &verbose, WRN, 0, "Be very verbose (show warnings). Implies -v"},
{"vvv", O_ST_CI, &verbose, NFO, 0, "Be extremely verbose (show infos). Implies -vv"},
{"d", O_ST_I, &debug, 0, 0, "Debug: URL=1 DNS=2 QUE=4 CON=8 HDR=16 CHK=32 MEM=64"},
{"h", O_HELP, 0, 0, 0, "This help screen"},
{0, -1, 0, 0, 0, "\nExample:\n"
"puf -P stuff -r+ www.foo.com -r www.bar.com -r- www.some.org , www.blub.de"},
};
/* *INDENT-ON* */
/* Did user specify URL? */
static int tried_url;
static int cur_url;
static void *
irealloc (void *ptr, size_t size)
{
void *ret;
if (!(ret = realloc(ptr, size)))
die(2, "out of memory.");
return ret;
}
static void *
imalloc (size_t size)
{
return irealloc(0, size);
}
static char *
istrdup (char *str)
{
char *ret;
if (!(ret = strdup(str)))
die(2, "out of memory.");
return ret;
}
static void
clr_ptrarr (ptrarr_t *arr)
{
arr->nents = arr->rents = 0;
if (arr->ents) {
if (!arr->cow)
free (arr->ents);
arr->ents = 0;
}
arr->cow = 0;
}
static void
dext_ptrarr (ptrarr_t *arr, void *data)
{
void *ents;
if (arr->cow) {
arr->rents = arr->nents * 2 + 1;
ents = imalloc(arr->rents * sizeof(void *));
memcpy(ents, arr->ents, arr->nents * sizeof(void *));
arr->ents = ents;
arr->cow = 0;
} else {
if (arr->nents == arr->rents) {
arr->rents = arr->rents * 2 + 1;
arr->ents = irealloc(arr->ents, arr->rents * sizeof(void *));
}
}
((void **)arr->ents)[arr->nents++] = data;
}
static void *
ext_ptrarr (ptrarr_t *arr, int size)
{
void *data = imalloc(size);
dext_ptrarr(arr, data);
return data;
}
static options_t *lopt;
static int lopt_cow;
static void
initlopt(void)
{
u_int i;
lopt = imalloc(sizeof(*lopt));
memset (lopt, 0, sizeof(*lopt));
for (i = 0; i < sizeof(options) / sizeof(options[0]); i++)
switch (options[i].todo) {
case O_LST_CI:
case O_LST_I:
*(int *)((char *)lopt + (size_t)options[i].argptr) =
options[i].defargval;
break;
}
lopt_cow = 0;
}
static void
localize(void)
{
options_t *nlopt;
if (lopt_cow) {
nlopt = imalloc(sizeof(*nlopt));
memcpy(nlopt, lopt, sizeof(*nlopt));
lopt = nlopt;
lopt->user_agents.cow = 1;
lopt->aux_headers.cow = 1;
lopt->bind_addrs.cow = 1;
lopt->proxies.cow = 1;
lopt_cow = 0;
}
}
static void
set_dpath(char *path)
{
int len;
len = strlen(path) + 1;
lopt->disp_path = imalloc(sizeof(*lopt->disp_path) + len);
lopt->disp_path->file_num = 0;
memcpy (&lopt->disp_path->path, path, len);
}
static void
adden(char *url)
{
url_parm_t *parm;
char *ptr;
char buf[SHORTSTR], dbuf[20];
if (!lopt->disp_path)
set_dpath("");
if (lopt->follow_href > NOT_RECURSIVE &&
lopt->follow_src < lopt->follow_href)
lopt->follow_src = lopt->follow_href;
lopt->follows_max = lopt->follow_src > lopt->follow_href ?
lopt->follow_src : lopt->follow_href;
checken_updgen(lopt);
parm = imalloc(sizeof(*parm));
parm->ref_count = 0;
parm->opt = lopt;
parm->http_auth = 0;
if ((ptr = strchr(url, '^'))) {
*ptr++ = '\0';
if (*ptr == '^') {
ptr++;
parm->strictproxy = 1;
} else
parm->strictproxy = 0;
parm->proxy = parse_proxy(ptr, 1);
} else
parm->proxy = 0;
if ((ptr = strchr(url, '*'))) {
*ptr = '\0';
parm->disposition = ptr + 1;
} else if (enumerate_urls) {
sprintf(dbuf, "%d.puf", ++cur_url);
parm->disposition = istrdup(dbuf);
} else
parm->disposition = disposition;
disposition = 0;
if (parm->disposition &&
(lopt->follow_src > DONT_FETCH || lopt->follow_href > NOT_RECURSIVE))
die(2, "-r and -p are mutually exclusive to -O.");
checken_updgen(parm);
snprintf(buf, SHORTSTR, "%s%s", url_prefix ? url_prefix : "", url);
dbg(URL, ("Trying URL '%s' from command line\n", buf));
if (!parse_add_url(buf, strlen(buf), 0, parm, 2, 0, 0, 0)) {
prx(ERR, "Invalid URL '%s'.\n", buf);
free(parm);
} else
lopt_cow = 1;
tried_url = 1;
}
static void
prx_adden(char *proxy)
{
proxy_t *prox;
char *ptr;
int ratio;
if ((ptr = strchr(proxy, '*'))) {
*ptr = '\0';
ratio = atoi(ptr + 1);
if (ratio <= 0)
die(1, "invalid load ratio '%s'.", ptr + 1);
} else
ratio = 100;
if (!(prox = parse_proxy(proxy, ratio)))
die(1, "invalid proxy specification '%s'.", proxy);
else
dext_ptrarr(&lopt->proxies, prox);
}
static void
add_bind_ip(char *ip)
{
int s;
if ((bind_addr.sin_addr.s_addr = inet_addr(ip)) == (unsigned)-1)
die(2, "'%s' is not a valid IP address.", ip);
if ((s = socket(PF_INET, SOCK_STREAM, 0)) < 0 )
die(2, "cannot open test socket.");
if (bind(s, (struct sockaddr *)&bind_addr, sizeof(struct sockaddr)))
die(2, "cannot bind to %s.", ip);
close(s);
*(struct in_addr *)ext_ptrarr(&lopt->bind_addrs, sizeof(struct in_addr)) =
bind_addr.sin_addr;
}
static void
adden_agent(char *agent, int perc)
{
agent_t *ag;
int len = strlen(agent) + 1;
ag = (agent_t *)ext_ptrarr(&lopt->user_agents, sizeof(*ag) + len);
ag->ratio = perc;
lopt->uar_total += perc;
memcpy(ag->agent, agent, len);
}
static void
add_agent(char *agent)
{
int perc = -1, nch;
sscanf(agent, "%i %n", &perc, &nch);
if (perc < 0)
die(2, "invalid percentage/agent spec '%s'.", agent);
adden_agent(agent + nch, perc);
}
static void
showhelp_advanced()
{
char ona[SHORTSTR];
unsigned i;
for (i = 0; i < sizeof(options) / sizeof(options[0]); i++) {
if (options[i].opt) {
sprintf(ona, "%s %s", options[i].opt, onams[options[i].todo]);
printf(" -%-9s", ona);
}
puts(options[i].desc);
}
}
static void
showhelp_basic()
{
printf("Usage: %s [options] [SPEC...]\n", progname);
}
static void
showhelp()
{
showhelp_basic();
printf("\nTry '%s -h' for more information.\n", progname);
}
static char *
mfgets(char *buf, int len, FILE *f)
{
int ba, be, bp;
for (;;) {
nxtl:
if (fgets(buf, len, f)) {
for (ba = 0; buf[ba] <= ' '; ba++)
if (!buf[ba])
goto nxtl;
if (buf[ba] == '#')
continue;
for (be = (bp = ba) - 1; buf[bp]; bp++)
if (buf[bp] > ' ')
be = bp;
buf[be + 1] = 0;
return buf + ba;
}
return 0;
}
}
static off_t
matoll(char *val, char *opt)
{
char *fbad;
off_t oll;
fbad = val;
oll = 0;
if (*fbad == '0' && fbad[1] == 'x') {
fbad += 2;
while (isxdigit((int)*fbad)) {
oll = oll * 16 + (isdigit((int)*fbad) ?
*fbad - '0' :
tolower((int)*fbad) - 'a' + 10);
fbad++;
}
} else
while (isdigit((int)*fbad)) {
oll = oll * 10 + (*fbad - '0');
fbad++;
}
if (*fbad == 'k') {
oll *= 1024; fbad++;
} else if (*fbad == 'm') {
oll *= 1024 * 1024; fbad++;
} else if (*fbad == 'g') {
oll *= 1024 * 1024 * 1024; fbad++;
}
if (*fbad)
die(2, "invalid numeric argument '%s' to option '%s'.", val, opt);
return oll;
}
static int
matoi(char *val, char *opt)
{
char *fbad;
int oint;
oint = strtol(val, &fbad, 0);
if (*fbad || oint < 1)
die(2, "invalid numeric argument '%s' to option '%s'.", val, opt);
return oint;
}
void
getopts(int argc, char *argv[])
{
char buf[SHORTSTR], *fbad;
int oind, dopts = 0, t;
unsigned i;
FILE *f;
initlopt();
for (oind = 1; oind < argc; oind++) {
if (!dopts && argv[oind][0] == '-') {
int olen = strlen (argv[oind] + 1);
int inv = 0;
if (argv[oind][olen] == '-') {
argv[oind][olen] = 0;
inv = 1;
}
for (i = 0; i < sizeof(options) / sizeof(options[0]); i++)
if (options[i].opt &&
!memcmp(argv[oind] + 1, options[i].opt, olen))
goto fopt;
die(2, "unrecognized option '%s', try \"%s -h\".", argv[oind],
progname);
fopt:
if (inv) {
localize();
switch (options[i].todo) {
case O_LST_CI:
case O_LST_I:
*(int *)((char *)lopt + (size_t)options[i].argptr) =
options[i].defargval;
break;
case O_LST_O:
*(off_t *)((char *)lopt + (size_t)options[i].argptr) =
0;
break;
case O_LRF_S:
*(char **)((char *)lopt + (size_t)options[i].argptr) =
0;
break;
case O_DPATH:
lopt->disp_path = 0;
break;
case O_BIND:
case O_BINDF:
clr_ptrarr(&lopt->bind_addrs);
break;
case O_AGENT:
case O_AGENTF:
clr_ptrarr(&lopt->user_agents);
break;
case O_PRX:
case O_PRXF:
clr_ptrarr(&lopt->proxies);
break;
case O_AHDR:
clr_ptrarr(&lopt->aux_headers);
break;
case O_SHDR:
clr_ptrarr(&lopt->save_headers);
break;
default:
die(2, "'%s' has no inverse option.", argv[oind]);
break;
}
continue;
}
if (options[i].todo == O_ST_CI)
*(int *)options[i].argptr = options[i].argval;
else if (options[i].todo == O_LST_CI) {
localize();
*(int *)((char *)lopt + (size_t)options[i].argptr) =
options[i].argval;
} else if (options[i].todo == O_HELP) {
showhelp_basic();
showhelp_advanced();
exit(0);
} else {
if (++oind >= argc)
die(2, "missing argument to option '%s', try \"%s -h\".",
argv[oind - 1], progname);
switch (options[i].todo) {
case O_ST_I:
*(int *)options[i].argptr = matoi(argv[oind],
argv[oind - 1]);
break;
case O_ST_O:
*(off_t *)options[i].argptr = matoll(argv[oind],
argv[oind - 1]);
break;
case O_RF_S:
*(char **)options[i].argptr = argv[oind];
break;
case O_LST_I:
localize();
*(int *)((char *)lopt + (size_t)options[i].argptr) =
matoi(argv[oind], argv[oind - 1]);
break;
case O_LST_O:
localize();
*(off_t *)((char *)lopt + (size_t)options[i].argptr) =
matoll(argv[oind], argv[oind - 1]);
break;
case O_LRF_S:
localize();
*(char **)((char *)lopt + (size_t)options[i].argptr) =
argv[oind];
break;
case O_DPATH:
localize();
set_dpath(argv[oind]);
break;
case O_BIND:
localize();
add_bind_ip(argv[oind]);
break;
case O_BINDF:
localize();
if (!(f = fopen(argv[oind], "r")))
die(2, "cannot open IP list '%s'.", argv[oind]);
while ((fbad = mfgets(buf, sizeof(buf), f)) != 0)
add_bind_ip(fbad);
fclose(f);
break;
case O_AGENT:
localize();
adden_agent(argv[oind], 1);
break;
case O_AGENTF:
localize();
if (!(f = fopen(argv[oind], "r")))
die(2, "cannot open User-Agent list '%s'.", argv[oind]);
while ((fbad = mfgets(buf, sizeof(buf), f)) != 0)
add_agent(fbad);
fclose(f);
break;
case O_PRX:
localize();
prx_adden(argv[oind]);
break;
case O_PRXF:
localize();
if (!(f = fopen(argv[oind], "r")))
die(2, "cannot open proxy list '%s'.", argv[oind]);
while ((fbad = mfgets(buf, sizeof(buf), f)) != 0)
prx_adden(fbad);
fclose(f);
break;
case O_AHDR:
localize();
dext_ptrarr(&lopt->aux_headers, argv[oind]);
break;
case O_SHDR:
localize();
dext_ptrarr(&lopt->save_headers, argv[oind]);
break;
case O_URLF:
if (!strcmp(argv[oind], "-"))
f = stdin;
else if (!(f = fopen(argv[oind], "r")))
die(2, "cannot open URL list '%s'.", argv[oind]);
while ((fbad = mfgets(buf, sizeof(buf), f)) != 0)
adden(fbad);
if (f != stdin)
fclose(f);
break;
case O_THROT:
t = matoi(argv[oind], argv[oind - 1]);
throttle.tv_sec = t / 1000;
throttle.tv_usec = t % 1000 * 1000;
break;
}
}
} else if (!strcmp(argv[oind], ",")) {
initlopt();
dopts = 0;
} else if (!dopts && !strcmp(argv[oind], "--"))
dopts++;
else
adden(argv[oind]);
}
/* If no url was given, show help message: */
if (!tried_url) {
showhelp();
exit(2);
}
if ((verbose
#ifdef DEBUG
|| debug
#endif
) && isatty(2))
show_stat = 0;
}
syntax highlighted by Code2HTML, v. 0.9.1