/* $Id: puf.h,v 1.17 2004/03/07 12:48:20 ossi Exp $ * * * puf 0.9 Copyright (C) 2000-2004 by Oswald Buddenhagen * based on puf 0.1.x (C) 1999,2000 by Anders Gavare * * You may modify and distribute this code under the terms of the GPL. * There is NO WARRANTY of any kind. See COPYING for details. * * puf.h - global definitions * */ #ifndef _PUF_H #define _PUF_H #include #ifdef USE_LL # define _FILE_OFFSET_BITS 64 # define SSOFFT "lli" #else # define SSOFFT "li" #endif #define SOFFT "%"SSOFFT #ifndef _O_BINARY # define _O_BINARY 0 #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Undef this, if you don't want the -d switch */ #define DEBUG /* This may be defined if you want to, but they are not neccessary: */ /*#define HTTP_ACCEPT "Accept: * / *" */ /* Undef this, if you want no message about incorrect DNS resolver configs */ #define CORRECT_DNS /* String length limit: */ #define SHORTSTR 512 /* Max buf size when reading from the network. */ #define MAXBUFSIZE 0x8000 /* Overlapping lenght of consecutive buffer scans */ #define OVERLAPLEN 200 /* Maximal allowed header length */ #define MAXHEADERLEN 0x10000 /* Filename of a url ending with a slash: */ #define DEFAULT_INDEX_FILE_NAME "index.html" /* Extension to append to patial files */ #define PART_EXT ".part" /* This could be set higher than 0 if you like the debug output... */ #define DEFAULT_VERBOSITY 0 /* Default max nr of connections: */ #define DEFAULT_MAX_ACTIVE 20 /* Default max of ansynchronous dns lookups: */ #define DEFAULT_MAX_DNS_FORKS 10 /* Default timeout when awaiting DNS lookup completion */ #define DEFAULT_TIMEOUT_DNS 60 /* Default timeout when connecting a host */ #define DEFAULT_TIMEOUT_CONNECT 60 /* Default timeout when awaiting http reply data */ #define DEFAULT_TIMEOUT_DATA 120 /* How long the bandwidht averaging timeframe should be */ #define AVERAGING_TIMEFRAME 100 /* Default Max number of connection attempts per url: */ #define DEFAULT_MAX_ATTEMPTS 5 #ifndef HAVE_STRDUP char *strdup(const char *s); #endif #if 1 # define RND(m) ((rand() >> 15) * m / ((RAND_MAX >> 15) + 1)) /* m < 32768 */ #else # define RND(m) ((int)(rand() * 1.0 * m / (RAND_MAX + 1.0))) /* needs float */ #endif /* Structures for hostlist.c */ typedef struct haddr_t { struct in_addr addr; time_t retry_time; u_int attempt:8, last_errt:2, err_wait:16; } haddr_t; typedef struct hinfo_t { char *name; /* back link to primary name */ char *lname; /* back link to local storage name */ u_int is_http11:1, /* 1 till opposite proven */ maybe:1; /* proxy finder state flag */ short num_ips; /* 0 if hostname not found */ short cur_ip; /* for round-robin */ haddr_t ips[0]; /* actually num_ips */ } hinfo_t; typedef struct host_t { struct host_t *next; hinfo_t *info; char name[0]; } host_t; /* proxy spec. */ typedef struct proxy_t { struct proxy_t *next; /* next proxy in chain */ host_t *host; /* Proxy host */ u_int have_auth:1, /* User:Password (BASE64) after cgi_path? */ ready:1; /* DNS lookup complete? */ u_short port; /* Proxy port */ u_short ratio; /* Requested load ratio */ int score; /* Connection/byte count - for capacity-driven balancing */ /* u_short cur_conn;*/ /* Currently open connections - for load-driven balancing - now unused */ char cgi_path[0]; /* Only for "cgi-proxies" */ } proxy_t; /* fake user agent */ typedef struct agent_t { int ratio; char agent[0]; } agent_t; /* generic array of pointers */ typedef struct ptrarr_t { void *ents; u_int nents, rents, cow:1; } ptrarr_t; /* disposition path spec. for -P */ typedef struct disp_path_t { int file_num; /* for -xe */ char path[0]; } disp_path_t; /* command line options */ typedef struct options_t { #ifdef USE_MAGIC int chk; #endif int follow_src; /* -p[r[+[+]]] :3 */ int follow_href; /* -r[+[+]] :3 */ int follows_max; /* [-{p,{{pr,r}{,+{,+}}}}] :3 */ int dir_mode; /* -nd & -xd :2 */ int max_depth; /* -ld short */ int max_recurse; /* -l short */ off_t max_bytes; /* -lb */ int inhibit_cgiget; /* -xg & -ng :2 */ int force_html; /* -F :1 */ int update_mode; /* -u, -c & -nc :2 */ disp_path_t *disp_path; /* -P */ char *index_filename; /* -xi */ int enumerate_files; /* -xe :1 */ #ifndef HAVE_CYGWIN int fat_quotes; /* -xq :1 */ #endif int no_touch; /* -nt :1 */ int delete_broken; /* -nb :1 */ int send_if_range; /* -ni :1 */ int send_referer; /* -nR :1 */ int uar_total; /* [-U & -iU] */ ptrarr_t user_agents; /* -U & -iU */ ptrarr_t aux_headers; /* -xH */ ptrarr_t save_headers; /* -xh */ int timeout_connect; /* -Tc short */ int timeout_data; /* -Td short */ int max_attempts; /* -t short */ int fail_no_wait; /* -nw :1 */ int http_err_trans; /* -xT :1 */ ptrarr_t bind_addrs; /* -xb & ib */ ptrarr_t proxies; /* -y & -iy */ } options_t; /* shared url parameters */ typedef struct url_parm_t { #ifdef USE_MAGIC int chk; #endif options_t *opt; char *http_auth; /* User:Password in BASE64 encoding */ char *disposition; /* -O, [-xE] & URL*disp */ proxy_t *proxy; /* URL^proxy */ uint strictproxy:1; /* URL^^proxy */ uint ref_count:31; /* >1 -> actually copy in clone_parm() */ } url_parm_t; typedef struct url_t { #ifdef USE_MAGIC u_int attempt; /* Nr of attempt to download this URL */ int chk; /* Structure checksum */ /* checked data begins here */ u_int len; /* Structure lenght */ #endif struct url_t *next; /* next url in global chain */ struct url_t *referer; /* Referer URL (or NULL) */ url_parm_t *parm; /* URL parameters */ u_int #ifndef USE_MAGIC attempt:8, /* Nr of attempt to download this URL */ #endif is_top_dir:1, /* Get disp_off from this url? */ is_requisite:1, /* 1=comes from src=, otherwise href= */ relocs:3; /* How often this URL was redirected */ /* Url: host, port and local part: */ int url_hash; /* (For quick search) */ host_t *host; /* Host of URL */ u_short port; /* Host port */ short link_depth; /* For -l */ short path_len; /* Length of the path in the local part */ short disp_pathoff; /* Disposition path; points into local_part */ char local_part[0]; /* Variable length local part */ } url_t; /* queue element for waiting urls in download queues */ typedef struct wurl_t { struct wurl_t *next; /* next url in current queue */ url_t *url; /* the associated url */ } wurl_t; /* queue element for waiting objects */ typedef struct wobj_t { struct wobj_t *next; /* next object in current queue */ url_t *url; /* the associated url (if any) */ proxy_t *proxy; /* the associated proxy (if any) */ } wobj_t; /* queue element for active urls */ typedef struct aurl_t { struct aurl_t *next; /* next url in current queue */ url_t *url; /* the associated url */ off_t file_off; /* Length of partial content */ off_t size_total; /* Size according to host (0=?) */ off_t size_fetched; /* Size fetched so far */ time_t file_time; /* Partial content creation time */ time_t timeout; /* Next timeout */ int socket; /* TCP socket */ int ipidx; /* Index of the used haddr_t */ /* int bipidx; */ /* Index of the bound outgoing ip */ proxy_t *proxy; /* Pointer to used proxy */ int pipidx; /* Index of the proxy's used haddr_t */ int f; /* File descriptor (-1 if switched) */ u_int file_created:1, /* Did we already create the file? */ http_done_header:1, /* End of header reached? */ content_is_html:1, /* Try recursive search on file? */ reloc:1; /* Is this url being redirected? */ char *headers; /* HTTP headers for saving to file */ int hdrslen, hdrssiz; /* Actual and allocated lenght of above */ /* Input data overlap buffer */ char *buffer; /* Overlap buffer */ short size; /* Current size of overlap buffer */ short offset; /* Current fill state of overlap buffer */ short http_result_code; /* 200, 400 etc. 0 means no status code by now */ short displen; /* Length of >> */ char disposition[0]; /* Local file name */ } aurl_t; typedef struct whost_t { struct whost_t *next; /* fork/read queue link */ wobj_t *objq; /* urls/proxies depending on this lookup */ host_t *host; /* host being looked up */ } whost_t; typedef struct dnsproc_t { struct dnsproc_t *next; whost_t *whost; /* host being looked up */ int fd; /* control socket */ int pid; /* pid of the DNS helper process */ long timeout; /* lookup or idle timeout */ } dnsproc_t; /* set: prepend, remove random */ #define ex_linear_na_queue(q, t) extern t *q #define linear_na_queue(q, t) t *q = 0 #define lnq_append(q, e) do {e->next = q; q = e;} while(0) /* #define lnq_rm1st(q) do {q = q->next;} while(0) */ #define lnq_remove(q, e) do {*e##p = (*e##p)->next;} while(0) #define lnq_iterate(q, t, e, a) do {t *e; for (e = q; e; e = e->next) a;} while(0) #define lnq_iterate_rm(q, t, e, a, c) do {t *e, **e##p; for (e##p = &q; (e = *e##p); ) {a; c: e##p = &(e->next); }} while(0) /* queue: append, remove first */ #define ex_linear_queue(q, t) extern t *q, **q##_app #define linear_queue(q, t) t *q = 0, **q##_app = &q #define lq_append(q, e) do {e->next = 0; *q##_app = e; q##_app = &(e->next);} while(0) #define lq_prepend(q, e) do {e->next = q; q = e; if (q##_app == &q) q##_app = &(q->next);} while(0) #define lq_rm1st(q) do {q = q->next; if (!q) q##_app = &q; } while(0) /* #define lq_remove(q, e) do {if (q##_app == &((*e##p)->next)) q##_app = e##p; *e##p = (*e##p)->next;} while(0) */ #define lq_consume(q, t, e, a) do {t *e; while (q) {e = q; a;}} while(0) /* circular queue: append, remove first, move first to end */ #define ex_circular_queue(q, t) extern t *q #define circular_queue(q, t) t *q = 0 #define cq_append(q, e) do {if(q) {e->next = q->next; q->next = e;} else e->next=e; q = e;} while(0) #define cq_rm1st(q) do {if(q->next == q) q = 0; else q->next = q->next->next;} while(0) /* #define cq_remove(q, e) do {if(q->next == q) q = 0; else {if(e##p->next == q) q = e##p; e##p->next = e##p->next->next;}} while(0) */ #define cq_consume(q, t, e, a) do {t *e, *e##fp; if (q) {e##fp = q; do {e = q->next; a;} while (e != e##fp);}} while(0) /* Update modes for already existing files */ #define EX_CLOBBER 0 #define EX_UPDATE 1 #define EX_CONTINUE 2 #define EX_NO_CLOBBER 3 /* Directory hierarchy creation modes */ #define DIRS_NONE 0 #define DIRS_NORMAL 1 #define DIRS_ALWAYS 2 /* Levels of url recursion: */ #define DONT_FETCH 0 #define NOT_RECURSIVE 1 #define SUBDIR_RECURSIVE 2 #define HOST_RECURSIVE 3 #define GLOBAL_RECURSIVE 4 /* Generic return codes */ #define RT_OK 0 /* go on */ #define RT_SKIP 1 /* deny existence */ #define RT_AGAIN 2 /* non-error retry */ #define RT_RETRY 3 /* error retry */ #define RT_GIVEUP 4 /* fatal error */ #define RT_DONE 5 /* finished */ #define RT_TIMEOUT 6 /* proxy->server timeout */ #define RT_REFUSED 7 /* proxy->server refusal */ /* Functions: */ /* main.c: */ extern char *progname; extern int verbose; int calc_hash(u_char *data, int len); void *mmalloc(size_t size); void *mrealloc(void *ptr, size_t size); void die(int ret, const char *msg, ...); int errm(url_t *u, const char *txt, ...); void prx(int lev, const char *txt, ...); #define NFO 3 #define WRN 2 #define ERR 1 #ifdef DEBUG extern int debug; void dbp(const char *txt, ...); void dbpe(const char *txt, ...); # define dbg(wht, tdo) do { if (wht & debug) dbp tdo; } while (0) # define dbge(wht, tdo) do { if (wht & debug) dbpe tdo; } while (0) #else # define dbg(wht, tdo) # define dbge(wht, tdo) #endif #define URL 1 #define DNS 2 #define QUE 4 #define CON 8 #define HDR 16 #define CHK 32 #define MEM 64 #if 0 # define ierr(m) die(3, m) #else # define ierr(m) *(char *)0 = 0 #endif #ifdef USE_MAGIC void magck(const char *); # define checken(m) magck(m) # define ichecken_hashurl(p) calc_hash((char *)&(p->len), p->len) # define ichecken_hashgen(p) calc_hash((char *)p+sizeof(int), sizeof(*p)-sizeof(int)) # define checken_updurl(p) p->chk = ichecken_hashurl(p) # define checken_updgen(p) p->chk = ichecken_hashgen(p) # define ichecken_url(p) (p->chk == ichecken_hashurl(p)) # define ichecken_gen(p) (p->chk == ichecken_hashgen(p)) # define checken_url(p,m) do { if(!ichecken_url(p)) checken(m); } while(0) # define checken_gen(p,m) do { if(!ichecken_gen(p)) checken(m); } while(0) #else # define checken(m) do ; while(0) # define checken_updurl(p) do ; while(0) # define checken_updgen(p) do ; while(0) # define checken_url(p,m) do ; while(0) # define checken_gen(p,m) do ; while(0) #endif /* hostlist.c: */ extern int always_primary_name; extern host_t *hostlist; ex_linear_queue(queue_dns_lookup, whost_t); ex_linear_na_queue(queue_dns_busy, dnsproc_t); ex_linear_na_queue(queue_dns_idle, dnsproc_t); host_t *host_lookup_fast(char *name, int namlen); host_t *host_lookup_full(char *name, int namlen, url_t *u, proxy_t *prx); int start_lookup(dnsproc_t *pr); int finish_lookup(dnsproc_t *pr); dnsproc_t *fork_dnsproc(void); void reap_dnsproc(dnsproc_t *pr); /* getopts.c: */ void getopts(int argc, char *argv[]); /* fetch.c: */ extern off_t max_bytes, fetched_bytes, total_bytes; extern int max_dnss_active; extern int max_urls_active; extern int timeout_dns; extern int max_time; extern int max_urls; extern int num_urls; extern int num_urls_done; extern int num_urls_fail; extern int num_errors; extern int show_stat; extern int waiting_proxies; extern int all_proxy_wait; /* unused */ extern struct timeval cur_tv, throttle; extern struct sockaddr_in bind_addr; ex_circular_queue(queue_urls_connect, wurl_t); ex_linear_na_queue(queue_urls_request, aurl_t); ex_linear_na_queue(queue_urls_reply, aurl_t); int touch(aurl_t *au); void byebye(char *msg); void fetch_all(void); /* url.c: */ extern int economize_dns; extern url_t *urllist; extern proxy_t *proxylist; int same_dir(char *path, int len, url_t *referer, int is_req); int find_url(char *path, int len, hinfo_t *hinfo, u_short port, int *hashp); proxy_t *parse_proxy (char *proxy, int ratio); int parse_add_url(char *url, int len, url_t *referer, url_parm_t *parm, int istopdir, int isreq, int relocs, int link_depth); int queue_url(url_t *u); void add_url(url_t *u); void free_url(url_t *u); int clone_parm(url_t *u); /* recurse.c: */ int recurse_buff(url_t *u, char *databuf, int len, int notlast); void recurse_pfile(url_t *u, int fi, char **bupo, int *lepo); void recurse_file(url_t *u, char *name); /* http_conn.c: */ int activate_url(url_t *u, aurl_t **au); /* http_req.c: */ extern void init_user_agent(void); #define len_enc_auth(x) (4 * (((x) + 2) / 3) + 1) void encode_auth(char *buf, char *auth, int len); int send_http_get(aurl_t *au); /* http_rsp.c: */ extern int economize_files; int mmfopen(char *name, int flags, int *f); int free_fd(void); int handle_reply(aurl_t *au); /* util_date.c: */ #define BAD_DATE 0 time_t parseHTTPdate(const char *date); #endif /* _PUF_H */