/* $Id: puf.h,v 1.17 2004/03/07 12:48:20 ossi Exp $ *
*
* puf 0.9 Copyright (C) 2000-2004 by Oswald Buddenhagen <puf@ossi.cjb.net>
* based on puf 0.1.x (C) 1999,2000 by Anders Gavare <gavare@hotmail.com>
*
* You may modify and distribute this code under the terms of the GPL.
* There is NO WARRANTY of any kind. See COPYING for details.
*
* puf.h - global definitions
*
*/
#ifndef _PUF_H
#define _PUF_H
#include <config.h>
#ifdef USE_LL
# define _FILE_OFFSET_BITS 64
# define SSOFFT "lli"
#else
# define SSOFFT "li"
#endif
#define SOFFT "%"SSOFFT
#ifndef _O_BINARY
# define _O_BINARY 0
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <sys/socket.h>
#include <sys/utsname.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <stdio.h>
#include <ctype.h>
#include <strings.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <time.h>
#include <utime.h>
#include <fcntl.h>
#include <errno.h>
#include <stdarg.h>
#include <stddef.h>
#include <limits.h>
#include <locale.h>
/* Undef this, if you don't want the -d switch */
#define DEBUG
/* This may be defined if you want to, but they are not neccessary: */
/*#define HTTP_ACCEPT "Accept: * / *" */
/* Undef this, if you want no message about incorrect DNS resolver configs */
#define CORRECT_DNS
/* String length limit: */
#define SHORTSTR 512
/* Max buf size when reading from the network. */
#define MAXBUFSIZE 0x8000
/* Overlapping lenght of consecutive buffer scans */
#define OVERLAPLEN 200
/* Maximal allowed header length */
#define MAXHEADERLEN 0x10000
/* Filename of a url ending with a slash: */
#define DEFAULT_INDEX_FILE_NAME "index.html"
/* Extension to append to patial files */
#define PART_EXT ".part"
/* This could be set higher than 0 if you like the debug output... */
#define DEFAULT_VERBOSITY 0
/* Default max nr of connections: */
#define DEFAULT_MAX_ACTIVE 20
/* Default max of ansynchronous dns lookups: */
#define DEFAULT_MAX_DNS_FORKS 10
/* Default timeout when awaiting DNS lookup completion */
#define DEFAULT_TIMEOUT_DNS 60
/* Default timeout when connecting a host */
#define DEFAULT_TIMEOUT_CONNECT 60
/* Default timeout when awaiting http reply data */
#define DEFAULT_TIMEOUT_DATA 120
/* How long the bandwidht averaging timeframe should be */
#define AVERAGING_TIMEFRAME 100
/* Default Max number of connection attempts per url: */
#define DEFAULT_MAX_ATTEMPTS 5
#ifndef HAVE_STRDUP
char *strdup(const char *s);
#endif
#if 1
# define RND(m) ((rand() >> 15) * m / ((RAND_MAX >> 15) + 1)) /* m < 32768 */
#else
# define RND(m) ((int)(rand() * 1.0 * m / (RAND_MAX + 1.0))) /* needs float */
#endif
/* Structures for hostlist.c */
typedef struct haddr_t {
struct in_addr addr;
time_t retry_time;
u_int
attempt:8,
last_errt:2,
err_wait:16;
} haddr_t;
typedef struct hinfo_t {
char *name; /* back link to primary name */
char *lname; /* back link to local storage name */
u_int
is_http11:1, /* 1 till opposite proven */
maybe:1; /* proxy finder state flag */
short num_ips; /* 0 if hostname not found */
short cur_ip; /* for round-robin */
haddr_t ips[0]; /* actually num_ips */
} hinfo_t;
typedef struct host_t {
struct host_t *next;
hinfo_t *info;
char name[0];
} host_t;
/* proxy spec. */
typedef struct proxy_t {
struct proxy_t *next; /* next proxy in chain */
host_t *host; /* Proxy host */
u_int
have_auth:1, /* User:Password (BASE64) after cgi_path? */
ready:1; /* DNS lookup complete? */
u_short port; /* Proxy port */
u_short ratio; /* Requested load ratio */
int score; /* Connection/byte count - for capacity-driven balancing */
/* u_short cur_conn;*/ /* Currently open connections - for load-driven balancing - now unused */
char cgi_path[0]; /* Only for "cgi-proxies" */
} proxy_t;
/* fake user agent */
typedef struct agent_t {
int ratio;
char agent[0];
} agent_t;
/* generic array of pointers */
typedef struct ptrarr_t {
void *ents;
u_int nents, rents, cow:1;
} ptrarr_t;
/* disposition path spec. for -P */
typedef struct disp_path_t {
int file_num; /* for -xe */
char path[0];
} disp_path_t;
/* command line options */
typedef struct options_t {
#ifdef USE_MAGIC
int chk;
#endif
int follow_src; /* -p[r[+[+]]] :3 */
int follow_href; /* -r[+[+]] :3 */
int follows_max; /* [-{p,{{pr,r}{,+{,+}}}}] :3 */
int dir_mode; /* -nd & -xd :2 */
int max_depth; /* -ld short */
int max_recurse; /* -l short */
off_t max_bytes; /* -lb */
int inhibit_cgiget; /* -xg & -ng :2 */
int force_html; /* -F :1 */
int update_mode; /* -u, -c & -nc :2 */
disp_path_t *disp_path; /* -P */
char *index_filename; /* -xi */
int enumerate_files; /* -xe :1 */
#ifndef HAVE_CYGWIN
int fat_quotes; /* -xq :1 */
#endif
int no_touch; /* -nt :1 */
int delete_broken; /* -nb :1 */
int send_if_range; /* -ni :1 */
int send_referer; /* -nR :1 */
int uar_total; /* [-U & -iU] */
ptrarr_t user_agents; /* -U & -iU */
ptrarr_t aux_headers; /* -xH */
ptrarr_t save_headers; /* -xh */
int timeout_connect; /* -Tc short */
int timeout_data; /* -Td short */
int max_attempts; /* -t short */
int fail_no_wait; /* -nw :1 */
int http_err_trans; /* -xT :1 */
ptrarr_t bind_addrs; /* -xb & ib */
ptrarr_t proxies; /* -y & -iy */
} options_t;
/* shared url parameters */
typedef struct url_parm_t {
#ifdef USE_MAGIC
int chk;
#endif
options_t *opt;
char *http_auth; /* User:Password in BASE64 encoding */
char *disposition; /* -O, [-xE] & URL*disp */
proxy_t *proxy; /* URL^proxy */
uint strictproxy:1; /* URL^^proxy */
uint ref_count:31; /* >1 -> actually copy in clone_parm() */
} url_parm_t;
typedef struct url_t {
#ifdef USE_MAGIC
u_int attempt; /* Nr of attempt to download this URL */
int chk; /* Structure checksum */
/* checked data begins here */
u_int len; /* Structure lenght */
#endif
struct url_t *next; /* next url in global chain */
struct url_t *referer; /* Referer URL (or NULL) */
url_parm_t *parm; /* URL parameters */
u_int
#ifndef USE_MAGIC
attempt:8, /* Nr of attempt to download this URL */
#endif
is_top_dir:1, /* Get disp_off from this url? */
is_requisite:1, /* 1=comes from src=, otherwise href= */
relocs:3; /* How often this URL was redirected */
/* Url: host, port and local part: */
int url_hash; /* (For quick search) */
host_t *host; /* Host of URL */
u_short port; /* Host port */
short link_depth; /* For -l */
short path_len; /* Length of the path in the local part */
short disp_pathoff; /* Disposition path; points into local_part */
char local_part[0]; /* Variable length local part */
} url_t;
/* queue element for waiting urls in download queues */
typedef struct wurl_t {
struct wurl_t *next; /* next url in current queue */
url_t *url; /* the associated url */
} wurl_t;
/* queue element for waiting objects */
typedef struct wobj_t {
struct wobj_t *next; /* next object in current queue */
url_t *url; /* the associated url (if any) */
proxy_t *proxy; /* the associated proxy (if any) */
} wobj_t;
/* queue element for active urls */
typedef struct aurl_t {
struct aurl_t *next; /* next url in current queue */
url_t *url; /* the associated url */
off_t file_off; /* Length of partial content */
off_t size_total; /* Size according to host (0=?) */
off_t size_fetched; /* Size fetched so far */
time_t file_time; /* Partial content creation time */
time_t timeout; /* Next timeout */
int socket; /* TCP socket */
int ipidx; /* Index of the used haddr_t */
/* int bipidx; */ /* Index of the bound outgoing ip */
proxy_t *proxy; /* Pointer to used proxy */
int pipidx; /* Index of the proxy's used haddr_t */
int f; /* File descriptor (-1 if switched) */
u_int
file_created:1, /* Did we already create the file? */
http_done_header:1, /* End of header reached? */
content_is_html:1, /* Try recursive search on file? */
reloc:1; /* Is this url being redirected? */
char *headers; /* HTTP headers for saving to file */
int hdrslen, hdrssiz; /* Actual and allocated lenght of above */
/* Input data overlap buffer */
char *buffer; /* Overlap buffer */
short size; /* Current size of overlap buffer */
short offset; /* Current fill state of overlap buffer */
short http_result_code; /* 200, 400 etc. 0 means no status code by now */
short displen; /* Length of >> */
char disposition[0]; /* Local file name */
} aurl_t;
typedef struct whost_t {
struct whost_t *next; /* fork/read queue link */
wobj_t *objq; /* urls/proxies depending on this lookup */
host_t *host; /* host being looked up */
} whost_t;
typedef struct dnsproc_t {
struct dnsproc_t *next;
whost_t *whost; /* host being looked up */
int fd; /* control socket */
int pid; /* pid of the DNS helper process */
long timeout; /* lookup or idle timeout */
} dnsproc_t;
/* set: prepend, remove random */
#define ex_linear_na_queue(q, t) extern t *q
#define linear_na_queue(q, t) t *q = 0
#define lnq_append(q, e) do {e->next = q; q = e;} while(0)
/* #define lnq_rm1st(q) do {q = q->next;} while(0) */
#define lnq_remove(q, e) do {*e##p = (*e##p)->next;} while(0)
#define lnq_iterate(q, t, e, a) do {t *e; for (e = q; e; e = e->next) a;} while(0)
#define lnq_iterate_rm(q, t, e, a, c) do {t *e, **e##p; for (e##p = &q; (e = *e##p); ) {a; c: e##p = &(e->next); }} while(0)
/* queue: append, remove first */
#define ex_linear_queue(q, t) extern t *q, **q##_app
#define linear_queue(q, t) t *q = 0, **q##_app = &q
#define lq_append(q, e) do {e->next = 0; *q##_app = e; q##_app = &(e->next);} while(0)
#define lq_prepend(q, e) do {e->next = q; q = e; if (q##_app == &q) q##_app = &(q->next);} while(0)
#define lq_rm1st(q) do {q = q->next; if (!q) q##_app = &q; } while(0)
/* #define lq_remove(q, e) do {if (q##_app == &((*e##p)->next)) q##_app = e##p; *e##p = (*e##p)->next;} while(0) */
#define lq_consume(q, t, e, a) do {t *e; while (q) {e = q; a;}} while(0)
/* circular queue: append, remove first, move first to end */
#define ex_circular_queue(q, t) extern t *q
#define circular_queue(q, t) t *q = 0
#define cq_append(q, e) do {if(q) {e->next = q->next; q->next = e;} else e->next=e; q = e;} while(0)
#define cq_rm1st(q) do {if(q->next == q) q = 0; else q->next = q->next->next;} while(0)
/* #define cq_remove(q, e) do {if(q->next == q) q = 0; else {if(e##p->next == q) q = e##p; e##p->next = e##p->next->next;}} while(0) */
#define cq_consume(q, t, e, a) do {t *e, *e##fp; if (q) {e##fp = q; do {e = q->next; a;} while (e != e##fp);}} while(0)
/* Update modes for already existing files */
#define EX_CLOBBER 0
#define EX_UPDATE 1
#define EX_CONTINUE 2
#define EX_NO_CLOBBER 3
/* Directory hierarchy creation modes */
#define DIRS_NONE 0
#define DIRS_NORMAL 1
#define DIRS_ALWAYS 2
/* Levels of url recursion: */
#define DONT_FETCH 0
#define NOT_RECURSIVE 1
#define SUBDIR_RECURSIVE 2
#define HOST_RECURSIVE 3
#define GLOBAL_RECURSIVE 4
/* Generic return codes */
#define RT_OK 0 /* go on */
#define RT_SKIP 1 /* deny existence */
#define RT_AGAIN 2 /* non-error retry */
#define RT_RETRY 3 /* error retry */
#define RT_GIVEUP 4 /* fatal error */
#define RT_DONE 5 /* finished */
#define RT_TIMEOUT 6 /* proxy->server timeout */
#define RT_REFUSED 7 /* proxy->server refusal */
/* Functions: */
/* main.c: */
extern char *progname;
extern int verbose;
int calc_hash(u_char *data, int len);
void *mmalloc(size_t size);
void *mrealloc(void *ptr, size_t size);
void die(int ret, const char *msg, ...);
int errm(url_t *u, const char *txt, ...);
void prx(int lev, const char *txt, ...);
#define NFO 3
#define WRN 2
#define ERR 1
#ifdef DEBUG
extern int debug;
void dbp(const char *txt, ...);
void dbpe(const char *txt, ...);
# define dbg(wht, tdo) do { if (wht & debug) dbp tdo; } while (0)
# define dbge(wht, tdo) do { if (wht & debug) dbpe tdo; } while (0)
#else
# define dbg(wht, tdo)
# define dbge(wht, tdo)
#endif
#define URL 1
#define DNS 2
#define QUE 4
#define CON 8
#define HDR 16
#define CHK 32
#define MEM 64
#if 0
# define ierr(m) die(3, m)
#else
# define ierr(m) *(char *)0 = 0
#endif
#ifdef USE_MAGIC
void magck(const char *);
# define checken(m) magck(m)
# define ichecken_hashurl(p) calc_hash((char *)&(p->len), p->len)
# define ichecken_hashgen(p) calc_hash((char *)p+sizeof(int), sizeof(*p)-sizeof(int))
# define checken_updurl(p) p->chk = ichecken_hashurl(p)
# define checken_updgen(p) p->chk = ichecken_hashgen(p)
# define ichecken_url(p) (p->chk == ichecken_hashurl(p))
# define ichecken_gen(p) (p->chk == ichecken_hashgen(p))
# define checken_url(p,m) do { if(!ichecken_url(p)) checken(m); } while(0)
# define checken_gen(p,m) do { if(!ichecken_gen(p)) checken(m); } while(0)
#else
# define checken(m) do ; while(0)
# define checken_updurl(p) do ; while(0)
# define checken_updgen(p) do ; while(0)
# define checken_url(p,m) do ; while(0)
# define checken_gen(p,m) do ; while(0)
#endif
/* hostlist.c: */
extern int always_primary_name;
extern host_t *hostlist;
ex_linear_queue(queue_dns_lookup, whost_t);
ex_linear_na_queue(queue_dns_busy, dnsproc_t);
ex_linear_na_queue(queue_dns_idle, dnsproc_t);
host_t *host_lookup_fast(char *name, int namlen);
host_t *host_lookup_full(char *name, int namlen, url_t *u, proxy_t *prx);
int start_lookup(dnsproc_t *pr);
int finish_lookup(dnsproc_t *pr);
dnsproc_t *fork_dnsproc(void);
void reap_dnsproc(dnsproc_t *pr);
/* getopts.c: */
void getopts(int argc, char *argv[]);
/* fetch.c: */
extern off_t max_bytes, fetched_bytes, total_bytes;
extern int max_dnss_active;
extern int max_urls_active;
extern int timeout_dns;
extern int max_time;
extern int max_urls;
extern int num_urls;
extern int num_urls_done;
extern int num_urls_fail;
extern int num_errors;
extern int show_stat;
extern int waiting_proxies;
extern int all_proxy_wait; /* unused */
extern struct timeval cur_tv, throttle;
extern struct sockaddr_in bind_addr;
ex_circular_queue(queue_urls_connect, wurl_t);
ex_linear_na_queue(queue_urls_request, aurl_t);
ex_linear_na_queue(queue_urls_reply, aurl_t);
int touch(aurl_t *au);
void byebye(char *msg);
void fetch_all(void);
/* url.c: */
extern int economize_dns;
extern url_t *urllist;
extern proxy_t *proxylist;
int same_dir(char *path, int len, url_t *referer, int is_req);
int find_url(char *path, int len, hinfo_t *hinfo, u_short port, int *hashp);
proxy_t *parse_proxy (char *proxy, int ratio);
int parse_add_url(char *url, int len, url_t *referer, url_parm_t *parm,
int istopdir, int isreq, int relocs, int link_depth);
int queue_url(url_t *u);
void add_url(url_t *u);
void free_url(url_t *u);
int clone_parm(url_t *u);
/* recurse.c: */
int recurse_buff(url_t *u, char *databuf, int len, int notlast);
void recurse_pfile(url_t *u, int fi, char **bupo, int *lepo);
void recurse_file(url_t *u, char *name);
/* http_conn.c: */
int activate_url(url_t *u, aurl_t **au);
/* http_req.c: */
extern void init_user_agent(void);
#define len_enc_auth(x) (4 * (((x) + 2) / 3) + 1)
void encode_auth(char *buf, char *auth, int len);
int send_http_get(aurl_t *au);
/* http_rsp.c: */
extern int economize_files;
int mmfopen(char *name, int flags, int *f);
int free_fd(void);
int handle_reply(aurl_t *au);
/* util_date.c: */
#define BAD_DATE 0
time_t parseHTTPdate(const char *date);
#endif /* _PUF_H */
syntax highlighted by Code2HTML, v. 0.9.1