/*____________________ Main file for mGet ________________*/
/* some comments are stale - please bear with me or if you
* can find the stale ones please remove them and then send
* me the patch file :)
*/
/* Copyright 2001 2002 Debajyoti Bera */
/* This file is part of mGet.
* mGet is free Software; please refer to COPYING for terms and conditions */
#define _REENTRANT
#include "mget.h"
int main(int argc, char **argv){
#ifdef TRY_SIGNAL
VERBOSE=TRUE;
read_log_file("mget.log");
#endif
get_options(argc, argv);
init_socket(); /* start the main socket */
/* now get the head and leangth etc. informations */
get_head(&main_request);
/* trying to catch signals */
#ifdef TRY_SIGNAL
signal( SIGINT, catch_interrupt_signal);
#endif
process_thread(&main_request);
fprintf(outfile,"\n%sThank you for using mGet/"VERSION"\n%s",sep,sep);
fclose(outfile);
exit(0);
/* return 0; */
}
/* print usage and exit */
void print_usage(void){
fprintf( \
stderr,
"Usage: mget %s \n\
[-n number_segments{1}] \n\
-f file_name\n"
#ifdef HAVE_GETOPT_LONG
" [--use_ftp_proxy[=<ftp_proxy_name:ftp_proxy_port{80}>]] \n\
[--use_http_proxy[=<http_proxy_name:http_proxy_port{80}>]]\n"
#elif HAVE_GETOPT_GETOPT_H
" [--use_ftp_proxy[=<ftp_proxy_name:ftp_proxy_port{80}>]] \n\
[--use_http_proxy[=<http_proxy_name:http_proxy_port{80}>]]\n"
#endif
" [-F<ftp_proxy_name:ftp_proxy_port{80}>] \n\
[-x<ftp_proxy_name:ftp_proxy_port{80}>] \n\
[-H<http_proxy_name:http_proxy_port{80}>] \n\
[-h<http_proxy_name:http_proxy_port{80}>] \n\
[-p port{80}] \n\
[-O Output_file_name] \n\
[-C]\n"
#ifdef TRY_SIGNAL
" [-l {log_file}]\n"
#endif
" [-t timeout_seconds{60}] \n\
[-R referrer] \n\
[-r rollback{0} bytes] \n\
[-v] \n\
[-V] \n\
------------------------------------------------------------------------- \n\
proxy option can be given as -H(F) proxyhost:proxyport or \n\
as --use_http(or ftp)_proxy=proxyhost:proxyport. If port is not given, \n\
port is assumed to be 80. [Works differently in Solaris, refer manpage]. \n\
On some machines getopt doesnot allow '-H <proxy_name>' i.e. the space is not allowed between H and hostname. \n\
\n", VERSION);
exit(1);
}
/* get the command line options and set up the option variables and other global variables */
void get_options(int argc, char **argv){
extern int optind;
extern char *optarg;
#if HAVE_GETOPT_LONG
int option_index;
static struct option long_options[]= {
{"use_http_proxy", optional_argument, NULL, 'H'},
{"use_ftp_proxy", optional_argument, NULL, 'F'},
{"use_http_proxy_noauth", optional_argument, NULL, 'h'},
{"use_ftp_proxy_noauth", optional_argument, NULL, 'x'}
};
#endif
int c;
if(argc<2)
print_usage();
/* allocate space for the protocol field. Why this different treatment
* to protocol whereas others are not allocated likewise :
* this is because initially protocol was statically allocated -
* as an array; but then while enabling resume support, the easiest
* thing to do was make it dynamically allocated */
protocol=(char *)calloc(5, sizeof(char)); //'http' - size max 4
/* a common mistake: donot try to print any information in this while block
* if needed, directly print on stdout, using outfile will lead to segmentation fault
* as outfile may not have been initialized until late */
#ifdef HAVE_GETOPT_GETOPT_H
while((c=getopt(argc, argv, "n:f:p:Cc:o:O:t:H::F::R:r:vl:h::x::V"))>0){
#elif HAVE_GETOPT_STDLIB_H
while((c=getopt(argc, argv, "n:f:p:Cc:o:O:t:H::h::F::R:r:vl:h::x::V"))>0){
#elif HAVE_GETOPT_LONG
while((c=getopt_long(argc, argv, "n:f:p:Cc:o:O:t:H::h::F::x::R:r:vl:V", long_options, &option_index))>0){
#else
error getopt not supported
#endif
switch (c){
case 'n':
num_segment=min(MAX_SEGMENT, atoi(optarg));
break;
case 'f':
file_name=
(char *)calloc(sizeof(char),1+strlen(optarg));
strcpy(file_name,optarg);/* XXX:should USE str*n*cpy */
file_name=_remove_http(file_name, protocol);
if(strncmp(protocol, "ftp", 3)==0)
PROTO_HTTP=FALSE; /* XXX:SHOULD be using ftp protocol */
break;
case 'p':
port=atoi(optarg);
break;
case 'o':
outfile=(FILE *)malloc(sizeof(FILE));
outfile=fopen(optarg,"w");
break;
case 'c': /* XXX: to add configuration file support */
fprintf(stderr,"no configuration file support till now\n");
break;
case 'C':
USE_HTTP_CONNECT=TRUE;
VERBOSEPRINT(fprintf(stderr, "Will use CONNECT to tunnel through the HTTP proxy.\n"))
break;
case 'O': /* XXX: to add output file support */
save_file_name=(char *)calloc(sizeof(char),1+strlen(optarg));
strcpy(save_file_name,optarg);/* XXX: to USE str*n*cpy */
break;
case 't':
thread_timeout_second=atoi(optarg);
break;
case 'r':
rollback=atoi(optarg);
#ifdef DEBUG
VERBOSEPRINT(printf("Using rollback of:%d.\n",rollback))
#endif
break;
case 'H':
#ifdef DEBUG
DEBUGPRINT(fprintf(stderr, "Debug:using http_proxy\n"))
#endif
if(proxy==NULL){
char *proxy_name;
if(optarg!=NULL)
proxy_name=get_proxy_name_port(optarg, &proxy_port, proxy_name);
else
proxy_name=get_proxy_from_env(&proxy_port, proxy_name, 'h');
if(proxy_name==NULL){
fprintf(stderr, "Invalid value of proxy. Using no-proxy setting.\n");
break;
}
proxy=(char *)calloc(sizeof(char),1+strlen(proxy_name));
/* XXX: to use str*n*cpy */
strcpy(proxy,proxy_name);
free(proxy_name);
PROXY=TRUE;
PROXY_AUTH=TRUE;
/* user has asked for proxy usage
* so ask for user name and password */
get_name_passwd();
}
break;
case 'h':
#ifdef DEBUG
DEBUGPRINT(fprintf(stderr, "Debug:using http_proxy but with no authentication\n"))
#endif
if(proxy==NULL){
char *proxy_name;
if(optarg!=NULL)
proxy_name=get_proxy_name_port(optarg, &proxy_port, proxy_name);
else
proxy_name=get_proxy_from_env(&proxy_port, proxy_name, 'h');
if(proxy_name==NULL){
fprintf(stderr, "Invalid value of proxy. Using no-proxy setting.\n");
break;
}
proxy=(char *)calloc(sizeof(char),1+strlen(proxy_name));
/* XXX: to use str*n*cpy */
strcpy(proxy,proxy_name);
free(proxy_name);
PROXY=TRUE;
PROXY_AUTH=FALSE;
/* user has asked for proxy usage, but the non-authentication type */
}
break;
case 'F':
#ifdef DEBUG
DEBUGPRINT(fprintf(stderr, "Debug:using ftp_proxy\n"))
#endif
if(proxy==NULL){
char *proxy_name;
if(optarg!=NULL)
proxy_name=get_proxy_name_port(optarg, &proxy_port, proxy_name);
else
proxy_name=get_proxy_from_env(&proxy_port, proxy_name, 'f');
if(proxy_name==NULL){
fprintf(stderr, "Invalid value of proxy. Using no-proxy setting.\n");
break;
}
proxy=(char *)calloc(sizeof(char),1+strlen(proxy_name));
/* XXX: to use str*n*cpy */
strcpy(proxy,proxy_name);
free(proxy_name);
PROXY=TRUE;
PROXY_AUTH=TRUE;
/* user has asked for proxy usage
* so ask for user name and password */
get_name_passwd();
}
break;
case 'x':
#ifdef DEBUG
DEBUGPRINT(fprintf(stderr, "Debug:using ftp_proxy with no proxy settings\n"))
#endif
if(proxy==NULL){
char *proxy_name;
if(optarg!=NULL)
proxy_name=get_proxy_name_port(optarg, &proxy_port, proxy_name);
else
proxy_name=get_proxy_from_env(&proxy_port, proxy_name, 'f');
if(proxy_name==NULL){
fprintf(stderr, "Invalid value of proxy. Using no-proxy setting.\n");
break;
}
proxy=(char *)calloc(sizeof(char),1+strlen(proxy_name));
/* XXX: to use str*n*cpy */
strcpy(proxy,proxy_name);
free(proxy_name);
PROXY=TRUE;
PROXY_AUTH=FALSE;
}
break;
case 'R':
referrer=(char *)calloc(sizeof(char),3+strlen(optarg));
strcpy(referrer,optarg);/* XXX: to USE str*n*cpy */
VERBOSEPRINT(fprintf(stderr, "Using referrer:%s\n", referrer))
referrer[strlen(optarg)]='\r';
referrer[strlen(optarg)+1]='\n';
referrer[strlen(optarg)+2]='\0';
REFERRER=TRUE;
break;
case 'v':
VERBOSE=TRUE;
break;
case 'V':
printf("mGet-"VERSION"\n");
print_usage();
break;
default:
print_usage();
}
}
/* first check if arguments etc. are all right, then go to other checkings */
if(file_name==NULL||optind > argc)
print_usage();
if(proxy==NULL){
/* direct connection
* => get the hostname and put it in proxy */
proxy=_get_site_name(file_name,proxy);
host_name=proxy;
/* no meaning of proxy_port, just kept as it was from before */
proxy_port=port;
}
else
host_name=_get_site_name(file_name,host_name);
/* fix the output file */
if(outfile==NULL){
outfile=(FILE *)malloc(sizeof(FILE));
outfile=stdout;
}
/* no ftp support without proxy, XXX: should try this out */
if(PROXY==FALSE && PROTO_HTTP==FALSE)
err_exit("No FTP allowed except through proxy.\n", 0, s);
/* check if some output filename is given, if not deduce the file name from site name */
if((save_file_name==NULL) && (save_file_name=_get_file_name(file_name,save_file_name))==NULL)
err_exit("No file name given",0,s);
/* fprintf(stderr,"\tDebug: file name is:%s\n",save_file_name); */
VERBOSEPRINT(fprintf(outfile, "Output file is:%s\n", save_file_name))
if(PROXY==FALSE && USE_HTTP_CONNECT==TRUE){
fprintf(stderr, "Tunnelling only allowed while using proxy. Will not use HTTP CONNECT.\n");
USE_HTTP_CONNECT=FALSE;
}
if(PROTO_HTTP==FALSE)
port=21;
VERBOSEPRINT(fprintf(outfile,"\n\
%sDocument %s requested from %s,with address %s in %d segments,\n,proxy:%s:%d\n%s",
sep,save_file_name,host_name,file_name,num_segment,proxy,proxy_port,sep))
}
/* Eeks!!! some error. QuiT */
void err_exit(char *msg, int option, SOCKET socket){
perror(msg);
if(option==CLOSE_SOCK)
closesocket(socket);
exit(1);
}
/* main socket is initialized */
void init_socket(){ /* initialize the main socket */
#ifdef DEBUG
char str[INET_ADDRSTRLEN];
int i;
#endif
HOST *host;
/* start filling the saddr structure
* first the addr
* get the hostent structure first
*/
host=gethostbyname(proxy);
if(host==NULL){
herror("Server lookup error");
closesocket(s);
exit(1);
}
/* printf debug info for the host */
#ifdef DEBUG
fprintf(outfile,"%s\t:=)Document requested from: %s\n",sep,host->h_name);
fprintf(outfile,"Debug Information:\n");
i=0;
while(host->h_aliases[i]!=NULL){
fprintf(outfile,"\t:=)Other interfaces: %s\n",(*host).h_aliases[i]);
i++;
}
fprintf(outfile,"\t:=)Proxy(Host) address: %s\n",inet_ntop(AF_INET,((INADDR *)host->h_addr),str,sizeof(str)));
#endif
/* now fill saddr */
saddr.sin_family=AF_INET;
saddr.sin_port=htons(proxy_port);
saddr.sin_addr=*((INADDR *)host->h_addr);
memset(saddr.sin_zero,0,8);/* bzero is deprecated */
return; /* end initialization */
}
/* send the initial request to gather information about the file
* like length; ideally this thread also should try to get some data
* as well and then suitably start and modify threads to get the rest
* of the data, this way the time while user is waiting to get the length
* information some data is also downloaded. NOT IMPLEMENTED till now :/
*/
int get_head(REQUEST *req){
int ret;
char *request_string,buffer[LENGTH];
char *HEAD_REQUEST=(PROTO_HTTP==TRUE ? HEAD_STR_HTTP : HEAD_STR_FTP);
/* initialize the socket */
s=socket(PF_INET, SOCK_STREAM, 0);
if(s==-1)
err_exit("Socket create error",NOCLOSE_SOCK,s);
DEBUGPRINT(fprintf(outfile,"\t:=)Socket successfully created: descriptor is %d\n",s))
/* now connect the host */
ret=connect(s,(SADDR *)&saddr,sizeof(SADDR));
if(ret!=0) /* it is better to check if return value is 0 or non-0 */
err_exit("Connect error",1,s);
/* CONNECTED !!!
* if asked to use http CONENCT try to use that */
if(USE_HTTP_CONNECT==TRUE){
ret=do_http_CONNECT(s);
if(ret!=200){
fprintf(outfile, "Disabling use of HTTP_CONNECT:");
_process_http_status(ret);
if(ret==206)
USE_HTTP_CONNECT=FALSE;
else
exit(1);
}
}
/* get the head information from the request string */
request_string=
(char *)calloc(sizeof(char),
4+strlen(HEAD_REQUEST)
+ strlen(file_name)
+ strlen(host_name)
+ (PROXY_AUTH==TRUE?strlen(PROXY_AUTH_STR):0)
+ (PROXY_AUTH==TRUE?strlen(user_passwd):0)
+ (REFERRER==TRUE?strlen(REFERRER_DECL):0)
+ (REFERRER==TRUE?strlen(referrer):0));
sprintf(request_string,HEAD_REQUEST,
protocol,
file_name,
host_name,
(PROXY_AUTH==TRUE?PROXY_AUTH_STR:""),
(PROXY_AUTH==TRUE?user_passwd:""),
(REFERRER==TRUE?REFERRER_DECL:""),
(REFERRER==TRUE?referrer:""));
/* now send the request */
ret=send(s,request_string,strlen(request_string),0);
DEBUGPRINT(fprintf(outfile,"=> %d bytes sent:\n%s%s%sResponse awaited:\n",ret,sep,request_string,sep))
/* get the header request */
memset(buffer,'\0',LENGTH);
ret=recv(s,buffer,LENGTH-1,0);
if(ret<0) /* error :( */
err_exit("Receive error",1,s);
buffer[ret]='\0';
VERBOSEPRINT(fprintf(outfile,"\nReceived =>%d bytes\n%s",ret,sep))
DEBUGPRINT(fprintf(outfile, "%s\n", buffer))
req->version=(char *)calloc(sizeof(char),10);
memset(req->version,'\0',10);
/* because only the pointer is passed in buffer,
* no use of trying to pass only the first line
* as that would also pass the pointer, same space */
_process_status_line(buffer, req->version, &(req->status_code));
if (req->status_code==200||req->status_code==206){
/* so atatus code is correct
* close the socket with utmost importance */
closesocket(s);
_process_header_length(buffer, &(req->size_file));
if(req->size_file==0){
DEBUGPRINT(fprintf(outfile, "Size of file is 0 !!! Status is:%d, Some problem. Trying to get file in single thread.\n", req->status_code))
fprintf(outfile, "Unable to get size of file, using single thread.\n");
/* exit(2); */ /* Now can handle this case also */
num_segment=1; /* size not known... so can't segment */
SUPPORT_RESUME=FALSE;
/* but as if disconnected request for continuing from broken download can be given
* (need to verify) so donot increase the timeout */
HAS_LENGTH=FALSE;
/* exit(1); */
return 1;
}
fprintf(outfile, "Size of requested file %s : %ld bytes\n", save_file_name, req->size_file);
DEBUGPRINT(fprintf(outfile,"\tDebug:Size of file=%ld\n\tDebug:Version=%s\n\tDebug:Status=%d\n",
req->size_file,
req->version,
req->status_code))
if(req->status_code==200){
VERBOSEPRINT(fprintf(outfile,"%sServer does not resume: using single thread, increasing timeout\n%s",sep,sep))
num_segment=1;
SUPPORT_RESUME=FALSE;
thread_timeout_second=(thread_timeout_second<300?300:thread_timeout_second*2); /* increase timeout time, adhoc policy */
}else
num_segment=check_file_segment_num(req->size_file, num_segment);
} else {
closesocket(s);
strncpy(error_buffer, buffer, 1023);
/* :0 at least tell user where to be redirected */
if(req->status_code==301 || req->status_code==302)
get_redirected_address(buffer);
_process_http_status(req->status_code);
exit(1);
}
return 1;
}
/* function to get user name and password */
void get_name_passwd(void){
char *tmp_string;
char *tmp;
int name_size, pass_size; /* size of name and password */
int encode_length;
tmp_string=(char *)calloc(sizeof(char),20);/* No justification of 20, from experience */
tmp=(char *)calloc(sizeof(char),40);/* No justification of 40,again */
/* username */
fprintf(stderr,"Please give the proxy user-name:");
scanf("%s",tmp_string);
name_size=strlen(tmp_string);
user_name=(char *)calloc(sizeof(char),1+name_size);
strncpy(user_name, tmp_string, name_size+1);
DEBUGPRINT(fprintf(stderr,"\tDebug:Username given=%s\n",user_name))
tmp_string=getpass("Give the proxy password:");
pass_size=strlen(tmp_string);
/* Use encoding technique : TO BE DONE */
encode_length=((name_size+1+pass_size+2)/3)*4;
user_passwd=(char *)calloc(sizeof(char),3+encode_length); //ad-hoc
/* add user_name:passwd */
sprintf(tmp,"%s:%s",user_name,tmp_string);
user_passwd=_encode(tmp);
user_passwd[encode_length]='\r';
user_passwd[encode_length+1]='\n';
user_passwd[encode_length+2]='\0';
/* fprintf(outfile,"\tDebug: Password=%s\n",user_passwd); */
}
/* thread processing starts here */
void process_thread(REQUEST *req){
int i;
int temp, interval;
off_t offset;
/* mutex initialization: great work Chak */
if(pthread_mutex_init(&thread_done_mutex,NULL)!=0)
err_exit("Mutex Init Error.",0,s);
if(pthread_cond_init(&thread_done_cond,NULL)!=0)
err_exit("Conditional Init Error.",0,s);
if(pthread_mutex_init(&file_mutex,NULL)!=0)
err_exit("File mutex Init Error.",0,s);
temp=-1; /* start byte of each request */
interval=(HAS_LENGTH==TRUE?req->size_file/num_segment:1);
/******************************
* ASSUMPTION : accept: bytes *
******************************/
/* allocate memory for number of structures */
ppage=(PPAGE *)calloc(sizeof(PPAGE),num_segment);
/* open the file */
file=fopen(save_file_name,"w");
/* ALL the next operations take place only if the size is known */
if(HAS_LENGTH==TRUE){
/* to make sure that during downloading, space doesnot end,
* lseek it first to store the necessary size.
* from practical point, write a char at end of file which will surely be overwritten */
offset=fseek(file,(long)(req->size_file-2), SEEK_SET);
if(offset==(off_t)-1)
err_exit("File creation error",0,s);
/* now write */
fprintf(file,"0");
fclose(file);/* each thread will open separately, close it now */
}
/* allocate mamory for thread ids */
thread_id=(pthread_t *)calloc(sizeof(pthread_t),num_segment);
for(i=0;i<num_segment-1;++i){
/* for each segment fill up individual structures */
(ppage+i)->page_id=i+1; /* id:1 to num_segment */
(ppage+i)->size_total=req->size_file;
(ppage+i)->range_start=temp+1;
(ppage+i)->range_end=temp+interval;
(ppage+i)->range_size=interval;
temp+=interval;
if(pthread_create(thread_id+i, NULL, start_thread, (void *)(ppage+i))==0){
#ifdef TRY_SIGNAL
fprintf(outfile,"Debug: Created thread %d with tid:%d\n",i+1,(int)*(thread_id+i));
#endif
curr_threads++;
}
else
VERBOSEPRINT(fprintf(outfile,"Thread %d create error:%s\n",i+1,strerror(errno)))
}
/* adjust for the last structure, it's range is what is left */
(ppage+i)->page_id=i+1;
(ppage+i)->size_total=req->size_file;
(ppage+i)->range_start=temp+1;
(ppage+i)->range_end=req->size_file-1; /* 1 less than size, as range starts from 0 */
(ppage+i)->range_size=
req->size_file-(num_segment-1)*interval;
if(pthread_create(thread_id+i, NULL, start_thread, (void *)(ppage+i))==0){
#ifdef TRY_SIGNAL
fprintf(outfile,"Debug: Created thread %d with tid:%d\n",i+1,(int)*(thread_id+i));
#endif
curr_threads++;
}
else
VERBOSEPRINT(fprintf(outfile,"Thread %d create error:%s\n",i+1,strerror(errno)))
pthread_mutex_lock(&thread_done_mutex);
while(curr_threads!=0)
pthread_cond_wait(&thread_done_cond,&thread_done_mutex);
for(i=0;i<num_segment;i++){
void *thread_return;
pthread_join(thread_id[i], &thread_return);
#ifdef TRY_SIGNAL
if (thread_return == PTHREAD_CANCELED)
fprintf(outfile, "Debug: Thread %d cancelled.\n", i);
else
fprintf(outfile,"Debug: Thread %d joined\n",i);
#endif
}
return;
}
/* thread start routine */
void* start_thread(void *input){
PPAGE *page;
page=(PPAGE *)input;
/*fprintf(outfile,"%s\tDebug Information for thread:%d\n",sep,page->page_id);
fprintf(outfile,"\tContent-Range:%d-%d/%d\n",
page->range_start,
page->range_end,
page->size_total);*/
/*not now
start_thread_sock(page);*/ /* per_thread socket initializations */
/* in order to handle ctrl-c (resume feature) need to enable ASYNCHRONOUS
* cancel type
*/
pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
/* initialize */
page->present_read_pos=page->range_start;/* initial read pointer */
page->present_seek_pos=page->range_start;/* initial write pointer */
do{
if(start_thread_sock(page)!=ERR_M_OK){
VERBOSEPRINT(fprintf(outfile, "Thread %d failed to start. Retrying after some time.\n", page->page_id))
sleep(RETRY_TIME);
continue;
}
if(get_thread_request(page)==-1){ /* get requests for each thread */
VERBOSEPRINT(fprintf(outfile, "\nTimeout in connection. Resending request for thread %d\n",page->page_id))
#ifdef DEBUG
DEBUGPRINT(fprintf(outfile, "Rolling back from %ld to %ld bytes.\n",page->present_read_pos-rollback, page->present_read_pos))
#endif
}
}while(page->present_read_pos < (int)page->range_end);
/* do this fast ... other end may be sending data */
closesocket(page->sock);
/* decrement the value of curr_threads before exiting */
pthread_mutex_lock(&thread_done_mutex);
curr_threads--;
pthread_cond_signal(&thread_done_cond);
pthread_mutex_unlock(&thread_done_mutex);
VERBOSEPRINT(fprintf(outfile,"... Exiting thread:%d\n",page->page_id))
pthread_exit(NULL);
return NULL;
}
/* individual thread routine starts here */
int start_thread_sock(PPAGE *page){
int ret;
#ifdef __linux__
int reuseaddr_optval=1;
#endif
struct linger l;
socklen_t optlen=sizeof(l);
page->sock=socket(PF_INET, SOCK_STREAM, 0);
if(page->sock==-1){
perror("Socket create error");
return ERR_M_SOCKERR; /* error */
}
/* set the socket options
* no-linger options... though set by default, explicitly set again */
l.l_onoff=0;
ret=setsockopt(page->sock, SOL_SOCKET, SO_LINGER, &l, optlen);
if(ret==-1)
VERBOSEPRINT(fprintf(outfile, "SO_LINGER set error:%s\n", strerror(errno)))
#ifdef __linux__
/* set reuseaddr option */
ret=setsockopt(page->sock, SOL_SOCKET, SO_REUSEADDR, &reuseaddr_optval, sizeof(reuseaddr_optval));
if(ret==-1)
VERBOSEPRINT(fprintf(outfile, "SO_REUSEADDR set error:%s\n", strerror(errno)))
#endif
ret=connect(page->sock,(SADDR *)&saddr, sizeof(SADDR));
if(ret != 0){
perror("Connect error");
closesocket(page->sock);
return ERR_M_CONNERR;
}
return ERR_M_OK;
}
int get_thread_request(PPAGE *page){
char *get_request, *start_data;
int ret, effective_size=0, written=0,is_header=1;
long ret_ftell;
byte buffer[LENGTH];
FILE * fp;
fd_set fds;
struct timeval timeout_struct;
get_request=(char *)calloc(sizeof(char),
4+(HAS_LENGTH==TRUE?strlen(GET_STR):strlen(GET_STR_NO_RANGE))
+ strlen(file_name)
+ strlen(host_name)
+ 33
+ (PROXY_AUTH==TRUE?strlen(PROXY_AUTH_STR):0)
+ (PROXY_AUTH==TRUE?strlen(user_passwd):0)
+ (REFERRER==TRUE?strlen(REFERRER_DECL):0)
+ (REFERRER==TRUE?strlen(referrer):0));
if(HAS_LENGTH==TRUE)
sprintf(get_request,GET_STR,
protocol,
file_name,
host_name,
page->present_read_pos,
page->range_end,
(PROXY_AUTH==TRUE?PROXY_AUTH_STR:""),
(PROXY_AUTH==TRUE?user_passwd:""),
(REFERRER==TRUE?REFERRER_DECL:""),
(REFERRER==TRUE?referrer:""));
else
sprintf(get_request,GET_STR_NO_RANGE,
protocol,
file_name,
host_name,
(PROXY_AUTH==TRUE?PROXY_AUTH_STR:""),
(PROXY_AUTH==TRUE?user_passwd:""),
(REFERRER==TRUE?REFERRER_DECL:""),
(REFERRER==TRUE?referrer:""));
DEBUGPRINT(fprintf(outfile,"\tDebug:request string for thread:%d\n%s\n%s",page->page_id,get_request,sep))
/* send HTTP CONNECT request */
if(USE_HTTP_CONNECT==TRUE){
ret=do_http_CONNECT(page->sock);
if(ret!=200){
fprintf(outfile, "Disabling use of HTTP_CONNECT:");
_process_http_status(ret);
if(ret==206)
USE_HTTP_CONNECT=FALSE;
else
exit(1);
}
}
/* send request */
ret=send(page->sock, get_request, strlen(get_request), 0);
#ifdef DEBUG
/* fprintf(outfile,"\tDebug:thread #%d send %d bytes\n",page->page_id, ret); */
#endif
/* to read data, quite a long loop :=( */
do{
memset(buffer,0,LENGTH);
/* try implementing timeout by select(); never used before :) */
FD_ZERO(&fds);
FD_SET(page->sock, &fds);
/* small bug!!! see man page of select NOTES section
* set the timeout values each time due to a different implementation in linux
*/
timeout_struct.tv_sec=thread_timeout_second;
timeout_struct.tv_usec=0;
select(page->sock+1, &fds, NULL, NULL, &timeout_struct);
/* select returns when there is data available */
if(!FD_ISSET(page->sock, &fds)){
/* timeout */
return do_timeout(page);
}
/* printf("select returned "); */
ret=recv(page->sock, buffer, LENGTH, 0); /* length -1 to print on stdout */
DEBUGPRINT(fprintf(outfile,"\nData amount:%d ",ret))
if(ret<=0)
continue;
/* write to file
* remember !!! mutex acquired ... if abnormal termination of loop, unlock it.
*/
pthread_mutex_lock(&file_mutex);
fp=fopen(save_file_name,"r+b");/* open in O_RDWR mode to for random write,
need binary mode just for FUN ...
in POSIX systems there basically is no difference
just thought may this can be ported in Windows
*/
fseek(fp,sizeof(byte)*page->present_seek_pos,SEEK_SET);
ret_ftell=ftell(fp);
/*if(ret_ftell==-1L){
perror("Error in file seek");
ret_ftell=0L;
}*/
DEBUGPRINT(fprintf(outfile,"Thread no%d now positioned at %ld, present_seek_pos:%ld \n",page->page_id,ret_ftell, page->present_seek_pos))
effective_size=ret;/* initialize buffer */
start_data=buffer;
if(is_header){
start_data=_remove_header(buffer,&effective_size);
/* effective_size returns the effective size if the packet is a proper
* http packet, else it returns -1*status_code
* in general we should be content with status code of 200 or 206
* only (they provide content) but there are some other responses
* which signifies that there was some sort of timeout etc. so that
* we may re-place the request, some of the status code of this type are:
* 408 : Timeout request
* 503 : Service unavailable (ideally shoulf then find out the
* retry time from header)
* 504 : Gateway timeout
*/
if(effective_size<0){ /* status not 200 or 206... */
if(effective_size==-1)
/* fprintf(outfile,"Lost packet.\n"); */
ret=1;
else if ( effective_size == -408 ||
effective_size == -503 ||
effective_size == -504 ) {
VERBOSEPRINT(fprintf(stderr, "Some other timeout:%d\n", -(effective_size)))
_process_http_status(-1*effective_size);
return do_timeout(ppage);
} else {
ret=0;
strncpy(error_buffer, buffer, 1023);
VERBOSEPRINT(fprintf(outfile, "Error in connection %d",page->page_id))
fflush(outfile);
_process_http_status(-1*effective_size);
exit(1);
}
/* essential */
fclose(fp);
pthread_mutex_unlock(&file_mutex);
continue; /* move out of this request */
}
is_header=0;
}
/* There is a strange phenomenon !!! For some of the requests, the server doesnot seem
* to take into account the end-of-requested range. It starts the data from the asked range
* and then sends till the rest of the file. This may be, however a proxy-server
* influenced matter. Still, I need to look into it.
* However, this can be suitably used if some segment is nearly end and the next segment
* hasnot even started receiving data. Will utilise this fact later... for present,
* just put a check and close the socket whenever the requested data has arrived.
*/
/* never consider range end while Length is not known
* check the amount of data to be written; subtract -1 as range_end gives offset from 0
* and after having written those 'effective' number of bytes, the pointer should get one past
* the range_end
*/
if(HAS_LENGTH==TRUE && ((page->present_seek_pos + effective_size - 1) > page->range_end)){
#ifdef DEBUG
DEBUGPRINT(fprintf(outfile, "\nstrange:%ld+%d>%ld,%ld,%d\n", page->present_seek_pos, effective_size, page->range_end,
(page->present_seek_pos + effective_size - 1),
((page->present_seek_pos + effective_size - 1)>page->range_end)?1:0))
#endif
effective_size=(page->range_end - page->present_seek_pos)+1;/* inclusive of both ranges, so +1 */
#ifdef DEBUG
DEBUGPRINT(fprintf(outfile, " now eff size:%d ",effective_size))
#endif
ret=0; /* make ret=0 to say that we end this thread here */
}
written=fwrite(start_data,sizeof(byte),effective_size,fp);
if(written<effective_size){
VERBOSEPRINT(fprintf(outfile,"File Write Error %d<%d\n",written,effective_size))
exit(1);
}
page->present_seek_pos+=written;
#ifdef DEBUG
/* fprintf(outfile,"Thread no:%d wrote %d/%d incoming bytes, position %ld\n",page->page_id,written,ret,ftell(fp)); */
#endif
fclose(fp);
pthread_mutex_unlock(&file_mutex);
/* fprintf(outfile,"%d",page->page_id-1);*/ /* to show something is coming */
page->present_read_pos=page->present_seek_pos;
if(HAS_LENGTH==FALSE)
page->range_end=page->present_read_pos;/* we donot the length,
* at least it is of this length */
DEBUGPRINT(if(page->present_read_pos>page->range_end)
fprintf(outfile, "\nStrange\n"))
print_segments();
fflush(outfile); /* stdout ??cannot?? be flushed, makes !!no!! sense */
}while(ret!=0);
return 1;
}
/* to print all those nice auto-updated columns of download statistics u see on the screen */
inline void print_segments(void){
int i, strange=-1;
float totdownload = 0; /* by Joseph: custom addition to know total download size */
/* fprintf(outfile, "\n"); */
fprintf(outfile, "\r");
for(i=0; i<num_segment; ++i){
float downloaded;
if(HAS_LENGTH==TRUE){
downloaded = (float)((ppage+i)->present_read_pos - (ppage+i)->range_start)/(float)(ppage+i)->range_size;
totdownload += downloaded; /* custom addition to know total download size */
fprintf(outfile, "%5.1f%% ", downloaded*100);
if(downloaded>1.0) /* strange segment */
strange=i;
}else{
downloaded = ((float)((ppage+i)->present_read_pos - (ppage+i)->range_start));
fprintf(outfile, "%d\t", (int)downloaded);
}
}
fprintf(outfile, "= %.1f%%", (totdownload*100)/num_segment); /* custom addition to know total download size */
/* fprintf(outfile, "= %.1f%%/%.1f Kb", (totdownload*100)/num_segment, main_request.size_file/1024.0); custom addition to know total download size */
#ifdef DEBUG
if(strange!=-1)
fprintf(outfile, "\nStrange:read pos:%ld,start:%ld,size:%ld\n",(ppage+i)->present_read_pos,(ppage+i)->range_start,(ppage+i)->range_size);
#endif
}
/* check if the number of segments requested are allright wrt the file size
* doesnot make sense to download very small files with arbitrary large segments
*/
int check_file_segment_num(int total, int segment){
/* heuristics... atleast 50 kilobytes per segment, allowance of 5kb */
if(segment*SIZE_PER_SEGMENT + ALLOWANCE > total){ /* so number of segments is too large */
segment = max(total/SIZE_PER_SEGMENT,1);
}
VERBOSEPRINT(fprintf(outfile, "resetting number of segments to %d.\n", segment))
return segment;
}
/* What all to do for adding resume feature ? assume that writing to log file is done:
* Normally main calls these in succession:
* o get_options: no need to call "in full"
* o init_socket: no need as we already have the length
* o get_head: " " " " " " " "
* o process_thread: this is needed to some extent
* - the mutex etc. has to be initialized
* - ppage allocated memory and filled
* - the file opening and writing part has to be done with care as it may already exist
* - better, just fill the req structure and let process_thread do the rest ...
* - reading/writing the structures in log file would have been an idea
* - but it was criticized at many places
*/
void catch_interrupt_signal(int sig){
FILE *logfile;
int count;
pthread_t self_thread_id, *tmp_thread;
DEBUGPRINT(fprintf(outfile, "Writing log file ... \n"))
self_thread_id=pthread_self();
fprintf( outfile, "exiting ... in %ld\n", self_thread_id);
/* Funny things all again !!! Linuxthreads donot fully implement the POSIX thread
* - thus SIGINT (as ctrl-c) is sent to all the threads; so somehow need to handle this
* and a naive way is to use semaphores. whereas in other POSIX platforms only one
* random thread gets the signal and so it has to cancel the other threads and write
* the log file
*/
for( count=0, tmp_thread=thread_id; count<num_segment; ++count, ++tmp_thread)
if (*tmp_thread != self_thread_id){
pthread_mutex_lock(&thread_done_mutex);
curr_threads--;
pthread_cond_signal(&thread_done_cond);
pthread_mutex_unlock(&thread_done_mutex);
pthread_cancel(*tmp_thread);
}
/* maybe a bad trick:
* the other threads may be notified of the signal in linux but
* they donot get a chance to enter the for loop as even before it
* they are cancelled; atleast that what empirically occurs to me
*/
sleep(3);
VERBOSEPRINT(fprintf(outfile, "done ... %d\n", (int)self_thread_id))
/* somehow we need to get hold of the name of log file
* maybe take it from the user or deduce intelligently
* currently make it 'mget.log' - thus overwriting any
* previous log file
*/
/* pass NULL as file name, will deal later with the name of log file */
if((logfile=_get_log_file(NULL))==NULL)
return;
write_log_file(logfile);
fclose(logfile);
fflush(outfile);
return;
}
void cleanup(void){
/* nothing till now */
return;
}
/* write all information needed to resume */
void write_log_file(FILE *fp){
int counter;
PPAGE *tmp_ppage;
fprintf(fp, "##### This file is autogenerated by mGet-"VERSION" DONOT EDIT IT MANUALLY !!!\n");
/* what all information needs to be written :
* nearly all variables, lets list them */
/* num_segment */
fprintf(fp, "%d\n", num_segment);
/* proxy_port */
fprintf(fp, "%d\n", proxy_port);
/* port */
fprintf(fp, "%d\n", port);
/* file_name */
fprintf(fp, "%d %s\n", strlen(file_name), file_name);
/* save_file_name */
fprintf(fp, "%d %s\n", strlen(save_file_name), save_file_name);
/* user_name */
/* user_passwd */
/* keep it as used i.e. the way it is sent in request */
fprintf(fp, "%d %s\n", strlen(user_passwd), user_passwd);
/* host_name */
fprintf(fp, "%d %s\n", strlen(host_name), host_name);
/* thread_timeout_second */
fprintf(fp, "%d\n", thread_timeout_second);
/* protocol */
fprintf(fp, "%d %s\n", strlen(protocol), protocol);
/* referrer */
fprintf(fp, "%d %s\n", strlen(referrer), referrer);
/* rollback */
fprintf(fp, "%d\n", rollback);
/* PROXY - REFERRER - SUPPORT_RESUME - HAS_LENGTH - PROTO_HTTP - VERBOSE - USE_HTTP_CONNECT */
/* as these are BOOL values, should have used fwrite */
fprintf(fp, "%d %d %d %d %d %d %d\n", \
PROXY, REFERRER, SUPPORT_RESUME, HAS_LENGTH,
PROTO_HTTP, VERBOSE, USE_HTTP_CONNECT);
/* req */
fprintf(fp, "%ld\n", main_request.size_file);
fprintf(fp, "%d %s\n", strlen(main_request.version), main_request.version);
fprintf(fp, "%d\n", main_request.status_code);
/* - each entry in ppage->... linked list
* however many are not needed to resume
*/
for (counter=0, tmp_ppage=ppage; counter<num_segment; ++counter, tmp_ppage++)
fprintf(fp, "%u %ld %ld %ld %ld %ld %ld\n", \
tmp_ppage->page_id,
tmp_ppage->size_total,
tmp_ppage->range_start,
tmp_ppage->range_end,
tmp_ppage->range_size,
tmp_ppage->present_seek_pos,
tmp_ppage->present_read_pos);
}
/* a host of fscanf statements to read the parameters :( */
void read_log_file(char *log_file_name){
int counter;
PPAGE *tmp_ppage;
FILE *fp;
char magic_string[]="##### This file is autogenerated by mGet-"VERSION" DONOT EDIT IT MANUALLY !!!", header_string[78]={0};
assert(log_file_name!=NULL);
fp=fopen(log_file_name, "r");
VERBOSEPRINT(fprintf(stderr, "Reading from log file:%s\n", log_file_name))
if(fp==NULL)
exit(1);
fscanf(fp, "%[^\n]", header_string);
if(strncmp(header_string, magic_string, 77)!=0)
exit(1);
if (fgetc(fp)!='\n')
exit(1);
VERBOSEPRINT(fprintf(stderr, "Got header data, seems the file may be correct.\n"))
/* what all information needs to be read :
* nearly all variables, lets list them */
/* num_segment */
check_return_value(fscanf(fp, "%d", &num_segment), 1)
VERBOSEPRINT(fprintf(stderr, "num_segment=%d\n", num_segment))
/* proxy_port */
check_return_value(fscanf(fp, "%d", &proxy_port), 1)
VERBOSEPRINT(fprintf(stderr, "proxy port=%d\n", proxy_port))
/* port */
check_return_value(fscanf(fp, "%d", &port), 1)
VERBOSEPRINT(fprintf(stderr, "port=%d\n", port))
/* file_name */
_read_string_value(fp, &file_name);
/* save_file_name */
_read_string_value(fp, &save_file_name);
/* user_name */
/* user_passwd */
/* keep it as used i.e. the way it is sent in request */
_read_string_value(fp, &user_passwd);
/* host_name */
_read_string_value(fp, &host_name);
/* thread_timeout_second */
check_return_value(fscanf(fp, "%d", &thread_timeout_second), 1)
VERBOSEPRINT(fprintf(stderr, "thread timeout=%d\n", thread_timeout_second))
/* protocol */
_read_string_value(fp, &protocol);
/* referrer */
_read_string_value(fp, &referrer);
/* rollback */
check_return_value(fscanf(fp, "%d", &rollback), 1)
VERBOSEPRINT(fprintf(stderr, "rollback=%d\n", rollback))
/* PROXY - REFERRER - SUPPORT_RESUME - HAS_LENGTH - PROTO_HTTP - VERBOSE - USE_HTTP_CONNECT */
/* read the 7 BOOL values, should have used fread for reading them */
check_return_value(fscanf(fp, "%d %d %d %d %d %d %d", \
(int *)&PROXY, (int *)&REFERRER, (int *)&SUPPORT_RESUME, (int *)&HAS_LENGTH, \
(int *)&PROTO_HTTP, (int *)&VERBOSE, (int *)&USE_HTTP_CONNECT), 7)
VERBOSEPRINT(fprintf(stderr, "%d %d %d %d %d %d %d\n", \
PROXY, REFERRER, SUPPORT_RESUME, HAS_LENGTH, \
PROTO_HTTP, VERBOSE, USE_HTTP_CONNECT))
/* req */
check_return_value(fscanf(fp, "%ld", &(main_request.size_file)), 1)
VERBOSEPRINT(fprintf(stderr, "main_request.size_file=%ld\n", main_request.size_file))
_read_string_value(fp, &(main_request.version));
check_return_value(fscanf(fp, "%d", &(main_request.status_code)), 1)
VERBOSEPRINT(fprintf(stderr, "main_request.status_code=%d\n", main_request.status_code))
fflush(stderr);
/* - each entry in ppage->... linked list
* however many are not needed to resume
*/
/* first need to do the memory allocation for ppage linked list */
ppage = (PPAGE *) calloc ( sizeof(PPAGE), num_segment);
for (counter=0, tmp_ppage=ppage; counter<num_segment; ++counter, tmp_ppage++)
check_return_value(fscanf(fp, "%u %ld %ld %ld %ld %ld %ld", \
&(tmp_ppage->page_id),
&(tmp_ppage->size_total),
&(tmp_ppage->range_start),
&(tmp_ppage->range_end),
&(tmp_ppage->range_size),
&(tmp_ppage->present_seek_pos),
&(tmp_ppage->present_read_pos)), 7)
exit(1);
}
inline int do_timeout(PPAGE *page){
/* timeout !!!
* if can't resume
* better restart from first
*/
if(SUPPORT_RESUME==FALSE){
page->present_seek_pos=page->range_start;
page->present_read_pos=page->range_start;
}else{ /* so will resume */
/* therefore check if more than rollback has come
* and then perform rollback */
if(page->present_read_pos>=page->range_start+rollback){
page->present_seek_pos-=rollback;
page->present_read_pos-=rollback;
}
}
return -1;
/* fprintf(outfile, "Timeout !!!\n"); */
}
#if !defined(HAVE_INET_NTOP)
char *inet_ntop(int af, INADDR *src, char *dst, size_t cnt){
return inet_ntoa((INADDR)(*src));
}
#endif
syntax highlighted by Code2HTML, v. 0.9.1