/*____________________ Main file for mGet ________________*/ /* some comments are stale - please bear with me or if you * can find the stale ones please remove them and then send * me the patch file :) */ /* Copyright 2001 2002 Debajyoti Bera */ /* This file is part of mGet. * mGet is free Software; please refer to COPYING for terms and conditions */ #define _REENTRANT #include "mget.h" int main(int argc, char **argv){ #ifdef TRY_SIGNAL VERBOSE=TRUE; read_log_file("mget.log"); #endif get_options(argc, argv); init_socket(); /* start the main socket */ /* now get the head and leangth etc. informations */ get_head(&main_request); /* trying to catch signals */ #ifdef TRY_SIGNAL signal( SIGINT, catch_interrupt_signal); #endif process_thread(&main_request); fprintf(outfile,"\n%sThank you for using mGet/"VERSION"\n%s",sep,sep); fclose(outfile); exit(0); /* return 0; */ } /* print usage and exit */ void print_usage(void){ fprintf( \ stderr, "Usage: mget %s \n\ [-n number_segments{1}] \n\ -f file_name\n" #ifdef HAVE_GETOPT_LONG " [--use_ftp_proxy[=]] \n\ [--use_http_proxy[=]]\n" #elif HAVE_GETOPT_GETOPT_H " [--use_ftp_proxy[=]] \n\ [--use_http_proxy[=]]\n" #endif " [-F] \n\ [-x] \n\ [-H] \n\ [-h] \n\ [-p port{80}] \n\ [-O Output_file_name] \n\ [-C]\n" #ifdef TRY_SIGNAL " [-l {log_file}]\n" #endif " [-t timeout_seconds{60}] \n\ [-R referrer] \n\ [-r rollback{0} bytes] \n\ [-v] \n\ [-V] \n\ ------------------------------------------------------------------------- \n\ proxy option can be given as -H(F) proxyhost:proxyport or \n\ as --use_http(or ftp)_proxy=proxyhost:proxyport. If port is not given, \n\ port is assumed to be 80. [Works differently in Solaris, refer manpage]. \n\ On some machines getopt doesnot allow '-H ' i.e. the space is not allowed between H and hostname. \n\ \n", VERSION); exit(1); } /* get the command line options and set up the option variables and other global variables */ void get_options(int argc, char **argv){ extern int optind; extern char *optarg; #if HAVE_GETOPT_LONG int option_index; static struct option long_options[]= { {"use_http_proxy", optional_argument, NULL, 'H'}, {"use_ftp_proxy", optional_argument, NULL, 'F'}, {"use_http_proxy_noauth", optional_argument, NULL, 'h'}, {"use_ftp_proxy_noauth", optional_argument, NULL, 'x'} }; #endif int c; if(argc<2) print_usage(); /* allocate space for the protocol field. Why this different treatment * to protocol whereas others are not allocated likewise : * this is because initially protocol was statically allocated - * as an array; but then while enabling resume support, the easiest * thing to do was make it dynamically allocated */ protocol=(char *)calloc(5, sizeof(char)); //'http' - size max 4 /* a common mistake: donot try to print any information in this while block * if needed, directly print on stdout, using outfile will lead to segmentation fault * as outfile may not have been initialized until late */ #ifdef HAVE_GETOPT_GETOPT_H while((c=getopt(argc, argv, "n:f:p:Cc:o:O:t:H::F::R:r:vl:h::x::V"))>0){ #elif HAVE_GETOPT_STDLIB_H while((c=getopt(argc, argv, "n:f:p:Cc:o:O:t:H::h::F::R:r:vl:h::x::V"))>0){ #elif HAVE_GETOPT_LONG while((c=getopt_long(argc, argv, "n:f:p:Cc:o:O:t:H::h::F::x::R:r:vl:V", long_options, &option_index))>0){ #else error getopt not supported #endif switch (c){ case 'n': num_segment=min(MAX_SEGMENT, atoi(optarg)); break; case 'f': file_name= (char *)calloc(sizeof(char),1+strlen(optarg)); strcpy(file_name,optarg);/* XXX:should USE str*n*cpy */ file_name=_remove_http(file_name, protocol); if(strncmp(protocol, "ftp", 3)==0) PROTO_HTTP=FALSE; /* XXX:SHOULD be using ftp protocol */ break; case 'p': port=atoi(optarg); break; case 'o': outfile=(FILE *)malloc(sizeof(FILE)); outfile=fopen(optarg,"w"); break; case 'c': /* XXX: to add configuration file support */ fprintf(stderr,"no configuration file support till now\n"); break; case 'C': USE_HTTP_CONNECT=TRUE; VERBOSEPRINT(fprintf(stderr, "Will use CONNECT to tunnel through the HTTP proxy.\n")) break; case 'O': /* XXX: to add output file support */ save_file_name=(char *)calloc(sizeof(char),1+strlen(optarg)); strcpy(save_file_name,optarg);/* XXX: to USE str*n*cpy */ break; case 't': thread_timeout_second=atoi(optarg); break; case 'r': rollback=atoi(optarg); #ifdef DEBUG VERBOSEPRINT(printf("Using rollback of:%d.\n",rollback)) #endif break; case 'H': #ifdef DEBUG DEBUGPRINT(fprintf(stderr, "Debug:using http_proxy\n")) #endif if(proxy==NULL){ char *proxy_name; if(optarg!=NULL) proxy_name=get_proxy_name_port(optarg, &proxy_port, proxy_name); else proxy_name=get_proxy_from_env(&proxy_port, proxy_name, 'h'); if(proxy_name==NULL){ fprintf(stderr, "Invalid value of proxy. Using no-proxy setting.\n"); break; } proxy=(char *)calloc(sizeof(char),1+strlen(proxy_name)); /* XXX: to use str*n*cpy */ strcpy(proxy,proxy_name); free(proxy_name); PROXY=TRUE; PROXY_AUTH=TRUE; /* user has asked for proxy usage * so ask for user name and password */ get_name_passwd(); } break; case 'h': #ifdef DEBUG DEBUGPRINT(fprintf(stderr, "Debug:using http_proxy but with no authentication\n")) #endif if(proxy==NULL){ char *proxy_name; if(optarg!=NULL) proxy_name=get_proxy_name_port(optarg, &proxy_port, proxy_name); else proxy_name=get_proxy_from_env(&proxy_port, proxy_name, 'h'); if(proxy_name==NULL){ fprintf(stderr, "Invalid value of proxy. Using no-proxy setting.\n"); break; } proxy=(char *)calloc(sizeof(char),1+strlen(proxy_name)); /* XXX: to use str*n*cpy */ strcpy(proxy,proxy_name); free(proxy_name); PROXY=TRUE; PROXY_AUTH=FALSE; /* user has asked for proxy usage, but the non-authentication type */ } break; case 'F': #ifdef DEBUG DEBUGPRINT(fprintf(stderr, "Debug:using ftp_proxy\n")) #endif if(proxy==NULL){ char *proxy_name; if(optarg!=NULL) proxy_name=get_proxy_name_port(optarg, &proxy_port, proxy_name); else proxy_name=get_proxy_from_env(&proxy_port, proxy_name, 'f'); if(proxy_name==NULL){ fprintf(stderr, "Invalid value of proxy. Using no-proxy setting.\n"); break; } proxy=(char *)calloc(sizeof(char),1+strlen(proxy_name)); /* XXX: to use str*n*cpy */ strcpy(proxy,proxy_name); free(proxy_name); PROXY=TRUE; PROXY_AUTH=TRUE; /* user has asked for proxy usage * so ask for user name and password */ get_name_passwd(); } break; case 'x': #ifdef DEBUG DEBUGPRINT(fprintf(stderr, "Debug:using ftp_proxy with no proxy settings\n")) #endif if(proxy==NULL){ char *proxy_name; if(optarg!=NULL) proxy_name=get_proxy_name_port(optarg, &proxy_port, proxy_name); else proxy_name=get_proxy_from_env(&proxy_port, proxy_name, 'f'); if(proxy_name==NULL){ fprintf(stderr, "Invalid value of proxy. Using no-proxy setting.\n"); break; } proxy=(char *)calloc(sizeof(char),1+strlen(proxy_name)); /* XXX: to use str*n*cpy */ strcpy(proxy,proxy_name); free(proxy_name); PROXY=TRUE; PROXY_AUTH=FALSE; } break; case 'R': referrer=(char *)calloc(sizeof(char),3+strlen(optarg)); strcpy(referrer,optarg);/* XXX: to USE str*n*cpy */ VERBOSEPRINT(fprintf(stderr, "Using referrer:%s\n", referrer)) referrer[strlen(optarg)]='\r'; referrer[strlen(optarg)+1]='\n'; referrer[strlen(optarg)+2]='\0'; REFERRER=TRUE; break; case 'v': VERBOSE=TRUE; break; case 'V': printf("mGet-"VERSION"\n"); print_usage(); break; default: print_usage(); } } /* first check if arguments etc. are all right, then go to other checkings */ if(file_name==NULL||optind > argc) print_usage(); if(proxy==NULL){ /* direct connection * => get the hostname and put it in proxy */ proxy=_get_site_name(file_name,proxy); host_name=proxy; /* no meaning of proxy_port, just kept as it was from before */ proxy_port=port; } else host_name=_get_site_name(file_name,host_name); /* fix the output file */ if(outfile==NULL){ outfile=(FILE *)malloc(sizeof(FILE)); outfile=stdout; } /* no ftp support without proxy, XXX: should try this out */ if(PROXY==FALSE && PROTO_HTTP==FALSE) err_exit("No FTP allowed except through proxy.\n", 0, s); /* check if some output filename is given, if not deduce the file name from site name */ if((save_file_name==NULL) && (save_file_name=_get_file_name(file_name,save_file_name))==NULL) err_exit("No file name given",0,s); /* fprintf(stderr,"\tDebug: file name is:%s\n",save_file_name); */ VERBOSEPRINT(fprintf(outfile, "Output file is:%s\n", save_file_name)) if(PROXY==FALSE && USE_HTTP_CONNECT==TRUE){ fprintf(stderr, "Tunnelling only allowed while using proxy. Will not use HTTP CONNECT.\n"); USE_HTTP_CONNECT=FALSE; } if(PROTO_HTTP==FALSE) port=21; VERBOSEPRINT(fprintf(outfile,"\n\ %sDocument %s requested from %s,with address %s in %d segments,\n,proxy:%s:%d\n%s", sep,save_file_name,host_name,file_name,num_segment,proxy,proxy_port,sep)) } /* Eeks!!! some error. QuiT */ void err_exit(char *msg, int option, SOCKET socket){ perror(msg); if(option==CLOSE_SOCK) closesocket(socket); exit(1); } /* main socket is initialized */ void init_socket(){ /* initialize the main socket */ #ifdef DEBUG char str[INET_ADDRSTRLEN]; int i; #endif HOST *host; /* start filling the saddr structure * first the addr * get the hostent structure first */ host=gethostbyname(proxy); if(host==NULL){ herror("Server lookup error"); closesocket(s); exit(1); } /* printf debug info for the host */ #ifdef DEBUG fprintf(outfile,"%s\t:=)Document requested from: %s\n",sep,host->h_name); fprintf(outfile,"Debug Information:\n"); i=0; while(host->h_aliases[i]!=NULL){ fprintf(outfile,"\t:=)Other interfaces: %s\n",(*host).h_aliases[i]); i++; } fprintf(outfile,"\t:=)Proxy(Host) address: %s\n",inet_ntop(AF_INET,((INADDR *)host->h_addr),str,sizeof(str))); #endif /* now fill saddr */ saddr.sin_family=AF_INET; saddr.sin_port=htons(proxy_port); saddr.sin_addr=*((INADDR *)host->h_addr); memset(saddr.sin_zero,0,8);/* bzero is deprecated */ return; /* end initialization */ } /* send the initial request to gather information about the file * like length; ideally this thread also should try to get some data * as well and then suitably start and modify threads to get the rest * of the data, this way the time while user is waiting to get the length * information some data is also downloaded. NOT IMPLEMENTED till now :/ */ int get_head(REQUEST *req){ int ret; char *request_string,buffer[LENGTH]; char *HEAD_REQUEST=(PROTO_HTTP==TRUE ? HEAD_STR_HTTP : HEAD_STR_FTP); /* initialize the socket */ s=socket(PF_INET, SOCK_STREAM, 0); if(s==-1) err_exit("Socket create error",NOCLOSE_SOCK,s); DEBUGPRINT(fprintf(outfile,"\t:=)Socket successfully created: descriptor is %d\n",s)) /* now connect the host */ ret=connect(s,(SADDR *)&saddr,sizeof(SADDR)); if(ret!=0) /* it is better to check if return value is 0 or non-0 */ err_exit("Connect error",1,s); /* CONNECTED !!! * if asked to use http CONENCT try to use that */ if(USE_HTTP_CONNECT==TRUE){ ret=do_http_CONNECT(s); if(ret!=200){ fprintf(outfile, "Disabling use of HTTP_CONNECT:"); _process_http_status(ret); if(ret==206) USE_HTTP_CONNECT=FALSE; else exit(1); } } /* get the head information from the request string */ request_string= (char *)calloc(sizeof(char), 4+strlen(HEAD_REQUEST) + strlen(file_name) + strlen(host_name) + (PROXY_AUTH==TRUE?strlen(PROXY_AUTH_STR):0) + (PROXY_AUTH==TRUE?strlen(user_passwd):0) + (REFERRER==TRUE?strlen(REFERRER_DECL):0) + (REFERRER==TRUE?strlen(referrer):0)); sprintf(request_string,HEAD_REQUEST, protocol, file_name, host_name, (PROXY_AUTH==TRUE?PROXY_AUTH_STR:""), (PROXY_AUTH==TRUE?user_passwd:""), (REFERRER==TRUE?REFERRER_DECL:""), (REFERRER==TRUE?referrer:"")); /* now send the request */ ret=send(s,request_string,strlen(request_string),0); DEBUGPRINT(fprintf(outfile,"=> %d bytes sent:\n%s%s%sResponse awaited:\n",ret,sep,request_string,sep)) /* get the header request */ memset(buffer,'\0',LENGTH); ret=recv(s,buffer,LENGTH-1,0); if(ret<0) /* error :( */ err_exit("Receive error",1,s); buffer[ret]='\0'; VERBOSEPRINT(fprintf(outfile,"\nReceived =>%d bytes\n%s",ret,sep)) DEBUGPRINT(fprintf(outfile, "%s\n", buffer)) req->version=(char *)calloc(sizeof(char),10); memset(req->version,'\0',10); /* because only the pointer is passed in buffer, * no use of trying to pass only the first line * as that would also pass the pointer, same space */ _process_status_line(buffer, req->version, &(req->status_code)); if (req->status_code==200||req->status_code==206){ /* so atatus code is correct * close the socket with utmost importance */ closesocket(s); _process_header_length(buffer, &(req->size_file)); if(req->size_file==0){ DEBUGPRINT(fprintf(outfile, "Size of file is 0 !!! Status is:%d, Some problem. Trying to get file in single thread.\n", req->status_code)) fprintf(outfile, "Unable to get size of file, using single thread.\n"); /* exit(2); */ /* Now can handle this case also */ num_segment=1; /* size not known... so can't segment */ SUPPORT_RESUME=FALSE; /* but as if disconnected request for continuing from broken download can be given * (need to verify) so donot increase the timeout */ HAS_LENGTH=FALSE; /* exit(1); */ return 1; } fprintf(outfile, "Size of requested file %s : %ld bytes\n", save_file_name, req->size_file); DEBUGPRINT(fprintf(outfile,"\tDebug:Size of file=%ld\n\tDebug:Version=%s\n\tDebug:Status=%d\n", req->size_file, req->version, req->status_code)) if(req->status_code==200){ VERBOSEPRINT(fprintf(outfile,"%sServer does not resume: using single thread, increasing timeout\n%s",sep,sep)) num_segment=1; SUPPORT_RESUME=FALSE; thread_timeout_second=(thread_timeout_second<300?300:thread_timeout_second*2); /* increase timeout time, adhoc policy */ }else num_segment=check_file_segment_num(req->size_file, num_segment); } else { closesocket(s); strncpy(error_buffer, buffer, 1023); /* :0 at least tell user where to be redirected */ if(req->status_code==301 || req->status_code==302) get_redirected_address(buffer); _process_http_status(req->status_code); exit(1); } return 1; } /* function to get user name and password */ void get_name_passwd(void){ char *tmp_string; char *tmp; int name_size, pass_size; /* size of name and password */ int encode_length; tmp_string=(char *)calloc(sizeof(char),20);/* No justification of 20, from experience */ tmp=(char *)calloc(sizeof(char),40);/* No justification of 40,again */ /* username */ fprintf(stderr,"Please give the proxy user-name:"); scanf("%s",tmp_string); name_size=strlen(tmp_string); user_name=(char *)calloc(sizeof(char),1+name_size); strncpy(user_name, tmp_string, name_size+1); DEBUGPRINT(fprintf(stderr,"\tDebug:Username given=%s\n",user_name)) tmp_string=getpass("Give the proxy password:"); pass_size=strlen(tmp_string); /* Use encoding technique : TO BE DONE */ encode_length=((name_size+1+pass_size+2)/3)*4; user_passwd=(char *)calloc(sizeof(char),3+encode_length); //ad-hoc /* add user_name:passwd */ sprintf(tmp,"%s:%s",user_name,tmp_string); user_passwd=_encode(tmp); user_passwd[encode_length]='\r'; user_passwd[encode_length+1]='\n'; user_passwd[encode_length+2]='\0'; /* fprintf(outfile,"\tDebug: Password=%s\n",user_passwd); */ } /* thread processing starts here */ void process_thread(REQUEST *req){ int i; int temp, interval; off_t offset; /* mutex initialization: great work Chak */ if(pthread_mutex_init(&thread_done_mutex,NULL)!=0) err_exit("Mutex Init Error.",0,s); if(pthread_cond_init(&thread_done_cond,NULL)!=0) err_exit("Conditional Init Error.",0,s); if(pthread_mutex_init(&file_mutex,NULL)!=0) err_exit("File mutex Init Error.",0,s); temp=-1; /* start byte of each request */ interval=(HAS_LENGTH==TRUE?req->size_file/num_segment:1); /****************************** * ASSUMPTION : accept: bytes * ******************************/ /* allocate memory for number of structures */ ppage=(PPAGE *)calloc(sizeof(PPAGE),num_segment); /* open the file */ file=fopen(save_file_name,"w"); /* ALL the next operations take place only if the size is known */ if(HAS_LENGTH==TRUE){ /* to make sure that during downloading, space doesnot end, * lseek it first to store the necessary size. * from practical point, write a char at end of file which will surely be overwritten */ offset=fseek(file,(long)(req->size_file-2), SEEK_SET); if(offset==(off_t)-1) err_exit("File creation error",0,s); /* now write */ fprintf(file,"0"); fclose(file);/* each thread will open separately, close it now */ } /* allocate mamory for thread ids */ thread_id=(pthread_t *)calloc(sizeof(pthread_t),num_segment); for(i=0;ipage_id=i+1; /* id:1 to num_segment */ (ppage+i)->size_total=req->size_file; (ppage+i)->range_start=temp+1; (ppage+i)->range_end=temp+interval; (ppage+i)->range_size=interval; temp+=interval; if(pthread_create(thread_id+i, NULL, start_thread, (void *)(ppage+i))==0){ #ifdef TRY_SIGNAL fprintf(outfile,"Debug: Created thread %d with tid:%d\n",i+1,(int)*(thread_id+i)); #endif curr_threads++; } else VERBOSEPRINT(fprintf(outfile,"Thread %d create error:%s\n",i+1,strerror(errno))) } /* adjust for the last structure, it's range is what is left */ (ppage+i)->page_id=i+1; (ppage+i)->size_total=req->size_file; (ppage+i)->range_start=temp+1; (ppage+i)->range_end=req->size_file-1; /* 1 less than size, as range starts from 0 */ (ppage+i)->range_size= req->size_file-(num_segment-1)*interval; if(pthread_create(thread_id+i, NULL, start_thread, (void *)(ppage+i))==0){ #ifdef TRY_SIGNAL fprintf(outfile,"Debug: Created thread %d with tid:%d\n",i+1,(int)*(thread_id+i)); #endif curr_threads++; } else VERBOSEPRINT(fprintf(outfile,"Thread %d create error:%s\n",i+1,strerror(errno))) pthread_mutex_lock(&thread_done_mutex); while(curr_threads!=0) pthread_cond_wait(&thread_done_cond,&thread_done_mutex); for(i=0;ipage_id); fprintf(outfile,"\tContent-Range:%d-%d/%d\n", page->range_start, page->range_end, page->size_total);*/ /*not now start_thread_sock(page);*/ /* per_thread socket initializations */ /* in order to handle ctrl-c (resume feature) need to enable ASYNCHRONOUS * cancel type */ pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); /* initialize */ page->present_read_pos=page->range_start;/* initial read pointer */ page->present_seek_pos=page->range_start;/* initial write pointer */ do{ if(start_thread_sock(page)!=ERR_M_OK){ VERBOSEPRINT(fprintf(outfile, "Thread %d failed to start. Retrying after some time.\n", page->page_id)) sleep(RETRY_TIME); continue; } if(get_thread_request(page)==-1){ /* get requests for each thread */ VERBOSEPRINT(fprintf(outfile, "\nTimeout in connection. Resending request for thread %d\n",page->page_id)) #ifdef DEBUG DEBUGPRINT(fprintf(outfile, "Rolling back from %ld to %ld bytes.\n",page->present_read_pos-rollback, page->present_read_pos)) #endif } }while(page->present_read_pos < (int)page->range_end); /* do this fast ... other end may be sending data */ closesocket(page->sock); /* decrement the value of curr_threads before exiting */ pthread_mutex_lock(&thread_done_mutex); curr_threads--; pthread_cond_signal(&thread_done_cond); pthread_mutex_unlock(&thread_done_mutex); VERBOSEPRINT(fprintf(outfile,"... Exiting thread:%d\n",page->page_id)) pthread_exit(NULL); return NULL; } /* individual thread routine starts here */ int start_thread_sock(PPAGE *page){ int ret; #ifdef __linux__ int reuseaddr_optval=1; #endif struct linger l; socklen_t optlen=sizeof(l); page->sock=socket(PF_INET, SOCK_STREAM, 0); if(page->sock==-1){ perror("Socket create error"); return ERR_M_SOCKERR; /* error */ } /* set the socket options * no-linger options... though set by default, explicitly set again */ l.l_onoff=0; ret=setsockopt(page->sock, SOL_SOCKET, SO_LINGER, &l, optlen); if(ret==-1) VERBOSEPRINT(fprintf(outfile, "SO_LINGER set error:%s\n", strerror(errno))) #ifdef __linux__ /* set reuseaddr option */ ret=setsockopt(page->sock, SOL_SOCKET, SO_REUSEADDR, &reuseaddr_optval, sizeof(reuseaddr_optval)); if(ret==-1) VERBOSEPRINT(fprintf(outfile, "SO_REUSEADDR set error:%s\n", strerror(errno))) #endif ret=connect(page->sock,(SADDR *)&saddr, sizeof(SADDR)); if(ret != 0){ perror("Connect error"); closesocket(page->sock); return ERR_M_CONNERR; } return ERR_M_OK; } int get_thread_request(PPAGE *page){ char *get_request, *start_data; int ret, effective_size=0, written=0,is_header=1; long ret_ftell; byte buffer[LENGTH]; FILE * fp; fd_set fds; struct timeval timeout_struct; get_request=(char *)calloc(sizeof(char), 4+(HAS_LENGTH==TRUE?strlen(GET_STR):strlen(GET_STR_NO_RANGE)) + strlen(file_name) + strlen(host_name) + 33 + (PROXY_AUTH==TRUE?strlen(PROXY_AUTH_STR):0) + (PROXY_AUTH==TRUE?strlen(user_passwd):0) + (REFERRER==TRUE?strlen(REFERRER_DECL):0) + (REFERRER==TRUE?strlen(referrer):0)); if(HAS_LENGTH==TRUE) sprintf(get_request,GET_STR, protocol, file_name, host_name, page->present_read_pos, page->range_end, (PROXY_AUTH==TRUE?PROXY_AUTH_STR:""), (PROXY_AUTH==TRUE?user_passwd:""), (REFERRER==TRUE?REFERRER_DECL:""), (REFERRER==TRUE?referrer:"")); else sprintf(get_request,GET_STR_NO_RANGE, protocol, file_name, host_name, (PROXY_AUTH==TRUE?PROXY_AUTH_STR:""), (PROXY_AUTH==TRUE?user_passwd:""), (REFERRER==TRUE?REFERRER_DECL:""), (REFERRER==TRUE?referrer:"")); DEBUGPRINT(fprintf(outfile,"\tDebug:request string for thread:%d\n%s\n%s",page->page_id,get_request,sep)) /* send HTTP CONNECT request */ if(USE_HTTP_CONNECT==TRUE){ ret=do_http_CONNECT(page->sock); if(ret!=200){ fprintf(outfile, "Disabling use of HTTP_CONNECT:"); _process_http_status(ret); if(ret==206) USE_HTTP_CONNECT=FALSE; else exit(1); } } /* send request */ ret=send(page->sock, get_request, strlen(get_request), 0); #ifdef DEBUG /* fprintf(outfile,"\tDebug:thread #%d send %d bytes\n",page->page_id, ret); */ #endif /* to read data, quite a long loop :=( */ do{ memset(buffer,0,LENGTH); /* try implementing timeout by select(); never used before :) */ FD_ZERO(&fds); FD_SET(page->sock, &fds); /* small bug!!! see man page of select NOTES section * set the timeout values each time due to a different implementation in linux */ timeout_struct.tv_sec=thread_timeout_second; timeout_struct.tv_usec=0; select(page->sock+1, &fds, NULL, NULL, &timeout_struct); /* select returns when there is data available */ if(!FD_ISSET(page->sock, &fds)){ /* timeout */ return do_timeout(page); } /* printf("select returned "); */ ret=recv(page->sock, buffer, LENGTH, 0); /* length -1 to print on stdout */ DEBUGPRINT(fprintf(outfile,"\nData amount:%d ",ret)) if(ret<=0) continue; /* write to file * remember !!! mutex acquired ... if abnormal termination of loop, unlock it. */ pthread_mutex_lock(&file_mutex); fp=fopen(save_file_name,"r+b");/* open in O_RDWR mode to for random write, need binary mode just for FUN ... in POSIX systems there basically is no difference just thought may this can be ported in Windows */ fseek(fp,sizeof(byte)*page->present_seek_pos,SEEK_SET); ret_ftell=ftell(fp); /*if(ret_ftell==-1L){ perror("Error in file seek"); ret_ftell=0L; }*/ DEBUGPRINT(fprintf(outfile,"Thread no%d now positioned at %ld, present_seek_pos:%ld \n",page->page_id,ret_ftell, page->present_seek_pos)) effective_size=ret;/* initialize buffer */ start_data=buffer; if(is_header){ start_data=_remove_header(buffer,&effective_size); /* effective_size returns the effective size if the packet is a proper * http packet, else it returns -1*status_code * in general we should be content with status code of 200 or 206 * only (they provide content) but there are some other responses * which signifies that there was some sort of timeout etc. so that * we may re-place the request, some of the status code of this type are: * 408 : Timeout request * 503 : Service unavailable (ideally shoulf then find out the * retry time from header) * 504 : Gateway timeout */ if(effective_size<0){ /* status not 200 or 206... */ if(effective_size==-1) /* fprintf(outfile,"Lost packet.\n"); */ ret=1; else if ( effective_size == -408 || effective_size == -503 || effective_size == -504 ) { VERBOSEPRINT(fprintf(stderr, "Some other timeout:%d\n", -(effective_size))) _process_http_status(-1*effective_size); return do_timeout(ppage); } else { ret=0; strncpy(error_buffer, buffer, 1023); VERBOSEPRINT(fprintf(outfile, "Error in connection %d",page->page_id)) fflush(outfile); _process_http_status(-1*effective_size); exit(1); } /* essential */ fclose(fp); pthread_mutex_unlock(&file_mutex); continue; /* move out of this request */ } is_header=0; } /* There is a strange phenomenon !!! For some of the requests, the server doesnot seem * to take into account the end-of-requested range. It starts the data from the asked range * and then sends till the rest of the file. This may be, however a proxy-server * influenced matter. Still, I need to look into it. * However, this can be suitably used if some segment is nearly end and the next segment * hasnot even started receiving data. Will utilise this fact later... for present, * just put a check and close the socket whenever the requested data has arrived. */ /* never consider range end while Length is not known * check the amount of data to be written; subtract -1 as range_end gives offset from 0 * and after having written those 'effective' number of bytes, the pointer should get one past * the range_end */ if(HAS_LENGTH==TRUE && ((page->present_seek_pos + effective_size - 1) > page->range_end)){ #ifdef DEBUG DEBUGPRINT(fprintf(outfile, "\nstrange:%ld+%d>%ld,%ld,%d\n", page->present_seek_pos, effective_size, page->range_end, (page->present_seek_pos + effective_size - 1), ((page->present_seek_pos + effective_size - 1)>page->range_end)?1:0)) #endif effective_size=(page->range_end - page->present_seek_pos)+1;/* inclusive of both ranges, so +1 */ #ifdef DEBUG DEBUGPRINT(fprintf(outfile, " now eff size:%d ",effective_size)) #endif ret=0; /* make ret=0 to say that we end this thread here */ } written=fwrite(start_data,sizeof(byte),effective_size,fp); if(writtenpresent_seek_pos+=written; #ifdef DEBUG /* fprintf(outfile,"Thread no:%d wrote %d/%d incoming bytes, position %ld\n",page->page_id,written,ret,ftell(fp)); */ #endif fclose(fp); pthread_mutex_unlock(&file_mutex); /* fprintf(outfile,"%d",page->page_id-1);*/ /* to show something is coming */ page->present_read_pos=page->present_seek_pos; if(HAS_LENGTH==FALSE) page->range_end=page->present_read_pos;/* we donot the length, * at least it is of this length */ DEBUGPRINT(if(page->present_read_pos>page->range_end) fprintf(outfile, "\nStrange\n")) print_segments(); fflush(outfile); /* stdout ??cannot?? be flushed, makes !!no!! sense */ }while(ret!=0); return 1; } /* to print all those nice auto-updated columns of download statistics u see on the screen */ inline void print_segments(void){ int i, strange=-1; float totdownload = 0; /* by Joseph: custom addition to know total download size */ /* fprintf(outfile, "\n"); */ fprintf(outfile, "\r"); for(i=0; ipresent_read_pos - (ppage+i)->range_start)/(float)(ppage+i)->range_size; totdownload += downloaded; /* custom addition to know total download size */ fprintf(outfile, "%5.1f%% ", downloaded*100); if(downloaded>1.0) /* strange segment */ strange=i; }else{ downloaded = ((float)((ppage+i)->present_read_pos - (ppage+i)->range_start)); fprintf(outfile, "%d\t", (int)downloaded); } } fprintf(outfile, "= %.1f%%", (totdownload*100)/num_segment); /* custom addition to know total download size */ /* fprintf(outfile, "= %.1f%%/%.1f Kb", (totdownload*100)/num_segment, main_request.size_file/1024.0); custom addition to know total download size */ #ifdef DEBUG if(strange!=-1) fprintf(outfile, "\nStrange:read pos:%ld,start:%ld,size:%ld\n",(ppage+i)->present_read_pos,(ppage+i)->range_start,(ppage+i)->range_size); #endif } /* check if the number of segments requested are allright wrt the file size * doesnot make sense to download very small files with arbitrary large segments */ int check_file_segment_num(int total, int segment){ /* heuristics... atleast 50 kilobytes per segment, allowance of 5kb */ if(segment*SIZE_PER_SEGMENT + ALLOWANCE > total){ /* so number of segments is too large */ segment = max(total/SIZE_PER_SEGMENT,1); } VERBOSEPRINT(fprintf(outfile, "resetting number of segments to %d.\n", segment)) return segment; } /* What all to do for adding resume feature ? assume that writing to log file is done: * Normally main calls these in succession: * o get_options: no need to call "in full" * o init_socket: no need as we already have the length * o get_head: " " " " " " " " * o process_thread: this is needed to some extent * - the mutex etc. has to be initialized * - ppage allocated memory and filled * - the file opening and writing part has to be done with care as it may already exist * - better, just fill the req structure and let process_thread do the rest ... * - reading/writing the structures in log file would have been an idea * - but it was criticized at many places */ void catch_interrupt_signal(int sig){ FILE *logfile; int count; pthread_t self_thread_id, *tmp_thread; DEBUGPRINT(fprintf(outfile, "Writing log file ... \n")) self_thread_id=pthread_self(); fprintf( outfile, "exiting ... in %ld\n", self_thread_id); /* Funny things all again !!! Linuxthreads donot fully implement the POSIX thread * - thus SIGINT (as ctrl-c) is sent to all the threads; so somehow need to handle this * and a naive way is to use semaphores. whereas in other POSIX platforms only one * random thread gets the signal and so it has to cancel the other threads and write * the log file */ for( count=0, tmp_thread=thread_id; count... linked list * however many are not needed to resume */ for (counter=0, tmp_ppage=ppage; counterpage_id, tmp_ppage->size_total, tmp_ppage->range_start, tmp_ppage->range_end, tmp_ppage->range_size, tmp_ppage->present_seek_pos, tmp_ppage->present_read_pos); } /* a host of fscanf statements to read the parameters :( */ void read_log_file(char *log_file_name){ int counter; PPAGE *tmp_ppage; FILE *fp; char magic_string[]="##### This file is autogenerated by mGet-"VERSION" DONOT EDIT IT MANUALLY !!!", header_string[78]={0}; assert(log_file_name!=NULL); fp=fopen(log_file_name, "r"); VERBOSEPRINT(fprintf(stderr, "Reading from log file:%s\n", log_file_name)) if(fp==NULL) exit(1); fscanf(fp, "%[^\n]", header_string); if(strncmp(header_string, magic_string, 77)!=0) exit(1); if (fgetc(fp)!='\n') exit(1); VERBOSEPRINT(fprintf(stderr, "Got header data, seems the file may be correct.\n")) /* what all information needs to be read : * nearly all variables, lets list them */ /* num_segment */ check_return_value(fscanf(fp, "%d", &num_segment), 1) VERBOSEPRINT(fprintf(stderr, "num_segment=%d\n", num_segment)) /* proxy_port */ check_return_value(fscanf(fp, "%d", &proxy_port), 1) VERBOSEPRINT(fprintf(stderr, "proxy port=%d\n", proxy_port)) /* port */ check_return_value(fscanf(fp, "%d", &port), 1) VERBOSEPRINT(fprintf(stderr, "port=%d\n", port)) /* file_name */ _read_string_value(fp, &file_name); /* save_file_name */ _read_string_value(fp, &save_file_name); /* user_name */ /* user_passwd */ /* keep it as used i.e. the way it is sent in request */ _read_string_value(fp, &user_passwd); /* host_name */ _read_string_value(fp, &host_name); /* thread_timeout_second */ check_return_value(fscanf(fp, "%d", &thread_timeout_second), 1) VERBOSEPRINT(fprintf(stderr, "thread timeout=%d\n", thread_timeout_second)) /* protocol */ _read_string_value(fp, &protocol); /* referrer */ _read_string_value(fp, &referrer); /* rollback */ check_return_value(fscanf(fp, "%d", &rollback), 1) VERBOSEPRINT(fprintf(stderr, "rollback=%d\n", rollback)) /* PROXY - REFERRER - SUPPORT_RESUME - HAS_LENGTH - PROTO_HTTP - VERBOSE - USE_HTTP_CONNECT */ /* read the 7 BOOL values, should have used fread for reading them */ check_return_value(fscanf(fp, "%d %d %d %d %d %d %d", \ (int *)&PROXY, (int *)&REFERRER, (int *)&SUPPORT_RESUME, (int *)&HAS_LENGTH, \ (int *)&PROTO_HTTP, (int *)&VERBOSE, (int *)&USE_HTTP_CONNECT), 7) VERBOSEPRINT(fprintf(stderr, "%d %d %d %d %d %d %d\n", \ PROXY, REFERRER, SUPPORT_RESUME, HAS_LENGTH, \ PROTO_HTTP, VERBOSE, USE_HTTP_CONNECT)) /* req */ check_return_value(fscanf(fp, "%ld", &(main_request.size_file)), 1) VERBOSEPRINT(fprintf(stderr, "main_request.size_file=%ld\n", main_request.size_file)) _read_string_value(fp, &(main_request.version)); check_return_value(fscanf(fp, "%d", &(main_request.status_code)), 1) VERBOSEPRINT(fprintf(stderr, "main_request.status_code=%d\n", main_request.status_code)) fflush(stderr); /* - each entry in ppage->... linked list * however many are not needed to resume */ /* first need to do the memory allocation for ppage linked list */ ppage = (PPAGE *) calloc ( sizeof(PPAGE), num_segment); for (counter=0, tmp_ppage=ppage; counterpage_id), &(tmp_ppage->size_total), &(tmp_ppage->range_start), &(tmp_ppage->range_end), &(tmp_ppage->range_size), &(tmp_ppage->present_seek_pos), &(tmp_ppage->present_read_pos)), 7) exit(1); } inline int do_timeout(PPAGE *page){ /* timeout !!! * if can't resume * better restart from first */ if(SUPPORT_RESUME==FALSE){ page->present_seek_pos=page->range_start; page->present_read_pos=page->range_start; }else{ /* so will resume */ /* therefore check if more than rollback has come * and then perform rollback */ if(page->present_read_pos>=page->range_start+rollback){ page->present_seek_pos-=rollback; page->present_read_pos-=rollback; } } return -1; /* fprintf(outfile, "Timeout !!!\n"); */ } #if !defined(HAVE_INET_NTOP) char *inet_ntop(int af, INADDR *src, char *dst, size_t cnt){ return inet_ntoa((INADDR)(*src)); } #endif