//------------------ mgetutil ---------------------------//
// General utility routines for HTTP programs -----------//
//(All debugging related printf statements have been commented.)
/* Copyright 2001 2002 Debajyoti Bera */
/* This file is part of mGet.
* mGet is free Software; please refer to COPYING for terms and conditions */
#include "mgetutil.h"
char *alphabet="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
//BASE-64 encoding routine
char * _encode(char *data){
char *out;
int i,index,length,outlen;
length=(int)strlen(data);
outlen=((length+2)/3)*4;
out=(char *)malloc((outlen+1)*sizeof(char));
for(i=0,index=0;i<length;i+=3,index+=4){
int quad=FALSE;
int trip=FALSE;
int val=(0xFF&(int)data[i]);
val<<=8;
if((i+1)<length){
val|=(0xFF&(int)data[i+1]);
trip=TRUE;
}
val<<=8;
if((i+2)<length){
val|=(0xFF&(int)data[i+2]);
quad=TRUE;
}
out[index+3]=alphabet[(quad?(val&0x3F):64)];
val>>=6;
out[index+2]=alphabet[(trip?(val&0x3F):64)];
val>>=6;
out[index+1]=alphabet[val&0x3F];
val>>=6;
out[index]=alphabet[val&0x3F];
}//end for
return out;
}//end _encode
void _process_header_length(char *header, long *length){ //get the length value from the header
char *pointer, bytes_spec[10]={0}, spacer[4]={0};
char *caps_buffer=(char *)calloc(sizeof(char),1+strlen(header));
unsigned int i;
int trash;
spacer[3]='\0';
//caps_buffer has buffer in capital letters
//as RFC says case distinction doesnot matters, so just to find the
//position, this method is used.
for(i=0;i<strlen(header);++i)
caps_buffer[i]=toupper(header[i]);
caps_buffer[i]='\0';
//#debug
DEBUGPRINT(printf("Debug:%s\n",caps_buffer))
pointer=strstr(caps_buffer, "CONTENT-RANGE");
if(pointer==NULL){ //content length is not in header
//it may be the case that the server doesnot support resume and hence so no
//content-range
pointer=strstr(caps_buffer, "CONTENT-LENGTH");
if(pointer==NULL){
*length=0;
return;
}
//content length is in header; pointer points to the beginning of the substring
sscanf(pointer,"%*14c%[: ]%ld", spacer,length); //so length is found
return;
}
//content range is in header; pointer points to the beginning of the substring
sscanf(pointer," %*13c%[: ]%s %d-%d/%ld",spacer,bytes_spec,&trash,&trash,length); //so length is found
//printf("Debug: length=%d\n",*length);
//check if the bytes_spec is bytes
if(strcmp(bytes_spec,"bytes")!=0 && strcmp(bytes_spec,"BYTES")!=0){
fprintf(stderr,"Only Byte ranges supported till now.\n");
exit(1);
}
return;
}
void _process_status_line(char *buffer, char *version, int *status_code){ //get the status code
//#debug
//printf("%s\n",buffer);
//first check if at all this is a header message or a late/lost packet
if(strncmp(buffer,"HTTP/",5)!=0)
*status_code=-1;
else
sscanf(buffer,"%s %d",version, status_code);
//#debug
//printf("Debug:Version=%s, Status code=%d\n",version,*status_code);
return;
}
//get the file name from the site name
char * _get_file_name(char *site_name, char *file_name){
//the file name can be obtained via this scheme
//get the last / after http://, rest is file name
//if no rest, file name is "index.html"
//if no /, site name of form www.abcd.com
//so file name is "index.html"
// NO SCOPE OF ERROR RETURN AS OF NOW
char *last_location,*next_location; //last location of '/'
last_location=(char *)rindex((char *)site_name,'/');
if(last_location==NULL){//no /
file_name=(char *)calloc(sizeof(char),11);//just store index.html
strncpy(file_name,"index.html",10);
file_name[10]='\0';
fprintf(stderr, "Warning:PLEASE SUPPLY A FILE NAME! Presently assuming file name - index.html.\n");
return file_name;
}
//there is a last '/'
//so string starts after that
next_location=last_location+1; //after /'
if(*next_location==' ' || *next_location=='\0'){
fprintf(stderr, "Warning: PLEASE SUPPLY A FILE NAME! Presently assuming file name - index.html.\n");
file_name=(char *)calloc(sizeof(char),11);//just store index.html
strncpy(file_name,"index.html",10);
file_name[10]='\0';
return file_name;
}
file_name=next_location;
//printf("file_name=%s\n",file_name);
return file_name;
}
char * _get_site_name(char *full_path, char *site_name){
//of use only when under direct connection
//i.e. proxy derives its address from this
//so only first part till '/' occurs
size_t size; //size of site name
size=strcspn(full_path,"/");
//so size+1 st character of full_path is "/"
//initialize site_name
site_name=(char *)calloc(sizeof(char),1+size); //1+: add '\0'
strncpy(site_name,full_path,size);
site_name[size]='\0';
DEBUGPRINT(printf("Host name:%s\n",site_name))
return site_name;
}
//function to remove header part for writing GET data
//should return int denoting success or failure, take two char * args
//and modify second char * to point to appropriate position of first
//stream, presently it is oversimplified. Ideally, on returning error
//i.e. not 206 or 200 status code, request should be re-sent.
char * _remove_header(char *header, int *datasize){
char * start_data; //will point to the start of data part in response
int position=0;
int status;
char version[10];
//first process the status line
//check if status is 200 or 206
_process_status_line(header, version, &status);
if(status!=206 && status!=200){
*datasize=(status>0?(-1*status):status);
return header; //doesnot matter what is returned
}
//as specified in RFC, the header part ends with two CRLF s.
//so search for \r, then check if it is followef by \n\r\n
//printf("hello %s\n",header);
do{
start_data=header+position;
if( *(start_data)=='\r' &&
*(start_data+1)=='\n' &&
*(start_data+2)=='\r' &&
*(start_data+3)=='\n')
break; //found
//else search again
position++;
//printf("found\n");
}while(position<*datasize);//header may not have any '\0' at its end
//so search by the length
//update datasize
if(start_data!=NULL)
*datasize=(*datasize)-(position+4);//move past \r\n\r\n
//#debug
//printf("Debug:filtered data:with length=%d\n",*datasize);
DEBUGPRINT(fprintf(stdout,"New effective size:%d, Header:[\n%s]\nData:[\n%s]\n",*datasize, header, (start_data==NULL?"null":start_data+4)))
//exit(1);
return ((start_data==NULL)?header:start_data+4); //return header on error, to avoid segmentation fault
}
//remove initial http:// from full path, if present
//add http:// manually in request string, THIS PROGRAM IS ONLY FOR HTTP servers
//changed ... check the initial string and decide ftp or http
char * _remove_http(char *full_path, char *protocol){
char *tmp;
tmp=strstr(full_path,"://");//RFC says both possible:http or HTTP ? so use ://
if(tmp==NULL){
strcpy(protocol, "http");
return full_path; // no 'http://'
}
//check for http or ftp
if (strncasecmp(full_path, "http", 4)==0){
strcpy(protocol, "http");
protocol[4]='\0';
}else if(strncasecmp(full_path, "ftp", 3)==0){
strcpy(protocol, "ftp");
protocol[3]='\0';
protocol[4]='\0';
}
return tmp+3;
}
//general error handling
void printerror(char *error){
fprintf(stderr, "HTTP Error: %s \nExiting mGet/"VERSION"\n",error);
/* exit(1); */
}
// this function is called for error handling, called only on error
void _process_http_status(int status_code){
error_buffer[1023]='\0';
VERBOSEPRINT(fprintf(outfile, "Last received buffer:\n%s...\n",error_buffer))
fflush(outfile);
fprintf(stderr, "Error code:%d\n",status_code);
switch(status_code){
case 204: return printerror("No Content");
case 301: return printerror("Moved Permanently");
case 302: return printerror("Moved Temporarily");
case 400: return printerror("Bad Request");
case 401: return printerror("Unauthorized");
case 403: return printerror("Forbidden");
case 404: return printerror("Not Found");
case 407: return printerror("Proxy Authentication Invalid");
case 408: return printerror("Request Time-out");
case 500: return printerror("Internal Server Error");
case 502: return printerror("Bad Gateway");
case 503: return printerror("Service Unavailable: Host not found");
case 504: return printerror("Gateway Timeout");
case 505: return printerror("HTTP Version not supported");
default : return printerror("Unknown Error");
}
}
char * get_proxy_name_port(char *optarg, int *port, char *proxy_name){
return _get_proxy_name_port(optarg, port, proxy_name);
}
char * get_proxy_from_env(int *port, char *proxy_name, char type){
return (type=='f' || type=='h')?_get_proxy_name_port((type=='f')?getenv("ftp_proxy"):getenv("http_proxy"), port, proxy_name):NULL;
}
char * _get_proxy_name_port(char *optarg, int *port, char *proxy_name){
unsigned int length;
//optarg is of form hostname:port; hostname can have ...:// in front
//but it should not have http:/ etc. in front of it, so remove it
char *pointer1, *pointer2;
//if optarg is null then only possible case is that
//user has asked mget to take the value from environment
//and there is no such environment variable
//so tell this error to user and exit
if(optarg==NULL){
fprintf(stderr, "No such environment variable exits.\n");
return NULL;
}
pointer1=strstr(optarg, "://");
if(pointer1==NULL){
length=(int)strcspn(optarg, ":");
//or hostname has :// at its beginning
pointer1=optarg;
}else{
pointer1+=3;
pointer2=strstr(pointer1, ":");
length=pointer2-pointer1;
}
proxy_name=(char *)calloc(sizeof(char), 1+length);
proxy_name[length]='\0';
strncpy(proxy_name, pointer1, length);
if(length==strlen(pointer1)){ //no port is given, assuming 80
*port=80;
return proxy_name;
}else{ //so port is given
char *end_slash;
//remove at most one last '/' from end of proxy value
if((end_slash=index(pointer1+length+1, '/'))!=NULL)
*end_slash='\0';
*port=atoi(pointer1+length+1);
if(*port==0){
VERBOSEPRINT(fprintf(stderr, "Value of proxy doesnot have a valid port number. Using default value 80.\n"))
*port=80;
}
return proxy_name;
}
}
//report the redirection address in case of redirection
void get_redirected_address(char *buffer){
char *pointer;
pointer=strstr(buffer, "Location");
if(pointer==NULL){
fprintf(stderr, "Redirected location not found.\n");
return;
}
//redirected location found
pointer=strchr(pointer,':');
if(pointer==NULL){
fprintf(stderr, "Location field in HTTP header is corrupted.\n");
return;
}
fprintf(stderr, "Redirected Location:");
for(; pointer !=NULL &&
*pointer!='\r' &&
*pointer!='\n' &&
*pointer!='\0' ; pointer++)
fprintf(stderr,"%c",*pointer);
fprintf(stderr, "\n");
return;
}
/* it returns a pointer to the log file, which is named either
* after the file_name argument or by some policy
* return -1 if unable to open the file*/
FILE * _get_log_file(char *file_name){
char tmp_name[]="mget.log";
FILE *file_pointer;
file_pointer=fopen( file_name==NULL ? tmp_name : file_name, "w");
return file_pointer;
}
void _read_string_value(FILE *fp, char **variable){
int length;
check_return_value(fscanf(fp, "%d", &length), 1)
/* so we need to read a string of length 'length' and store that in *variable */
/* if length is 0 then need to read all the way till '\n' as in some cases
* null strings are written as '(null)' */
if (length==0){
fscanf(fp, "%*[^\n]");
return;
}
/* allocate memory for the variable */
*variable=(char *)calloc(sizeof(char), length+1);
check_return_value(fscanf(fp, "%s", *variable), 1)
VERBOSEPRINT(fprintf(stderr, "read %s\n", *variable))
}
//---------------- end of mgetutil ------------------//
syntax highlighted by Code2HTML, v. 0.9.1