//------------------ mgetutil ---------------------------//
// General utility routines for HTTP programs -----------//
//(All debugging related printf statements have been commented.)
/* Copyright 2001 2002 Debajyoti Bera */
/* This file is part of mGet.
 * mGet is free Software; please refer to COPYING for terms and conditions */

#include "mgetutil.h"

char *alphabet="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";

//BASE-64 encoding routine
char * _encode(char *data){

	char *out;
	int i,index,length,outlen;

	length=(int)strlen(data);
	outlen=((length+2)/3)*4;
	out=(char *)malloc((outlen+1)*sizeof(char));
	for(i=0,index=0;i<length;i+=3,index+=4){
		int quad=FALSE;
		int trip=FALSE;
		int val=(0xFF&(int)data[i]);

		val<<=8;
		if((i+1)<length){
			val|=(0xFF&(int)data[i+1]);
			trip=TRUE;
		}
		val<<=8;
		if((i+2)<length){
			val|=(0xFF&(int)data[i+2]);
			quad=TRUE;
		}
		out[index+3]=alphabet[(quad?(val&0x3F):64)];
		val>>=6;
		out[index+2]=alphabet[(trip?(val&0x3F):64)];
		val>>=6;
		out[index+1]=alphabet[val&0x3F];
		val>>=6;
		out[index]=alphabet[val&0x3F];
	}//end for
	return out;
}//end _encode



void _process_header_length(char *header, long *length){ //get the length value from the header
	char *pointer, bytes_spec[10]={0}, spacer[4]={0};
	char *caps_buffer=(char *)calloc(sizeof(char),1+strlen(header)); 
	unsigned int i;
	int trash;

	spacer[3]='\0';
	//caps_buffer has buffer in capital letters
	//as RFC says case distinction doesnot matters, so just to find the 
	//position, this method is used.
	for(i=0;i<strlen(header);++i)
		caps_buffer[i]=toupper(header[i]);
	caps_buffer[i]='\0';
//#debug	
	DEBUGPRINT(printf("Debug:%s\n",caps_buffer))
	
	pointer=strstr(caps_buffer, "CONTENT-RANGE");
       	if(pointer==NULL){ //content length is not in header
	//it may be the case that the server doesnot support resume and hence so no 
	//content-range
		pointer=strstr(caps_buffer, "CONTENT-LENGTH");
		if(pointer==NULL){
			*length=0;
			return;
		}
       		//content length is in header; pointer points to the beginning of the substring
		sscanf(pointer,"%*14c%[: ]%ld", spacer,length); //so length is found
		return;
	}

       	//content range is in header; pointer points to the beginning of the substring
	sscanf(pointer," %*13c%[: ]%s %d-%d/%ld",spacer,bytes_spec,&trash,&trash,length); //so length is found
 	//printf("Debug: length=%d\n",*length);

	//check if the bytes_spec is bytes
	if(strcmp(bytes_spec,"bytes")!=0 && strcmp(bytes_spec,"BYTES")!=0){
		fprintf(stderr,"Only Byte ranges supported till now.\n");
		exit(1);
	}
	return;
}

void _process_status_line(char *buffer, char *version, int *status_code){ //get the status code

//#debug	
	//printf("%s\n",buffer);
	
	//first check if at all this is a header message or a late/lost packet
	if(strncmp(buffer,"HTTP/",5)!=0)
		*status_code=-1;
	else
		sscanf(buffer,"%s %d",version, status_code);	
	
//#debug	
	//printf("Debug:Version=%s, Status code=%d\n",version,*status_code);
	
	return;
}

//get the file name from the site name
char * _get_file_name(char *site_name, char *file_name){
	//the file name can be obtained via this scheme
	//get the last / after http://, rest is file name
	//if no rest, file name is "index.html"
	//if no /, site name of  form www.abcd.com
	//so file name is "index.html"
	// NO SCOPE OF ERROR RETURN AS OF NOW	
	
	char *last_location,*next_location; //last location of '/'

	last_location=(char *)rindex((char *)site_name,'/');
       	
	if(last_location==NULL){//no /
		file_name=(char *)calloc(sizeof(char),11);//just store index.html
		strncpy(file_name,"index.html",10);
		file_name[10]='\0';
		fprintf(stderr, "Warning:PLEASE SUPPLY A FILE NAME! Presently assuming file name - index.html.\n");
		return file_name;
	}

	//there is a last '/'
	//so string starts after that 
	next_location=last_location+1; //after /'
	if(*next_location==' ' || *next_location=='\0'){
		fprintf(stderr, "Warning: PLEASE SUPPLY A FILE NAME! Presently assuming file name - index.html.\n");
		file_name=(char *)calloc(sizeof(char),11);//just store index.html
		strncpy(file_name,"index.html",10);
		file_name[10]='\0';
		return file_name;
	}
	file_name=next_location;
	//printf("file_name=%s\n",file_name);

	return file_name;
}

char * _get_site_name(char *full_path, char *site_name){
	//of use only when under direct connection
	//i.e. proxy derives its address from this
	//so only first part till '/' occurs
	size_t size; //size of site name

	size=strcspn(full_path,"/");
	//so size+1 st character of full_path is "/"
	//initialize site_name
	site_name=(char *)calloc(sizeof(char),1+size); //1+: add '\0'
	strncpy(site_name,full_path,size);
       	site_name[size]='\0';
	DEBUGPRINT(printf("Host name:%s\n",site_name))	
	
	return site_name; 
}

//function to remove header part for writing GET data
//should return int denoting success or failure, take two char * args
//and modify second char * to point to appropriate position of first
//stream, presently it is oversimplified. Ideally, on returning error
//i.e. not 206 or 200 status code, request should be re-sent.
char * _remove_header(char *header, int *datasize){
	char * start_data; //will point to the start of data part in response
	int position=0;
	int status;
	char version[10];
	
	//first process the status line
	//check if status is 200 or 206
	_process_status_line(header, version, &status);
	if(status!=206 && status!=200){
		*datasize=(status>0?(-1*status):status);
		return header; //doesnot matter what is returned
	}

	//as specified in RFC, the header part ends with two CRLF s.
	//so search for \r, then check if it is followef by \n\r\n
	//printf("hello %s\n",header);
	do{
		start_data=header+position;
		if(	*(start_data)=='\r' && 
			*(start_data+1)=='\n' && 
			*(start_data+2)=='\r' && 
			*(start_data+3)=='\n')
			break; //found
		//else search again
		position++;
		//printf("found\n");
	}while(position<*datasize);//header may not have any '\0' at its end
	//so search by the length
	
	//update datasize
	if(start_data!=NULL)
		*datasize=(*datasize)-(position+4);//move past \r\n\r\n
//#debug	
	//printf("Debug:filtered data:with length=%d\n",*datasize);

	DEBUGPRINT(fprintf(stdout,"New effective size:%d, Header:[\n%s]\nData:[\n%s]\n",*datasize, header, (start_data==NULL?"null":start_data+4)))
			
	//exit(1);
	return ((start_data==NULL)?header:start_data+4); //return header on error, to avoid segmentation fault
}

//remove initial http:// from full path, if present
//add http:// manually in request string, THIS PROGRAM IS ONLY FOR HTTP servers
//changed ... check the initial string and decide ftp or http
char * _remove_http(char *full_path, char *protocol){
	char *tmp;

	tmp=strstr(full_path,"://");//RFC says both possible:http or HTTP ? so use ://
	if(tmp==NULL){
		strcpy(protocol, "http");
		return full_path; // no 'http://'
	}
	//check for http or ftp
	if (strncasecmp(full_path, "http", 4)==0){
		strcpy(protocol, "http");
		protocol[4]='\0';
	}else if(strncasecmp(full_path, "ftp", 3)==0){
		strcpy(protocol, "ftp");
		protocol[3]='\0';
		protocol[4]='\0';
	}
	return tmp+3; 
}

//general error handling
void printerror(char *error){
	fprintf(stderr, "HTTP Error: %s \nExiting mGet/"VERSION"\n",error);
	/* exit(1); */
}
// this function is called for error handling, called only on error
void _process_http_status(int status_code){

	error_buffer[1023]='\0';
	VERBOSEPRINT(fprintf(outfile, "Last received buffer:\n%s...\n",error_buffer))
	fflush(outfile);
	fprintf(stderr, "Error code:%d\n",status_code);
	switch(status_code){
		case 204: return printerror("No Content");
		case 301: return printerror("Moved Permanently");
		case 302: return printerror("Moved Temporarily");
		case 400: return printerror("Bad Request");
		case 401: return printerror("Unauthorized");
		case 403: return printerror("Forbidden");
		case 404: return printerror("Not Found");
		case 407: return printerror("Proxy Authentication Invalid");
		case 408: return printerror("Request Time-out");
		case 500: return printerror("Internal Server Error");
		case 502: return printerror("Bad Gateway");
		case 503: return printerror("Service Unavailable: Host not found");
		case 504: return printerror("Gateway Timeout");
		case 505: return printerror("HTTP Version not supported");
		default : return printerror("Unknown Error");
	}
}

char * get_proxy_name_port(char *optarg, int *port, char *proxy_name){
	return _get_proxy_name_port(optarg, port, proxy_name);
}

char * get_proxy_from_env(int *port, char *proxy_name, char type){
	return (type=='f' || type=='h')?_get_proxy_name_port((type=='f')?getenv("ftp_proxy"):getenv("http_proxy"), port, proxy_name):NULL;
}

char * _get_proxy_name_port(char *optarg, int *port, char *proxy_name){
	unsigned int length;
	//optarg is of form hostname:port; hostname can have ...:// in front
	//but it should not have http:/ etc. in front of it, so remove it
	char *pointer1, *pointer2;

	//if optarg is null then only possible case is that
	//user has asked mget to take the value from environment
	//and there is no such environment variable
	//so tell this error to user and exit
	if(optarg==NULL){
		fprintf(stderr, "No such environment variable exits.\n");
		return NULL;
	}
	
	pointer1=strstr(optarg, "://");
	if(pointer1==NULL){
		length=(int)strcspn(optarg, ":");
	//or hostname has :// at its beginning
		pointer1=optarg;
	}else{
		pointer1+=3;
		pointer2=strstr(pointer1, ":");
		length=pointer2-pointer1;
	}
	proxy_name=(char *)calloc(sizeof(char), 1+length);
	proxy_name[length]='\0';
	strncpy(proxy_name, pointer1, length);
	if(length==strlen(pointer1)){ //no port is given, assuming 80
		*port=80;
		return proxy_name;
	}else{ //so port is given
		char *end_slash;
		//remove at most one last '/' from end of proxy value
		if((end_slash=index(pointer1+length+1, '/'))!=NULL)
			*end_slash='\0';
		*port=atoi(pointer1+length+1);
		if(*port==0){
			VERBOSEPRINT(fprintf(stderr, "Value of proxy doesnot have a valid port number. Using default value 80.\n"))
			*port=80;
		}
		return proxy_name;
	}
}

//report the redirection address in case of redirection
void get_redirected_address(char *buffer){
	char *pointer;
	
	pointer=strstr(buffer, "Location");
	if(pointer==NULL){
		fprintf(stderr, "Redirected location not found.\n");
		return;
	}
	//redirected location found
	pointer=strchr(pointer,':');
	if(pointer==NULL){
		fprintf(stderr, "Location field in HTTP header is corrupted.\n");
		return;
	}
	fprintf(stderr, "Redirected Location:");
	for(; pointer !=NULL && 
	     *pointer!='\r' && 
	     *pointer!='\n' && 
	     *pointer!='\0' ; pointer++)
		fprintf(stderr,"%c",*pointer);
	fprintf(stderr, "\n");
	return;
}

/* it returns a pointer to the log file, which is named either
 * after the file_name argument or by some policy 
 * return -1 if unable to open the file*/
FILE * _get_log_file(char *file_name){
	char tmp_name[]="mget.log";
	FILE *file_pointer;
	file_pointer=fopen( file_name==NULL ? tmp_name : file_name, "w");
	return file_pointer;
}

void _read_string_value(FILE *fp, char **variable){
	int length;
	check_return_value(fscanf(fp, "%d", &length), 1)
	/* so we need to read a string of length 'length' and store that in *variable */
	/* if length is 0 then need to read all the way till '\n' as in some cases 
	 * null strings are written as '(null)' */
	if (length==0){
		fscanf(fp, "%*[^\n]");
		return;
	}
	/* allocate memory for the variable */
	*variable=(char *)calloc(sizeof(char), length+1);
	check_return_value(fscanf(fp, "%s", *variable), 1)
	VERBOSEPRINT(fprintf(stderr, "read %s\n", *variable))
}

//----------------  end of mgetutil ------------------//


syntax highlighted by Code2HTML, v. 0.9.1