/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*  GMime
 *  Copyright (C) 2000-2007 Jeffrey Stedfast
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */


#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <string.h>
#include <limits.h>
#include <ctype.h>
#include <errno.h>

#include "gmime-param.h"
#include "gmime-common.h"
#include "gmime-table-private.h"
#include "gmime-parse-utils.h"
#include "gmime-iconv-utils.h"
#include "gmime-charset.h"
#include "gmime-utils.h"
#include "gmime-iconv.h"


#ifdef ENABLE_WARNINGS
#define w(x) x
#else
#define w(x)
#endif /* ENABLE_WARNINGS */

#define d(x)


static unsigned char tohex[16] = {
	'0', '1', '2', '3', '4', '5', '6', '7',
	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
};


/**
 * g_mime_param_new: Create a new MIME Param object
 * @name: parameter name
 * @value: parameter value
 *
 * Creates a new GMimeParam node with name @name and value @value.
 *
 * Returns a new paramter structure.
 **/
GMimeParam *
g_mime_param_new (const char *name, const char *value)
{
	GMimeParam *param;
	
	param = g_new (GMimeParam, 1);
	
	param->next = NULL;
	param->name = g_strdup (name);
	param->value = g_strdup (value);
	
	return param;
}

#define INT_OVERFLOW(x,d) (((x) > (INT_MAX / 10)) || ((x) == (INT_MAX / 10) && (d) > (INT_MAX % 10)))

static int
decode_int (const char **in)
{
	const unsigned char *inptr;
	int digit, n = 0;
	
	decode_lwsp (in);
	
	inptr = (const unsigned char *) *in;
	while (isdigit ((int) *inptr)) {
		digit = (*inptr - '0');
		if (INT_OVERFLOW (n, digit)) {
			while (isdigit ((int) *inptr))
				inptr++;
			break;
		}
		
		n = (n * 10) + digit;
		
		inptr++;
	}
	
	*in = (const char *) inptr;
	
	return n;
}

static char *
decode_quoted_string (const char **in)
{
	const char *start, *inptr = *in;
	char *outptr, *out = NULL;
	gboolean unescape = FALSE;
	
	decode_lwsp (&inptr);
	
	if (*inptr != '"') {
		*in = inptr;
		return NULL;
	}
	
	start = inptr++;
	
	while (*inptr && *inptr != '"') {
		if (*inptr++ == '\\') {
			unescape = TRUE;
			inptr++;
		}
	}
	
	if (*inptr == '"') {
		start++;
		out = g_strndup (start, (unsigned int) (inptr - start));
		inptr++;
	} else {
		/* string wasn't properly quoted */
		out = g_strndup (start, (unsigned int) (inptr - start));
	}
	
	*in = inptr;
	
	if (unescape) {
		inptr = outptr = out;
		while (*inptr) {
			if (*inptr == '\\')
				inptr++;
			*outptr++ = *inptr++;
		}
		
		*outptr = '\0';
	}
	
	return out;
}

static char *
decode_token (const char **in)
{
	const char *inptr = *in;
	const char *start;
	
	decode_lwsp (&inptr);
	
	start = inptr;
	while (is_ttoken (*inptr))
		inptr++;
	if (inptr > start) {
		*in = inptr;
		return g_strndup (start, (unsigned int) (inptr - start));
	} else {
		return NULL;
	}
}

static char *
decode_value (const char **in)
{
	const char *inptr = *in;
	
	decode_lwsp (&inptr);
	
	if (*inptr == '"') {
		return decode_quoted_string (in);
	} else if (is_ttoken (*inptr)) {
		return decode_token (in);
	}
	
	return NULL;
}

/* This function is basically the same as decode_token()
 * except that it will not accept *'s which have a special
 * meaning for rfc2184 params */
static char *
decode_param_token (const char **in)
{
	const char *inptr = *in;
	const char *start;
	
	decode_lwsp (&inptr);
	
	start = inptr;
	while (is_ttoken (*inptr) && *inptr != '*')
		inptr++;
	if (inptr > start) {
		*in = inptr;
		return g_strndup (start, (unsigned int) (inptr - start));
	} else {
		return NULL;
	}
}

static gboolean
decode_rfc2184_param (const char **in, char **paramp, int *part, gboolean *encoded)
{
	gboolean is_rfc2184 = FALSE;
	const char *inptr = *in;
	char *param;
	
	*encoded = FALSE;
	*part = -1;
	
	param = decode_param_token (&inptr);
	
	decode_lwsp (&inptr);
	
	if (*inptr == '*') {
		is_rfc2184 = TRUE;
		inptr++;
		
		decode_lwsp (&inptr);
		if (*inptr == '=') {
			/* form := param*=value */
			*encoded = TRUE;
		} else {
			/* form := param*#=value or param*#*=value */
			*part = decode_int (&inptr);
			
			decode_lwsp (&inptr);
			if (*inptr == '*') {
				/* form := param*#*=value */
				inptr++;
				*encoded = TRUE;
				decode_lwsp (&inptr);
			}
		}
	}
	
	if (paramp)
		*paramp = param;
	
	if (param)
		*in = inptr;
	
	return is_rfc2184;
}

static gboolean
decode_param (const char **in, char **paramp, char **valuep, int *id, gboolean *encoded)
{
	gboolean is_rfc2184 = FALSE;
	const char *inptr = *in;
	char *param, *value = NULL;
	char *val;
	
	is_rfc2184 = decode_rfc2184_param (&inptr, &param, id, encoded);
	
	if (*inptr == '=') {
		inptr++;
		value = decode_value (&inptr);
		
		if (!is_rfc2184 && value) {
			if (strstr (value, "=?") != NULL) {
				/* We (may) have a broken param value that is rfc2047
				 * encoded. Since both Outlook and Netscape/Mozilla do
				 * this, we should handle this case.
				 */
				
				if ((val = g_mime_utils_header_decode_text (value))) {
					g_free (value);
					value = val;
				}
			}
			
			if (!g_utf8_validate (value, -1, NULL)) {
				/* A (broken) mailer has sent us an unencoded 8bit value.
				 * Attempt to save it by assuming it's in the user's
				 * locale and converting to UTF-8 */
				
				if ((val = g_mime_iconv_locale_to_utf8 (value))) {
					g_free (value);
					value = val;
				} else {
					d(g_warning ("Failed to convert %s param value (\"%s\") to UTF-8: %s",
						     param, value, g_strerror (errno)));
				}
			}
		}
	}
	
	if (param && value) {
		*paramp = param;
		*valuep = value;
		*in = inptr;
		return TRUE;
	} else {
		g_free (param);
		g_free (value);
		return FALSE;
	}
}


struct _rfc2184_part {
	char *value;
	int id;
};

struct _rfc2184_param {
	struct _rfc2184_param *next;
	const char *charset;
	GMimeParam *param;
	GPtrArray *parts;
	char *lang;
};

static int
rfc2184_sort_cb (const void *v0, const void *v1)
{
	const struct _rfc2184_part *p0 = *((struct _rfc2184_part **) v0);
	const struct _rfc2184_part *p1 = *((struct _rfc2184_part **) v1);
	
	return p0->id - p1->id;
}

#define HEXVAL(c) (isdigit (c) ? (c) - '0' : tolower (c) - 'a' + 10)

static size_t
hex_decode (const char *in, size_t len, char *out)
{
	register const unsigned char *inptr = (const unsigned char *) in;
	register unsigned char *outptr = (unsigned char *) out;
	const unsigned char *inend = inptr + len;
	
	while (inptr < inend) {
		if (*inptr == '%') {
			if (isxdigit (inptr[1]) && isxdigit (inptr[2])) {
				*outptr++ = HEXVAL (inptr[1]) * 16 + HEXVAL (inptr[2]);
				inptr += 3;
			} else
				*outptr++ = *inptr++;
		} else
			*outptr++ = *inptr++;
	}
	
	*outptr = '\0';
	
	return ((char *) outptr) - out;
}

static const char *
rfc2184_param_charset (const char **in, char **langp)
{
	const char *lang, *inptr = *in;
	char *charset;
	size_t len;
	
	if (langp)
		*langp = NULL;
	
	while (*inptr != '\0' && *inptr != '\'')
		inptr++;
	
	if (*inptr != '\'')
		return NULL;
	
	len = inptr - *in;
	charset = g_alloca (len + 1);
	memcpy (charset, *in, len);
	charset[len] = '\0';
	
	lang = ++inptr;
	while (*inptr != '\0' && *inptr != '\'')
		inptr++;
	
	if (*inptr == '\'') {
		if (langp)
			*langp = g_strndup (lang, inptr - lang);
		
		inptr++;
	}
	
	*in = inptr;
	
	return g_mime_charset_canon_name (charset);
}

static char *
charset_convert (const char *charset, char *in, size_t inlen)
{
	gboolean locale = FALSE;
	char *result = NULL;
	iconv_t cd;
	
	if (!charset || !g_ascii_strcasecmp (charset, "UTF-8") || !g_ascii_strcasecmp (charset, "us-ascii")) {
		/* we shouldn't need any charset conversion here... */
		if (g_utf8_validate (in, inlen, NULL))
			return in;
		
		charset = g_mime_locale_charset ();
		locale = TRUE;
	}
	
	/* need charset conversion */
	cd = g_mime_iconv_open ("UTF-8", charset);
	if (cd == (iconv_t) -1 && !locale) {
		charset = g_mime_locale_charset ();
		cd = g_mime_iconv_open ("UTF-8", charset);
	}
	
	if (cd != (iconv_t) -1) {
		result = g_mime_iconv_strndup (cd, in, inlen);
		g_mime_iconv_close (cd);
	}
	
	if (result == NULL)
		result = in;
	else
		g_free (in);
	
	return result;
}

static char *
rfc2184_decode (const char *value)
{
	const char *inptr = value;
	const char *charset;
	char *decoded;
	size_t len;
	
	charset = rfc2184_param_charset (&inptr, NULL);
	
	len = strlen (inptr);
	decoded = g_alloca (len + 1);
	len = hex_decode (inptr, len, decoded);
	
	return charset_convert (charset, g_strdup (decoded), len);
}

static void
rfc2184_param_add_part (struct _rfc2184_param *rfc2184, char *value, int id, gboolean encoded)
{
	struct _rfc2184_part *part;
	size_t len;
	
	part = g_new (struct _rfc2184_part, 1);
	g_ptr_array_add (rfc2184->parts, part);
	part->id = id;
	
	if (encoded) {
		len = strlen (value);
		part->value = g_malloc (len + 1);
		hex_decode (value, len, part->value);
		g_free (value);
	} else {
		part->value = value;
	}
}

static struct _rfc2184_param *
rfc2184_param_new (char *name, char *value, int id, gboolean encoded)
{
	struct _rfc2184_param *rfc2184;
	const char *inptr = value;
	
	rfc2184 = g_new (struct _rfc2184_param, 1);
	rfc2184->parts = g_ptr_array_new ();
	rfc2184->next = NULL;
	
	rfc2184->charset = rfc2184_param_charset (&inptr, &rfc2184->lang);
	
	if (inptr == value) {
		rfc2184_param_add_part (rfc2184, value, id, encoded);
	} else {
		rfc2184_param_add_part (rfc2184, g_strdup (inptr), id, encoded);
		g_free (value);
	}
	
	rfc2184->param = g_new (GMimeParam, 1);
	rfc2184->param->next = NULL;
	rfc2184->param->name = name;
	rfc2184->param->value = NULL;
	
	return rfc2184;
}

static GMimeParam *
decode_param_list (const char *in)
{
	struct _rfc2184_param *rfc2184, *list, *t;
	GMimeParam *param, *params, *tail;
	struct _rfc2184_part *part;
	GHashTable *rfc2184_hash;
	const char *inptr = in;
	char *name, *value;
	gboolean encoded;
	GString *gvalue;
	int id, i;
	
	params = NULL;
	tail = (GMimeParam *) &params;
	
	list = NULL;
	t = (struct _rfc2184_param *) &list;
	rfc2184_hash = g_hash_table_new (g_mime_strcase_hash, g_mime_strcase_equal);
	
	decode_lwsp (&inptr);
	
	do {
		/* invalid format? */
		if (!decode_param (&inptr, &name, &value, &id, &encoded)) {
			decode_lwsp (&inptr);
			
			if (*inptr == ';')
				continue;
			
			break;
		}
		
		if (id != -1) {
			/* we have a multipart rfc2184 param */
			if (!(rfc2184 = g_hash_table_lookup (rfc2184_hash, name))) {
				rfc2184 = rfc2184_param_new (name, value, id, encoded);
				param = rfc2184->param;
				t->next = rfc2184;
				t = rfc2184;
				
				g_hash_table_insert (rfc2184_hash, param->name, rfc2184);
				
				tail->next = param;
				tail = param;
			} else {
				rfc2184_param_add_part (rfc2184, value, id, encoded);
				g_free (name);
			}
		} else {
			param = g_new (GMimeParam, 1);
			param->next = NULL;
			param->name = name;
			
			if (encoded) {
				/* singleton encoded rfc2184 param value */
				param->value = rfc2184_decode (value);
				g_free (value);
			} else {
				/* normal parameter value */
				param->value = value;
			}
			
			tail->next = param;
			tail = param;
		}
		
		decode_lwsp (&inptr);
	} while (*inptr++ == ';');
	
	g_hash_table_destroy (rfc2184_hash);
	
	rfc2184 = list;
	while (rfc2184 != NULL) {
		t = rfc2184->next;
		
		param = rfc2184->param;
		gvalue = g_string_new ("");
		
		g_ptr_array_sort (rfc2184->parts, rfc2184_sort_cb);
		for (i = 0; i < rfc2184->parts->len; i++) {
			part = rfc2184->parts->pdata[i];
			g_string_append (gvalue, part->value);
			g_free (part->value);
			g_free (part);
		}
		
		g_ptr_array_free (rfc2184->parts, TRUE);
		
		param->value = charset_convert (rfc2184->charset, gvalue->str, gvalue->len);
		g_string_free (gvalue, FALSE);
		
		g_free (rfc2184->lang);
		g_free (rfc2184);
		rfc2184 = t;
	}
	
	return params;
}


/**
 * g_mime_param_new_from_string: Create a new MIME Param object
 * @string: input string
 *
 * Creates a parameter list based on the input string.
 *
 * Returns a #GMimeParam structure based on @string.
 **/
GMimeParam *
g_mime_param_new_from_string (const char *string)
{
	g_return_val_if_fail (string != NULL, NULL);
	
	return decode_param_list (string);
}


/**
 * g_mime_param_destroy: Destroy the MIME Param
 * @param: Mime param list to destroy
 *
 * Releases all memory used by this mime param back to the Operating
 * System.
 **/
void
g_mime_param_destroy (GMimeParam *param)
{
	GMimeParam *next;
	
	while (param) {
		next = param->next;
		g_free (param->name);
		g_free (param->value);
		g_free (param);
		param = next;
	}
}


/**
 * g_mime_param_append:
 * @params: param list
 * @name: new param name
 * @value: new param value
 *
 * Appends a new parameter with name @name and value @value to the
 * parameter list @params.
 *
 * Returns a param list with the new param of name @name and value
 * @value appended to the list of params @params.
 **/
GMimeParam *
g_mime_param_append (GMimeParam *params, const char *name, const char *value)
{
	GMimeParam *param, *p;
	
	g_return_val_if_fail (name != NULL, params);
	g_return_val_if_fail (value != NULL, params);
	
	param = g_mime_param_new (name, value);
	if (params) {
		p = params;
		while (p->next)
			p = p->next;
		p->next = param;
	} else
		params = param;
	
	return params;
}


/**
 * g_mime_param_append_param:
 * @params: param list
 * @param: param to append
 *
 * Appends @param to the param list @params.
 *
 * Returns a param list with the new param @param appended to the list
 * of params @params.
 **/
GMimeParam *
g_mime_param_append_param (GMimeParam *params, GMimeParam *param)
{
	GMimeParam *p;
	
	g_return_val_if_fail (param != NULL, params);
	
	if (params) {
		p = params;
		while (p->next)
			p = p->next;
		p->next = param;
	} else
		params = param;
	
	return params;
}

/* FIXME: I wrote this in a quick & dirty fasion - it may not be 100% correct */
static char *
encode_param (const char *in, gboolean *encoded)
{
	register const unsigned char *inptr = (const unsigned char *) in;
	const unsigned char *instart = inptr;
	iconv_t cd = (iconv_t) -1;
	const char *charset = NULL;
	char *outbuf = NULL;
	unsigned char c;
	char *outstr;
	GString *out;
	
	*encoded = FALSE;
	
	while (*inptr && ((inptr - instart) < GMIME_FOLD_LEN)) {
		if (*inptr > 127)
			break;
		inptr++;
	}
	
	if (*inptr == '\0')
		return g_strdup (in);
	
	if (*inptr > 127)
		charset = g_mime_charset_best (in, strlen (in));
	
	if (!charset)
		charset = "iso-8859-1";
	
	if (g_ascii_strcasecmp (charset, "UTF-8") != 0)
		cd = g_mime_iconv_open (charset, "UTF-8");
	
	if (cd != (iconv_t) -1) {
		outbuf = g_mime_iconv_strdup (cd, in);
		g_mime_iconv_close (cd);
		if (outbuf == NULL) {
			charset = "UTF-8";
			inptr = instart;
		} else {
			inptr = (const unsigned char *) outbuf;
		}
	} else {
		charset = "UTF-8";
		inptr = instart;
	}
	
	/* FIXME: set the 'language' as well, assuming we can get that info...? */
	out = g_string_new ("");
	g_string_append_printf (out, "%s''", charset);
	
	while ((c = *inptr++)) {
		if (!is_attrchar (c))
			g_string_append_printf (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]);
		else
			g_string_append_c (out, c);
	}
	
	g_free (outbuf);
	
	outstr = out->str;
	g_string_free (out, FALSE);
	*encoded = TRUE;
	
	return outstr;
}

static void
g_string_append_len_quoted (GString *out, const char *in, size_t len)
{
	register const char *inptr;
	const char *inend;
	
	g_string_append_c (out, '"');
	
	inptr = in;
	inend = in + len;
	
	while (inptr < inend) {
		if ((*inptr == '"') || *inptr == '\\')
			g_string_append_c (out, '\\');
		
		g_string_append_c (out, *inptr);
		
		inptr++;
	}
	
	g_string_append_c (out, '"');
}

static void
param_list_format (GString *out, GMimeParam *param, gboolean fold)
{
	int used = out->len;
	
	while (param) {
		gboolean encoded = FALSE;
		unsigned nlen, vlen;
		int here = out->len;
		int quote = 0;
		char *value;
		
		if (!param->value) {
			param = param->next;
			continue;
		}
		
		if (!(value = encode_param (param->value, &encoded))) {
			w(g_warning ("appending parameter %s=%s violates rfc2184",
				     param->name, param->value));
			value = g_strdup (param->value);
		}
		
		if (!encoded) {
			char *ch;
			
			for (ch = value; *ch; ch++) {
				if (!is_attrchar (*ch) || is_lwsp (*ch))
					quote++;
			}
		}
		
		nlen = strlen (param->name);
		vlen = strlen (value);
		
		if (fold && (used + nlen + vlen + quote > GMIME_FOLD_LEN - 2)) {
			g_string_append (out, ";\n\t");
			here = out->len;
			used = 1;
		} else {
			g_string_append (out, "; ");
			here = out->len;
			used += 2;
		}
		
		if (nlen + vlen + quote > GMIME_FOLD_LEN - 2) {
			/* we need to do special rfc2184 parameter wrapping */
			int maxlen = GMIME_FOLD_LEN - (nlen + 6);
			char *inptr, *inend;
			int i = 0;
			
			inptr = value;
			inend = value + vlen;
			
			while (inptr < inend) {
				char *ptr = inptr + MIN (inend - inptr, maxlen);
				
				if (encoded && ptr < inend) {
					/* be careful not to break an encoded char (ie %20) */
					char *q = ptr;
					int j = 2;
					
					for ( ; j > 0 && q > inptr && *q != '%'; j--, q--);
					if (*q == '%')
						ptr = q;
				}
				
				if (i != 0) {
					if (fold)
						g_string_append (out, ";\n\t");
					else
						g_string_append (out, "; ");
					
					here = out->len;
					used = 1;
				}
				
				g_string_append_printf (out, "%s*%d%s=", param->name,
							i++, encoded ? "*" : "");
				
				if (encoded || !quote)
					g_string_append_len (out, inptr, ptr - inptr);
				else
					g_string_append_len_quoted (out, inptr, ptr - inptr);
				
				used += (out->len - here);
				
				inptr = ptr;
			}
		} else {
			g_string_append_printf (out, "%s%s=", param->name, encoded ? "*" : "");
			
			if (encoded || !quote)
				g_string_append_len (out, value, vlen);
			else
				g_string_append_len_quoted (out, value, vlen);
			
			used += (out->len - here);
		}
		
		g_free (value);
		
		param = param->next;
	}
	
	if (fold)
		g_string_append_c (out, '\n');
}


/**
 * g_mime_param_write_to_string:
 * @param: MIME Param list
 * @fold: specifies whether or not to fold headers
 * @string: output string
 *
 * Assumes the output string contains only the Content-* header and
 * it's immediate value.
 *
 * Writes the params out to the string @string.
 **/
void
g_mime_param_write_to_string (GMimeParam *param, gboolean fold, GString *string)
{
	g_return_if_fail (string != NULL);
	
	param_list_format (string, param, fold);
}


syntax highlighted by Code2HTML, v. 0.9.1