/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/* GMime
* Copyright (C) 2000-2007 Jeffrey Stedfast
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <string.h>
#include <limits.h>
#include <ctype.h>
#include <errno.h>
#include "gmime-param.h"
#include "gmime-common.h"
#include "gmime-table-private.h"
#include "gmime-parse-utils.h"
#include "gmime-iconv-utils.h"
#include "gmime-charset.h"
#include "gmime-utils.h"
#include "gmime-iconv.h"
#ifdef ENABLE_WARNINGS
#define w(x) x
#else
#define w(x)
#endif /* ENABLE_WARNINGS */
#define d(x)
static unsigned char tohex[16] = {
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
};
/**
* g_mime_param_new: Create a new MIME Param object
* @name: parameter name
* @value: parameter value
*
* Creates a new GMimeParam node with name @name and value @value.
*
* Returns a new paramter structure.
**/
GMimeParam *
g_mime_param_new (const char *name, const char *value)
{
GMimeParam *param;
param = g_new (GMimeParam, 1);
param->next = NULL;
param->name = g_strdup (name);
param->value = g_strdup (value);
return param;
}
#define INT_OVERFLOW(x,d) (((x) > (INT_MAX / 10)) || ((x) == (INT_MAX / 10) && (d) > (INT_MAX % 10)))
static int
decode_int (const char **in)
{
const unsigned char *inptr;
int digit, n = 0;
decode_lwsp (in);
inptr = (const unsigned char *) *in;
while (isdigit ((int) *inptr)) {
digit = (*inptr - '0');
if (INT_OVERFLOW (n, digit)) {
while (isdigit ((int) *inptr))
inptr++;
break;
}
n = (n * 10) + digit;
inptr++;
}
*in = (const char *) inptr;
return n;
}
static char *
decode_quoted_string (const char **in)
{
const char *start, *inptr = *in;
char *outptr, *out = NULL;
gboolean unescape = FALSE;
decode_lwsp (&inptr);
if (*inptr != '"') {
*in = inptr;
return NULL;
}
start = inptr++;
while (*inptr && *inptr != '"') {
if (*inptr++ == '\\') {
unescape = TRUE;
inptr++;
}
}
if (*inptr == '"') {
start++;
out = g_strndup (start, (unsigned int) (inptr - start));
inptr++;
} else {
/* string wasn't properly quoted */
out = g_strndup (start, (unsigned int) (inptr - start));
}
*in = inptr;
if (unescape) {
inptr = outptr = out;
while (*inptr) {
if (*inptr == '\\')
inptr++;
*outptr++ = *inptr++;
}
*outptr = '\0';
}
return out;
}
static char *
decode_token (const char **in)
{
const char *inptr = *in;
const char *start;
decode_lwsp (&inptr);
start = inptr;
while (is_ttoken (*inptr))
inptr++;
if (inptr > start) {
*in = inptr;
return g_strndup (start, (unsigned int) (inptr - start));
} else {
return NULL;
}
}
static char *
decode_value (const char **in)
{
const char *inptr = *in;
decode_lwsp (&inptr);
if (*inptr == '"') {
return decode_quoted_string (in);
} else if (is_ttoken (*inptr)) {
return decode_token (in);
}
return NULL;
}
/* This function is basically the same as decode_token()
* except that it will not accept *'s which have a special
* meaning for rfc2184 params */
static char *
decode_param_token (const char **in)
{
const char *inptr = *in;
const char *start;
decode_lwsp (&inptr);
start = inptr;
while (is_ttoken (*inptr) && *inptr != '*')
inptr++;
if (inptr > start) {
*in = inptr;
return g_strndup (start, (unsigned int) (inptr - start));
} else {
return NULL;
}
}
static gboolean
decode_rfc2184_param (const char **in, char **paramp, int *part, gboolean *encoded)
{
gboolean is_rfc2184 = FALSE;
const char *inptr = *in;
char *param;
*encoded = FALSE;
*part = -1;
param = decode_param_token (&inptr);
decode_lwsp (&inptr);
if (*inptr == '*') {
is_rfc2184 = TRUE;
inptr++;
decode_lwsp (&inptr);
if (*inptr == '=') {
/* form := param*=value */
*encoded = TRUE;
} else {
/* form := param*#=value or param*#*=value */
*part = decode_int (&inptr);
decode_lwsp (&inptr);
if (*inptr == '*') {
/* form := param*#*=value */
inptr++;
*encoded = TRUE;
decode_lwsp (&inptr);
}
}
}
if (paramp)
*paramp = param;
if (param)
*in = inptr;
return is_rfc2184;
}
static gboolean
decode_param (const char **in, char **paramp, char **valuep, int *id, gboolean *encoded)
{
gboolean is_rfc2184 = FALSE;
const char *inptr = *in;
char *param, *value = NULL;
char *val;
is_rfc2184 = decode_rfc2184_param (&inptr, ¶m, id, encoded);
if (*inptr == '=') {
inptr++;
value = decode_value (&inptr);
if (!is_rfc2184 && value) {
if (strstr (value, "=?") != NULL) {
/* We (may) have a broken param value that is rfc2047
* encoded. Since both Outlook and Netscape/Mozilla do
* this, we should handle this case.
*/
if ((val = g_mime_utils_header_decode_text (value))) {
g_free (value);
value = val;
}
}
if (!g_utf8_validate (value, -1, NULL)) {
/* A (broken) mailer has sent us an unencoded 8bit value.
* Attempt to save it by assuming it's in the user's
* locale and converting to UTF-8 */
if ((val = g_mime_iconv_locale_to_utf8 (value))) {
g_free (value);
value = val;
} else {
d(g_warning ("Failed to convert %s param value (\"%s\") to UTF-8: %s",
param, value, g_strerror (errno)));
}
}
}
}
if (param && value) {
*paramp = param;
*valuep = value;
*in = inptr;
return TRUE;
} else {
g_free (param);
g_free (value);
return FALSE;
}
}
struct _rfc2184_part {
char *value;
int id;
};
struct _rfc2184_param {
struct _rfc2184_param *next;
const char *charset;
GMimeParam *param;
GPtrArray *parts;
char *lang;
};
static int
rfc2184_sort_cb (const void *v0, const void *v1)
{
const struct _rfc2184_part *p0 = *((struct _rfc2184_part **) v0);
const struct _rfc2184_part *p1 = *((struct _rfc2184_part **) v1);
return p0->id - p1->id;
}
#define HEXVAL(c) (isdigit (c) ? (c) - '0' : tolower (c) - 'a' + 10)
static size_t
hex_decode (const char *in, size_t len, char *out)
{
register const unsigned char *inptr = (const unsigned char *) in;
register unsigned char *outptr = (unsigned char *) out;
const unsigned char *inend = inptr + len;
while (inptr < inend) {
if (*inptr == '%') {
if (isxdigit (inptr[1]) && isxdigit (inptr[2])) {
*outptr++ = HEXVAL (inptr[1]) * 16 + HEXVAL (inptr[2]);
inptr += 3;
} else
*outptr++ = *inptr++;
} else
*outptr++ = *inptr++;
}
*outptr = '\0';
return ((char *) outptr) - out;
}
static const char *
rfc2184_param_charset (const char **in, char **langp)
{
const char *lang, *inptr = *in;
char *charset;
size_t len;
if (langp)
*langp = NULL;
while (*inptr != '\0' && *inptr != '\'')
inptr++;
if (*inptr != '\'')
return NULL;
len = inptr - *in;
charset = g_alloca (len + 1);
memcpy (charset, *in, len);
charset[len] = '\0';
lang = ++inptr;
while (*inptr != '\0' && *inptr != '\'')
inptr++;
if (*inptr == '\'') {
if (langp)
*langp = g_strndup (lang, inptr - lang);
inptr++;
}
*in = inptr;
return g_mime_charset_canon_name (charset);
}
static char *
charset_convert (const char *charset, char *in, size_t inlen)
{
gboolean locale = FALSE;
char *result = NULL;
iconv_t cd;
if (!charset || !g_ascii_strcasecmp (charset, "UTF-8") || !g_ascii_strcasecmp (charset, "us-ascii")) {
/* we shouldn't need any charset conversion here... */
if (g_utf8_validate (in, inlen, NULL))
return in;
charset = g_mime_locale_charset ();
locale = TRUE;
}
/* need charset conversion */
cd = g_mime_iconv_open ("UTF-8", charset);
if (cd == (iconv_t) -1 && !locale) {
charset = g_mime_locale_charset ();
cd = g_mime_iconv_open ("UTF-8", charset);
}
if (cd != (iconv_t) -1) {
result = g_mime_iconv_strndup (cd, in, inlen);
g_mime_iconv_close (cd);
}
if (result == NULL)
result = in;
else
g_free (in);
return result;
}
static char *
rfc2184_decode (const char *value)
{
const char *inptr = value;
const char *charset;
char *decoded;
size_t len;
charset = rfc2184_param_charset (&inptr, NULL);
len = strlen (inptr);
decoded = g_alloca (len + 1);
len = hex_decode (inptr, len, decoded);
return charset_convert (charset, g_strdup (decoded), len);
}
static void
rfc2184_param_add_part (struct _rfc2184_param *rfc2184, char *value, int id, gboolean encoded)
{
struct _rfc2184_part *part;
size_t len;
part = g_new (struct _rfc2184_part, 1);
g_ptr_array_add (rfc2184->parts, part);
part->id = id;
if (encoded) {
len = strlen (value);
part->value = g_malloc (len + 1);
hex_decode (value, len, part->value);
g_free (value);
} else {
part->value = value;
}
}
static struct _rfc2184_param *
rfc2184_param_new (char *name, char *value, int id, gboolean encoded)
{
struct _rfc2184_param *rfc2184;
const char *inptr = value;
rfc2184 = g_new (struct _rfc2184_param, 1);
rfc2184->parts = g_ptr_array_new ();
rfc2184->next = NULL;
rfc2184->charset = rfc2184_param_charset (&inptr, &rfc2184->lang);
if (inptr == value) {
rfc2184_param_add_part (rfc2184, value, id, encoded);
} else {
rfc2184_param_add_part (rfc2184, g_strdup (inptr), id, encoded);
g_free (value);
}
rfc2184->param = g_new (GMimeParam, 1);
rfc2184->param->next = NULL;
rfc2184->param->name = name;
rfc2184->param->value = NULL;
return rfc2184;
}
static GMimeParam *
decode_param_list (const char *in)
{
struct _rfc2184_param *rfc2184, *list, *t;
GMimeParam *param, *params, *tail;
struct _rfc2184_part *part;
GHashTable *rfc2184_hash;
const char *inptr = in;
char *name, *value;
gboolean encoded;
GString *gvalue;
int id, i;
params = NULL;
tail = (GMimeParam *) ¶ms;
list = NULL;
t = (struct _rfc2184_param *) &list;
rfc2184_hash = g_hash_table_new (g_mime_strcase_hash, g_mime_strcase_equal);
decode_lwsp (&inptr);
do {
/* invalid format? */
if (!decode_param (&inptr, &name, &value, &id, &encoded)) {
decode_lwsp (&inptr);
if (*inptr == ';')
continue;
break;
}
if (id != -1) {
/* we have a multipart rfc2184 param */
if (!(rfc2184 = g_hash_table_lookup (rfc2184_hash, name))) {
rfc2184 = rfc2184_param_new (name, value, id, encoded);
param = rfc2184->param;
t->next = rfc2184;
t = rfc2184;
g_hash_table_insert (rfc2184_hash, param->name, rfc2184);
tail->next = param;
tail = param;
} else {
rfc2184_param_add_part (rfc2184, value, id, encoded);
g_free (name);
}
} else {
param = g_new (GMimeParam, 1);
param->next = NULL;
param->name = name;
if (encoded) {
/* singleton encoded rfc2184 param value */
param->value = rfc2184_decode (value);
g_free (value);
} else {
/* normal parameter value */
param->value = value;
}
tail->next = param;
tail = param;
}
decode_lwsp (&inptr);
} while (*inptr++ == ';');
g_hash_table_destroy (rfc2184_hash);
rfc2184 = list;
while (rfc2184 != NULL) {
t = rfc2184->next;
param = rfc2184->param;
gvalue = g_string_new ("");
g_ptr_array_sort (rfc2184->parts, rfc2184_sort_cb);
for (i = 0; i < rfc2184->parts->len; i++) {
part = rfc2184->parts->pdata[i];
g_string_append (gvalue, part->value);
g_free (part->value);
g_free (part);
}
g_ptr_array_free (rfc2184->parts, TRUE);
param->value = charset_convert (rfc2184->charset, gvalue->str, gvalue->len);
g_string_free (gvalue, FALSE);
g_free (rfc2184->lang);
g_free (rfc2184);
rfc2184 = t;
}
return params;
}
/**
* g_mime_param_new_from_string: Create a new MIME Param object
* @string: input string
*
* Creates a parameter list based on the input string.
*
* Returns a #GMimeParam structure based on @string.
**/
GMimeParam *
g_mime_param_new_from_string (const char *string)
{
g_return_val_if_fail (string != NULL, NULL);
return decode_param_list (string);
}
/**
* g_mime_param_destroy: Destroy the MIME Param
* @param: Mime param list to destroy
*
* Releases all memory used by this mime param back to the Operating
* System.
**/
void
g_mime_param_destroy (GMimeParam *param)
{
GMimeParam *next;
while (param) {
next = param->next;
g_free (param->name);
g_free (param->value);
g_free (param);
param = next;
}
}
/**
* g_mime_param_append:
* @params: param list
* @name: new param name
* @value: new param value
*
* Appends a new parameter with name @name and value @value to the
* parameter list @params.
*
* Returns a param list with the new param of name @name and value
* @value appended to the list of params @params.
**/
GMimeParam *
g_mime_param_append (GMimeParam *params, const char *name, const char *value)
{
GMimeParam *param, *p;
g_return_val_if_fail (name != NULL, params);
g_return_val_if_fail (value != NULL, params);
param = g_mime_param_new (name, value);
if (params) {
p = params;
while (p->next)
p = p->next;
p->next = param;
} else
params = param;
return params;
}
/**
* g_mime_param_append_param:
* @params: param list
* @param: param to append
*
* Appends @param to the param list @params.
*
* Returns a param list with the new param @param appended to the list
* of params @params.
**/
GMimeParam *
g_mime_param_append_param (GMimeParam *params, GMimeParam *param)
{
GMimeParam *p;
g_return_val_if_fail (param != NULL, params);
if (params) {
p = params;
while (p->next)
p = p->next;
p->next = param;
} else
params = param;
return params;
}
/* FIXME: I wrote this in a quick & dirty fasion - it may not be 100% correct */
static char *
encode_param (const char *in, gboolean *encoded)
{
register const unsigned char *inptr = (const unsigned char *) in;
const unsigned char *instart = inptr;
iconv_t cd = (iconv_t) -1;
const char *charset = NULL;
char *outbuf = NULL;
unsigned char c;
char *outstr;
GString *out;
*encoded = FALSE;
while (*inptr && ((inptr - instart) < GMIME_FOLD_LEN)) {
if (*inptr > 127)
break;
inptr++;
}
if (*inptr == '\0')
return g_strdup (in);
if (*inptr > 127)
charset = g_mime_charset_best (in, strlen (in));
if (!charset)
charset = "iso-8859-1";
if (g_ascii_strcasecmp (charset, "UTF-8") != 0)
cd = g_mime_iconv_open (charset, "UTF-8");
if (cd != (iconv_t) -1) {
outbuf = g_mime_iconv_strdup (cd, in);
g_mime_iconv_close (cd);
if (outbuf == NULL) {
charset = "UTF-8";
inptr = instart;
} else {
inptr = (const unsigned char *) outbuf;
}
} else {
charset = "UTF-8";
inptr = instart;
}
/* FIXME: set the 'language' as well, assuming we can get that info...? */
out = g_string_new ("");
g_string_append_printf (out, "%s''", charset);
while ((c = *inptr++)) {
if (!is_attrchar (c))
g_string_append_printf (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]);
else
g_string_append_c (out, c);
}
g_free (outbuf);
outstr = out->str;
g_string_free (out, FALSE);
*encoded = TRUE;
return outstr;
}
static void
g_string_append_len_quoted (GString *out, const char *in, size_t len)
{
register const char *inptr;
const char *inend;
g_string_append_c (out, '"');
inptr = in;
inend = in + len;
while (inptr < inend) {
if ((*inptr == '"') || *inptr == '\\')
g_string_append_c (out, '\\');
g_string_append_c (out, *inptr);
inptr++;
}
g_string_append_c (out, '"');
}
static void
param_list_format (GString *out, GMimeParam *param, gboolean fold)
{
int used = out->len;
while (param) {
gboolean encoded = FALSE;
unsigned nlen, vlen;
int here = out->len;
int quote = 0;
char *value;
if (!param->value) {
param = param->next;
continue;
}
if (!(value = encode_param (param->value, &encoded))) {
w(g_warning ("appending parameter %s=%s violates rfc2184",
param->name, param->value));
value = g_strdup (param->value);
}
if (!encoded) {
char *ch;
for (ch = value; *ch; ch++) {
if (!is_attrchar (*ch) || is_lwsp (*ch))
quote++;
}
}
nlen = strlen (param->name);
vlen = strlen (value);
if (fold && (used + nlen + vlen + quote > GMIME_FOLD_LEN - 2)) {
g_string_append (out, ";\n\t");
here = out->len;
used = 1;
} else {
g_string_append (out, "; ");
here = out->len;
used += 2;
}
if (nlen + vlen + quote > GMIME_FOLD_LEN - 2) {
/* we need to do special rfc2184 parameter wrapping */
int maxlen = GMIME_FOLD_LEN - (nlen + 6);
char *inptr, *inend;
int i = 0;
inptr = value;
inend = value + vlen;
while (inptr < inend) {
char *ptr = inptr + MIN (inend - inptr, maxlen);
if (encoded && ptr < inend) {
/* be careful not to break an encoded char (ie %20) */
char *q = ptr;
int j = 2;
for ( ; j > 0 && q > inptr && *q != '%'; j--, q--);
if (*q == '%')
ptr = q;
}
if (i != 0) {
if (fold)
g_string_append (out, ";\n\t");
else
g_string_append (out, "; ");
here = out->len;
used = 1;
}
g_string_append_printf (out, "%s*%d%s=", param->name,
i++, encoded ? "*" : "");
if (encoded || !quote)
g_string_append_len (out, inptr, ptr - inptr);
else
g_string_append_len_quoted (out, inptr, ptr - inptr);
used += (out->len - here);
inptr = ptr;
}
} else {
g_string_append_printf (out, "%s%s=", param->name, encoded ? "*" : "");
if (encoded || !quote)
g_string_append_len (out, value, vlen);
else
g_string_append_len_quoted (out, value, vlen);
used += (out->len - here);
}
g_free (value);
param = param->next;
}
if (fold)
g_string_append_c (out, '\n');
}
/**
* g_mime_param_write_to_string:
* @param: MIME Param list
* @fold: specifies whether or not to fold headers
* @string: output string
*
* Assumes the output string contains only the Content-* header and
* it's immediate value.
*
* Writes the params out to the string @string.
**/
void
g_mime_param_write_to_string (GMimeParam *param, gboolean fold, GString *string)
{
g_return_if_fail (string != NULL);
param_list_format (string, param, fold);
}
syntax highlighted by Code2HTML, v. 0.9.1