/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
* Authors: Jeffrey Stedfast <fejj@helixcode.com>
*
* Copyright 2000 Helix Code, Inc. (www.helixcode.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
*
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <string.h>
#include <ctype.h>
#include <errno.h>
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
#include "gmime-param.h"
#include "gmime-table-private.h"
#include "gmime-charset.h"
#include "gmime-utils.h"
#include "gmime-iconv.h"
#include "gmime-iconv-utils.h"
#include "strlib.h"
#define d(x)
#define w(x)
extern int gmime_interfaces_utf8;
static unsigned char tohex[16] = {
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
};
/**
* g_mime_param_new: Create a new MIME Param object
* @name: parameter name
* @value: parameter value
*
* Creates a new GMimeParam node with name @name and value @value.
*
* Returns a new paramter structure.
**/
GMimeParam *
g_mime_param_new (const char *name, const char *value)
{
GMimeParam *param;
param = g_new (GMimeParam, 1);
param->next = NULL;
param->name = g_strdup (name);
param->value = g_strdup (value);
return param;
}
#define HEXVAL(c) (isdigit (c) ? (c) - '0' : tolower (c) - 'a' + 10)
static size_t
hex_decode (const unsigned char *in, size_t len, unsigned char *out)
{
register const unsigned char *inptr;
register unsigned char *outptr;
const unsigned char *inend;
inptr = in;
inend = in + len;
outptr = out;
while (inptr < inend) {
if (*inptr == '%') {
if (isxdigit (inptr[1]) && isxdigit (inptr[2])) {
*outptr++ = HEXVAL (inptr[1]) * 16 + HEXVAL (inptr[2]);
inptr += 3;
} else
*outptr++ = *inptr++;
} else
*outptr++ = *inptr++;
}
*outptr = '\0';
return outptr - out;
}
/* an rfc2184 encoded string looks something like:
* us-ascii'en'This%20is%20even%20more%20
*/
static char *
rfc2184_decode (const char *in, size_t len)
{
const char *inptr = in;
const char *inend = in + len;
const char *charset = NULL;
char *decoded = NULL;
char *charenc;
/* skips to the end of the charset / beginning of the locale */
inptr = memchr (inptr, '\'', len);
if (!inptr)
return NULL;
/* save the charset */
len = inptr - in;
charenc = alloca (len + 1);
memcpy (charenc, in, len);
charenc[len] = '\0';
charset = charenc;
/* skip to the end of the locale */
inptr = memchr (inptr + 1, '\'', (unsigned int) (inend - inptr - 1));
if (!inptr)
return NULL;
inptr++;
if (inptr < inend) {
len = inend - inptr;
if (gmime_interfaces_utf8 && strcasecmp (charset, "UTF-8") != 0) {
char *udecoded;
iconv_t cd;
decoded = alloca (len + 1);
len = hex_decode (inptr, len, decoded);
cd = g_mime_iconv_open ("UTF-8", charset);
if (cd == (iconv_t) -1) {
d(g_warning ("Cannot convert from %s to UTF-8, param display may "
"be corrupt: %s", charset, g_strerror (errno)));
charset = g_mime_charset_locale_name ();
cd = g_mime_iconv_open ("UTF-8", charset);
if (cd == (iconv_t) -1)
return NULL;
}
udecoded = g_mime_iconv_strndup (cd, decoded, len);
g_mime_iconv_close (cd);
if (!udecoded) {
d(g_warning ("Failed to convert \"%.*s\" to UTF-8, display may be "
"corrupt: %s", len, decoded, g_strerror (errno)));
}
decoded = udecoded;
} else {
decoded = g_malloc (len + 1);
hex_decode (inptr, len, decoded);
}
}
return decoded;
}
static void
decode_lwsp (const char **in)
{
const char *inptr = *in;
while (*inptr && (*inptr == '(' || is_lwsp (*inptr))) {
while (*inptr && is_lwsp (*inptr))
inptr++;
/* skip over any comments */
if (*inptr == '(') {
int depth = 1;
inptr++;
while (*inptr && depth) {
if (*inptr == '\\' && *(inptr + 1))
inptr++;
else if (*inptr == '(')
depth++;
else if (*inptr == ')')
depth--;
inptr++;
}
}
}
*in = inptr;
}
static int
decode_int (const char **in)
{
const char *inptr = *in;
int n = 0;
decode_lwsp (&inptr);
while (isdigit ((int) *inptr)) {
n = n * 10 + (*inptr - '0');
inptr++;
}
*in = inptr;
return n;
}
static char *
decode_quoted_string (const char **in)
{
const char *start, *inptr = *in;
char *out = NULL;
decode_lwsp (&inptr);
if (*inptr == '"') {
start = inptr++;
while (*inptr && *inptr != '"') {
if (*inptr++ == '\\')
inptr++;
}
if (*inptr == '"') {
start++;
out = g_strndup (start, (unsigned int) (inptr - start));
inptr++;
} else {
/* string wasn't properly quoted */
out = g_strndup (start, (unsigned int) (inptr - start));
}
}
*in = inptr;
return out;
}
static char *
decode_token (const char **in)
{
const char *inptr = *in;
const char *start;
decode_lwsp (&inptr);
start = inptr;
while (is_ttoken (*inptr))
inptr++;
if (inptr > start) {
*in = inptr;
return g_strndup (start, (unsigned int) (inptr - start));
} else {
return NULL;
}
}
static char *
decode_value (const char **in)
{
const char *inptr = *in;
decode_lwsp (&inptr);
if (*inptr == '"') {
return decode_quoted_string (in);
} else if (is_ttoken (*inptr)) {
return decode_token (in);
}
return NULL;
}
/* This function is basically the same as decode_token()
* except that it will not accept *'s which have a special
* meaning for rfc2184 params */
static char *
decode_param_token (const char **in)
{
const char *inptr = *in;
const char *start;
decode_lwsp (&inptr);
start = inptr;
while (is_ttoken (*inptr) && *inptr != '*')
inptr++;
if (inptr > start) {
*in = inptr;
return g_strndup (start, (unsigned int) (inptr - start));
} else {
return NULL;
}
}
static gboolean
decode_rfc2184_param (const char **in, char **paramp, int *part, gboolean *value_is_encoded)
{
gboolean is_rfc2184 = FALSE;
const char *inptr = *in;
char *param;
*value_is_encoded = FALSE;
*part = -1;
param = decode_param_token (&inptr);
decode_lwsp (&inptr);
if (*inptr == '*') {
is_rfc2184 = TRUE;
inptr++;
decode_lwsp (&inptr);
if (*inptr == '=') {
/* form := param*=value */
if (value_is_encoded)
*value_is_encoded = TRUE;
} else {
/* form := param*#=value or param*#*=value */
*part = decode_int (&inptr);
decode_lwsp (&inptr);
if (*inptr == '*') {
/* form := param*#*=value */
if (value_is_encoded)
*value_is_encoded = TRUE;
inptr++;
decode_lwsp (&inptr);
}
}
}
if (paramp)
*paramp = param;
if (param)
*in = inptr;
return is_rfc2184;
}
static int
decode_param (const char **in, char **paramp, char **valuep, gboolean *is_rfc2184_param)
{
gboolean is_rfc2184_encoded = FALSE;
gboolean is_rfc2184 = FALSE;
gboolean valid_utf8 = FALSE;
const char *inptr = *in;
char *param, *value = NULL;
int rfc2184_part = -1;
*is_rfc2184_param = FALSE;
is_rfc2184 = decode_rfc2184_param (&inptr, ¶m, &rfc2184_part,
&is_rfc2184_encoded);
if (*inptr == '=') {
inptr++;
if (!(value = decode_value (&inptr)))
goto done;
if (is_rfc2184) {
/* We have ourselves an rfc2184 parameter */
if (rfc2184_part == -1) {
/* rfc2184 allows the value to be broken into
* multiple parts - this isn't one of them so
* it is safe to decode it.
*/
char *val;
val = rfc2184_decode (value, strlen (value));
if (val) {
valid_utf8 = TRUE;
g_free (value);
value = val;
}
} else {
/* Since we are expecting to find the rest of
* this paramter value later, let our caller know.
*/
*is_rfc2184_param = TRUE;
}
} else if (!strncmp (value, "=?", 2)) {
/* We have a broken param value that is rfc2047 encoded.
* Since both Outlook and Netscape/Mozilla do this, we
* should handle this case.
*/
char *val;
val = g_mime_utils_8bit_header_decode (value);
if (val) {
valid_utf8 = TRUE;
g_free (value);
value = val;
}
} else {
if (gmime_interfaces_utf8 && value)
valid_utf8 = !g_mime_utils_text_is_8bit (value, strlen (value));
}
}
if (gmime_interfaces_utf8 && value && !valid_utf8) {
/* A (broken) mailer has sent us an unencoded 8bit value.
* Attempt to save it by assuming it's in the user's
* locale and converting to UTF-8 */
char *buf;
buf = g_mime_iconv_locale_to_utf8 (value);
if (buf) {
g_free (value);
value = buf;
} else {
d(g_warning ("Failed to convert %s param value (\"%s\") to UTF-8: %s",
param, value, g_strerror (errno)));
}
}
done:
if (param && value) {
*paramp = param;
*valuep = value;
*in = inptr;
return 0;
} else {
g_free (param);
g_free (value);
return 1;
}
}
static GMimeParam *
decode_param_list (const char **in)
{
const char *inptr = *in;
GMimeParam *head = NULL, *tail = NULL;
gboolean last_was_rfc2184 = FALSE;
gboolean is_rfc2184 = FALSE;
decode_lwsp (&inptr);
do {
GMimeParam *param = NULL;
char *name, *value;
/* invalid format? */
if (decode_param (&inptr, &name, &value, &is_rfc2184) != 0) {
if (*inptr == ';') {
continue;
}
break;
}
if (is_rfc2184 && tail && !strcasecmp (name, tail->name)) {
/* rfc2184 allows a parameter to be broken into multiple parts
* and it looks like we've found one. Append this value to the
* last value.
*/
GString *gvalue;
gvalue = g_string_new (tail->value);
g_string_append (gvalue, value);
g_free (tail->value);
g_free (value);
g_free (name);
tail->value = gvalue->str;
g_string_free (gvalue, FALSE);
} else {
if (last_was_rfc2184) {
/* We've finished gathering the values for the last param
* so it is now safe to decode it.
*/
char *val;
val = rfc2184_decode (tail->value, strlen (tail->value));
if (val) {
g_free (tail->value);
tail->value = val;
}
}
param = g_new (GMimeParam, 1);
param->next = NULL;
param->name = name;
param->value = value;
if (head == NULL)
head = param;
if (tail)
tail->next = param;
tail = param;
}
last_was_rfc2184 = is_rfc2184;
decode_lwsp (&inptr);
} while (*inptr++ == ';');
if (last_was_rfc2184) {
/* We've finished gathering the values for the last param
* so it is now safe to decode it.
*/
char *val;
val = rfc2184_decode (tail->value, strlen (tail->value));
if (val) {
g_free (tail->value);
tail->value = val;
}
}
*in = inptr;
return head;
}
/**
* g_mime_param_new_from_string: Create a new MIME Param object
* @string: input string
*
* Creates a parameter list based on the input string.
*
* Returns a GMimeParam structure based on @string.
**/
GMimeParam *
g_mime_param_new_from_string (const char *string)
{
g_return_val_if_fail (string != NULL, NULL);
return decode_param_list (&string);
}
/**
* g_mime_param_destroy: Destroy the MIME Param
* @param: Mime param list to destroy
*
* Releases all memory used by this mime param back to the Operating
* System.
**/
void
g_mime_param_destroy (GMimeParam *param)
{
GMimeParam *next;
while (param) {
next = param->next;
g_free (param->name);
g_free (param->value);
g_free (param);
param = next;
}
}
/**
* g_mime_param_append:
* @params: param list
* @name: new param name
* @value: new param value
*
* Appends a new parameter with name @name and value @value to the
* parameter list @params.
*
* Returns a param list with the new param of name @name and value
* @value appended to the list of params @params.
**/
GMimeParam *
g_mime_param_append (GMimeParam *params, const char *name, const char *value)
{
GMimeParam *param, *p;
g_return_val_if_fail (name != NULL, params);
g_return_val_if_fail (value != NULL, params);
param = g_mime_param_new (name, value);
if (params) {
p = params;
while (p->next)
p = p->next;
p->next = param;
} else
params = param;
return params;
}
/**
* g_mime_param_append_param:
* @params: param list
* @param: param to append
*
* Appends @param to the param list @params.
*
* Returns a param list with the new param @param appended to the list
* of params @params.
**/
GMimeParam *
g_mime_param_append_param (GMimeParam *params, GMimeParam *param)
{
GMimeParam *p;
g_return_val_if_fail (param != NULL, params);
if (params) {
p = params;
while (p->next)
p = p->next;
p->next = param;
} else
params = param;
return params;
}
/* FIXME: I wrote this in a quick & dirty fasion - it may not be 100% correct */
static char *
encode_param (const unsigned char *in, gboolean *encoded)
{
register const unsigned char *inptr;
unsigned char *outbuf = NULL;
iconv_t cd = (iconv_t) -1;
const char *charset = NULL;
char *outstr;
GString *out;
*encoded = FALSE;
for (inptr = in; *inptr && inptr - in < GMIME_FOLD_LEN; inptr++)
if (*inptr > 127)
break;
if (*inptr == '\0')
return g_strdup (in);
if (*inptr > 127) {
if (gmime_interfaces_utf8)
charset = g_mime_charset_best (in, strlen (in));
else
charset = g_mime_charset_locale_name ();
}
if (!charset)
charset = "iso-8859-1";
if (gmime_interfaces_utf8) {
if (strcasecmp (charset, "UTF-8") != 0)
cd = g_mime_iconv_open (charset, "UTF-8");
if (cd == (iconv_t) -1)
charset = "UTF-8";
}
if (cd != (iconv_t) -1) {
outbuf = g_mime_iconv_strdup (cd, in);
g_mime_iconv_close (cd);
inptr = outbuf;
} else {
inptr = in;
}
/* FIXME: set the 'language' as well, assuming we can get that info...? */
out = g_string_new ("");
g_string_sprintfa (out, "%s''", charset);
while (inptr && *inptr) {
unsigned char c = *inptr++;
/* FIXME: make sure that '\'', '*', and ';' are also encoded */
if (c > 127) {
g_string_sprintfa (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]);
} else if (is_lwsp (c) || !(gmime_special_table[c] & IS_ESAFE)) {
g_string_sprintfa (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]);
} else {
g_string_append_c (out, c);
}
}
g_free (outbuf);
outstr = out->str;
g_string_free (out, FALSE);
*encoded = TRUE;
return outstr;
}
static void
g_string_append_len_quoted (GString *out, const char *in, size_t len)
{
register const char *inptr;
const char *inend;
g_string_append_c (out, '"');
inptr = in;
inend = in + len;
while (inptr < inend) {
if ((*inptr == '"') || *inptr == '\\')
g_string_append_c (out, '\\');
g_string_append_c (out, *inptr);
inptr++;
}
g_string_append_c (out, '"');
}
static void
g_string_append_len (GString *out, const char *in, size_t len)
{
char *buf;
buf = alloca (len + 1);
strlcpy (buf, in, len + 1);
g_string_append (out, buf);
}
static void
param_list_format (GString *out, GMimeParam *param, gboolean fold)
{
int used = out->len;
while (param) {
gboolean encoded = FALSE;
gboolean quote = FALSE;
unsigned nlen, vlen;
int here = out->len;
char *value;
if (!param->value) {
param = param->next;
continue;
}
value = encode_param (param->value, &encoded);
if (!value) {
w(g_warning ("appending parameter %s=%s violates rfc2184",
param->name, param->value));
value = g_strdup (param->value);
}
if (!encoded) {
char *ch;
for (ch = value; *ch; ch++) {
if (is_tspecial (*ch) || is_lwsp (*ch))
break;
}
quote = ch && *ch;
}
nlen = strlen (param->name);
vlen = strlen (value);
if (used + nlen + vlen > GMIME_FOLD_LEN - 8) {
if (fold)
g_string_append (out, ";\n\t");
else
g_string_append (out, "; ");
here = out->len;
used = 0;
} else
out = g_string_append (out, "; ");
if (nlen + vlen > GMIME_FOLD_LEN - 10) {
/* we need to do special rfc2184 parameter wrapping */
int maxlen = GMIME_FOLD_LEN - (nlen + 10);
char *inptr, *inend;
int i = 0;
inptr = value;
inend = value + vlen;
while (inptr < inend) {
char *ptr = inptr + MIN (inend - inptr, maxlen);
if (encoded && ptr < inend) {
/* be careful not to break an encoded char (ie %20) */
char *q = ptr;
int j = 2;
for ( ; j > 0 && q > inptr && *q != '%'; j--, q--);
if (*q == '%')
ptr = q;
}
if (i != 0) {
if (fold)
g_string_append (out, ";\n\t");
else
g_string_append (out, "; ");
here = out->len;
used = 0;
}
g_string_sprintfa (out, "%s*%d%s=", param->name, i++, encoded ? "*" : "");
if (encoded || !quote)
g_string_append_len (out, inptr, (unsigned) (ptr - inptr));
else
g_string_append_len_quoted (out, inptr, (unsigned) (ptr - inptr));
d(printf ("wrote: %s\n", out->str + here));
used += (out->len - here);
inptr = ptr;
}
} else {
g_string_sprintfa (out, "%s%s=", param->name, encoded ? "*" : "");
if (encoded || !quote)
g_string_append_len (out, value, vlen);
else
g_string_append_len_quoted (out, value, vlen);
used += (out->len - here);
}
g_free (value);
param = param->next;
}
}
/**
* g_mime_param_write_to_string:
* @param: MIME Param list
* @fold: specifies whether or not to fold headers
* @string: output string
*
* Assumes the output string contains only the Content-* header and
* it's immediate value.
*
* Writes the params out to the string @string.
**/
void
g_mime_param_write_to_string (GMimeParam *param, gboolean fold, GString *string)
{
g_return_if_fail (string != NULL);
param_list_format (string, param, fold);
}
syntax highlighted by Code2HTML, v. 0.9.1