/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
* Authors: Michael Zucchi <notzed@helixcode.com>
* Jeffrey Stedfast <fejj@helixcode.com>
*
* Copyright 2000 Helix Code, Inc. (www.helixcode.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
*
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
#include "gmime-utils.h"
#include "gmime-table-private.h"
#include "gmime-part.h"
#include "gmime-charset.h"
#include "gmime-iconv.h"
#include "gmime-iconv-utils.h"
#include "unicode.h"
#define d(x)
#define w(x) x
#ifndef HAVE_ISBLANK
#define isblank(c) (c == ' ' || c == '\t')
#endif
#define GMIME_UUENCODE_CHAR(c) ((c) ? (c) + ' ' : '`')
#define GMIME_UUDECODE_CHAR(c) (((c) - ' ') & 077)
#define GMIME_FOLD_PREENCODED (GMIME_FOLD_LEN / 2)
/* date parser macros */
#define NUMERIC_CHARS "1234567890"
#define WEEKDAY_CHARS "SundayMondayTuesdayWednesdayThursdayFridaySaturday"
#define MONTH_CHARS "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember"
#define TIMEZONE_ALPHA_CHARS "UTCGMTESTEDTCSTCDTMSTPSTPDTZAMNY()"
#define TIMEZONE_NUMERIC_CHARS "-+1234567890"
#define TIME_CHARS "1234567890:"
#define DATE_TOKEN_NON_NUMERIC (1 << 0)
#define DATE_TOKEN_NON_WEEKDAY (1 << 1)
#define DATE_TOKEN_NON_MONTH (1 << 2)
#define DATE_TOKEN_NON_TIME (1 << 3)
#define DATE_TOKEN_HAS_COLON (1 << 4)
#define DATE_TOKEN_NON_TIMEZONE_ALPHA (1 << 5)
#define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6)
#define DATE_TOKEN_HAS_SIGN (1 << 7)
/* from gmime.c */
extern int gmime_interfaces_utf8;
static char *base64_alphabet =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static unsigned char tohex[16] = {
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
};
static unsigned char gmime_base64_rank[256] = {
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255, 0,255,255,
255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255,
255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
};
static unsigned char gmime_uu_rank[256] = {
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
};
static unsigned char gmime_datetok_table[256] = {
128,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
111,111,111,111,111,111,111,111, 79, 79,111,175,111,175,111,111,
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,119,111,111,111,111,111,
111, 75,111, 79, 75, 79,105, 79,111,111,107,111,111, 73, 75,107,
79,111,111, 73, 77, 79,111,109,111, 79, 79,111,111,111,111,111,
111,105,107,107,109,105,111,107,105,105,111,111,107,107,105,105,
107,111,105,105,105,105,107,111,111,105,111,111,111,111,111,111,
111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
};
/* hrm, is there a library for this shit? */
static struct {
char *name;
int offset;
} tz_offsets [] = {
{ "UT", 0 },
{ "GMT", 0 },
{ "EST", -500 }, /* these are all US timezones. bloody yanks */
{ "EDT", -400 },
{ "CST", -600 },
{ "CDT", -500 },
{ "MST", -700 },
{ "MDT", -600 },
{ "PST", -800 },
{ "PDT", -700 },
{ "Z", 0 },
{ "A", -100 },
{ "M", -1200 },
{ "N", 100 },
{ "Y", 1200 },
};
static char *tm_months[] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
static char *tm_days[] = {
"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
};
/**
* g_mime_utils_header_format_date:
* @time: time_t date representation
* @offset: Timezone offset
*
* Allocates a string buffer containing the rfc822 formatted date
* string represented by @time and @offset.
*
* Returns a valid string representation of the date.
**/
char *
g_mime_utils_header_format_date (time_t time, int offset)
{
struct tm tm;
time += ((offset / 100) * (60 * 60)) + (offset % 100) * 60;
memcpy (&tm, gmtime (&time), sizeof (tm));
return g_strdup_printf ("%s, %02d %s %04d %02d:%02d:%02d %+05d",
tm_days[tm.tm_wday], tm.tm_mday,
tm_months[tm.tm_mon],
tm.tm_year + 1900,
tm.tm_hour, tm.tm_min, tm.tm_sec,
offset);
}
/* This is where it gets ugly... */
struct _date_token {
struct _date_token *next;
const unsigned char *start;
unsigned int len;
unsigned int mask;
};
static struct _date_token *
datetok (const char *date)
{
struct _date_token *tokens = NULL, *token, *tail = (struct _date_token *) &tokens;
const unsigned char *start, *end;
unsigned int mask;
start = date;
while (*start) {
/* kill leading whitespace */
for ( ; *start && isspace ((int) *start); start++);
mask = 0;
/* find the end of this token */
for (end = start; *end && !strchr ("-/,\t\r\n ", *end); end++) {
mask |= gmime_datetok_table[*end];
}
if (end != start) {
token = g_malloc (sizeof (struct _date_token));
token->next = NULL;
token->start = start;
token->len = end - start;
token->mask = mask;
tail->next = token;
tail = token;
}
if (*end)
start = end + 1;
else
break;
}
return tokens;
}
static int
decode_int (const unsigned char *in, unsigned int inlen)
{
register const unsigned char *inptr;
const unsigned char *inend;
int sign = 1, val = 0;
inptr = in;
inend = in + inlen;
if (*inptr == '-') {
sign = -1;
inptr++;
} else if (*inptr == '+')
inptr++;
for ( ; inptr < inend; inptr++) {
if (!isdigit ((int) *inptr))
return -1;
else
val = (val * 10) + (*inptr - '0');
}
val *= sign;
return val;
}
#if 0
static int
get_days_in_month (int month, int year)
{
switch (month) {
case 1:
case 3:
case 5:
case 7:
case 8:
case 10:
case 12:
return 31;
case 4:
case 6:
case 9:
case 11:
return 30;
case 2:
if (g_date_is_leap_year (year))
return 29;
else
return 28;
default:
return 0;
}
}
#endif
static int
get_wday (const unsigned char *in, unsigned int inlen)
{
int wday;
g_return_val_if_fail (in != NULL, -1);
if (inlen < 3)
return -1;
for (wday = 0; wday < 7; wday++)
if (!strncasecmp (in, tm_days[wday], 3))
return wday;
return -1; /* unknown week day */
}
static int
get_mday (const unsigned char *in, unsigned int inlen)
{
int mday;
g_return_val_if_fail (in != NULL, -1);
mday = decode_int (in, inlen);
if (mday < 0 || mday > 31)
mday = -1;
return mday;
}
static int
get_month (const unsigned char *in, unsigned int inlen)
{
int i;
g_return_val_if_fail (in != NULL, -1);
if (inlen < 3)
return -1;
for (i = 0; i < 12; i++)
if (!strncasecmp (in, tm_months[i], 3))
return i;
return -1; /* unknown month */
}
static int
get_year (const unsigned char *in, unsigned int inlen)
{
int year;
g_return_val_if_fail (in != NULL, -1);
year = decode_int (in, inlen);
if (year == -1)
return -1;
if (year < 100)
year += (year < 70) ? 2000 : 1900;
if (year < 1969)
return -1;
return year;
}
static gboolean
get_time (const unsigned char *in, unsigned int inlen, int *hour, int *min, int *sec)
{
register const unsigned char *inptr;
const unsigned char *inend;
int *val, colons = 0;
*hour = *min = *sec = 0;
inend = in + inlen;
val = hour;
for (inptr = in; inptr < inend; inptr++) {
if (*inptr == ':') {
colons++;
switch (colons) {
case 1:
val = min;
break;
case 2:
val = sec;
break;
default:
return FALSE;
}
} else if (!isdigit ((int) *inptr))
return FALSE;
else
*val = (*val * 10) + (*inptr - '0');
}
return TRUE;
}
static int
get_tzone (struct _date_token **token)
{
const unsigned char *inptr, *inend;
unsigned int inlen;
int i, t;
for (i = 0; *token && i < 2; *token = (*token)->next, i++) {
inptr = (*token)->start;
inlen = (*token)->len;
inend = inptr + inlen;
if (*inptr == '+' || *inptr == '-') {
return decode_int (inptr, inlen);
} else {
if (*inptr == '(') {
inptr++;
if (*(inend - 1) == ')')
inlen -= 2;
else
inlen--;
}
for (t = 0; t < 15; t++) {
unsigned int len = strlen (tz_offsets[t].name);
if (len != inlen)
continue;
if (!strncmp (inptr, tz_offsets[t].name, len))
return tz_offsets[t].offset;
}
}
}
return -1;
}
static time_t
parse_rfc822_date (struct _date_token *tokens, int *tzone)
{
int hour, min, sec, offset, n;
struct _date_token *token;
struct tm tm;
time_t t;
g_return_val_if_fail (tokens != NULL, (time_t) 0);
token = tokens;
memset ((void *) &tm, 0, sizeof (struct tm));
if ((n = get_wday (token->start, token->len)) != -1) {
/* not all dates may have this... */
tm.tm_wday = n;
token = token->next;
}
/* get the mday */
if (!token || (n = get_mday (token->start, token->len)) == -1)
return (time_t) 0;
tm.tm_mday = n;
token = token->next;
/* get the month */
if (!token || (n = get_month (token->start, token->len)) == -1)
return (time_t) 0;
tm.tm_mon = n;
token = token->next;
/* get the year */
if (!token || (n = get_year (token->start, token->len)) == -1)
return (time_t) 0;
tm.tm_year = n - 1900;
token = token->next;
/* get the hour/min/sec */
if (!token || !get_time (token->start, token->len, &hour, &min, &sec))
return (time_t) 0;
tm.tm_hour = hour;
tm.tm_min = min;
tm.tm_sec = sec;
token = token->next;
/* get the timezone */
if (!token || (n = get_tzone (&token)) == -1) {
/* I guess we assume tz is GMT? */
offset = 0;
} else {
offset = n;
}
t = mktime (&tm);
#if defined (HAVE_TIMEZONE)
t -= timezone;
#elif defined (HAVE_TM_GMTOFF)
t += tm.tm_gmtoff;
#else
#error Neither HAVE_TIMEZONE nor HAVE_TM_GMTOFF defined. Rerun autoheader, autoconf, etc.
#endif
/* t is now GMT of the time we want, but not offset by the timezone ... */
/* this should convert the time to the GMT equiv time */
t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
if (tzone)
*tzone = offset;
return t;
}
#define date_token_mask(t) (((struct _date_token *) t)->mask)
#define is_numeric(t) ((date_token_mask (t) & DATE_TOKEN_NON_NUMERIC) == 0)
#define is_weekday(t) ((date_token_mask (t) & DATE_TOKEN_NON_WEEKDAY) == 0)
#define is_month(t) ((date_token_mask (t) & DATE_TOKEN_NON_MONTH) == 0)
#define is_time(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIME) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_COLON))
#define is_tzone_alpha(t) ((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_ALPHA) == 0)
#define is_tzone_numeric(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_NUMERIC) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_SIGN))
#define is_tzone(t) (is_tzone_alpha (t) || is_tzone_numeric (t))
static time_t
parse_broken_date (struct _date_token *tokens, int *tzone)
{
gboolean got_wday, got_month, got_tzone;
int hour, min, sec, offset, n;
struct _date_token *token;
struct tm tm;
time_t t;
memset ((void *) &tm, 0, sizeof (struct tm));
got_wday = got_month = got_tzone = FALSE;
offset = 0;
token = tokens;
while (token) {
if (is_weekday (token) && !got_wday) {
if ((n = get_wday (token->start, token->len)) != -1) {
d(printf ("weekday; "));
got_wday = TRUE;
tm.tm_wday = n;
goto next_token;
}
}
if (is_month (token) && !got_month) {
if ((n = get_month (token->start, token->len)) != -1) {
d(printf ("month; "));
got_month = TRUE;
tm.tm_mon = n;
goto next_token;
}
}
if (is_time (token) && !tm.tm_hour && !tm.tm_min && !tm.tm_sec) {
if (get_time (token->start, token->len, &hour, &min, &sec)) {
d(printf ("time; "));
tm.tm_hour = hour;
tm.tm_min = min;
tm.tm_sec = sec;
goto next_token;
}
}
if (is_tzone (token) && !got_tzone) {
struct _date_token *t = token;
if ((n = get_tzone (&t)) != -1) {
d(printf ("tzone; "));
got_tzone = TRUE;
offset = n;
goto next_token;
}
}
if (is_numeric (token)) {
if (token->len == 4 && !tm.tm_year) {
if ((n = get_year (token->start, token->len)) != -1) {
d(printf ("year; "));
tm.tm_year = n - 1900;
goto next_token;
}
} else {
if (!got_month && !got_wday && token->next && is_numeric (token->next)) {
d(printf ("mon; "));
n = decode_int (token->start, token->len);
got_month = TRUE;
tm.tm_mon = n - 1;
goto next_token;
} else if (!tm.tm_mday && (n = get_mday (token->start, token->len)) != -1) {
d(printf ("mday; "));
tm.tm_mday = n;
goto next_token;
} else if (!tm.tm_year) {
d(printf ("2-digit year; "));
n = get_year (token->start, token->len);
tm.tm_year = n - 1900;
goto next_token;
}
}
}
d(printf ("???; "));
next_token:
token = token->next;
}
d(printf ("\n"));
t = mktime (&tm);
#if defined (HAVE_TIMEZONE)
t -= timezone;
#elif defined (HAVE_TM_GMTOFF)
t += tm.tm_gmtoff;
#else
#error Neither HAVE_TIMEZONE nor HAVE_TM_GMTOFF defined. Rerun autoheader, autoconf, etc.
#endif
/* t is now GMT of the time we want, but not offset by the timezone ... */
/* this should convert the time to the GMT equiv time */
t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
if (tzone)
*tzone = offset;
return t;
}
#if 0
static void
gmime_datetok_table_init ()
{
int i;
memset (gmime_datetok_table, 0, sizeof (gmime_datetok_table));
for (i = 0; i < 256; i++) {
if (!strchr (NUMERIC_CHARS, i))
gmime_datetok_table[i] |= DATE_TOKEN_NON_NUMERIC;
if (!strchr (WEEKDAY_CHARS, i))
gmime_datetok_table[i] |= DATE_TOKEN_NON_WEEKDAY;
if (!strchr (MONTH_CHARS, i))
gmime_datetok_table[i] |= DATE_TOKEN_NON_MONTH;
if (!strchr (TIME_CHARS, i))
gmime_datetok_table[i] |= DATE_TOKEN_NON_TIME;
if (!strchr (TIMEZONE_ALPHA_CHARS, i))
gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_ALPHA;
if (!strchr (TIMEZONE_NUMERIC_CHARS, i))
gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_NUMERIC;
if (((char) i) == ':')
gmime_datetok_table[i] |= DATE_TOKEN_HAS_COLON;
if (strchr ("+-", i))
gmime_datetok_table[i] |= DATE_TOKEN_HAS_SIGN;
}
printf ("static unsigned char gmime_datetok_table[256] = {");
for (i = 0; i < 256; i++) {
if (i % 16 == 0)
printf ("\n\t");
printf ("%3d,", gmime_datetok_table[i]);
}
printf ("\n};\n");
}
#endif
/**
* g_mime_utils_header_decode_date:
* @in: input date string
* @saveoffset:
*
* Decodes the rfc822 date string and saves the GMT offset into
* @saveoffset if non-NULL.
*
* Returns the time_t representation of the date string specified by
* @in. If 'saveoffset' is non-NULL, the value of the timezone offset
* will be stored.
**/
time_t
g_mime_utils_header_decode_date (const char *in, int *saveoffset)
{
struct _date_token *token, *tokens;
time_t date;
tokens = datetok (in);
date = parse_rfc822_date (tokens, saveoffset);
if (!date)
date = parse_broken_date (tokens, saveoffset);
/* cleanup */
while (tokens) {
token = tokens;
tokens = tokens->next;
g_free (token);
}
return date;
}
static void
g_string_append_len (GString *out, const char *in, size_t len)
{
char *buf;
buf = alloca (len + 1);
strlcpy (buf, in, len + 1);
g_string_append (out, buf);
}
/**
* g_mime_utils_header_fold:
* @in: input header string
*
* Folds a header according to the rules in rfc822.
*
* Returns an allocated string containing the folded header.
**/
char *
g_mime_utils_header_fold (const char *in)
{
gboolean last_was_lwsp = FALSE;
register const char *inptr;
size_t len, outlen, i;
GString *out;
char *ret;
inptr = in;
len = strlen (in);
if (len <= GMIME_FOLD_LEN)
return g_strdup (in);
out = g_string_new ("");
outlen = 0;
while (*inptr) {
len = strcspn (inptr, " \t");
if (outlen + len > GMIME_FOLD_LEN) {
if (last_was_lwsp)
g_string_truncate (out, out->len - 1);
g_string_append (out, "\n\t");
outlen = 1;
/* check for very long words, just cut them up */
while (outlen + len > GMIME_FOLD_LEN) {
for (i = 0; i < GMIME_FOLD_LEN - outlen; i++)
g_string_append_c (out, inptr[i]);
inptr += GMIME_FOLD_LEN - outlen;
len -= GMIME_FOLD_LEN - outlen;
g_string_append (out, "\n\t");
outlen = 1;
}
last_was_lwsp = FALSE;
} else if (len > 0) {
outlen += len;
g_string_append_len (out, inptr, len);
inptr += len;
last_was_lwsp = FALSE;
} else {
if (*inptr == '\t') {
/* tabs are a good place to fold, odds
are that this is where the previous
mailer folded it */
g_string_append (out, "\n\t");
outlen = 1;
inptr++;
last_was_lwsp = FALSE;
} else {
g_string_append_c (out, *inptr++);
outlen++;
last_was_lwsp = TRUE;
}
}
}
ret = out->str;
g_string_free (out, FALSE);
return ret;
}
/**
* g_mime_utils_header_printf:
* @format: string format
* @Varargs: arguments
*
* Allocates a buffer containing a formatted header specified by the
* @Varargs.
*
* Returns an allocated string containing the folded header specified
* by @format and the following arguments.
**/
char *
g_mime_utils_header_printf (const char *format, ...)
{
char *buf, *ret;
va_list ap;
va_start (ap, format);
buf = g_strdup_vprintf (format, ap);
va_end (ap);
ret = g_mime_utils_header_fold (buf);
g_free (buf);
return ret;
}
static gboolean
need_quotes (const char *string)
{
gboolean quoted = FALSE;
const char *inptr;
inptr = string;
while (*inptr) {
if (*inptr == '\\')
inptr++;
else if (*inptr == '"')
quoted = !quoted;
else if (!quoted && (is_tspecial (*inptr) || *inptr == '.'))
return TRUE;
if (*inptr)
inptr++;
}
return FALSE;
}
/**
* g_mime_utils_quote_string:
* @string: input string
*
* Quotes @string as needed according to the rules in rfc2045.
*
* Returns an allocated string containing the escaped and quoted (if
* needed to be) input string. The decision to quote the string is
* based on whether or not the input string contains any 'tspecials'
* as defined by rfc2045.
**/
char *
g_mime_utils_quote_string (const char *string)
{
gboolean quote;
const char *c;
char *qstring;
GString *out;
out = g_string_new ("");
quote = need_quotes (string);
for (c = string; *c; c++) {
if ((*c == '"' && quote) || *c == '\\')
g_string_append_c (out, '\\');
g_string_append_c (out, *c);
}
if (quote) {
g_string_prepend_c (out, '"');
g_string_append_c (out, '"');
}
qstring = out->str;
g_string_free (out, FALSE);
return qstring;
}
/**
* g_mime_utils_unquote_string: Unquote a string.
* @string: string
*
* Unquotes and unescapes a string.
**/
void
g_mime_utils_unquote_string (char *string)
{
/* if the string is quoted, unquote it */
char *inptr, *inend;
if (!string)
return;
inptr = string;
inend = string + strlen (string);
/* get rid of the wrapping quotes */
if (*inptr == '"' && *(inend - 1) == '"') {
inend--;
*inend = '\0';
if (*inptr)
memmove (inptr, inptr + 1, inend - inptr);
}
/* un-escape the string */
inend--;
while (inptr < inend) {
if (*inptr == '\\') {
memmove (inptr, inptr + 1, inend - inptr);
inend--;
}
inptr++;
}
}
/**
* g_mime_utils_text_is_8bit:
* @text: text to check for 8bit chars
* @len: text length
*
* Determines if @text contains 8bit characters within the first @len
* bytes.
*
* Returns TRUE if the text contains 8bit characters or FALSE
* otherwise.
**/
gboolean
g_mime_utils_text_is_8bit (const unsigned char *text, size_t len)
{
const unsigned char *c, *inend;
g_return_val_if_fail (text != NULL, FALSE);
inend = text + len;
for (c = text; c < inend; c++)
if (*c > (unsigned char) 127)
return TRUE;
return FALSE;
}
/**
* g_mime_utils_best_encoding:
* @text: text to encode
* @len: text length
*
* Determines the best content encoding for the first @len bytes of
* @text.
*
* Returns a GMimePartEncodingType that is determined to be the best
* encoding type for the specified block of text. ("best" in this
* particular case means best compression)
**/
GMimePartEncodingType
g_mime_utils_best_encoding (const unsigned char *text, size_t len)
{
const unsigned char *ch, *inend;
size_t count = 0;
inend = text + len;
for (ch = text; ch < inend; ch++)
if (*ch > (unsigned char) 127)
count++;
if ((float) count <= len * 0.17)
return GMIME_PART_ENCODING_QUOTEDPRINTABLE;
else
return GMIME_PART_ENCODING_BASE64;
}
/* this decodes rfc2047's version of quoted-printable */
static ssize_t
quoted_decode (const unsigned char *in, size_t len, unsigned char *out)
{
register const unsigned char *inptr;
register unsigned char *outptr;
const unsigned char *inend;
unsigned char c, c1;
inend = in + len;
outptr = out;
inptr = in;
while (inptr < inend) {
c = *inptr++;
if (c == '=') {
if (inend - inptr >= 2) {
c = toupper (*inptr++);
c1 = toupper (*inptr++);
*outptr++ = (((c >= 'A' ? c - 'A' + 10 : c - '0') & 0x0f) << 4)
| ((c1 >= 'A' ? c1 - 'A' + 10 : c1 - '0') & 0x0f);
} else {
/* data was truncated */
return -1;
}
} else if (c == '_') {
/* _'s are an rfc2047 shortcut for encoding spaces */
*outptr++ = ' ';
} else {
*outptr++ = c;
}
}
return (outptr - out);
}
#define is_rfc2047_encoded_word(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2))
static unsigned char *
rfc2047_decode_word (const unsigned char *in, size_t inlen)
{
const register unsigned char *inptr;
const unsigned char *inend;
inptr = in + 2;
inend = in + inlen - 2;
inptr = memchr (inptr, '?', inend - inptr);
if (inptr && inptr[2] == '?') {
unsigned char *decoded;
ssize_t declen;
int state = 0;
int save = 0;
inptr++;
switch (*inptr) {
case 'B':
case 'b':
inptr += 2;
decoded = alloca (inend - inptr);
declen = g_mime_utils_base64_decode_step (inptr, inend - inptr, decoded, &state, &save);
break;
case 'Q':
case 'q':
inptr += 2;
decoded = alloca (inend - inptr);
declen = quoted_decode (inptr, inend - inptr, decoded);
if (declen == -1) {
d(fprintf (stderr, "encountered broken 'Q' encoding\n"));
return NULL;
}
break;
default:
d(fprintf (stderr, "unknown encoding\n"));
return NULL;
}
if (gmime_interfaces_utf8) {
const char *charset;
unsigned char *buf;
char *charenc, *p;
size_t len;
iconv_t cd;
len = (inptr - 3) - (in + 2);
charenc = alloca (len + 1);
memcpy (charenc, in + 2, len);
charenc[len] = '\0';
charset = charenc;
/* rfc2231 updates rfc2047 encoded words...
* The ABNF given in RFC 2047 for encoded-words is:
* encoded-word := "=?" charset "?" encoding "?" encoded-text "?="
* This specification changes this ABNF to:
* encoded-word := "=?" charset ["*" language] "?" encoding "?" encoded-text "?="
*/
/* trim off the 'language' part if it's there... */
p = strchr (charset, '*');
if (p)
*p = '\0';
/* slight optimization */
if (!strcasecmp (charset, "UTF-8"))
return g_strndup (decoded, declen);
cd = g_mime_iconv_open ("UTF-8", charset);
if (cd == (iconv_t) -1) {
w(g_warning ("Cannot convert from %s to UTF-8, header display may "
"be corrupt: %s", charset, g_strerror (errno)));
charset = g_mime_charset_locale_name ();
cd = g_mime_iconv_open ("UTF-8", charset);
if (cd == (iconv_t) -1)
return NULL;
}
buf = g_mime_iconv_strndup (cd, decoded, declen);
g_mime_iconv_close (cd);
if (!buf) {
w(g_warning ("Failed to convert \"%.*s\" to UTF-8, display may be "
"corrupt: %s", declen, decoded, g_strerror (errno)));
}
return buf;
} else {
return g_strndup (decoded, declen);
}
}
return NULL;
}
/**
* g_mime_utils_8bit_header_decode:
* @in: header to decode
*
* Decodes and rfc2047 encoded header.
*
* Returns the mime encoded header as 8bit text.
**/
char *
g_mime_utils_8bit_header_decode (const unsigned char *in)
{
GString *out, *lwsp, *atom;
const unsigned char *inptr;
unsigned char *decoded;
gboolean last_was_encoded = FALSE;
gboolean last_was_space = FALSE;
out = g_string_sized_new (256);
lwsp = g_string_sized_new (256);
atom = g_string_sized_new (256);
inptr = in;
while (inptr && *inptr) {
unsigned char c = *inptr++;
if (!is_atom (c) && !last_was_space) {
/* we reached the end of an atom */
unsigned char *dword = NULL;
const unsigned char *word;
gboolean was_encoded;
if ((was_encoded = is_rfc2047_encoded_word (atom->str, atom->len)))
word = dword = rfc2047_decode_word (atom->str, atom->len);
else
word = atom->str;
if (word) {
if (!(last_was_encoded && was_encoded)) {
/* rfc2047 states that you
must ignore all whitespace
between encoded words */
g_string_append (out, lwsp->str);
}
g_string_append (out, word);
g_free (dword);
} else {
was_encoded = FALSE;
g_string_append (out, lwsp->str);
g_string_append (out, atom->str);
}
last_was_encoded = was_encoded;
g_string_truncate (lwsp, 0);
g_string_truncate (atom, 0);
if (is_lwsp (c)) {
g_string_append_c (lwsp, c);
last_was_space = TRUE;
} else {
/* This is mostly here for interoperability with broken
mailers that might do something stupid like:
=?iso-8859-1?Q?blah?=:\t=?iso-8859-1?Q?I_am_broken?= */
g_string_append_c (out, c);
last_was_encoded = FALSE;
last_was_space = FALSE;
}
continue;
}
if (is_atom (c)) {
g_string_append_c (atom, c);
last_was_space = FALSE;
} else {
g_string_append_c (lwsp, c);
last_was_space = TRUE;
}
}
if (atom->len || lwsp->len) {
unsigned char *dword = NULL;
const unsigned char *word;
gboolean was_encoded;
if ((was_encoded = is_rfc2047_encoded_word (atom->str, atom->len)))
word = dword = rfc2047_decode_word (atom->str, atom->len);
else
word = atom->str;
if (word) {
if (!(last_was_encoded && was_encoded)) {
/* rfc2047 states that you
must ignore all whitespace
between encoded words */
g_string_append (out, lwsp->str);
}
g_string_append (out, word);
g_free (dword);
} else {
g_string_append (out, lwsp->str);
g_string_append (out, atom->str);
}
}
g_string_free (lwsp, TRUE);
g_string_free (atom, TRUE);
decoded = out->str;
g_string_free (out, FALSE);
return (char *) decoded;
}
/* rfc2047 version of quoted-printable */
static size_t
quoted_encode (const unsigned char *in, size_t len, unsigned char *out, gushort safemask)
{
register const unsigned char *inptr;
register unsigned char *outptr;
const unsigned char *inend;
unsigned char c;
inptr = in;
inend = in + len;
outptr = out;
while (inptr < inend) {
c = *inptr++;
if (c == ' ') {
*outptr++ = '_';
} else if (gmime_special_table[c] & safemask) {
*outptr++ = c;
} else {
*outptr++ = '=';
*outptr++ = tohex[(c >> 4) & 0xf];
*outptr++ = tohex[c & 0xf];
}
}
return (outptr - out);
}
static void
rfc2047_encode_word (GString *string, const unsigned char *word, size_t len,
const char *charset, gushort safemask)
{
unsigned char *encoded, *ptr;
unsigned char *uword = NULL;
iconv_t cd = (iconv_t) -1;
size_t enclen, pos;
int state = 0;
int save = 0;
char encoding;
if (gmime_interfaces_utf8) {
if (strcasecmp (charset, "UTF-8") != 0)
cd = g_mime_iconv_open (charset, "UTF-8");
if (cd != (iconv_t) -1) {
uword = g_mime_iconv_strndup (cd, word, len);
g_mime_iconv_close (cd);
}
if (uword) {
len = strlen (uword);
word = uword;
} else {
charset = "UTF-8";
}
}
switch (g_mime_utils_best_encoding (word, len)) {
case GMIME_PART_ENCODING_BASE64:
enclen = BASE64_ENCODE_LEN (len);
encoded = alloca (enclen);
encoding = 'b';
pos = g_mime_utils_base64_encode_close (word, len, encoded, &state, &save);
encoded[pos] = '\0';
/* remove \n chars as headers need to be wrapped differently */
ptr = encoded;
while ((ptr = memchr (ptr, '\n', strlen (ptr))))
memmove (ptr, ptr + 1, strlen (ptr));
break;
case GMIME_PART_ENCODING_QUOTEDPRINTABLE:
enclen = QP_ENCODE_LEN (len);
encoded = alloca (enclen);
encoding = 'q';
pos = quoted_encode (word, len, encoded, safemask);
encoded[pos] = '\0';
break;
default:
g_assert_not_reached ();
}
g_free (uword);
g_string_sprintfa (string, "=?%s?%c?%s?=", charset, encoding, encoded);
}
/**
* g_mime_utils_8bit_header_encode_phrase:
* @in: header to encode
*
* Encodes a header phrase according to the rules in rfc2047.
*
* Returns the header phrase as 1 encoded atom. Useful for encoding
* internet addresses.
**/
char *
g_mime_utils_8bit_header_encode_phrase (const unsigned char *in)
{
const char *charset;
GString *string;
size_t len;
char *str;
if (in == NULL)
return NULL;
len = strlen (in);
if (gmime_interfaces_utf8) {
charset = g_mime_charset_best (in, len);
charset = charset ? charset : "iso-8859-1";
} else {
charset = g_mime_charset_locale_name ();
}
string = g_string_new ("");
rfc2047_encode_word (string, in, strlen (in), charset, IS_ESAFE);
str = string->str;
g_string_free (string, FALSE);
return str;
}
enum _phrase_word_t {
WORD_ATOM,
WORD_2047
};
struct _phrase_word {
struct _phrase_word *next;
const unsigned char *start, *end;
enum _phrase_word_t type;
int encoding;
};
static gboolean
word_types_compatable (enum _phrase_word_t type1, enum _phrase_word_t type2)
{
switch (type1) {
case WORD_ATOM:
return FALSE;
case WORD_2047:
return type2 == WORD_2047;
default:
return FALSE;
}
}
static struct _phrase_word *
rfc2047_encode_phrase_get_words (const unsigned char *in)
{
const unsigned char *inptr, *start, *last;
struct _phrase_word *words, *tail, *word;
enum _phrase_word_t type = WORD_ATOM;
int count = 0, encoding = 0;
words = NULL;
tail = (struct _phrase_word *) &words;
last = start = inptr = in;
while (inptr && *inptr) {
gboolean is_space;
unichar c;
if (gmime_interfaces_utf8) {
const char *newinptr;
newinptr = unicode_next_char (inptr);
c = unicode_get_char (inptr);
if (newinptr == NULL || !unichar_validate (c)) {
w(g_warning ("Invalid UTF-8 sequence encountered"));
inptr++;
continue;
}
inptr = newinptr;
is_space = unichar_isspace (c);
} else {
is_space = isspace ((int) *inptr);
c = *inptr++;
}
if (is_space) {
if (count > 0) {
word = g_new (struct _phrase_word, 1);
word->next = NULL;
word->start = start;
word->end = last;
word->type = type;
word->encoding = encoding;
tail->next = word;
tail = word;
count = 0;
}
start = inptr;
type = WORD_ATOM;
encoding = 0;
} else {
count++;
if (c > 127 && c < 256) {
type = WORD_2047;
encoding = MAX (encoding, 2);
} else if (c >= 256) {
type = WORD_2047;
encoding = 2;
}
}
last = inptr;
}
if (count > 0) {
word = g_new (struct _phrase_word, 1);
word->next = NULL;
word->start = start;
word->end = last;
word->type = type;
word->encoding = encoding;
tail->next = word;
tail = word;
}
return words;
}
static gboolean
rfc2047_encode_phrase_merge_words (struct _phrase_word **wordsp)
{
struct _phrase_word *word, *next, *words = *wordsp;
gboolean merged = FALSE;
/* scan the list, checking for words of similar types that can be merged */
word = words;
while (word) {
next = word->next;
while (next) {
/* merge nodes of the same type AND we are not creating too long a string */
if (word_types_compatable (word->type, next->type)) {
if (next->end - word->start < GMIME_FOLD_PREENCODED) {
/* the resulting word type is the MAX of the 2 types */
word->type = MAX (word->type, next->type);
word->end = next->end;
word->next = next->next;
g_free (next);
next = word->next;
merged = TRUE;
} else {
/* if it is going to be too long, make sure we include the
separating whitespace */
word->end = next->start;
break;
}
} else {
break;
}
}
word = word->next;
}
*wordsp = words;
return merged;
}
static char *
rfc2047_encode_phrase (const unsigned char *in)
{
struct _phrase_word *words, *word, *prev = NULL;
GString *out;
char *outstr;
if (in == NULL)
return NULL;
words = rfc2047_encode_phrase_get_words (in);
if (!words)
return NULL;
while (rfc2047_encode_phrase_merge_words (&words))
;
out = g_string_new ("");
/* output words now with spaces between them */
word = words;
while (word) {
const char *start;
size_t len;
/* append correct number of spaces between words */
if (prev && !(prev->type == WORD_2047 && word->type == WORD_2047)) {
/* one or both of the words are not encoded so we write the spaces out untouched */
len = word->start - prev->end;
g_string_append_len (out, prev->end, len);
}
switch (word->type) {
case WORD_ATOM:
g_string_append_len (out, word->start, word->end - word->start);
break;
case WORD_2047:
if (prev && prev->type == WORD_2047) {
/* include the whitespace chars between these 2 words in the
resulting rfc2047 encoded word. */
len = word->end - prev->end;
start = prev->end;
/* encoded words need to be separated by linear whitespace */
g_string_append_c (out, ' ');
} else {
len = word->end - word->start;
start = word->start;
}
if (word->encoding == 1 || !gmime_interfaces_utf8)
rfc2047_encode_word (out, start, len, "iso-8859-1", IS_PSAFE);
else
rfc2047_encode_word (out, start, len,
g_mime_charset_best (start, len), IS_PSAFE);
break;
}
g_free (prev);
prev = word;
word = word->next;
}
g_free (prev);
outstr = out->str;
g_string_free (out, FALSE);
return outstr;
}
/**
* g_mime_utils_8bit_header_encode:
* @in: header to encode
*
* Encodes a header according to the rules in rfc2047.
*
* Returns the header as several encoded atoms. Useful for encoding
* headers like "Subject".
**/
char *
g_mime_utils_8bit_header_encode (const unsigned char *in)
{
return rfc2047_encode_phrase (in);
}
/**
* g_mime_utils_base64_encode_close:
* @in: input stream
* @inlen: length of the input
* @out: output string
* @state: holds the number of bits that are stored in @save
* @save: leftover bits that have not yet been encoded
*
* Base64 encodes the input stream to the output stream. Call this
* when finished encoding data with g_mime_utils_base64_encode_step to
* flush off the last little bit.
*
* Returns the number of bytes encoded.
**/
size_t
g_mime_utils_base64_encode_close (const unsigned char *in, size_t inlen, unsigned char *out, int *state, guint32 *save)
{
unsigned char *outptr = out;
int c1, c2;
if (inlen > 0)
outptr += g_mime_utils_base64_encode_step (in, inlen, outptr, state, save);
c1 = ((unsigned char *)save)[1];
c2 = ((unsigned char *)save)[2];
switch (((unsigned char *)save)[0]) {
case 2:
outptr[2] = base64_alphabet [(c2 & 0x0f) << 2];
goto skip;
case 1:
outptr[2] = '=';
skip:
outptr[0] = base64_alphabet [c1 >> 2];
outptr[1] = base64_alphabet [c2 >> 4 | ((c1 & 0x3) << 4)];
outptr[3] = '=';
outptr += 4;
break;
}
*outptr++ = '\n';
*save = 0;
*state = 0;
return (outptr - out);
}
/**
* g_mime_utils_base64_encode_step:
* @in: input stream
* @inlen: length of the input
* @out: output string
* @state: holds the number of bits that are stored in @save
* @save: leftover bits that have not yet been encoded
*
* Base64 encodes a chunk of data. Performs an 'encode step', only
* encodes blocks of 3 characters to the output at a time, saves
* left-over state in state and save (initialise to 0 on first
* invocation).
*
* Returns the number of bytes encoded.
**/
size_t
g_mime_utils_base64_encode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, guint32 *save)
{
const register unsigned char *inptr;
register unsigned char *outptr;
if (inlen <= 0)
return 0;
inptr = in;
outptr = out;
if (inlen + ((unsigned char *)save)[0] > 2) {
const unsigned char *inend = in + inlen - 2;
register int c1 = 0, c2 = 0, c3 = 0;
register int already;
already = *state;
switch (((char *)save)[0]) {
case 1: c1 = ((unsigned char *)save)[1]; goto skip1;
case 2: c1 = ((unsigned char *)save)[1];
c2 = ((unsigned char *)save)[2]; goto skip2;
}
/* yes, we jump into the loop, no i'm not going to change it, its beautiful! */
while (inptr < inend) {
c1 = *inptr++;
skip1:
c2 = *inptr++;
skip2:
c3 = *inptr++;
*outptr++ = base64_alphabet [c1 >> 2];
*outptr++ = base64_alphabet [(c2 >> 4) | ((c1 & 0x3) << 4)];
*outptr++ = base64_alphabet [((c2 & 0x0f) << 2) | (c3 >> 6)];
*outptr++ = base64_alphabet [c3 & 0x3f];
/* this is a bit ugly ... */
if ((++already) >= 19) {
*outptr++ = '\n';
already = 0;
}
}
((unsigned char *)save)[0] = 0;
inlen = 2 - (inptr - inend);
*state = already;
}
d(printf ("state = %d, inlen = %d\n", (int)((char *)save)[0], inlen));
if (inlen > 0) {
register char *saveout;
/* points to the slot for the next char to save */
saveout = & (((char *)save)[1]) + ((char *)save)[0];
/* inlen can only be 0 1 or 2 */
switch (inlen) {
case 2: *saveout++ = *inptr++;
case 1: *saveout++ = *inptr++;
}
((char *)save)[0] += inlen;
}
d(printf ("mode = %d\nc1 = %c\nc2 = %c\n",
(int)((char *)save)[0],
(int)((char *)save)[1],
(int)((char *)save)[2]));
return (outptr - out);
}
/**
* g_mime_utils_base64_decode_step:
* @in: input stream
* @inlen: max length of data to decode
* @out: output stream
* @state: holds the number of bits that are stored in @save
* @save: leftover bits that have not yet been decoded
*
* Decodes a chunk of base64 encoded data.
*
* Returns the number of bytes decoded (which have been dumped in @out).
**/
size_t
g_mime_utils_base64_decode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, guint32 *save)
{
const register unsigned char *inptr;
register unsigned char *outptr;
const unsigned char *inend;
register guint32 saved;
unsigned char c;
int i;
inend = in + inlen;
outptr = out;
/* convert 4 base64 bytes to 3 normal bytes */
saved = *save;
i = *state;
inptr = in;
while (inptr < inend) {
c = gmime_base64_rank[*inptr++];
if (c != 0xff) {
saved = (saved << 6) | c;
i++;
if (i == 4) {
*outptr++ = saved >> 16;
*outptr++ = saved >> 8;
*outptr++ = saved;
i = 0;
}
}
}
*save = saved;
*state = i;
/* quick scan back for '=' on the end somewhere */
/* fortunately we can drop 1 output char for each trailing = (upto 2) */
i = 2;
while (inptr > in && i) {
inptr--;
if (gmime_base64_rank[*inptr] != 0xff) {
if (*inptr == '=' && outptr > out)
outptr--;
i--;
}
}
/* if i != 0 then there is a truncation error! */
return (outptr - out);
}
/**
* g_mime_utils_uuencode_close:
* @in: input stream
* @inlen: input stream length
* @out: output stream
* @uubuf: temporary buffer of 60 bytes
* @state: holds the number of bits that are stored in @save
* @save: leftover bits that have not yet been encoded
*
* Uuencodes a chunk of data. Call this when finished encoding data
* with g_mime_utils_uuencode_step to flush off the last little bit.
*
* Returns the number of bytes encoded.
**/
size_t
g_mime_utils_uuencode_close (const unsigned char *in, size_t inlen, unsigned char *out, unsigned char *uubuf, int *state, guint32 *save)
{
register unsigned char *outptr, *bufptr;
register guint32 saved;
int uulen, uufill, i;
outptr = out;
if (inlen > 0)
outptr += g_mime_utils_uuencode_step (in, inlen, out, uubuf, state, save);
uufill = 0;
saved = *save;
i = *state & 0xff;
uulen = (*state >> 8) & 0xff;
bufptr = uubuf + ((uulen / 3) * 4);
if (i > 0) {
while (i < 3) {
saved <<= 8 | 0;
uufill++;
i++;
}
if (i == 3) {
/* convert 3 normal bytes into 4 uuencoded bytes */
unsigned char b0, b1, b2;
b0 = saved >> 16;
b1 = saved >> 8 & 0xff;
b2 = saved & 0xff;
*bufptr++ = GMIME_UUENCODE_CHAR ((b0 >> 2) & 0x3f);
*bufptr++ = GMIME_UUENCODE_CHAR (((b0 << 4) | ((b1 >> 4) & 0xf)) & 0x3f);
*bufptr++ = GMIME_UUENCODE_CHAR (((b1 << 2) | ((b2 >> 6) & 0x3)) & 0x3f);
*bufptr++ = GMIME_UUENCODE_CHAR (b2 & 0x3f);
i = 0;
saved = 0;
uulen += 3;
}
}
if (uulen > 0) {
int cplen = ((uulen / 3) * 4);
*outptr++ = GMIME_UUENCODE_CHAR ((uulen - uufill) & 0xff);
memcpy (outptr, uubuf, cplen);
outptr += cplen;
*outptr++ = '\n';
uulen = 0;
}
*outptr++ = GMIME_UUENCODE_CHAR (uulen & 0xff);
*outptr++ = '\n';
*save = 0;
*state = 0;
return (outptr - out);
}
/**
* g_mime_utils_uuencode_step:
* @in: input stream
* @inlen: input stream length
* @out: output stream
* @uubuf: temporary buffer of 60 bytes
* @state: holds the number of bits that are stored in @save
* @save: leftover bits that have not yet been encoded
*
* Uuencodes a chunk of data. Performs an 'encode step', only encodes
* blocks of 45 characters to the output at a time, saves left-over
* state in @uubuf, @state and @save (initialize to 0 on first
* invocation).
*
* Returns the number of bytes encoded.
**/
size_t
g_mime_utils_uuencode_step (const unsigned char *in, size_t inlen, unsigned char *out, unsigned char *uubuf, int *state, guint32 *save)
{
register unsigned char *outptr, *bufptr;
const register unsigned char *inptr;
const unsigned char *inend;
register guint32 saved;
int uulen, i;
saved = *save;
i = *state & 0xff;
uulen = (*state >> 8) & 0xff;
inptr = in;
inend = in + inlen;
outptr = out;
bufptr = uubuf + ((uulen / 3) * 4);
while (inptr < inend) {
while (uulen < 45 && inptr < inend) {
while (i < 3 && inptr < inend) {
saved = (saved << 8) | *inptr++;
i++;
}
if (i == 3) {
/* convert 3 normal bytes into 4 uuencoded bytes */
unsigned char b0, b1, b2;
b0 = saved >> 16;
b1 = saved >> 8 & 0xff;
b2 = saved & 0xff;
*bufptr++ = GMIME_UUENCODE_CHAR ((b0 >> 2) & 0x3f);
*bufptr++ = GMIME_UUENCODE_CHAR (((b0 << 4) | ((b1 >> 4) & 0xf)) & 0x3f);
*bufptr++ = GMIME_UUENCODE_CHAR (((b1 << 2) | ((b2 >> 6) & 0x3)) & 0x3f);
*bufptr++ = GMIME_UUENCODE_CHAR (b2 & 0x3f);
i = 0;
saved = 0;
uulen += 3;
}
}
if (uulen >= 45) {
*outptr++ = GMIME_UUENCODE_CHAR (uulen & 0xff);
memcpy (outptr, uubuf, ((uulen / 3) * 4));
outptr += ((uulen / 3) * 4);
*outptr++ = '\n';
uulen = 0;
bufptr = uubuf;
}
}
*save = saved;
*state = ((uulen & 0xff) << 8) | (i & 0xff);
return (outptr - out);
}
/**
* g_mime_utils_uudecode_step:
* @in: input stream
* @inlen: max length of data to decode (normally strlen (in) ??)
* @out: output stream
* @state: holds the number of bits that are stored in @save
* @save: leftover bits that have not yet been decoded
*
* Uudecodes a chunk of data. Performs a 'decode step' on a chunk of
* uuencoded data. Assumes the "begin <mode> <file name>" line has
* been stripped off.
*
* Returns the number of bytes decoded.
**/
size_t
g_mime_utils_uudecode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, guint32 *save)
{
const register unsigned char *inptr;
register unsigned char *outptr;
const unsigned char *inend;
unsigned char ch;
register guint32 saved;
gboolean last_was_eoln;
int uulen, i;
if (*state & GMIME_UUDECODE_STATE_END)
return 0;
saved = *save;
i = *state & 0xff;
uulen = (*state >> 8) & 0xff;
if (uulen == 0)
last_was_eoln = TRUE;
else
last_was_eoln = FALSE;
inend = in + inlen;
outptr = out;
inptr = in;
while (inptr < inend) {
if (*inptr == '\n') {
last_was_eoln = TRUE;
inptr++;
continue;
} else if (!uulen || last_was_eoln) {
/* first octet on a line is the uulen octet */
uulen = gmime_uu_rank[*inptr];
last_was_eoln = FALSE;
if (uulen == 0) {
*state |= GMIME_UUDECODE_STATE_END;
break;
}
inptr++;
continue;
}
ch = *inptr++;
if (uulen > 0) {
/* save the byte */
saved = (saved << 8) | ch;
i++;
if (i == 4) {
/* convert 4 uuencoded bytes to 3 normal bytes */
unsigned char b0, b1, b2, b3;
b0 = saved >> 24;
b1 = saved >> 16 & 0xff;
b2 = saved >> 8 & 0xff;
b3 = saved & 0xff;
if (uulen >= 3) {
*outptr++ = gmime_uu_rank[b0] << 2 | gmime_uu_rank[b1] >> 4;
*outptr++ = gmime_uu_rank[b1] << 4 | gmime_uu_rank[b2] >> 2;
*outptr++ = gmime_uu_rank[b2] << 6 | gmime_uu_rank[b3];
} else {
if (uulen >= 1) {
*outptr++ = gmime_uu_rank[b0] << 2 | gmime_uu_rank[b1] >> 4;
}
if (uulen >= 2) {
*outptr++ = gmime_uu_rank[b1] << 4 | gmime_uu_rank[b2] >> 2;
}
}
i = 0;
saved = 0;
uulen -= 3;
}
} else {
break;
}
}
*save = saved;
*state = (*state & GMIME_UUDECODE_STATE_MASK) | ((uulen & 0xff) << 8) | (i & 0xff);
return (outptr - out);
}
/**
* g_mime_utils_quoted_encode_close:
* @in: input stream
* @inlen: length of the input
* @out: output string
* @state: holds the number of bits that are stored in @save
* @save: leftover bits that have not yet been encoded
*
* Quoted-printable encodes a block of text. Call this when finished
* encoding data with g_mime_utils_quoted_encode_step to flush off the
* last little bit.
*
* Returns the number of bytes encoded.
**/
size_t
g_mime_utils_quoted_encode_close (const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save)
{
register unsigned char *outptr = out;
int last;
if (inlen > 0)
outptr += g_mime_utils_quoted_encode_step (in, inlen, outptr, state, save);
last = *state;
if (last != -1) {
/* space/tab must be encoded if its the last character on
the line */
if (is_qpsafe (last) && !isblank (last)) {
*outptr++ = last;
} else {
*outptr++ = '=';
*outptr++ = tohex[(last >> 4) & 0xf];
*outptr++ = tohex[last & 0xf];
}
}
*outptr++ = '\n';
*save = 0;
*state = -1;
return (outptr - out);
}
/**
* g_mime_utils_quoted_encode_step:
* @in: input stream
* @inlen: length of the input
* @out: output string
* @state: holds the number of bits that are stored in @save
* @save: leftover bits that have not yet been encoded
*
* Quoted-printable encodes a block of text. Performs an 'encode
* step', saves left-over state in state and save (initialise to -1 on
* first invocation).
*
* Returns the number of bytes encoded.
**/
size_t
g_mime_utils_quoted_encode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save)
{
const register unsigned char *inptr, *inend;
register unsigned char *outptr;
unsigned char c;
register int sofar = *save; /* keeps track of how many chars on a line */
register int last = *state; /* keeps track if last char to end was a space cr etc */
inptr = in;
inend = in + inlen;
outptr = out;
while (inptr < inend) {
c = *inptr++;
if (c == '\r') {
if (last != -1) {
*outptr++ = '=';
*outptr++ = tohex[(last >> 4) & 0xf];
*outptr++ = tohex[last & 0xf];
sofar += 3;
}
last = c;
} else if (c == '\n') {
if (last != -1 && last != '\r') {
*outptr++ = '=';
*outptr++ = tohex[(last >> 4) & 0xf];
*outptr++ = tohex[last & 0xf];
}
*outptr++ = '\n';
sofar = 0;
last = -1;
} else {
if (last != -1) {
if (is_qpsafe (last)) {
*outptr++ = last;
sofar++;
} else {
*outptr++ = '=';
*outptr++ = tohex[(last >> 4) & 0xf];
*outptr++ = tohex[last & 0xf];
sofar += 3;
}
}
if (is_qpsafe (c)) {
if (sofar > 74) {
*outptr++ = '=';
*outptr++ = '\n';
sofar = 0;
}
/* delay output of space char */
if (isblank (c)) {
last = c;
} else {
*outptr++ = c;
sofar++;
last = -1;
}
} else {
if (sofar > 72) {
*outptr++ = '=';
*outptr++ = '\n';
sofar = 3;
} else
sofar += 3;
*outptr++ = '=';
*outptr++ = tohex[(c >> 4) & 0xf];
*outptr++ = tohex[c & 0xf];
last = -1;
}
}
}
*save = sofar;
*state = last;
return (outptr - out);
}
/**
* g_mime_utils_quoted_decode_step: decode a chunk of QP encoded data
* @in: input stream
* @inlen: max length of data to decode
* @out: output stream
* @savestate: holds the number of bits that are stored in @save
* @saved: leftover bits that have not yet been decoded
*
* Decodes a block of quoted-printable encoded data. Performs a
* 'decode step' on a chunk of QP encoded data.
*
* Returns the number of bytes decoded.
**/
size_t
g_mime_utils_quoted_decode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *savestate, int *saved)
{
/* FIXME: this does not strip trailing spaces from lines (as
* it should, rfc 2045, section 6.7) Should it also
* canonicalise the end of line to CR LF??
*
* Note: Trailing rubbish (at the end of input), like = or =x
* or =\r will be lost.
*/
const register unsigned char *inptr;
register unsigned char *outptr;
const unsigned char *inend;
unsigned char c;
int state, save;
inend = in + inlen;
outptr = out;
d(printf ("quoted-printable, decoding text '%.*s'\n", inlen, in));
state = *savestate;
save = *saved;
inptr = in;
while (inptr < inend) {
switch (state) {
case 0:
while (inptr < inend) {
c = *inptr++;
/* FIXME: use a specials table to avoid 3 comparisons for the common case */
if (c == '=') {
state = 1;
break;
}
#ifdef CANONICALISE_EOL
/*else if (c=='\r') {
state = 3;
} else if (c=='\n') {
*outptr++ = '\r';
*outptr++ = c;
} */
#endif
else {
*outptr++ = c;
}
}
break;
case 1:
c = *inptr++;
if (c == '\n') {
/* soft break ... unix end of line */
state = 0;
} else {
save = c;
state = 2;
}
break;
case 2:
c = *inptr++;
if (isxdigit (c) && isxdigit (save)) {
c = toupper (c);
save = toupper (save);
*outptr++ = (((save >= 'A' ? save - 'A' + 10 : save - '0') & 0x0f) << 4)
| ((c >= 'A' ? c - 'A' + 10 : c - '0') & 0x0f);
} else if (c == '\n' && save == '\r') {
/* soft break ... canonical end of line */
} else {
/* just output the data */
*outptr++ = '=';
*outptr++ = save;
*outptr++ = c;
}
state = 0;
break;
#ifdef CANONICALISE_EOL
case 3:
/* convert \n -> to \r\n, leaves \r\n alone */
c = *inptr++;
if (c == '\n') {
*outptr++ = '\r';
*outptr++ = c;
} else {
*outptr++ = '\r';
*outptr++ = '\n';
*outptr++ = c;
}
state = 0;
break;
#endif
}
}
*savestate = state;
*saved = save;
return (outptr - out);
}
syntax highlighted by Code2HTML, v. 0.9.1