/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ /* GMime * Copyright (C) 2000-2007 Jeffrey Stedfast * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifdef HAVE_CONFIG_H #include #endif #define _GNU_SOURCE #include #include #include #ifdef HAVE_SYS_PARAM_H #include /* for MAXHOSTNAMELEN */ #else #define MAXHOSTNAMELEN 64 #endif #include #include #ifdef HAVE_NETDB_H #include #endif #include #include #include "gmime-utils.h" #include "gmime-table-private.h" #include "gmime-parse-utils.h" #include "gmime-part.h" #include "gmime-charset.h" #include "gmime-iconv.h" #include "gmime-iconv-utils.h" #ifdef ENABLE_WARNINGS #define w(x) x #else #define w(x) #endif /* ENABLE_WARNINGS */ #define d(x) #define GMIME_UUENCODE_CHAR(c) ((c) ? (c) + ' ' : '`') #define GMIME_UUDECODE_CHAR(c) (((c) - ' ') & 077) #define GMIME_FOLD_PREENCODED (GMIME_FOLD_LEN / 2) /* date parser macros */ #define NUMERIC_CHARS "1234567890" #define WEEKDAY_CHARS "SundayMondayTuesdayWednesdayThursdayFridaySaturday" #define MONTH_CHARS "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember" #define TIMEZONE_ALPHA_CHARS "UTCGMTESTEDTCSTCDTMSTPSTPDTZAMNY()" #define TIMEZONE_NUMERIC_CHARS "-+1234567890" #define TIME_CHARS "1234567890:" #define DATE_TOKEN_NON_NUMERIC (1 << 0) #define DATE_TOKEN_NON_WEEKDAY (1 << 1) #define DATE_TOKEN_NON_MONTH (1 << 2) #define DATE_TOKEN_NON_TIME (1 << 3) #define DATE_TOKEN_HAS_COLON (1 << 4) #define DATE_TOKEN_NON_TIMEZONE_ALPHA (1 << 5) #define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6) #define DATE_TOKEN_HAS_SIGN (1 << 7) static char base64_alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; static unsigned char tohex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; static unsigned char gmime_base64_rank[256] = { 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255, 0,255,255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255, 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, }; static unsigned char gmime_uu_rank[256] = { 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, }; static unsigned char gmime_datetok_table[256] = { 128,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, 111,111,111,111,111,111,111,111, 79, 79,111,175,111,175,111,111, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,119,111,111,111,111,111, 111, 75,111, 79, 75, 79,105, 79,111,111,107,111,111, 73, 75,107, 79,111,111, 73, 77, 79,111,109,111, 79, 79,111,111,111,111,111, 111,105,107,107,109,105,111,107,105,105,111,111,107,107,105,105, 107,111,105,105,105,105,107,111,111,105,111,111,111,111,111,111, 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, }; /* hrm, is there a library for this shit? */ static struct { char *name; int offset; } tz_offsets [] = { { "UT", 0 }, { "GMT", 0 }, { "EST", -500 }, /* these are all US timezones. bloody yanks */ { "EDT", -400 }, { "CST", -600 }, { "CDT", -500 }, { "MST", -700 }, { "MDT", -600 }, { "PST", -800 }, { "PDT", -700 }, { "Z", 0 }, { "A", -100 }, { "M", -1200 }, { "N", 100 }, { "Y", 1200 }, }; static char *tm_months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; static char *tm_days[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" }; /** * g_mime_utils_header_format_date: * @date: time_t date representation * @tz_offset: Timezone offset * * Allocates a string buffer containing the rfc822 formatted date * string represented by @time and @offset. * * Returns a valid string representation of the date. **/ char * g_mime_utils_header_format_date (time_t date, int tz_offset) { struct tm tm; date += ((tz_offset / 100) * (60 * 60)) + (tz_offset % 100) * 60; #ifdef HAVE_GMTIME_R gmtime_r (&date, &tm); #else memcpy (&tm, gmtime (&date), sizeof (tm)); #endif return g_strdup_printf ("%s, %02d %s %04d %02d:%02d:%02d %+05d", tm_days[tm.tm_wday], tm.tm_mday, tm_months[tm.tm_mon], tm.tm_year + 1900, tm.tm_hour, tm.tm_min, tm.tm_sec, tz_offset); } /* This is where it gets ugly... */ struct _date_token { struct _date_token *next; unsigned char mask; const char *start; size_t len; }; static struct _date_token * datetok (const char *date) { struct _date_token *tokens = NULL, *token, *tail = (struct _date_token *) &tokens; const char *start, *end; unsigned char mask; start = date; while (*start) { /* kill leading whitespace */ while (*start == ' ' || *start == '\t') start++; if (*start == '\0') break; mask = gmime_datetok_table[(unsigned char) *start]; /* find the end of this token */ end = start + 1; while (*end && !strchr ("-/,\t\r\n ", *end)) mask |= gmime_datetok_table[(unsigned char) *end++]; if (end != start) { token = g_malloc (sizeof (struct _date_token)); token->next = NULL; token->start = start; token->len = end - start; token->mask = mask; tail->next = token; tail = token; } if (*end) start = end + 1; else break; } return tokens; } static int decode_int (const char *in, size_t inlen) { register const char *inptr; int sign = 1, val = 0; const char *inend; inptr = in; inend = in + inlen; if (*inptr == '-') { sign = -1; inptr++; } else if (*inptr == '+') inptr++; for ( ; inptr < inend; inptr++) { if (!(*inptr >= '0' && *inptr <= '9')) return -1; else val = (val * 10) + (*inptr - '0'); } val *= sign; return val; } #if 0 static int get_days_in_month (int month, int year) { switch (month) { case 1: case 3: case 5: case 7: case 8: case 10: case 12: return 31; case 4: case 6: case 9: case 11: return 30; case 2: if (g_date_is_leap_year (year)) return 29; else return 28; default: return 0; } } #endif static int get_wday (const char *in, size_t inlen) { int wday; g_return_val_if_fail (in != NULL, -1); if (inlen < 3) return -1; for (wday = 0; wday < 7; wday++) { if (!g_ascii_strncasecmp (in, tm_days[wday], 3)) return wday; } return -1; /* unknown week day */ } static int get_mday (const char *in, size_t inlen) { int mday; g_return_val_if_fail (in != NULL, -1); mday = decode_int (in, inlen); if (mday < 0 || mday > 31) mday = -1; return mday; } static int get_month (const char *in, size_t inlen) { int i; g_return_val_if_fail (in != NULL, -1); if (inlen < 3) return -1; for (i = 0; i < 12; i++) { if (!g_ascii_strncasecmp (in, tm_months[i], 3)) return i; } return -1; /* unknown month */ } static int get_year (const char *in, size_t inlen) { int year; g_return_val_if_fail (in != NULL, -1); if ((year = decode_int (in, inlen)) == -1) return -1; if (year < 100) year += (year < 70) ? 2000 : 1900; if (year < 1969) return -1; return year; } static gboolean get_time (const char *in, size_t inlen, int *hour, int *min, int *sec) { register const char *inptr; int *val, colons = 0; const char *inend; *hour = *min = *sec = 0; inend = in + inlen; val = hour; for (inptr = in; inptr < inend; inptr++) { if (*inptr == ':') { colons++; switch (colons) { case 1: val = min; break; case 2: val = sec; break; default: return FALSE; } } else if (!(*inptr >= '0' && *inptr <= '9')) return FALSE; else *val = (*val * 10) + (*inptr - '0'); } return TRUE; } static int get_tzone (struct _date_token **token) { const char *inptr, *inend; size_t inlen; int i, t; for (i = 0; *token && i < 2; *token = (*token)->next, i++) { inptr = (*token)->start; inlen = (*token)->len; inend = inptr + inlen; if (*inptr == '+' || *inptr == '-') { return decode_int (inptr, inlen); } else { if (*inptr == '(') { inptr++; if (*(inend - 1) == ')') inlen -= 2; else inlen--; } for (t = 0; t < 15; t++) { size_t len = strlen (tz_offsets[t].name); if (len != inlen) continue; if (!strncmp (inptr, tz_offsets[t].name, len)) return tz_offsets[t].offset; } } } return -1; } static time_t mktime_utc (struct tm *tm) { time_t tt; tm->tm_isdst = -1; tt = mktime (tm); #if defined (HAVE_TM_GMTOFF) tt += tm->tm_gmtoff; #elif defined (HAVE_TIMEZONE) if (tm->tm_isdst > 0) { #if defined (HAVE_ALTZONE) tt -= altzone; #else /* !defined (HAVE_ALTZONE) */ tt -= (timezone - 3600); #endif } else tt -= timezone; #elif defined (HAVE__TIMEZONE) tt -= _timezone; #else #error Neither HAVE_TIMEZONE nor HAVE_TM_GMTOFF defined. Rerun autoheader, autoconf, etc. #endif return tt; } static time_t parse_rfc822_date (struct _date_token *tokens, int *tzone) { int hour, min, sec, offset, n; struct _date_token *token; struct tm tm; time_t t; g_return_val_if_fail (tokens != NULL, (time_t) 0); token = tokens; memset ((void *) &tm, 0, sizeof (struct tm)); if ((n = get_wday (token->start, token->len)) != -1) { /* not all dates may have this... */ tm.tm_wday = n; token = token->next; } /* get the mday */ if (!token || (n = get_mday (token->start, token->len)) == -1) return (time_t) 0; tm.tm_mday = n; token = token->next; /* get the month */ if (!token || (n = get_month (token->start, token->len)) == -1) return (time_t) 0; tm.tm_mon = n; token = token->next; /* get the year */ if (!token || (n = get_year (token->start, token->len)) == -1) return (time_t) 0; tm.tm_year = n - 1900; token = token->next; /* get the hour/min/sec */ if (!token || !get_time (token->start, token->len, &hour, &min, &sec)) return (time_t) 0; tm.tm_hour = hour; tm.tm_min = min; tm.tm_sec = sec; token = token->next; /* get the timezone */ if (!token || (n = get_tzone (&token)) == -1) { /* I guess we assume tz is GMT? */ offset = 0; } else { offset = n; } t = mktime_utc (&tm); /* t is now GMT of the time we want, but not offset by the timezone ... */ /* this should convert the time to the GMT equiv time */ t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60; if (tzone) *tzone = offset; return t; } #define date_token_mask(t) (((struct _date_token *) t)->mask) #define is_numeric(t) ((date_token_mask (t) & DATE_TOKEN_NON_NUMERIC) == 0) #define is_weekday(t) ((date_token_mask (t) & DATE_TOKEN_NON_WEEKDAY) == 0) #define is_month(t) ((date_token_mask (t) & DATE_TOKEN_NON_MONTH) == 0) #define is_time(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIME) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_COLON)) #define is_tzone_alpha(t) ((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_ALPHA) == 0) #define is_tzone_numeric(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_NUMERIC) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_SIGN)) #define is_tzone(t) (is_tzone_alpha (t) || is_tzone_numeric (t)) static time_t parse_broken_date (struct _date_token *tokens, int *tzone) { gboolean got_wday, got_month, got_tzone; int hour, min, sec, offset, n; struct _date_token *token; struct tm tm; time_t t; memset ((void *) &tm, 0, sizeof (struct tm)); got_wday = got_month = got_tzone = FALSE; offset = 0; token = tokens; while (token) { if (is_weekday (token) && !got_wday) { if ((n = get_wday (token->start, token->len)) != -1) { d(printf ("weekday; ")); got_wday = TRUE; tm.tm_wday = n; goto next; } } if (is_month (token) && !got_month) { if ((n = get_month (token->start, token->len)) != -1) { d(printf ("month; ")); got_month = TRUE; tm.tm_mon = n; goto next; } } if (is_time (token) && !tm.tm_hour && !tm.tm_min && !tm.tm_sec) { if (get_time (token->start, token->len, &hour, &min, &sec)) { d(printf ("time; ")); tm.tm_hour = hour; tm.tm_min = min; tm.tm_sec = sec; goto next; } } if (is_tzone (token) && !got_tzone) { struct _date_token *t = token; if ((n = get_tzone (&t)) != -1) { d(printf ("tzone; ")); got_tzone = TRUE; offset = n; goto next; } } if (is_numeric (token)) { if (token->len == 4 && !tm.tm_year) { if ((n = get_year (token->start, token->len)) != -1) { d(printf ("year; ")); tm.tm_year = n - 1900; goto next; } } else { /* Note: assumes MM-DD-YY ordering if '0 < MM < 12' holds true */ if (!got_month && token->next && is_numeric (token->next)) { if ((n = decode_int (token->start, token->len)) > 12) { goto mday; } else if (n > 0) { d(printf ("mon; ")); got_month = TRUE; tm.tm_mon = n - 1; } goto next; } else if (!tm.tm_mday && (n = get_mday (token->start, token->len)) != -1) { mday: d(printf ("mday; ")); tm.tm_mday = n; goto next; } else if (!tm.tm_year) { if ((n = get_year (token->start, token->len)) != -1) { d(printf ("2-digit year; ")); tm.tm_year = n - 1900; } goto next; } } } d(printf ("???; ")); next: token = token->next; } d(printf ("\n")); t = mktime_utc (&tm); /* t is now GMT of the time we want, but not offset by the timezone ... */ /* this should convert the time to the GMT equiv time */ t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60; if (tzone) *tzone = offset; return t; } #if 0 static void gmime_datetok_table_init (void) { int i; memset (gmime_datetok_table, 0, sizeof (gmime_datetok_table)); for (i = 0; i < 256; i++) { if (!strchr (NUMERIC_CHARS, i)) gmime_datetok_table[i] |= DATE_TOKEN_NON_NUMERIC; if (!strchr (WEEKDAY_CHARS, i)) gmime_datetok_table[i] |= DATE_TOKEN_NON_WEEKDAY; if (!strchr (MONTH_CHARS, i)) gmime_datetok_table[i] |= DATE_TOKEN_NON_MONTH; if (!strchr (TIME_CHARS, i)) gmime_datetok_table[i] |= DATE_TOKEN_NON_TIME; if (!strchr (TIMEZONE_ALPHA_CHARS, i)) gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_ALPHA; if (!strchr (TIMEZONE_NUMERIC_CHARS, i)) gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_NUMERIC; if (((char) i) == ':') gmime_datetok_table[i] |= DATE_TOKEN_HAS_COLON; if (strchr ("+-", i)) gmime_datetok_table[i] |= DATE_TOKEN_HAS_SIGN; } printf ("static unsigned char gmime_datetok_table[256] = {"); for (i = 0; i < 256; i++) { if (i % 16 == 0) printf ("\n\t"); printf ("%3d,", gmime_datetok_table[i]); } printf ("\n};\n"); } #endif /** * g_mime_utils_header_decode_date: * @in: input date string * @tz_offset: timezone offset * * Decodes the rfc822 date string and saves the GMT offset into * @saveoffset if non-NULL. * * Returns the time_t representation of the date string specified by * @in. If 'saveoffset' is non-NULL, the value of the timezone offset * will be stored. **/ time_t g_mime_utils_header_decode_date (const char *in, int *tz_offset) { struct _date_token *token, *tokens; time_t date; if (!(tokens = datetok (in))) { if (tz_offset) *tz_offset = 0; return (time_t) 0; } if (!(date = parse_rfc822_date (tokens, tz_offset))) date = parse_broken_date (tokens, tz_offset); /* cleanup */ while (tokens) { token = tokens; tokens = tokens->next; g_free (token); } return date; } /** * g_mime_utils_generate_message_id: * @fqdn: Fully qualified domain name * * Generates a unique Message-Id. * * Returns a unique string in an addr-spec format suitable for use as * a Message-Id. **/ char * g_mime_utils_generate_message_id (const char *fqdn) { #ifdef G_THREADS_ENABLED static GStaticMutex mutex = G_STATIC_MUTEX_INIT; #define MUTEX_LOCK() g_static_mutex_lock (&mutex) #define MUTEX_UNLOCK() g_static_mutex_unlock (&mutex) #else #define MUTEX_LOCK() #define MUTEX_UNLOCK() #endif static unsigned int count = 0; char host[MAXHOSTNAMELEN + 1]; #ifdef HAVE_GETADDRINFO struct addrinfo hints, *res; char *name = NULL; #endif char *msgid; if (!fqdn) { #ifdef HAVE_GETHOSTNAME if (gethostname (host, sizeof (host)) == 0) { #ifdef HAVE_GETADDRINFO memset (&hints, 0, sizeof (hints)); hints.ai_flags = AI_CANONNAME; if (getaddrinfo (host, NULL, &hints, &res) == 0) { name = g_strdup (res->ai_canonname); freeaddrinfo (res); } #endif /* HAVE_GETADDRINFO */ } else { host[0] = '\0'; } #else host[0] = '\0'; #endif /* HAVE_GETHOSTNAME */ #ifdef HAVE_GETADDRINFO fqdn = name != NULL ? name : (*host ? host : "localhost.localdomain"); g_free (name); #else fqdn = *host ? host : "localhost.localdomain"; #endif } MUTEX_LOCK (); msgid = g_strdup_printf ("%ul.%ul.%ul@%s", (unsigned int) time (NULL), getpid (), count++, fqdn); MUTEX_UNLOCK (); return msgid; } static char * decode_addrspec (const char **in) { char *domain, *word, *str = NULL; const char *inptr; GString *addrspec; decode_lwsp (in); inptr = *in; if (!(word = decode_word (&inptr))) { w(g_warning ("No local-part in addr-spec: %s", *in)); return NULL; } addrspec = g_string_new (word); g_free (word); /* get the rest of the local-part */ decode_lwsp (&inptr); while (*inptr == '.') { g_string_append_c (addrspec, *inptr++); if ((word = decode_word (&inptr))) { g_string_append (addrspec, word); decode_lwsp (&inptr); g_free (word); } else { w(g_warning ("Invalid local-part in addr-spec: %s", *in)); goto exception; } } /* we should be at the '@' now... */ if (*inptr++ != '@') { w(g_warning ("Invalid addr-spec; missing '@': %s", *in)); goto exception; } if (!(domain = decode_domain (&inptr))) { w(g_warning ("No domain in addr-spec: %s", *in)); goto exception; } g_string_append_c (addrspec, '@'); g_string_append (addrspec, domain); g_free (domain); str = addrspec->str; g_string_free (addrspec, FALSE); *in = inptr; return str; exception: g_string_free (addrspec, TRUE); return NULL; } static char * decode_msgid (const char **in) { const char *inptr = *in; char *msgid = NULL; decode_lwsp (&inptr); if (*inptr != '<') { w(g_warning ("Invalid msg-id; missing '<': %s", *in)); } else { inptr++; } decode_lwsp (&inptr); if ((msgid = decode_addrspec (&inptr))) { decode_lwsp (&inptr); if (*inptr != '>') { w(g_warning ("Invalid msg-id; missing '>': %s", *in)); } else { inptr++; } *in = inptr; } else { w(g_warning ("Invalid msg-id; missing addr-spec: %s", *in)); *in = inptr; while (*inptr && *inptr != '>') inptr++; msgid = g_strndup (*in, inptr - *in); *in = inptr; } return msgid; } /** * g_mime_utils_decode_message_id: * @message_id: string containing a message-id * * Decodes a msg-id as defined by rfc822. * * Returns the addr-spec portion of the msg-id. **/ char * g_mime_utils_decode_message_id (const char *message_id) { g_return_val_if_fail (message_id != NULL, NULL); return decode_msgid (&message_id); } /** * g_mime_references_decode: * @text: string containing a list of msg-ids * * Decodes a list of msg-ids as in the References and/or In-Reply-To * headers defined in rfc822. * * Returns a list of referenced msg-ids. **/ GMimeReferences * g_mime_references_decode (const char *text) { GMimeReferences *refs, *tail, *ref; const char *inptr = text; char *word, *msgid; g_return_val_if_fail (text != NULL, NULL); refs = NULL; tail = (GMimeReferences *) &refs; while (*inptr) { decode_lwsp (&inptr); if (*inptr == '<') { /* looks like a msg-id */ if ((msgid = decode_msgid (&inptr))) { ref = g_new (GMimeReferences, 1); ref->next = NULL; ref->msgid = msgid; tail->next = ref; tail = ref; } else { w(g_warning ("Invalid References header: %s", inptr)); break; } } else if (*inptr) { /* looks like part of a phrase */ if ((word = decode_word (&inptr))) { g_free (word); } else { w(g_warning ("Invalid References header: %s", inptr)); break; } } } return refs; } /** * g_mime_references_append: * @refs: the address of a #GMimeReferences list * @msgid: a message-id string * * Appends a reference to msgid to the list of references. **/ void g_mime_references_append (GMimeReferences **refs, const char *msgid) { GMimeReferences *ref; g_return_if_fail (refs != NULL); g_return_if_fail (msgid != NULL); ref = (GMimeReferences *) refs; while (ref->next) ref = ref->next; ref->next = g_new (GMimeReferences, 1); ref->next->msgid = g_strdup (msgid); ref->next->next = NULL; } /** * g_mime_references_clear: * @refs: address of a #GMimeReferences list * * Clears the #GMimeReferences list and resets it to %NULL. **/ void g_mime_references_clear (GMimeReferences **refs) { GMimeReferences *ref, *next; g_return_if_fail (refs != NULL); ref = *refs; while (ref) { next = ref->next; g_free (ref->msgid); g_free (ref); ref = next; } *refs = NULL; } /** * g_mime_references_next: * @ref: a #GMimeReferences list * * Advances to the next reference node in the #GMimeReferences list. * * Returns the next reference node in the #GMimeReferences list. **/ GMimeReferences * g_mime_references_next (const GMimeReferences *ref) { return ref ? ref->next : NULL; } static gboolean is_rfc2047_token (const char *inptr, size_t len) { if (len < 8 || strncmp (inptr, "=?", 2) != 0 || strncmp (inptr + len - 2, "?=", 2) != 0) return FALSE; inptr += 3; len -= 3; while (*inptr != '?' && len > 0) { inptr++; len--; } if (*inptr != '?' || len < 4) return FALSE; if (inptr[1] != 'q' && inptr[1] != 'Q' && inptr[1] != 'b' && inptr[1] != 'B') return FALSE; inptr += 2; len -= 2; if (*inptr != '?') return FALSE; return TRUE; } static char * header_fold (const char *in, gboolean structured) { gboolean last_was_lwsp = FALSE; register const char *inptr; size_t len, outlen, i; size_t fieldlen; GString *out; char *ret; inptr = in; len = strlen (in); if (len <= GMIME_FOLD_LEN + 1) return g_strdup (in); out = g_string_new (""); fieldlen = strcspn (inptr, ": \t\n"); g_string_append_len (out, inptr, fieldlen); outlen = fieldlen; inptr += fieldlen; while (*inptr && *inptr != '\n') { len = strcspn (inptr, " \t\n"); if (len > 1 && outlen + len > GMIME_FOLD_LEN) { if (outlen > 1 && out->len > fieldlen + 2) { if (last_was_lwsp) { if (structured) out->str[out->len - 1] = '\t'; g_string_insert_c (out, out->len - 1, '\n'); } else g_string_append (out, "\n\t"); outlen = 1; } if (!structured && !is_rfc2047_token (inptr, len)) { /* check for very long words, just cut them up */ while (outlen + len > GMIME_FOLD_LEN) { for (i = 0; i < GMIME_FOLD_LEN - outlen; i++) g_string_append_c (out, inptr[i]); inptr += GMIME_FOLD_LEN - outlen; len -= GMIME_FOLD_LEN - outlen; g_string_append (out, "\n\t"); outlen = 1; } } else { g_string_append_len (out, inptr, len); outlen += len; inptr += len; } last_was_lwsp = FALSE; } else if (len > 0) { g_string_append_len (out, inptr, len); outlen += len; inptr += len; last_was_lwsp = FALSE; } else { last_was_lwsp = TRUE; if (*inptr == '\t') { /* tabs are a good place to fold, odds are that this is where the previous mailer folded it */ g_string_append (out, "\n\t"); outlen = 1; while (is_blank (*inptr)) inptr++; } else { g_string_append_c (out, *inptr++); outlen++; } } } if (*inptr == '\n' && out->str[out->len - 1] != '\n') g_string_append_c (out, '\n'); ret = out->str; g_string_free (out, FALSE); return ret; } /** * g_mime_utils_structured_header_fold: * @in: input header string * * Folds a structured header according to the rules in rfc822. * * Returns an allocated string containing the folded header. **/ char * g_mime_utils_structured_header_fold (const char *in) { return header_fold (in, TRUE); } /** * g_mime_utils_unstructured_header_fold: * @in: input header string * * Folds an unstructured header according to the rules in rfc822. * * Returns an allocated string containing the folded header. **/ char * g_mime_utils_unstructured_header_fold (const char *in) { return header_fold (in, FALSE); } /** * g_mime_utils_header_fold: * @in: input header string * * Folds a structured header according to the rules in rfc822. * * Returns an allocated string containing the folded header. **/ char * g_mime_utils_header_fold (const char *in) { return header_fold (in, TRUE); } /** * g_mime_utils_header_printf: * @format: string format * @Varargs: arguments * * Allocates a buffer containing a formatted header specified by the * @Varargs. * * Returns an allocated string containing the folded header specified * by @format and the following arguments. **/ char * g_mime_utils_header_printf (const char *format, ...) { char *buf, *ret; va_list ap; va_start (ap, format); buf = g_strdup_vprintf (format, ap); va_end (ap); ret = header_fold (buf, TRUE); g_free (buf); return ret; } static gboolean need_quotes (const char *string) { gboolean quoted = FALSE; const char *inptr; inptr = string; while (*inptr) { if (*inptr == '\\') inptr++; else if (*inptr == '"') quoted = !quoted; else if (!quoted && (is_tspecial (*inptr) || *inptr == '.')) return TRUE; if (*inptr) inptr++; } return FALSE; } /** * g_mime_utils_quote_string: * @string: input string * * Quotes @string as needed according to the rules in rfc2045. * * Returns an allocated string containing the escaped and quoted (if * needed to be) input string. The decision to quote the string is * based on whether or not the input string contains any 'tspecials' * as defined by rfc2045. **/ char * g_mime_utils_quote_string (const char *string) { gboolean quote; const char *c; char *qstring; GString *out; out = g_string_new (""); if ((quote = need_quotes (string))) g_string_append_c (out, '"'); for (c = string; *c; c++) { if ((*c == '"' && quote) || *c == '\\') g_string_append_c (out, '\\'); g_string_append_c (out, *c); } if (quote) g_string_append_c (out, '"'); qstring = out->str; g_string_free (out, FALSE); return qstring; } /** * g_mime_utils_unquote_string: Unquote a string. * @string: string * * Unquotes and unescapes a string. **/ void g_mime_utils_unquote_string (char *string) { /* if the string is quoted, unquote it */ register char *inptr = string; int escaped = FALSE; int quoted = FALSE; if (!string) return; while (*inptr) { if (*inptr == '\\') { if (escaped) *string++ = *inptr++; else inptr++; escaped = !escaped; } else if (*inptr == '"') { if (escaped) { *string++ = *inptr++; escaped = FALSE; } else { quoted = !quoted; inptr++; } } else { *string++ = *inptr++; escaped = FALSE; } } *string = '\0'; } /** * g_mime_utils_text_is_8bit: * @text: text to check for 8bit chars * @len: text length * * Determines if @text contains 8bit characters within the first @len * bytes. * * Returns %TRUE if the text contains 8bit characters or %FALSE * otherwise. **/ gboolean g_mime_utils_text_is_8bit (const unsigned char *text, size_t len) { register const unsigned char *inptr; const unsigned char *inend; g_return_val_if_fail (text != NULL, FALSE); inend = text + len; for (inptr = text; *inptr && inptr < inend; inptr++) if (*inptr > (unsigned char) 127) return TRUE; return FALSE; } /** * g_mime_utils_best_encoding: * @text: text to encode * @len: text length * * Determines the best content encoding for the first @len bytes of * @text. * * Returns a #GMimePartEncodingType that is determined to be the best * encoding type for the specified block of text. ("best" in this * particular case means best compression) **/ GMimePartEncodingType g_mime_utils_best_encoding (const unsigned char *text, size_t len) { const unsigned char *ch, *inend; size_t count = 0; inend = text + len; for (ch = text; ch < inend; ch++) if (*ch > (unsigned char) 127) count++; if ((float) count <= len * 0.17) return GMIME_PART_ENCODING_QUOTEDPRINTABLE; else return GMIME_PART_ENCODING_BASE64; } static char * decode_8bit (const char *text, size_t len) { const char *fallback_charsets[3] = { "UTF-8", NULL, NULL }; size_t inleft, outleft, outlen, rc, min, n; const char **charsets, *best; char *out, *outbuf; const char *inbuf; iconv_t cd; int i; if (!(charsets = g_mime_user_charsets ())) { inbuf = g_mime_locale_charset (); if (g_ascii_strcasecmp (inbuf, "UTF-8") != 0) fallback_charsets[1] = inbuf; charsets = fallback_charsets; } min = len; best = charsets[0]; outlen = (len * 2) + 16; out = g_malloc (outlen + 1); for (i = 0; charsets[i]; i++) { if ((cd = g_mime_iconv_open ("UTF-8", charsets[i])) == (iconv_t) -1) continue; outleft = outlen; outbuf = out; inleft = len; inbuf = text; n = 0; do { rc = iconv (cd, (char **) &inbuf, &inleft, &outbuf, &outleft); if (rc == (size_t) -1) { if (errno == EINVAL) { /* incomplete sequence at the end of the input buffer */ n += inleft; break; } if (errno == E2BIG) { outlen += (inleft * 2) + 16; rc = (size_t) (outbuf - out); out = g_realloc (out, outlen + 1); outbuf = out + rc; } else { inleft--; inbuf++; n++; } } } while (inleft > 0); rc = iconv (cd, NULL, NULL, &outbuf, &outleft); *outbuf = '\0'; g_mime_iconv_close (cd); if (rc != (size_t) -1 && n == 0) return out; if (n < min) { best = charsets[i]; min = n; } } /* if we get here, then none of the charsets fit the 8bit text flawlessly... * try to find the one that fit the best and use that to convert what we can, * replacing any byte we can't convert with a '?' */ if ((cd = g_mime_iconv_open ("UTF-8", best)) == (iconv_t) -1) { /* this shouldn't happen... but if we are here, then * it did... the only thing we can do at this point * is replace the 8bit garbage and pray */ register const char *inptr = text; const char *inend = inptr + len; outbuf = out; while (inptr < inend) { if (is_ascii (*inptr)) *outbuf++ = *inptr++; else *outbuf++ = '?'; } *outbuf = '\0'; return out; } outleft = outlen; outbuf = out; inleft = len; inbuf = text; do { rc = iconv (cd, (char **) &inbuf, &inleft, &outbuf, &outleft); if (rc == (size_t) -1) { if (errno == EINVAL) { /* incomplete sequence at the end of the input buffer */ break; } if (errno == E2BIG) { rc = outbuf - out; outlen += inleft * 2 + 16; out = g_realloc (out, outlen + 1); outbuf = out + rc; } else { *outbuf++ = '?'; outleft--; inleft--; inbuf++; } } } while (inleft > 0); iconv (cd, NULL, NULL, &outbuf, &outleft); *outbuf = '\0'; g_mime_iconv_close (cd); return out; } /* this decodes rfc2047's version of quoted-printable */ static ssize_t quoted_decode (const unsigned char *in, size_t len, unsigned char *out) { register const unsigned char *inptr; register unsigned char *outptr; const unsigned char *inend; unsigned char c, c1; inend = in + len; outptr = out; inptr = in; while (inptr < inend) { c = *inptr++; if (c == '=') { if (inend - inptr >= 2) { c = toupper (*inptr++); c1 = toupper (*inptr++); *outptr++ = (((c >= 'A' ? c - 'A' + 10 : c - '0') & 0x0f) << 4) | ((c1 >= 'A' ? c1 - 'A' + 10 : c1 - '0') & 0x0f); } else { /* data was truncated */ return -1; } } else if (c == '_') { /* _'s are an rfc2047 shortcut for encoding spaces */ *outptr++ = ' '; } else { *outptr++ = c; } } return (outptr - out); } #define is_rfc2047_encoded_word(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2)) static char * rfc2047_decode_word (const char *in, size_t inlen) { const unsigned char *instart = (const unsigned char *) in; const register unsigned char *inptr = instart + 2; const unsigned char *inend = instart + inlen - 2; unsigned char *decoded; const char *charset; char *charenc, *p; guint32 save = 0; ssize_t declen; int state = 0; size_t len; iconv_t cd; char *buf; if (!(inptr = memchr (inptr, '?', inend - inptr)) || inptr[2] != '?') return NULL; inptr++; switch (*inptr) { case 'B': case 'b': inptr += 2; decoded = g_alloca (inend - inptr); declen = g_mime_utils_base64_decode_step (inptr, inend - inptr, decoded, &state, &save); break; case 'Q': case 'q': inptr += 2; decoded = g_alloca (inend - inptr); declen = quoted_decode (inptr, inend - inptr, decoded); if (declen == -1) { d(fprintf (stderr, "encountered broken 'Q' encoding\n")); return NULL; } break; default: d(fprintf (stderr, "unknown encoding\n")); return NULL; } len = (inptr - 3) - (instart + 2); charenc = g_alloca (len + 1); memcpy (charenc, in + 2, len); charenc[len] = '\0'; charset = charenc; /* rfc2231 updates rfc2047 encoded words... * The ABNF given in RFC 2047 for encoded-words is: * encoded-word := "=?" charset "?" encoding "?" encoded-text "?=" * This specification changes this ABNF to: * encoded-word := "=?" charset ["*" language] "?" encoding "?" encoded-text "?=" */ /* trim off the 'language' part if it's there... */ if ((p = strchr (charset, '*'))) *p = '\0'; /* slight optimization? */ if (!g_ascii_strcasecmp (charset, "UTF-8")) { p = (char *) decoded; len = declen; while (!g_utf8_validate (p, len, (const char **) &p)) { len = declen - (p - (char *) decoded); *p = '?'; } return g_strndup ((char *) decoded, declen); } if ((cd = g_mime_iconv_open ("UTF-8", charset)) == (iconv_t) -1) { w(g_warning ("Cannot convert from %s to UTF-8, header display may " "be corrupt: %s", charset, g_strerror (errno))); return decode_8bit ((char *) decoded, declen); } buf = g_mime_iconv_strndup (cd, (char *) decoded, declen); g_mime_iconv_close (cd); if (buf != NULL) return buf; w(g_warning ("Failed to convert \"%.*s\" to UTF-8, display may be " "corrupt: %s", declen, decoded, g_strerror (errno))); return decode_8bit ((char *) decoded, declen); } /** * g_mime_utils_header_decode_text: * @in: header to decode * * Decodes an rfc2047 encoded 'text' header. * * Note: See g_mime_set_user_charsets() for details on how charset * conversion is handled for unencoded 8bit text and/or wrongly * specified rfc2047 encoded-word tokens. * * Returns a newly allocated UTF-8 string representing the the decoded * header. **/ char * g_mime_utils_header_decode_text (const char *in) { register const char *inptr = in; gboolean encoded = FALSE; const char *lwsp, *text; size_t nlwsp, n; gboolean ascii; char *decoded; GString *out; if (in == NULL) return g_strdup (""); out = g_string_sized_new (strlen (in) + 1); while (*inptr != '\0') { lwsp = inptr; while (is_lwsp (*inptr)) inptr++; nlwsp = (size_t) (inptr - lwsp); if (*inptr != '\0') { text = inptr; ascii = TRUE; #ifdef ENABLE_RFC2047_WORKAROUNDS if (!strncmp (inptr, "=?", 2) && inptr[2] != '?') { inptr += 2; /* skip past the charset */ while (*inptr && *inptr != '?') { ascii = ascii && is_ascii (*inptr); inptr++; } /* sanity check encoding type */ if (inptr[0] != '?' || !strchr ("BbQq", inptr[1]) || inptr[2] != '?') goto non_rfc2047; inptr += 3; /* find the end of the rfc2047 encoded word token */ while (*inptr && strncmp (inptr, "?=", 2) != 0) { ascii = ascii && is_ascii (*inptr); inptr++; } if (!strncmp (inptr, "?=", 2)) inptr += 2; } else { non_rfc2047: /* stop if we encounter a possible rfc2047 encoded * token even if it's inside another word, sigh. */ while (*inptr && !is_lwsp (*inptr) && strncmp (inptr, "=?", 2) != 0) { ascii = ascii && is_ascii (*inptr); inptr++; } } #else while (*inptr && !is_lwsp (*inptr)) { ascii = ascii && is_ascii (*inptr); inptr++; } #endif /* ENABLE_RFC2047_WORKAROUNDS */ n = (size_t) (inptr - text); if (is_rfc2047_encoded_word (text, n)) { if ((decoded = rfc2047_decode_word (text, n))) { /* rfc2047 states that you must ignore all * whitespace between encoded words */ if (!encoded) g_string_append_len (out, lwsp, nlwsp); g_string_append (out, decoded); g_free (decoded); encoded = TRUE; } else { /* append lwsp and invalid rfc2047 encoded-word token */ g_string_append_len (out, lwsp, nlwsp + n); encoded = FALSE; } } else { /* append lwsp */ g_string_append_len (out, lwsp, nlwsp); /* append word token */ if (!ascii) { /* *sigh* I hate broken mailers... */ decoded = decode_8bit (text, n); g_string_append (out, decoded); g_free (decoded); } else { g_string_append_len (out, text, n); } encoded = FALSE; } } else { /* appending trailing lwsp */ g_string_append_len (out, lwsp, nlwsp); break; } } decoded = out->str; g_string_free (out, FALSE); return decoded; } /** * g_mime_utils_header_decode_phrase: * @in: header to decode * * Decodes an rfc2047 encoded 'phrase' header. * * Note: See g_mime_set_user_charsets() for details on how charset * conversion is handled for unencoded 8bit text and/or wrongly * specified rfc2047 encoded-word tokens. * * Returns a newly allocated UTF-8 string representing the the decoded * header. **/ char * g_mime_utils_header_decode_phrase (const char *in) { register const char *inptr = in; gboolean encoded = FALSE; const char *lwsp, *text; size_t nlwsp, n; gboolean ascii; char *decoded; GString *out; if (in == NULL) return g_strdup (""); out = g_string_sized_new (strlen (in) + 1); while (*inptr != '\0') { lwsp = inptr; while (is_lwsp (*inptr)) inptr++; nlwsp = (size_t) (inptr - lwsp); text = inptr; if (is_atom (*inptr)) { while (is_atom (*inptr)) inptr++; n = (size_t) (inptr - text); if (is_rfc2047_encoded_word (text, n)) { if ((decoded = rfc2047_decode_word (text, n))) { /* rfc2047 states that you must ignore all * whitespace between encoded words */ if (!encoded) g_string_append_len (out, lwsp, nlwsp); g_string_append (out, decoded); g_free (decoded); encoded = TRUE; } else { /* append lwsp and invalid rfc2047 encoded-word token */ g_string_append_len (out, lwsp, nlwsp + n); encoded = FALSE; } } else { /* append lwsp and atom token */ g_string_append_len (out, lwsp, nlwsp + n); encoded = FALSE; } } else { g_string_append_len (out, lwsp, nlwsp); ascii = TRUE; while (*inptr && !is_lwsp (*inptr)) { ascii = ascii && is_ascii (*inptr); inptr++; } n = (size_t) (inptr - text); if (!ascii) { /* *sigh* I hate broken mailers... */ decoded = decode_8bit (text, n); g_string_append (out, decoded); g_free (decoded); } else { g_string_append_len (out, text, n); } encoded = FALSE; } } decoded = out->str; g_string_free (out, FALSE); return decoded; } /* rfc2047 version of quoted-printable */ static size_t quoted_encode (const char *in, size_t len, unsigned char *out, gushort safemask) { register const unsigned char *inptr = (const unsigned char *) in; const unsigned char *inend = inptr + len; register unsigned char *outptr = out; unsigned char c; while (inptr < inend) { c = *inptr++; if (c == ' ') { *outptr++ = '_'; } else if (gmime_special_table[c] & safemask) { *outptr++ = c; } else { *outptr++ = '='; *outptr++ = tohex[(c >> 4) & 0xf]; *outptr++ = tohex[c & 0xf]; } } return (outptr - out); } static void rfc2047_encode_word (GString *string, const char *word, size_t len, const char *charset, gushort safemask) { register char *inptr, *outptr; iconv_t cd = (iconv_t) -1; unsigned char *encoded; size_t enclen, pos; char *uword = NULL; guint32 save = 0; int state = 0; char encoding; if (g_ascii_strcasecmp (charset, "UTF-8") != 0) cd = g_mime_iconv_open (charset, "UTF-8"); if (cd != (iconv_t) -1) { uword = g_mime_iconv_strndup (cd, (char *) word, len); g_mime_iconv_close (cd); } if (uword) { len = strlen (uword); word = uword; } else { charset = "UTF-8"; } switch (g_mime_utils_best_encoding ((const unsigned char *) word, len)) { case GMIME_PART_ENCODING_BASE64: enclen = BASE64_ENCODE_LEN (len); encoded = g_alloca (enclen + 1); encoding = 'b'; pos = g_mime_utils_base64_encode_close ((const unsigned char *) word, len, encoded, &state, &save); encoded[pos] = '\0'; /* remove \n chars as headers need to be wrapped differently */ if (G_UNLIKELY (inptr = strchr ((char *) encoded, '\n'))) { outptr = inptr++; while (G_LIKELY (*inptr)) { if (G_LIKELY (*inptr != '\n')) *outptr++ = *inptr; inptr++; } *outptr = '\0'; } break; case GMIME_PART_ENCODING_QUOTEDPRINTABLE: enclen = QP_ENCODE_LEN (len); encoded = g_alloca (enclen + 1); encoding = 'q'; pos = quoted_encode (word, len, encoded, safemask); encoded[pos] = '\0'; break; default: encoded = NULL; encoding = '\0'; g_assert_not_reached (); } g_free (uword); g_string_append_printf (string, "=?%s?%c?%s?=", charset, encoding, encoded); } enum _rfc822_word_t { WORD_ATOM, WORD_QSTRING, WORD_2047 }; struct _rfc822_word { struct _rfc822_word *next; enum _rfc822_word_t type; const char *start, *end; int encoding; }; static gboolean word_types_compatable (enum _rfc822_word_t type1, enum _rfc822_word_t type2) { switch (type1) { case WORD_ATOM: return type2 != WORD_ATOM; case WORD_QSTRING: return type2 != WORD_2047; case WORD_2047: return type2 == WORD_2047; default: return FALSE; } } /* okay, so 'unstructured text' fields don't actually contain 'word' * tokens, but we can group stuff similarly... */ static struct _rfc822_word * rfc2047_encode_get_rfc822_words (const char *in, gboolean phrase) { struct _rfc822_word *words, *tail, *word; enum _rfc822_word_t type = WORD_ATOM; const char *inptr, *start, *last; int count = 0, encoding = 0; words = NULL; tail = (struct _rfc822_word *) &words; last = start = inptr = in; while (inptr && *inptr) { const char *newinptr; gunichar c; newinptr = g_utf8_next_char (inptr); c = g_utf8_get_char (inptr); if (newinptr == NULL || !g_unichar_validate (c)) { w(g_warning ("Invalid UTF-8 sequence encountered")); inptr++; continue; } inptr = newinptr; if (c < 256 && is_lwsp (c)) { if (count > 0) { word = g_new (struct _rfc822_word, 1); word->next = NULL; word->start = start; word->end = last; word->type = type; word->encoding = encoding; tail->next = word; tail = word; count = 0; } start = inptr; type = WORD_ATOM; encoding = 0; } else { count++; if (phrase && c < 128) { /* phrases can have qstring words */ if (!is_atom (c)) type = MAX (type, WORD_QSTRING); } else if (c > 127 && c < 256) { type = WORD_2047; encoding = MAX (encoding, 1); } else if (c >= 256) { type = WORD_2047; encoding = 2; } if (count >= GMIME_FOLD_PREENCODED) { word = g_new (struct _rfc822_word, 1); word->next = NULL; word->start = start; word->end = inptr; word->type = type; word->encoding = encoding; tail->next = word; tail = word; count = 0; start = inptr; type = WORD_ATOM; encoding = 0; } } last = inptr; } if (count > 0) { word = g_new (struct _rfc822_word, 1); word->next = NULL; word->start = start; word->end = last; word->type = type; word->encoding = encoding; tail->next = word; tail = word; } return words; } #define MERGED_WORD_LT_FOLDLEN(wlen, type) ((type) == WORD_2047 ? (wlen) < GMIME_FOLD_PREENCODED : (wlen) < (GMIME_FOLD_LEN - 8)) static gboolean rfc2047_encode_merge_rfc822_words (struct _rfc822_word **wordsp) { struct _rfc822_word *word, *next, *words = *wordsp; gboolean merged = FALSE; /* scan the list, checking for words of similar types that can be merged */ word = words; while (word) { next = word->next; while (next) { /* merge nodes of the same type AND we are not creating too long a string */ if (word_types_compatable (word->type, next->type)) { if (MERGED_WORD_LT_FOLDLEN (next->end - word->start, MAX (word->type, next->type))) { /* the resulting word type is the MAX of the 2 types */ word->type = MAX (word->type, next->type); word->end = next->end; word->next = next->next; g_free (next); next = word->next; merged = TRUE; } else { /* if it is going to be too long, make sure we include the separating whitespace */ word->end = next->start; break; } } else { break; } } word = word->next; } *wordsp = words; return merged; } static void g_string_append_len_quoted (GString *out, const char *in, size_t len) { register const char *inptr; const char *inend; g_string_append_c (out, '"'); inptr = in; inend = in + len; while (inptr < inend) { if (*inptr == '"' || *inptr == '\\') g_string_append_c (out, '\\'); g_string_append_c (out, *inptr); inptr++; } g_string_append_c (out, '"'); } static char * rfc2047_encode (const char *in, gushort safemask) { struct _rfc822_word *words, *word, *prev = NULL; const char **charsets, *charset; const char *start; GMimeCharset mask; GString *out; char *outstr; size_t len; int i; if (!(words = rfc2047_encode_get_rfc822_words (in, safemask & IS_PSAFE))) return g_strdup (in); while (rfc2047_encode_merge_rfc822_words (&words)) ; charsets = g_mime_user_charsets (); out = g_string_new (""); /* output words now with spaces between them */ word = words; while (word) { /* append correct number of spaces between words */ if (prev && !(prev->type == WORD_2047 && word->type == WORD_2047)) { /* one or both of the words are not encoded so we write the spaces out untouched */ len = word->start - prev->end; g_string_append_len (out, prev->end, len); } switch (word->type) { case WORD_ATOM: g_string_append_len (out, word->start, word->end - word->start); break; case WORD_QSTRING: g_assert (safemask & IS_PSAFE); g_string_append_len_quoted (out, word->start, word->end - word->start); break; case WORD_2047: if (prev && prev->type == WORD_2047) { /* include the whitespace chars between these 2 words in the resulting rfc2047 encoded word. */ len = word->end - prev->end; start = prev->end; /* encoded words need to be separated by linear whitespace */ g_string_append_c (out, ' '); } else { len = word->end - word->start; start = word->start; } if (word->encoding == 1) { rfc2047_encode_word (out, start, len, "iso-8859-1", safemask); } else { charset = NULL; g_mime_charset_init (&mask); g_mime_charset_step (&mask, start, len); for (i = 0; charsets && charsets[i]; i++) { if (g_mime_charset_can_encode (&mask, charsets[i], start, len)) { charset = charsets[i]; break; } } if (!charset) charset = g_mime_charset_best_name (&mask); rfc2047_encode_word (out, start, len, charset, safemask); } break; } g_free (prev); prev = word; word = word->next; } g_free (prev); outstr = out->str; g_string_free (out, FALSE); return outstr; } /** * g_mime_utils_header_encode_phrase: * @in: header to encode * * Encodes a 'phrase' header according to the rules in rfc2047. * * Returns the encoded 'phrase'. Useful for encoding internet * addresses. **/ char * g_mime_utils_header_encode_phrase (const char *in) { if (in == NULL) return NULL; return rfc2047_encode (in, IS_PSAFE); } /** * g_mime_utils_header_encode_text: * @in: header to encode * * Encodes a 'text' header according to the rules in rfc2047. * * Returns the encoded header. Useful for encoding * headers like "Subject". **/ char * g_mime_utils_header_encode_text (const char *in) { if (in == NULL) return NULL; return rfc2047_encode (in, IS_ESAFE); } /** * g_mime_utils_8bit_header_decode: * @in: header to decode * * Decodes an rfc2047 encoded header. * * WARNING: This function is deprecated. Use * g_mime_utils_header_decode_text() instead. * * Returns the decoded header (which will be in UTF-8 if at all * possible). **/ char * g_mime_utils_8bit_header_decode (const unsigned char *in) { return g_mime_utils_header_decode_text ((const char *) in); } /** * g_mime_utils_8bit_header_encode: * @in: header to encode * * Encodes a 'text' header according to the rules in rfc2047. * * WARNING: This function is deprecated. Use * g_mime_utils_header_encode_text() instead. * * Returns the encoded header. Useful for encoding * headers like "Subject". **/ char * g_mime_utils_8bit_header_encode (const unsigned char *in) { return g_mime_utils_header_encode_text ((const char *) in); } /** * g_mime_utils_8bit_header_encode_phrase: * @in: header to encode * * Encodes a 'phrase' header according to the rules in rfc2047. * * WARNING: This function is deprecated. Use * g_mime_utils_header_encode_phrase() instead. * * Returns the encoded 'phrase'. Useful for encoding internet * addresses. **/ char * g_mime_utils_8bit_header_encode_phrase (const unsigned char *in) { return g_mime_utils_header_encode_phrase ((const char *) in); } /** * g_mime_utils_base64_encode_close: * @in: input stream * @inlen: length of the input * @out: output string * @state: holds the number of bits that are stored in @save * @save: leftover bits that have not yet been encoded * * Base64 encodes the input stream to the output stream. Call this * when finished encoding data with g_mime_utils_base64_encode_step() * to flush off the last little bit. * * Returns the number of bytes encoded. **/ size_t g_mime_utils_base64_encode_close (const unsigned char *in, size_t inlen, unsigned char *out, int *state, guint32 *save) { unsigned char *outptr = out; int c1, c2; if (inlen > 0) outptr += g_mime_utils_base64_encode_step (in, inlen, outptr, state, save); c1 = ((unsigned char *)save)[1]; c2 = ((unsigned char *)save)[2]; switch (((unsigned char *)save)[0]) { case 2: outptr[2] = base64_alphabet [(c2 & 0x0f) << 2]; goto skip; case 1: outptr[2] = '='; skip: outptr[0] = base64_alphabet [c1 >> 2]; outptr[1] = base64_alphabet [c2 >> 4 | ((c1 & 0x3) << 4)]; outptr[3] = '='; outptr += 4; break; } *outptr++ = '\n'; *save = 0; *state = 0; return (outptr - out); } /** * g_mime_utils_base64_encode_step: * @in: input stream * @inlen: length of the input * @out: output string * @state: holds the number of bits that are stored in @save * @save: leftover bits that have not yet been encoded * * Base64 encodes a chunk of data. Performs an 'encode step', only * encodes blocks of 3 characters to the output at a time, saves * left-over state in state and save (initialise to 0 on first * invocation). * * Returns the number of bytes encoded. **/ size_t g_mime_utils_base64_encode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, guint32 *save) { const register unsigned char *inptr; register unsigned char *outptr; if (inlen <= 0) return 0; inptr = in; outptr = out; if (inlen + ((unsigned char *)save)[0] > 2) { const unsigned char *inend = in + inlen - 2; register int c1 = 0, c2 = 0, c3 = 0; register int already; already = *state; switch (((char *)save)[0]) { case 1: c1 = ((unsigned char *)save)[1]; goto skip1; case 2: c1 = ((unsigned char *)save)[1]; c2 = ((unsigned char *)save)[2]; goto skip2; } /* yes, we jump into the loop, no i'm not going to change it, its beautiful! */ while (inptr < inend) { c1 = *inptr++; skip1: c2 = *inptr++; skip2: c3 = *inptr++; *outptr++ = base64_alphabet [c1 >> 2]; *outptr++ = base64_alphabet [(c2 >> 4) | ((c1 & 0x3) << 4)]; *outptr++ = base64_alphabet [((c2 & 0x0f) << 2) | (c3 >> 6)]; *outptr++ = base64_alphabet [c3 & 0x3f]; /* this is a bit ugly ... */ if ((++already) >= 19) { *outptr++ = '\n'; already = 0; } } ((unsigned char *)save)[0] = 0; inlen = 2 - (inptr - inend); *state = already; } d(printf ("state = %d, inlen = %d\n", (int)((char *)save)[0], inlen)); if (inlen > 0) { register char *saveout; /* points to the slot for the next char to save */ saveout = & (((char *)save)[1]) + ((char *)save)[0]; /* inlen can only be 0 1 or 2 */ switch (inlen) { case 2: *saveout++ = *inptr++; case 1: *saveout++ = *inptr++; } ((char *)save)[0] += inlen; } d(printf ("mode = %d\nc1 = %c\nc2 = %c\n", (int)((char *)save)[0], (int)((char *)save)[1], (int)((char *)save)[2])); return (outptr - out); } /** * g_mime_utils_base64_decode_step: * @in: input stream * @inlen: max length of data to decode * @out: output stream * @state: holds the number of bits that are stored in @save * @save: leftover bits that have not yet been decoded * * Decodes a chunk of base64 encoded data. * * Returns the number of bytes decoded (which have been dumped in @out). **/ size_t g_mime_utils_base64_decode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, guint32 *save) { const register unsigned char *inptr; register unsigned char *outptr; const unsigned char *inend; register guint32 saved; unsigned char c; int i; inend = in + inlen; outptr = out; /* convert 4 base64 bytes to 3 normal bytes */ saved = *save; i = *state; inptr = in; while (inptr < inend) { c = gmime_base64_rank[*inptr++]; if (c != 0xff) { saved = (saved << 6) | c; i++; if (i == 4) { *outptr++ = saved >> 16; *outptr++ = saved >> 8; *outptr++ = saved; i = 0; } } } *save = saved; *state = i; /* quick scan back for '=' on the end somewhere */ /* fortunately we can drop 1 output char for each trailing = (upto 2) */ i = 2; while (inptr > in && i) { inptr--; if (gmime_base64_rank[*inptr] != 0xff) { if (*inptr == '=' && outptr > out) outptr--; i--; } } /* if i != 0 then there is a truncation error! */ return (outptr - out); } /** * g_mime_utils_uuencode_close: * @in: input stream * @inlen: input stream length * @out: output stream * @uubuf: temporary buffer of 60 bytes * @state: holds the number of bits that are stored in @save * @save: leftover bits that have not yet been encoded * * Uuencodes a chunk of data. Call this when finished encoding data * with g_mime_utils_uuencode_step() to flush off the last little bit. * * Returns the number of bytes encoded. **/ size_t g_mime_utils_uuencode_close (const unsigned char *in, size_t inlen, unsigned char *out, unsigned char *uubuf, int *state, guint32 *save) { register unsigned char *outptr, *bufptr; register guint32 saved; int uulen, uufill, i; outptr = out; if (inlen > 0) outptr += g_mime_utils_uuencode_step (in, inlen, out, uubuf, state, save); uufill = 0; saved = *save; i = *state & 0xff; uulen = (*state >> 8) & 0xff; bufptr = uubuf + ((uulen / 3) * 4); if (i > 0) { while (i < 3) { saved <<= 8 | 0; uufill++; i++; } if (i == 3) { /* convert 3 normal bytes into 4 uuencoded bytes */ unsigned char b0, b1, b2; b0 = saved >> 16; b1 = saved >> 8 & 0xff; b2 = saved & 0xff; *bufptr++ = GMIME_UUENCODE_CHAR ((b0 >> 2) & 0x3f); *bufptr++ = GMIME_UUENCODE_CHAR (((b0 << 4) | ((b1 >> 4) & 0xf)) & 0x3f); *bufptr++ = GMIME_UUENCODE_CHAR (((b1 << 2) | ((b2 >> 6) & 0x3)) & 0x3f); *bufptr++ = GMIME_UUENCODE_CHAR (b2 & 0x3f); i = 0; saved = 0; uulen += 3; } } if (uulen > 0) { int cplen = ((uulen / 3) * 4); *outptr++ = GMIME_UUENCODE_CHAR ((uulen - uufill) & 0xff); memcpy (outptr, uubuf, cplen); outptr += cplen; *outptr++ = '\n'; uulen = 0; } *outptr++ = GMIME_UUENCODE_CHAR (uulen & 0xff); *outptr++ = '\n'; *save = 0; *state = 0; return (outptr - out); } /** * g_mime_utils_uuencode_step: * @in: input stream * @inlen: input stream length * @out: output stream * @uubuf: temporary buffer of 60 bytes * @state: holds the number of bits that are stored in @save * @save: leftover bits that have not yet been encoded * * Uuencodes a chunk of data. Performs an 'encode step', only encodes * blocks of 45 characters to the output at a time, saves left-over * state in @uubuf, @state and @save (initialize to 0 on first * invocation). * * Returns the number of bytes encoded. **/ size_t g_mime_utils_uuencode_step (const unsigned char *in, size_t inlen, unsigned char *out, unsigned char *uubuf, int *state, guint32 *save) { register unsigned char *outptr, *bufptr; const register unsigned char *inptr; const unsigned char *inend; register guint32 saved; int uulen, i; saved = *save; i = *state & 0xff; uulen = (*state >> 8) & 0xff; inptr = in; inend = in + inlen; outptr = out; bufptr = uubuf + ((uulen / 3) * 4); while (inptr < inend) { while (uulen < 45 && inptr < inend) { while (i < 3 && inptr < inend) { saved = (saved << 8) | *inptr++; i++; } if (i == 3) { /* convert 3 normal bytes into 4 uuencoded bytes */ unsigned char b0, b1, b2; b0 = saved >> 16; b1 = saved >> 8 & 0xff; b2 = saved & 0xff; *bufptr++ = GMIME_UUENCODE_CHAR ((b0 >> 2) & 0x3f); *bufptr++ = GMIME_UUENCODE_CHAR (((b0 << 4) | ((b1 >> 4) & 0xf)) & 0x3f); *bufptr++ = GMIME_UUENCODE_CHAR (((b1 << 2) | ((b2 >> 6) & 0x3)) & 0x3f); *bufptr++ = GMIME_UUENCODE_CHAR (b2 & 0x3f); i = 0; saved = 0; uulen += 3; } } if (uulen >= 45) { *outptr++ = GMIME_UUENCODE_CHAR (uulen & 0xff); memcpy (outptr, uubuf, ((uulen / 3) * 4)); outptr += ((uulen / 3) * 4); *outptr++ = '\n'; uulen = 0; bufptr = uubuf; } } *save = saved; *state = ((uulen & 0xff) << 8) | (i & 0xff); return (outptr - out); } /** * g_mime_utils_uudecode_step: * @in: input stream * @inlen: max length of data to decode * @out: output stream * @state: holds the number of bits that are stored in @save * @save: leftover bits that have not yet been decoded * * Uudecodes a chunk of data. Performs a 'decode step' on a chunk of * uuencoded data. Assumes the "begin mode filename" line has * been stripped off. * * Returns the number of bytes decoded. **/ size_t g_mime_utils_uudecode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, guint32 *save) { const register unsigned char *inptr; register unsigned char *outptr; const unsigned char *inend; unsigned char ch; register guint32 saved; gboolean last_was_eoln; int uulen, i; if (*state & GMIME_UUDECODE_STATE_END) return 0; saved = *save; i = *state & 0xff; uulen = (*state >> 8) & 0xff; if (uulen == 0) last_was_eoln = TRUE; else last_was_eoln = FALSE; inend = in + inlen; outptr = out; inptr = in; while (inptr < inend) { if (*inptr == '\n') { last_was_eoln = TRUE; inptr++; continue; } else if (!uulen || last_was_eoln) { /* first octet on a line is the uulen octet */ uulen = gmime_uu_rank[*inptr]; last_was_eoln = FALSE; if (uulen == 0) { *state |= GMIME_UUDECODE_STATE_END; break; } inptr++; continue; } ch = *inptr++; if (uulen > 0) { /* save the byte */ saved = (saved << 8) | ch; i++; if (i == 4) { /* convert 4 uuencoded bytes to 3 normal bytes */ unsigned char b0, b1, b2, b3; b0 = saved >> 24; b1 = saved >> 16 & 0xff; b2 = saved >> 8 & 0xff; b3 = saved & 0xff; if (uulen >= 3) { *outptr++ = gmime_uu_rank[b0] << 2 | gmime_uu_rank[b1] >> 4; *outptr++ = gmime_uu_rank[b1] << 4 | gmime_uu_rank[b2] >> 2; *outptr++ = gmime_uu_rank[b2] << 6 | gmime_uu_rank[b3]; } else { if (uulen >= 1) { *outptr++ = gmime_uu_rank[b0] << 2 | gmime_uu_rank[b1] >> 4; } if (uulen >= 2) { *outptr++ = gmime_uu_rank[b1] << 4 | gmime_uu_rank[b2] >> 2; } } i = 0; saved = 0; uulen -= 3; } } else { break; } } *save = saved; *state = (*state & GMIME_UUDECODE_STATE_MASK) | ((uulen & 0xff) << 8) | (i & 0xff); return (outptr - out); } /** * g_mime_utils_quoted_encode_close: * @in: input stream * @inlen: length of the input * @out: output string * @state: holds the number of bits that are stored in @save * @save: leftover bits that have not yet been encoded * * Quoted-printable encodes a block of text. Call this when finished * encoding data with g_mime_utils_quoted_encode_step() to flush off * the last little bit. * * Returns the number of bytes encoded. **/ size_t g_mime_utils_quoted_encode_close (const unsigned char *in, size_t inlen, unsigned char *out, int *state, guint32 *save) { register unsigned char *outptr = out; int last; if (inlen > 0) outptr += g_mime_utils_quoted_encode_step (in, inlen, outptr, state, save); last = *state; if (last != -1) { /* space/tab must be encoded if its the last character on the line */ if (is_qpsafe (last) && !is_blank (last)) { *outptr++ = last; } else { *outptr++ = '='; *outptr++ = tohex[(last >> 4) & 0xf]; *outptr++ = tohex[last & 0xf]; } } *outptr++ = '\n'; *save = 0; *state = -1; return (outptr - out); } /** * g_mime_utils_quoted_encode_step: * @in: input stream * @inlen: length of the input * @out: output string * @state: holds the number of bits that are stored in @save * @save: leftover bits that have not yet been encoded * * Quoted-printable encodes a block of text. Performs an 'encode * step', saves left-over state in state and save (initialise to -1 on * first invocation). * * Returns the number of bytes encoded. **/ size_t g_mime_utils_quoted_encode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, guint32 *save) { const register unsigned char *inptr = in; const unsigned char *inend = in + inlen; register unsigned char *outptr = out; register guint32 sofar = *save; /* keeps track of how many chars on a line */ register int last = *state; /* keeps track if last char to end was a space cr etc */ unsigned char c; while (inptr < inend) { c = *inptr++; if (c == '\r') { if (last != -1) { *outptr++ = '='; *outptr++ = tohex[(last >> 4) & 0xf]; *outptr++ = tohex[last & 0xf]; sofar += 3; } last = c; } else if (c == '\n') { if (last != -1 && last != '\r') { *outptr++ = '='; *outptr++ = tohex[(last >> 4) & 0xf]; *outptr++ = tohex[last & 0xf]; } *outptr++ = '\n'; sofar = 0; last = -1; } else { if (last != -1) { if (is_qpsafe (last)) { *outptr++ = last; sofar++; } else { *outptr++ = '='; *outptr++ = tohex[(last >> 4) & 0xf]; *outptr++ = tohex[last & 0xf]; sofar += 3; } } if (is_qpsafe (c)) { if (sofar > 74) { *outptr++ = '='; *outptr++ = '\n'; sofar = 0; } /* delay output of space char */ if (is_blank (c)) { last = c; } else { *outptr++ = c; sofar++; last = -1; } } else { if (sofar > 72) { *outptr++ = '='; *outptr++ = '\n'; sofar = 3; } else sofar += 3; *outptr++ = '='; *outptr++ = tohex[(c >> 4) & 0xf]; *outptr++ = tohex[c & 0xf]; last = -1; } } } *save = sofar; *state = last; return (outptr - out); } /** * g_mime_utils_quoted_decode_step: * @in: input stream * @inlen: max length of data to decode * @out: output stream * @state: holds the number of bits that are stored in @save * @save: leftover bits that have not yet been decoded * * Decodes a block of quoted-printable encoded data. Performs a * 'decode step' on a chunk of QP encoded data. * * Returns the number of bytes decoded. **/ size_t g_mime_utils_quoted_decode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, guint32 *save) { /* FIXME: this does not strip trailing spaces from lines (as * it should, rfc 2045, section 6.7) Should it also * canonicalise the end of line to CR LF?? * * Note: Trailing rubbish (at the end of input), like = or =x * or =\r will be lost. */ const register unsigned char *inptr = in; const unsigned char *inend = in + inlen; register unsigned char *outptr = out; guint32 isave = *save; int istate = *state; unsigned char c; d(printf ("quoted-printable, decoding text '%.*s'\n", inlen, in)); while (inptr < inend) { switch (istate) { case 0: while (inptr < inend) { c = *inptr++; /* FIXME: use a specials table to avoid 3 comparisons for the common case */ if (c == '=') { istate = 1; break; } #ifdef CANONICALISE_EOL /*else if (c=='\r') { state = 3; } else if (c=='\n') { *outptr++ = '\r'; *outptr++ = c; } */ #endif else { *outptr++ = c; } } break; case 1: c = *inptr++; if (c == '\n') { /* soft break ... unix end of line */ istate = 0; } else { isave = c; istate = 2; } break; case 2: c = *inptr++; if (isxdigit (c) && isxdigit (isave)) { c = toupper ((int) c); isave = toupper ((int) isave); *outptr++ = (((isave >= 'A' ? isave - 'A' + 10 : isave - '0') & 0x0f) << 4) | ((c >= 'A' ? c - 'A' + 10 : c - '0') & 0x0f); } else if (c == '\n' && isave == '\r') { /* soft break ... canonical end of line */ } else { /* just output the data */ *outptr++ = '='; *outptr++ = isave; *outptr++ = c; } istate = 0; break; #ifdef CANONICALISE_EOL case 3: /* convert \n -> to \r\n, leaves \r\n alone */ c = *inptr++; if (c == '\n') { *outptr++ = '\r'; *outptr++ = c; } else { *outptr++ = '\r'; *outptr++ = '\n'; *outptr++ = c; } istate = 0; break; #endif } } *state = istate; *save = isave; return (outptr - out); }