/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ /* GMime * Copyright (C) 2000-2007 Jeffrey Stedfast * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #include "gmime-parser.h" #include "gmime-stream-mem.h" #include "gmime-message-part.h" #include "gmime-multipart.h" #include "gmime-part.h" #ifdef ENABLE_WARNINGS #define w(x) x #else #define w(x) #endif /* ENABLE_WARNINGS */ #define d(x) static void g_mime_parser_class_init (GMimeParserClass *klass); static void g_mime_parser_init (GMimeParser *parser, GMimeParserClass *klass); static void g_mime_parser_finalize (GObject *object); static void parser_init (GMimeParser *parser, GMimeStream *stream); static void parser_close (GMimeParser *parser); static GMimeObject *parser_construct_leaf_part (GMimeParser *parser, GMimeContentType *content_type, int *found); static GMimeObject *parser_construct_multipart (GMimeParser *parser, GMimeContentType *content_type, int *found); static GObjectClass *parent_class = NULL; #define SCAN_BUF 4096 /* size of read buffer */ #define SCAN_HEAD 128 /* headroom guaranteed to be before each read buffer */ enum { GMIME_PARSER_STATE_ERROR = -1, GMIME_PARSER_STATE_INIT, GMIME_PARSER_STATE_FROM, GMIME_PARSER_STATE_HEADERS, GMIME_PARSER_STATE_HEADERS_END, GMIME_PARSER_STATE_CONTENT, }; struct _GMimeParserPrivate { int state; GMimeStream *stream; off_t offset; /* i/o buffers */ char realbuf[SCAN_HEAD + SCAN_BUF + 1]; char *inbuf; char *inptr; char *inend; off_t from_offset; GByteArray *from_line; regex_t header_regex; GMimeParserHeaderRegexFunc header_cb; gpointer user_data; /* header buffer */ char *headerbuf; char *headerptr; guint headerleft; /* raw header buffer */ char *rawbuf; char *rawptr; guint rawleft; off_t headers_start; off_t header_start; unsigned int unstep:26; unsigned int midline:1; unsigned int seekable:1; unsigned int scan_from:1; unsigned int have_regex:1; unsigned int persist_stream:1; unsigned int respect_content_length:1; GMimeContentType *content_type; struct _header_raw *headers; struct _boundary_stack *bounds; }; struct _boundary_stack { struct _boundary_stack *parent; char *boundary; size_t boundarylen; size_t boundarylenfinal; size_t boundarylenmax; off_t content_end; }; static void parser_push_boundary (GMimeParser *parser, const char *boundary) { struct _GMimeParserPrivate *priv = parser->priv; struct _boundary_stack *s; size_t max; max = priv->bounds ? priv->bounds->boundarylenmax : 0; s = g_new (struct _boundary_stack, 1); s->parent = priv->bounds; priv->bounds = s; if (!strcmp (boundary, "From ")) { s->boundary = g_strdup ("From "); s->boundarylen = 5; s->boundarylenfinal = 5; } else { s->boundary = g_strdup_printf ("--%s--", boundary); s->boundarylen = strlen (boundary) + 2; s->boundarylenfinal = s->boundarylen + 2; } s->boundarylenmax = MAX (s->boundarylenfinal, max); s->content_end = -1; } static void parser_pop_boundary (GMimeParser *parser) { struct _GMimeParserPrivate *priv = parser->priv; struct _boundary_stack *s; if (!priv->bounds) { d(g_warning ("boundary stack underflow")); return; } s = priv->bounds; priv->bounds = priv->bounds->parent; g_free (s->boundary); g_free (s); } struct _header_raw { struct _header_raw *next; char *name; char *value; off_t offset; }; static const char * header_raw_find (struct _header_raw *headers, const char *name, off_t *offset) { struct _header_raw *h; h = headers; while (h) { if (!g_ascii_strcasecmp (h->name, name)) { if (offset) *offset = h->offset; return h->value; } h = h->next; } return NULL; } static void header_raw_clear (struct _header_raw **headers) { struct _header_raw *h, *n; h = *headers; while (h) { n = h->next; g_free (h->name); g_free (h->value); g_free (h); h = n; } *headers = NULL; } GType g_mime_parser_get_type (void) { static GType type = 0; if (!type) { static const GTypeInfo info = { sizeof (GMimeParserClass), NULL, /* base_class_init */ NULL, /* base_class_finalize */ (GClassInitFunc) g_mime_parser_class_init, NULL, /* class_finalize */ NULL, /* class_data */ sizeof (GMimeParser), 0, /* n_preallocs */ (GInstanceInitFunc) g_mime_parser_init, }; type = g_type_register_static (G_TYPE_OBJECT, "GMimeParser", &info, 0); } return type; } static void g_mime_parser_class_init (GMimeParserClass *klass) { GObjectClass *object_class = G_OBJECT_CLASS (klass); parent_class = g_type_class_ref (G_TYPE_OBJECT); object_class->finalize = g_mime_parser_finalize; } static void g_mime_parser_init (GMimeParser *parser, GMimeParserClass *klass) { parser->priv = g_new (struct _GMimeParserPrivate, 1); parser_init (parser, NULL); parser->priv->scan_from = FALSE; parser->priv->have_regex = FALSE; parser->priv->persist_stream = TRUE; parser->priv->respect_content_length = FALSE; } static void g_mime_parser_finalize (GObject *object) { GMimeParser *parser = (GMimeParser *) object; parser_close (parser); if (parser->priv->have_regex) regfree (&parser->priv->header_regex); g_free (parser->priv); G_OBJECT_CLASS (parent_class)->finalize (object); } static void parser_init (GMimeParser *parser, GMimeStream *stream) { struct _GMimeParserPrivate *priv = parser->priv; off_t offset = -1; if (stream) { g_object_ref (stream); offset = g_mime_stream_tell (stream); } priv->state = GMIME_PARSER_STATE_INIT; priv->stream = stream; priv->offset = offset; priv->inbuf = priv->realbuf + SCAN_HEAD; priv->inptr = priv->inbuf; priv->inend = priv->inbuf; priv->from_offset = -1; priv->from_line = g_byte_array_new (); priv->headerbuf = g_malloc (SCAN_HEAD + 1); priv->headerptr = priv->headerbuf; priv->headerleft = SCAN_HEAD; priv->rawbuf = g_malloc (SCAN_HEAD + 1); priv->rawptr = priv->rawbuf; priv->rawleft = SCAN_HEAD; priv->headers_start = -1; priv->header_start = -1; priv->unstep = 0; priv->midline = FALSE; priv->seekable = offset != -1; priv->headers = NULL; priv->bounds = NULL; } static void parser_close (GMimeParser *parser) { struct _GMimeParserPrivate *priv = parser->priv; if (priv->stream) g_object_unref (priv->stream); g_byte_array_free (priv->from_line, TRUE); g_free (priv->headerbuf); g_free (priv->rawbuf); header_raw_clear (&priv->headers); while (priv->bounds) parser_pop_boundary (parser); } static void parser_unstep (GMimeParser *parser) { struct _GMimeParserPrivate *priv = parser->priv; priv->unstep++; } /** * g_mime_parser_new: * * Creates a new parser object. * * Returns a new parser object. **/ GMimeParser * g_mime_parser_new (void) { GMimeParser *parser; parser = g_object_new (GMIME_TYPE_PARSER, NULL); return parser; } /** * g_mime_parser_new_with_stream: * @stream: raw message or part stream * * Creates a new parser object preset to parse @stream. * * Returns a new parser object. **/ GMimeParser * g_mime_parser_new_with_stream (GMimeStream *stream) { GMimeParser *parser; parser = g_mime_parser_new (); g_mime_parser_init_with_stream (parser, stream); return parser; } /** * g_mime_parser_init_with_stream: * @parser: MIME parser object * @stream: raw message or part stream * * Initializes @parser to use @stream. * * WARNING: Initializing a parser with a stream is comparable to * selling your soul (@stream) to the devil (@parser). You are * basically giving the parser complete control of the stream, this * means that you had better not touch the stream so long as the * parser is still using it. This means no reading, writing, seeking, * or resetting of the stream. Anything that will/could change the * current stream's offset is PROHIBITED. * * It is also recommended that you not use g_mime_stream_tell() * because it will not necessarily give you the current @parser offset * since @parser handles its own internal read-ahead buffer. Instead, * it is recommended that you use g_mime_parser_tell() if you have a * reason to need the current offset of the @parser. **/ void g_mime_parser_init_with_stream (GMimeParser *parser, GMimeStream *stream) { g_return_if_fail (GMIME_IS_PARSER (parser)); g_return_if_fail (GMIME_IS_STREAM (stream)); parser_close (parser); parser_init (parser, stream); } /** * g_mime_parser_get_persist_stream: * @parser: MIME parser object * * Gets whether or not the underlying stream is persistent. * * Returns %TRUE if the @parser will leave the content on disk or * %FALSE if it will load the content into memory. **/ gboolean g_mime_parser_get_persist_stream (GMimeParser *parser) { g_return_val_if_fail (GMIME_IS_PARSER (parser), FALSE); return (parser->priv->persist_stream && parser->priv->seekable); } /** * g_mime_parser_set_persist_stream: * @parser: MIME parser object * @persist: persist attribute * * Sets whether or not the @parser's underlying stream is persistent. * * If @persist is %TRUE, the @parser will attempt to construct * messages/parts whose content will remain on disk rather than being * loaded into memory so as to reduce memory usage. This is the default. * * If @persist is %FALSE, the @parser will always load message content * into memory. * * Note: This attribute only serves as a hint to the @parser. If the * underlying stream does not support seeking, then this attribute * will be ignored. **/ void g_mime_parser_set_persist_stream (GMimeParser *parser, gboolean persist) { g_return_if_fail (GMIME_IS_PARSER (parser)); parser->priv->persist_stream = persist; } /** * g_mime_parser_get_scan_from: * @parser: MIME parser object * * Gets whether or not @parser is set to scan mbox-style From-lines. * * Returns whether or not @parser is set to scan mbox-style * From-lines. **/ gboolean g_mime_parser_get_scan_from (GMimeParser *parser) { g_return_val_if_fail (GMIME_IS_PARSER (parser), FALSE); return parser->priv->scan_from; } /** * g_mime_parser_set_scan_from: * @parser: MIME parser object * @scan_from: %TRUE to scan From-lines or %FALSE otherwise * * Sets whether or not @parser should scan mbox-style From-lines. **/ void g_mime_parser_set_scan_from (GMimeParser *parser, gboolean scan_from) { g_return_if_fail (GMIME_IS_PARSER (parser)); parser->priv->scan_from = scan_from ? 1 : 0; } /** * g_mime_parser_get_respect_content_length: * @parser: MIME parser object * * Gets whether or not @parser is set to use Content-Length for * determining the offset of the end of the message. * * Returns whether or not @parser is set to use Content-Length for * determining the offset of the end of the message. **/ gboolean g_mime_parser_get_respect_content_length (GMimeParser *parser) { g_return_val_if_fail (GMIME_IS_PARSER (parser), FALSE); return parser->priv->respect_content_length; } /** * g_mime_parser_set_respect_content_length: * @parser: MIME parser object * @respect_content_length: %TRUE if the parser should use Content-Length headers or %FALSE otherwise. * * Sets whether or not @parser should respect Content-Length headers * when deciding where to look for the start of the next message. Only * used when the parser is also set to scan for From-lines. * * Most notably useful when parsing broken Solaris mbox files (See * http://www.jwz.org/doc/content-length.html for details). **/ void g_mime_parser_set_respect_content_length (GMimeParser *parser, gboolean respect_content_length) { g_return_if_fail (GMIME_IS_PARSER (parser)); parser->priv->respect_content_length = respect_content_length ? 1 : 0; } /** * g_mime_parser_set_header_regex: * @parser: MIME parser object * @regex: regular expression * @header_cb: callback function * @user_data: user data * * Sets the regular expression pattern @regex on @parser. Whenever a * header matching the pattern @regex is parsed, @header_cb is called * with @user_data as the user_data argument. **/ void g_mime_parser_set_header_regex (GMimeParser *parser, const char *regex, GMimeParserHeaderRegexFunc header_cb, gpointer user_data) { struct _GMimeParserPrivate *priv; g_return_if_fail (GMIME_IS_PARSER (parser)); priv = parser->priv; if (priv->have_regex) { regfree (&priv->header_regex); priv->have_regex = FALSE; } if (!regex || !header_cb) return; priv->header_cb = header_cb; priv->user_data = user_data; priv->have_regex = !regcomp (&priv->header_regex, regex, REG_EXTENDED | REG_ICASE | REG_NOSUB); } static ssize_t parser_fill (GMimeParser *parser) { struct _GMimeParserPrivate *priv = parser->priv; char *inbuf, *inptr, *inend; ssize_t nread; size_t inlen; inbuf = priv->inbuf; inptr = priv->inptr; inend = priv->inend; inlen = inend - inptr; g_assert (inptr <= inend); /* attempt to align 'inend' with realbuf + SCAN_HEAD */ if (inptr >= inbuf) { inbuf -= inlen < SCAN_HEAD ? inlen : SCAN_HEAD; memmove (inbuf, inptr, inlen); inptr = inbuf; inbuf += inlen; } else if (inptr > priv->realbuf) { size_t shift; shift = MIN (inptr - priv->realbuf, inend - inbuf); memmove (inptr - shift, inptr, inlen); inptr -= shift; inbuf = inptr + inlen; } else { /* we can't shift... */ inbuf = inend; } priv->inptr = inptr; priv->inend = inbuf; inend = priv->realbuf + SCAN_HEAD + SCAN_BUF - 1; if ((nread = g_mime_stream_read (priv->stream, inbuf, inend - inbuf)) > 0) priv->inend += nread; priv->offset = g_mime_stream_tell (priv->stream); return priv->inend - priv->inptr; } static off_t parser_offset (struct _GMimeParserPrivate *priv, const char *inptr) { if (priv->offset == -1) return -1; if (!inptr) inptr = priv->inptr; return (priv->offset - (priv->inend - inptr)); } /** * g_mime_parser_tell: * @parser: MIME parser object * * Gets the current stream offset from the parser's internal stream. * * Returns the current stream offset from the parser's internal stream * or -1 on error. **/ off_t g_mime_parser_tell (GMimeParser *parser) { g_return_val_if_fail (GMIME_IS_PARSER (parser), -1); g_return_val_if_fail (GMIME_IS_STREAM (parser->priv->stream), -1); return parser_offset (parser->priv, NULL); } /** * g_mime_parser_eos: * @parser: MIME parser * * Tests the end-of-stream indicator for @parser's internal stream. * * Returns %TRUE on EOS or %FALSE otherwise. **/ gboolean g_mime_parser_eos (GMimeParser *parser) { struct _GMimeParserPrivate *priv; g_return_val_if_fail (GMIME_IS_STREAM (parser->priv->stream), TRUE); priv = parser->priv; return g_mime_stream_eos (priv->stream) && priv->inptr == priv->inend; } static int parser_step_from (GMimeParser *parser) { struct _GMimeParserPrivate *priv = parser->priv; register char *inptr; char *start, *inend; ssize_t left = 0; size_t len; g_byte_array_set_size (priv->from_line, 0); inptr = priv->inptr; g_assert (inptr <= priv->inend); do { refill: if (parser_fill (parser) <= left) { /* failed to find a From line; EOF reached */ priv->state = GMIME_PARSER_STATE_ERROR; priv->inptr = priv->inend; return -1; } inptr = priv->inptr; inend = priv->inend; *inend = '\n'; while (inptr < inend) { start = inptr; while (*inptr != '\n') inptr++; if (inptr + 1 >= inend) { /* we don't have enough data; if we can't get more we have to bail */ left = inend - start; priv->inptr = start; goto refill; } len = inptr - start; inptr++; if (len >= 5 && !strncmp (start, "From ", 5)) { priv->from_offset = parser_offset (priv, start); g_byte_array_append (priv->from_line, (unsigned char *) start, len); goto got_from; } } priv->inptr = inptr; left = 0; } while (1); got_from: priv->state = GMIME_PARSER_STATE_HEADERS; priv->inptr = inptr; return 0; } #define header_append(priv, start, len) G_STMT_START { \ if (priv->headerleft <= len) { \ size_t hlen, hoff; \ \ hlen = hoff = priv->headerptr - priv->headerbuf; \ hlen = hlen ? hlen : 1; \ \ while (hlen < hoff + len) \ hlen <<= 1; \ \ priv->headerbuf = g_realloc (priv->headerbuf, hlen + 1); \ priv->headerptr = priv->headerbuf + hoff; \ priv->headerleft = hlen - hoff; \ } \ \ memcpy (priv->headerptr, start, len); \ priv->headerptr += len; \ priv->headerleft -= len; \ } G_STMT_END #define raw_header_append(priv, start, len) G_STMT_START { \ if (priv->rawleft <= len) { \ size_t hlen, hoff; \ \ hlen = hoff = priv->rawptr - priv->rawbuf; \ hlen = hlen ? hlen : 1; \ \ while (hlen < hoff + len) \ hlen <<= 1; \ \ priv->rawbuf = g_realloc (priv->rawbuf, hlen + 1); \ priv->rawptr = priv->rawbuf + hoff; \ priv->rawleft = hlen - hoff; \ } \ \ memcpy (priv->rawptr, start, len); \ priv->rawptr += len; \ priv->rawleft -= len; \ } G_STMT_END #define raw_header_reset(priv) G_STMT_START { \ priv->rawleft += priv->rawptr - priv->rawbuf; \ priv->rawptr = priv->rawbuf; \ } G_STMT_END #define header_parse(parser, priv, hend) G_STMT_START { \ struct _header_raw *header; \ register char *colon; \ size_t hlen; \ \ header = g_new (struct _header_raw, 1); \ header->next = NULL; \ \ *priv->headerptr = '\0'; \ colon = priv->headerbuf; \ while (*colon && *colon != ':') \ colon++; \ \ hlen = colon - priv->headerbuf; \ \ header->name = g_strndup (priv->headerbuf, hlen); \ g_strstrip (header->name); \ if (*colon != ':') { \ w(g_warning ("Invalid header: %s", header->name)); \ header->value = header->name; \ header->name = g_strdup ("X-Invalid-Header"); \ } else { \ header->value = g_strdup (colon + 1); \ g_strstrip (header->value); \ } \ header->offset = priv->header_start; \ \ hend->next = header; \ hend = header; \ \ priv->headerleft += priv->headerptr - priv->headerbuf; \ priv->headerptr = priv->headerbuf; \ \ if (priv->have_regex && \ !regexec (&priv->header_regex, header->name, 0, NULL, 0)) \ priv->header_cb (parser, header->name, header->value, \ header->offset, priv->user_data); \ } G_STMT_END static int parser_step_headers (GMimeParser *parser) { struct _GMimeParserPrivate *priv = parser->priv; struct _header_raw *hend; register char *inptr; char *start, *inend; ssize_t left = 0; size_t len; priv->midline = FALSE; hend = (struct _header_raw *) &priv->headers; priv->headers_start = parser_offset (priv, NULL); priv->header_start = parser_offset (priv, NULL); inptr = priv->inptr; inend = priv->inend; do { refill: if (parser_fill (parser) <= left) break; inptr = priv->inptr; inend = priv->inend; /* Note: see optimization comment [1] */ *inend = '\n'; g_assert (inptr <= inend); while (inptr < inend) { start = inptr; /* Note: see optimization comment [1] */ while (*inptr != '\n') inptr++; len = (size_t) (inptr - start); raw_header_append (priv, start, len); if (inptr == inend) { /* we don't have enough data to tell if we got all of the header or not... */ header_append (priv, start, len); priv->midline = TRUE; left = inend - inptr; priv->inptr = inptr; goto refill; } /* check to see if we've reached the end of the headers */ if (!priv->midline && (inptr == start || (len == 1 && *start == '\r'))) goto headers_end; if (inptr > start && inptr[-1] == '\r') len--; header_append (priv, start, len); /* inptr has to be less than inend - 1 */ raw_header_append (priv, inptr, 1); inptr++; if (*inptr == ' ' || *inptr == '\t') { priv->midline = TRUE; } else { priv->midline = FALSE; header_parse (parser, priv, hend); priv->header_start = parser_offset (priv, inptr); } } left = inend - inptr; priv->inptr = inptr; } while (1); inptr = priv->inptr; inend = priv->inend; len = (size_t) (inend - inptr); header_append (priv, inptr, len); raw_header_append (priv, inptr, len); headers_end: if (priv->headerptr > priv->headerbuf) header_parse (parser, priv, hend); *priv->rawptr = '\0'; priv->state = GMIME_PARSER_STATE_HEADERS_END; g_assert (inptr <= priv->inend); priv->inptr = inptr; return 0; } static GMimeContentType * parser_content_type (GMimeParser *parser) { struct _GMimeParserPrivate *priv = parser->priv; const char *content_type; if ((content_type = header_raw_find (priv->headers, "Content-Type", NULL))) return g_mime_content_type_new_from_string (content_type); return NULL; } static int parser_step (GMimeParser *parser) { struct _GMimeParserPrivate *priv = parser->priv; if (!priv->unstep) { step: switch (priv->state) { case GMIME_PARSER_STATE_INIT: if (priv->scan_from) priv->state = GMIME_PARSER_STATE_FROM; else priv->state = GMIME_PARSER_STATE_HEADERS; goto step; break; case GMIME_PARSER_STATE_FROM: parser_step_from (parser); break; case GMIME_PARSER_STATE_HEADERS: parser_step_headers (parser); break; case GMIME_PARSER_STATE_ERROR: break; default: g_assert_not_reached (); break; } } else { priv->unstep--; } return priv->state; } static void parser_skip_line (GMimeParser *parser) { struct _GMimeParserPrivate *priv = parser->priv; register char *inptr; char *inend; inptr = priv->inptr; do { if (parser_fill (parser) <= 0) { inptr = priv->inptr; break; } inptr = priv->inptr; inend = priv->inend; *inend = '\n'; while (*inptr != '\n') inptr++; if (inptr < inend) break; priv->inptr = inptr; } while (1); priv->midline = FALSE; priv->inptr = MIN (inptr + 1, priv->inend); } enum { FOUND_EOS = 1, FOUND_BOUNDARY, FOUND_END_BOUNDARY }; #define content_save(content, start, len) G_STMT_START { \ if (content) \ g_byte_array_append (content, (unsigned char *) start, len); \ } G_STMT_END #define possible_boundary(scan_from, start, len) \ ((scan_from && len >= 5 && !strncmp (start, "From ", 5)) || \ (len >= 2 && (start[0] == '-' && start[1] == '-'))) static int check_boundary (struct _GMimeParserPrivate *priv, const char *start, size_t len) { off_t offset = parser_offset (priv, start); if (start[len - 1] == '\r') len--; if (possible_boundary (priv->scan_from, start, len)) { struct _boundary_stack *s; d(printf ("checking boundary '%.*s'\n", len, start)); s = priv->bounds; while (s) { /* we use >= here because From lines are > 5 chars */ if (offset >= s->content_end && len >= s->boundarylenfinal && !strncmp (s->boundary, start, s->boundarylenfinal)) { d(printf ("found %s\n", s->content_end != -1 && offset >= s->content_end ? "end of content" : "end boundary")); return FOUND_END_BOUNDARY; } if (len == s->boundarylen && !strncmp (s->boundary, start, s->boundarylen)) { d(printf ("found boundary\n")); return FOUND_BOUNDARY; } s = s->parent; } d(printf ("'%.*s' not a boundary\n", len, start)); } return 0; } /* Optimization Notes: * * 1. By making the priv->realbuf char array 1 extra char longer, we * can safely set '*inend' to '\n' and not fear an ABW. Setting *inend * to '\n' means that we can eliminate having to check that inptr < * inend every trip through our inner while-loop. This cuts the number * of instructions down from ~7 to ~4, assuming the compiler does its * job correctly ;-) **/ static int parser_scan_content (GMimeParser *parser, GByteArray *content, guint *crlf) { struct _GMimeParserPrivate *priv = parser->priv; register char *inptr; char *start, *inend; size_t nleft, len; int found = 0; d(printf ("scan-content\n")); priv->midline = FALSE; g_assert (priv->inptr <= priv->inend); start = inptr = priv->inptr; do { refill: nleft = priv->inend - inptr; if (parser_fill (parser) <= 0) { start = priv->inptr; found = FOUND_EOS; break; } inptr = priv->inptr; inend = priv->inend; /* Note: see optimization comment [1] */ *inend = '\n'; len = (size_t) (inend - inptr); if (priv->midline && len == nleft) found = FOUND_EOS; priv->midline = FALSE; while (inptr < inend) { start = inptr; /* Note: see optimization comment [1] */ while (*inptr != '\n') inptr++; len = (size_t) (inptr - start); if (inptr < inend) { if ((found = check_boundary (priv, start, len))) goto boundary; inptr++; len++; } else { /* didn't find an end-of-line */ priv->midline = TRUE; if (!found) { /* not enough to tell if we found a boundary */ priv->inptr = start; inptr = start; goto refill; } /* check for a boundary not ending in a \n */ if ((found = check_boundary (priv, start, len))) goto boundary; } content_save (content, start, len); } priv->inptr = inptr; } while (!found); boundary: /* don't chew up the boundary */ priv->inptr = start; if (found != FOUND_EOS) { if (inptr[-1] == '\r') *crlf = 2; else *crlf = 1; } else { *crlf = 0; } return found; } static void parser_scan_mime_part_content (GMimeParser *parser, GMimePart *mime_part, int *found) { struct _GMimeParserPrivate *priv = parser->priv; GMimePartEncodingType encoding; GByteArray *content = NULL; GMimeDataWrapper *wrapper; GMimeStream *stream; off_t start, end; guint crlf; if (priv->persist_stream && priv->seekable) start = parser_offset (priv, NULL); else content = g_byte_array_new (); *found = parser_scan_content (parser, content, &crlf); if (*found != FOUND_EOS) { /* last '\n' belongs to the boundary */ if (priv->persist_stream && priv->seekable) end = parser_offset (priv, NULL) - crlf; else g_byte_array_set_size (content, MAX (content->len - crlf, 0)); } else if (priv->persist_stream && priv->seekable) { end = parser_offset (priv, NULL); } encoding = g_mime_part_get_encoding (mime_part); if (priv->persist_stream && priv->seekable) { stream = g_mime_stream_substream (priv->stream, start, end); } else { stream = g_mime_stream_mem_new_with_byte_array (content); } wrapper = g_mime_data_wrapper_new_with_stream (stream, encoding); g_mime_part_set_content_object (mime_part, wrapper); g_object_unref (wrapper); g_object_unref (stream); } static void parser_scan_message_part (GMimeParser *parser, GMimeMessagePart *mpart, int *found) { struct _GMimeParserPrivate *priv = parser->priv; GMimeContentType *content_type; struct _header_raw *header; GMimeMessage *message; GMimeObject *object; /* get the headers */ parser->priv->state = GMIME_PARSER_STATE_HEADERS; while (parser_step (parser) != GMIME_PARSER_STATE_HEADERS_END) ; message = g_mime_message_new (FALSE); header = priv->headers; while (header) { g_mime_object_add_header ((GMimeObject *) message, header->name, header->value); header = header->next; } if (!(content_type = parser_content_type (parser))) content_type = g_mime_content_type_new ("text", "plain"); parser_unstep (parser); if (g_mime_content_type_is_type (content_type, "multipart", "*")) { object = parser_construct_multipart (parser, content_type, found); } else { object = parser_construct_leaf_part (parser, content_type, found); } message->mime_part = object; g_mime_message_part_set_message (mpart, message); g_object_unref (message); } static GMimeObject * parser_construct_leaf_part (GMimeParser *parser, GMimeContentType *content_type, int *found) { struct _GMimeParserPrivate *priv = parser->priv; struct _header_raw *header; GMimeObject *object; /* get the headers */ while (parser_step (parser) != GMIME_PARSER_STATE_HEADERS_END) ; object = g_mime_object_new_type (content_type->type, content_type->subtype); header = priv->headers; while (header) { g_mime_object_add_header (object, header->name, header->value); header = header->next; } header_raw_clear (&priv->headers); if (object->content_type) g_mime_content_type_destroy (object->content_type); object->content_type = content_type; g_mime_header_set_raw (object->headers, priv->rawbuf); raw_header_reset (priv); /* skip empty line after headers */ parser_skip_line (parser); if (GMIME_IS_MESSAGE_PART (object)) { parser_scan_message_part (parser, (GMimeMessagePart *) object, found); } else { parser_scan_mime_part_content (parser, (GMimePart *) object, found); } return object; } static void crlf2lf (char *in) { register char *inptr = in; register char *outptr; while (*inptr != '\0' && !(inptr[0] == '\r' && inptr[1] == '\n')) inptr++; if (*inptr == '\0') return; outptr = inptr++; while (*inptr != '\0') { while (*inptr != '\0' && !(inptr[0] == '\r' && inptr[1] == '\n')) *outptr++ = *inptr++; if (*inptr == '\r') inptr++; } *outptr = '\0'; } static int parser_scan_multipart_face (GMimeParser *parser, GMimeMultipart *multipart, gboolean preface) { GByteArray *buffer; guint crlf; char *face; int found; buffer = g_byte_array_new (); found = parser_scan_content (parser, buffer, &crlf); if (buffer->len >= crlf) { /* last '\n' belongs to the boundary */ g_byte_array_set_size (buffer, buffer->len + 1); buffer->data[buffer->len - crlf - 1] = '\0'; face = (char *) buffer->data; crlf2lf (face); if (preface) g_mime_multipart_set_preface (multipart, face); else g_mime_multipart_set_postface (multipart, face); } g_byte_array_free (buffer, TRUE); return found; } #define parser_scan_multipart_preface(parser, multipart) parser_scan_multipart_face (parser, multipart, TRUE) #define parser_scan_multipart_postface(parser, multipart) parser_scan_multipart_face (parser, multipart, FALSE) static int parser_scan_multipart_subparts (GMimeParser *parser, GMimeMultipart *multipart) { GMimeContentType *content_type; GMimeObject *subpart; int found; do { /* skip over the boundary marker */ parser_skip_line (parser); /* get the headers */ parser_step_headers (parser); if (!(content_type = parser_content_type (parser))) content_type = g_mime_content_type_new ("text", "plain"); parser_unstep (parser); if (g_mime_content_type_is_type (content_type, "multipart", "*")) { subpart = parser_construct_multipart (parser, content_type, &found); } else { subpart = parser_construct_leaf_part (parser, content_type, &found); } g_mime_multipart_add_part (multipart, subpart); g_object_unref (subpart); } while (found == FOUND_BOUNDARY); return found; } static gboolean found_immediate_boundary (struct _GMimeParserPrivate *priv) { struct _boundary_stack *s = priv->bounds; return !strncmp (s->boundary, priv->inptr, s->boundarylenfinal); } static GMimeObject * parser_construct_multipart (GMimeParser *parser, GMimeContentType *content_type, int *found) { struct _GMimeParserPrivate *priv = parser->priv; struct _header_raw *header; GMimeMultipart *multipart; const char *boundary; GMimeObject *object; /* get the headers */ while (parser_step (parser) != GMIME_PARSER_STATE_HEADERS_END) ; object = g_mime_object_new_type (content_type->type, content_type->subtype); header = priv->headers; while (header) { g_mime_object_add_header (object, header->name, header->value); header = header->next; } header_raw_clear (&priv->headers); if (object->content_type) g_mime_content_type_destroy (object->content_type); object->content_type = content_type; g_mime_header_set_raw (object->headers, priv->rawbuf); raw_header_reset (priv); multipart = (GMimeMultipart *) object; /* skip empty line after headers */ parser_skip_line (parser); boundary = g_mime_content_type_get_parameter (content_type, "boundary"); if (boundary) { parser_push_boundary (parser, boundary); *found = parser_scan_multipart_preface (parser, multipart); if (*found == FOUND_BOUNDARY) *found = parser_scan_multipart_subparts (parser, multipart); if (*found == FOUND_END_BOUNDARY && found_immediate_boundary (priv)) { /* eat end boundary */ parser_skip_line (parser); parser_pop_boundary (parser); *found = parser_scan_multipart_postface (parser, multipart); } else { parser_pop_boundary (parser); } } else { w(g_warning ("multipart without boundary encountered")); /* this will scan everything into the preface */ *found = parser_scan_multipart_preface (parser, multipart); } return object; } static GMimeObject * parser_construct_part (GMimeParser *parser) { GMimeContentType *content_type; GMimeObject *object; int found; /* get the headers */ while (parser_step (parser) != GMIME_PARSER_STATE_HEADERS_END) ; if (!(content_type = parser_content_type (parser))) content_type = g_mime_content_type_new ("text", "plain"); parser_unstep (parser); if (g_mime_content_type_is_type (content_type, "multipart", "*")) { object = parser_construct_multipart (parser, content_type, &found); } else { object = parser_construct_leaf_part (parser, content_type, &found); } return object; } /** * g_mime_parser_construct_part: * @parser: MIME parser object * * Constructs a MIME part from @parser. * * Returns a MIME part based on @parser or %NULL on fail. **/ GMimeObject * g_mime_parser_construct_part (GMimeParser *parser) { g_return_val_if_fail (GMIME_IS_PARSER (parser), NULL); return parser_construct_part (parser); } static GMimeMessage * parser_construct_message (GMimeParser *parser) { struct _GMimeParserPrivate *priv = parser->priv; GMimeContentType *content_type; struct _header_raw *header; int content_length = -1; GMimeMessage *message; GMimeObject *object; int state, found; /* get the headers (and, optionally, the from-line) */ while ((state = parser_step (parser)) != GMIME_PARSER_STATE_ERROR && state != GMIME_PARSER_STATE_HEADERS_END) ; if (state == GMIME_PARSER_STATE_ERROR) return NULL; message = g_mime_message_new (FALSE); header = priv->headers; while (header) { if (priv->respect_content_length && !g_ascii_strcasecmp (header->name, "Content-Length")) content_length = strtoul (header->value, NULL, 10); g_mime_object_add_header ((GMimeObject *) message, header->name, header->value); header = header->next; } if (priv->scan_from) { parser_push_boundary (parser, "From "); if (priv->respect_content_length && content_length != -1) priv->bounds->content_end = parser_offset (priv, NULL) + content_length; } if (!(content_type = parser_content_type (parser))) content_type = g_mime_content_type_new ("text", "plain"); parser_unstep (parser); if (content_type && g_mime_content_type_is_type (content_type, "multipart", "*")) { object = parser_construct_multipart (parser, content_type, &found); } else { object = parser_construct_leaf_part (parser, content_type, &found); } message->mime_part = object; if (priv->scan_from) { priv->state = GMIME_PARSER_STATE_FROM; parser_pop_boundary (parser); } return message; } /** * g_mime_parser_construct_message: * @parser: MIME parser object * * Constructs a MIME message from @parser. * * Returns a MIME message or %NULL on fail. **/ GMimeMessage * g_mime_parser_construct_message (GMimeParser *parser) { g_return_val_if_fail (GMIME_IS_PARSER (parser), NULL); return parser_construct_message (parser); } /** * g_mime_parser_get_from: * @parser: MIME parser object * * Gets the mbox-style From-line of the most recently parsed message * (gotten from g_mime_parser_construct_message()). * * Returns the mbox-style From-line of the most recently parsed * message or %NULL on error. **/ char * g_mime_parser_get_from (GMimeParser *parser) { struct _GMimeParserPrivate *priv; g_return_val_if_fail (GMIME_IS_PARSER (parser), NULL); priv = parser->priv; if (!priv->scan_from) return NULL; if (priv->from_line->len) return g_strndup ((char *) priv->from_line->data, priv->from_line->len); return NULL; } /** * g_mime_parser_get_from_offset: * @parser: MIME parser object * * Gets the offset of the most recently parsed mbox-style From-line * (gotten from g_mime_parser_construct_message()). * * Returns the offset of the most recently parsed mbox-style From-line * or -1 on error. **/ off_t g_mime_parser_get_from_offset (GMimeParser *parser) { struct _GMimeParserPrivate *priv; g_return_val_if_fail (GMIME_IS_PARSER (parser), -1); priv = parser->priv; if (!priv->scan_from) return -1; return priv->from_offset; }