/* * libEtPan! -- a mail stuff library * * Copyright (C) 2001, 2005 - DINH Viet Hoa * Copyright (C) 2006 Andrej Kacian * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the libEtPan! project nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "parser.h" #include #include #include #include #ifdef HAVE_ICONV #include #endif #include "newsfeed.h" #include "newsfeed_private.h" #include "parser_rss20.h" #include "parser_rdf.h" #include "parser_atom10.h" #include "parser_atom03.h" enum { FEED_TYPE_NONE, FEED_TYPE_RDF, FEED_TYPE_RSS_20, FEED_TYPE_ATOM_03, FEED_TYPE_ATOM_10 }; #ifdef HAVE_EXPAT static void handler_set(XML_Parser parser, unsigned int type) { if (parser == NULL) return; switch(type) { case FEED_TYPE_RSS_20: XML_SetElementHandler(parser, newsfeed_parser_rss20_start, newsfeed_parser_rss20_end); break; case FEED_TYPE_RDF: XML_SetElementHandler(parser, newsfeed_parser_rdf_start, newsfeed_parser_rdf_end); break; case FEED_TYPE_ATOM_10: XML_SetElementHandler(parser, newsfeed_parser_atom10_start, newsfeed_parser_atom10_end); break; case FEED_TYPE_ATOM_03: XML_SetElementHandler(parser, newsfeed_parser_atom03_start, newsfeed_parser_atom03_end); break; } } static void elparse_start_chooser(void * data, const char * el, const char ** attr) { struct newsfeed_parser_context * ctx; unsigned int feedtype; XML_Parser parser; ctx = (struct newsfeed_parser_context *) data; feedtype = FEED_TYPE_NONE; if (ctx->depth == 0) { /* RSS 2.0 detected */ if (strcasecmp(el, "rss") == 0) { feedtype = FEED_TYPE_RSS_20; } else if (strcasecmp(el, "rdf:RDF") == 0) { feedtype = FEED_TYPE_RDF; } else if (strcasecmp(el, "feed") == 0) { const char * version; /* ATOM feed detected, let's check version */ version = newsfeed_parser_get_attribute_value(attr, "xmlns"); if (version != NULL) { if (strcmp(version, "http://www.w3.org/2005/Atom") == 0) feedtype = FEED_TYPE_ATOM_10; else feedtype = FEED_TYPE_ATOM_03; } } } parser = ctx->parser; handler_set(parser, feedtype); ctx->depth ++; } static void elparse_end_dummy(void * data, const char * el) { struct newsfeed_parser_context * ctx; ctx = (struct newsfeed_parser_context *) data; mmap_string_truncate(ctx->str, 0); ctx->depth --; } static void chparse(void * data, const char * s, int len) { struct newsfeed_parser_context * ctx; char * pt; int i; int blank; blank = 1; ctx = (struct newsfeed_parser_context *) data; /* check if the string is blank, ... */ for(i = 0, pt = (XML_Char *) s ; i < len ; i ++) { if ((* pt != ' ') && (* pt != '\t')) blank = 0; pt ++; } /* ... because we do not want to deal with blank strings */ if (blank) return; for(i = 0, pt = (XML_Char *) s ; i < len ; i ++) { /* do not append newline as first char of our string */ if ((* pt != '\n') || (ctx->str->len != 0)) { if (mmap_string_append_c(ctx->str, * pt) == NULL) { ctx->error = NEWSFEED_ERROR_MEMORY; return; } pt ++; } } } #define CHARSIZEUTF32 4 enum { LEP_ICONV_OK, LEP_ICONV_FAILED, LEP_ICONV_ILSEQ, LEP_ICONV_INVAL, LEP_ICONV_UNKNOWN, }; static int iconv_utf32_char(iconv_t cd, const char * inbuf, size_t insize, uint32_t * p_value) { #ifdef HAVE_ICONV size_t outsize; unsigned char outbuf[CHARSIZEUTF32]; char * outbufp; int r; outsize = sizeof(outbuf); outbufp = (char *) outbuf; #ifdef HAVE_ICONV_PROTO_CONST r = iconv(cd, (const char **) &inbuf, &insize, &outbufp, &outsize); #else r = iconv(cd, (char **) &inbuf, &insize, &outbufp, &outsize); #endif if (r == -1) { iconv (cd, 0, 0, 0, 0); switch (errno) { case EILSEQ: return LEP_ICONV_ILSEQ; case EINVAL: return LEP_ICONV_INVAL; default: return LEP_ICONV_UNKNOWN; } } else { uint32_t value; unsigned int i; if ((insize > 0) || (outsize > 0)) return LEP_ICONV_FAILED; value = 0; for(i = 0 ; i < sizeof(outbuf) ; i ++) { value = (value << 8) + outbuf[i]; } * p_value = value; return LEP_ICONV_OK; } #else return LEP_ICONV_FAIL; #endif } /* return 1 if conversion function is needed */ static int setup_unknown_encoding(const char * charset, XML_Encoding * info) { iconv_t cd; int flag; char buf[4]; unsigned int i; int r; cd = iconv_open("UTF-32BE", charset); if (cd == (iconv_t) (-1)) return -1; flag = 0; for (i = 0; i < 256; i++) { /* *** first char *** */ uint32_t value; buf[0] = i; info->map[i] = 0; r = iconv_utf32_char(cd, buf, 1, &value); if (r == LEP_ICONV_OK) { info->map[i] = value; } else if (r != LEP_ICONV_INVAL) { /* do nothing */ } else /* r == LEP_ICONV_INVAL */ { unsigned int j; for (j = 0; j < 256; j++) { /* *** second char *** */ buf[1] = j; r = iconv_utf32_char(cd, buf, 2, &value); if (r == LEP_ICONV_OK) { flag = 1; info->map[i] = -2; } else if (r != LEP_ICONV_INVAL) { /* do nothing */ } else /* r == LEP_ICONV_INVAL */ { unsigned int k; for (k = 0; k < 256; k++) { /* *** third char *** */ buf[2] = k; r = iconv_utf32_char(cd, buf, 3, &value); if (r == LEP_ICONV_OK) { info->map[i] = -3; } } } } } } iconv_close(cd); return flag; } struct unknown_encoding_data { char * charset; iconv_t cd; }; static int unknown_encoding_convert(void * data, const char * s) { int r; struct unknown_encoding_data * enc_data; size_t insize; uint32_t value; enc_data = data; insize = 4; if (s == NULL) goto err; r = iconv_utf32_char(enc_data->cd, s, insize, &value); if (r != LEP_ICONV_OK) return -1; return 0; err: return -1; } static void unknown_encoding_data_free(void * data) { struct unknown_encoding_data * enc_data; enc_data = data; free(enc_data->charset); iconv_close(enc_data->cd); free(enc_data); } static int unknown_encoding_handler(void * encdata, const XML_Char * name, XML_Encoding * info) { iconv_t cd; struct unknown_encoding_data * data; int result; result = setup_unknown_encoding(name, info); if (result == 0) { info->data = NULL; info->convert = NULL; info->release = NULL; return XML_STATUS_OK; } cd = iconv_open("UTF-32BE", name); if (cd == (iconv_t) -1) { goto err; } data = malloc(sizeof(* data)); if (data == NULL) goto close_iconv; data->charset = strdup(name); if (data->charset == NULL) goto free_data; data->cd = cd; info->data = data; info->convert = unknown_encoding_convert; info->release = unknown_encoding_data_free; return XML_STATUS_OK; free_data: free(data); close_iconv: iconv_close(cd); err: return XML_STATUS_ERROR; } #endif void newsfeed_parser_set_expat_handlers(struct newsfeed_parser_context * ctx) { #ifdef HAVE_EXPAT XML_Parser parser; parser = ctx->parser; XML_SetUserData(parser, (void *) ctx); XML_SetElementHandler(parser, elparse_start_chooser, elparse_end_dummy); XML_SetCharacterDataHandler(parser, chparse); XML_SetUnknownEncodingHandler(parser, unknown_encoding_handler, NULL); #endif } size_t newsfeed_writefunc(void * ptr, size_t size, size_t nmemb, void * data) { #ifdef HAVE_EXPAT unsigned int len; struct newsfeed_parser_context * ctx; XML_Parser parser; ctx = data; len = size * nmemb; if (ctx->error != NEWSFEED_NO_ERROR) { return 0; } parser = ctx->parser; XML_Parse(parser, ptr, len, 0); if (ctx->error != NEWSFEED_NO_ERROR) { return 0; } return len; #endif return 0; } const char * newsfeed_parser_get_attribute_value(const char ** attr, const char * name) { unsigned int i; if ((attr == NULL) && (name == NULL)) return NULL; for(i = 0 ; attr[i] != NULL && attr[i + 1] != NULL ; i += 2 ) { if (strcmp(attr[i], name) == 0) return attr[i + 1]; } /* We haven't found anything. */ return NULL; }