/*
* libEtPan! -- a mail stuff library
*
* Copyright (C) 2001, 2005 - DINH Viet Hoa
* Copyright (C) 2006 Andrej Kacian <andrej@kacian.sk>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the libEtPan! project nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "parser.h"
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <errno.h>
#ifdef HAVE_ICONV
#include <iconv.h>
#endif
#include "newsfeed.h"
#include "newsfeed_private.h"
#include "parser_rss20.h"
#include "parser_rdf.h"
#include "parser_atom10.h"
#include "parser_atom03.h"
enum {
FEED_TYPE_NONE,
FEED_TYPE_RDF,
FEED_TYPE_RSS_20,
FEED_TYPE_ATOM_03,
FEED_TYPE_ATOM_10
};
#ifdef HAVE_EXPAT
static void handler_set(XML_Parser parser, unsigned int type)
{
if (parser == NULL)
return;
switch(type) {
case FEED_TYPE_RSS_20:
XML_SetElementHandler(parser,
newsfeed_parser_rss20_start,
newsfeed_parser_rss20_end);
break;
case FEED_TYPE_RDF:
XML_SetElementHandler(parser,
newsfeed_parser_rdf_start,
newsfeed_parser_rdf_end);
break;
case FEED_TYPE_ATOM_10:
XML_SetElementHandler(parser,
newsfeed_parser_atom10_start,
newsfeed_parser_atom10_end);
break;
case FEED_TYPE_ATOM_03:
XML_SetElementHandler(parser,
newsfeed_parser_atom03_start,
newsfeed_parser_atom03_end);
break;
}
}
static void elparse_start_chooser(void * data,
const char * el, const char ** attr)
{
struct newsfeed_parser_context * ctx;
unsigned int feedtype;
XML_Parser parser;
ctx = (struct newsfeed_parser_context *) data;
feedtype = FEED_TYPE_NONE;
if (ctx->depth == 0) {
/* RSS 2.0 detected */
if (strcasecmp(el, "rss") == 0) {
feedtype = FEED_TYPE_RSS_20;
}
else if (strcasecmp(el, "rdf:RDF") == 0) {
feedtype = FEED_TYPE_RDF;
}
else if (strcasecmp(el, "feed") == 0) {
const char * version;
/* ATOM feed detected, let's check version */
version = newsfeed_parser_get_attribute_value(attr, "xmlns");
if (version != NULL) {
if (strcmp(version, "http://www.w3.org/2005/Atom") == 0)
feedtype = FEED_TYPE_ATOM_10;
else
feedtype = FEED_TYPE_ATOM_03;
}
}
}
parser = ctx->parser;
handler_set(parser, feedtype);
ctx->depth ++;
}
static void elparse_end_dummy(void * data, const char * el)
{
struct newsfeed_parser_context * ctx;
ctx = (struct newsfeed_parser_context *) data;
mmap_string_truncate(ctx->str, 0);
ctx->depth --;
}
static void chparse(void * data, const char * s, int len)
{
struct newsfeed_parser_context * ctx;
char * pt;
int i;
int blank;
blank = 1;
ctx = (struct newsfeed_parser_context *) data;
/* check if the string is blank, ... */
for(i = 0, pt = (XML_Char *) s ; i < len ; i ++) {
if ((* pt != ' ') && (* pt != '\t'))
blank = 0;
pt ++;
}
/* ... because we do not want to deal with blank strings */
if (blank)
return;
for(i = 0, pt = (XML_Char *) s ; i < len ; i ++) {
/* do not append newline as first char of our string */
if ((* pt != '\n') || (ctx->str->len != 0)) {
if (mmap_string_append_c(ctx->str, * pt) == NULL) {
ctx->error = NEWSFEED_ERROR_MEMORY;
return;
}
pt ++;
}
}
}
#define CHARSIZEUTF32 4
enum {
LEP_ICONV_OK,
LEP_ICONV_FAILED,
LEP_ICONV_ILSEQ,
LEP_ICONV_INVAL,
LEP_ICONV_UNKNOWN,
};
static int iconv_utf32_char(iconv_t cd, const char * inbuf, size_t insize,
uint32_t * p_value)
{
#ifdef HAVE_ICONV
size_t outsize;
unsigned char outbuf[CHARSIZEUTF32];
char * outbufp;
int r;
outsize = sizeof(outbuf);
outbufp = (char *) outbuf;
#ifdef HAVE_ICONV_PROTO_CONST
r = iconv(cd, (const char **) &inbuf, &insize,
&outbufp, &outsize);
#else
r = iconv(cd, (char **) &inbuf, &insize, &outbufp, &outsize);
#endif
if (r == -1) {
iconv (cd, 0, 0, 0, 0);
switch (errno) {
case EILSEQ:
return LEP_ICONV_ILSEQ;
case EINVAL:
return LEP_ICONV_INVAL;
default:
return LEP_ICONV_UNKNOWN;
}
}
else {
uint32_t value;
unsigned int i;
if ((insize > 0) || (outsize > 0))
return LEP_ICONV_FAILED;
value = 0;
for(i = 0 ; i < sizeof(outbuf) ; i ++) {
value = (value << 8) + outbuf[i];
}
* p_value = value;
return LEP_ICONV_OK;
}
#else
return LEP_ICONV_FAIL;
#endif
}
/* return 1 if conversion function is needed */
static int setup_unknown_encoding(const char * charset, XML_Encoding * info)
{
iconv_t cd;
int flag;
char buf[4];
unsigned int i;
int r;
cd = iconv_open("UTF-32BE", charset);
if (cd == (iconv_t) (-1))
return -1;
flag = 0;
for (i = 0; i < 256; i++) {
/* *** first char *** */
uint32_t value;
buf[0] = i;
info->map[i] = 0;
r = iconv_utf32_char(cd, buf, 1, &value);
if (r == LEP_ICONV_OK) {
info->map[i] = value;
}
else if (r != LEP_ICONV_INVAL) {
/* do nothing */
}
else /* r == LEP_ICONV_INVAL */ {
unsigned int j;
for (j = 0; j < 256; j++) {
/* *** second char *** */
buf[1] = j;
r = iconv_utf32_char(cd, buf, 2, &value);
if (r == LEP_ICONV_OK) {
flag = 1;
info->map[i] = -2;
}
else if (r != LEP_ICONV_INVAL) {
/* do nothing */
}
else /* r == LEP_ICONV_INVAL */ {
unsigned int k;
for (k = 0; k < 256; k++) {
/* *** third char *** */
buf[2] = k;
r = iconv_utf32_char(cd, buf, 3, &value);
if (r == LEP_ICONV_OK) {
info->map[i] = -3;
}
}
}
}
}
}
iconv_close(cd);
return flag;
}
struct unknown_encoding_data {
char * charset;
iconv_t cd;
};
static int unknown_encoding_convert(void * data, const char * s)
{
int r;
struct unknown_encoding_data * enc_data;
size_t insize;
uint32_t value;
enc_data = data;
insize = 4;
if (s == NULL)
goto err;
r = iconv_utf32_char(enc_data->cd, s, insize, &value);
if (r != LEP_ICONV_OK)
return -1;
return 0;
err:
return -1;
}
static void unknown_encoding_data_free(void * data)
{
struct unknown_encoding_data * enc_data;
enc_data = data;
free(enc_data->charset);
iconv_close(enc_data->cd);
free(enc_data);
}
static int unknown_encoding_handler(void * encdata, const XML_Char * name,
XML_Encoding * info)
{
iconv_t cd;
struct unknown_encoding_data * data;
int result;
result = setup_unknown_encoding(name, info);
if (result == 0) {
info->data = NULL;
info->convert = NULL;
info->release = NULL;
return XML_STATUS_OK;
}
cd = iconv_open("UTF-32BE", name);
if (cd == (iconv_t) -1) {
goto err;
}
data = malloc(sizeof(* data));
if (data == NULL)
goto close_iconv;
data->charset = strdup(name);
if (data->charset == NULL)
goto free_data;
data->cd = cd;
info->data = data;
info->convert = unknown_encoding_convert;
info->release = unknown_encoding_data_free;
return XML_STATUS_OK;
free_data:
free(data);
close_iconv:
iconv_close(cd);
err:
return XML_STATUS_ERROR;
}
#endif
void newsfeed_parser_set_expat_handlers(struct newsfeed_parser_context * ctx)
{
#ifdef HAVE_EXPAT
XML_Parser parser;
parser = ctx->parser;
XML_SetUserData(parser, (void *) ctx);
XML_SetElementHandler(parser,
elparse_start_chooser,
elparse_end_dummy);
XML_SetCharacterDataHandler(parser,
chparse);
XML_SetUnknownEncodingHandler(parser, unknown_encoding_handler, NULL);
#endif
}
size_t newsfeed_writefunc(void * ptr, size_t size, size_t nmemb, void * data)
{
#ifdef HAVE_EXPAT
unsigned int len;
struct newsfeed_parser_context * ctx;
XML_Parser parser;
ctx = data;
len = size * nmemb;
if (ctx->error != NEWSFEED_NO_ERROR) {
return 0;
}
parser = ctx->parser;
XML_Parse(parser, ptr, len, 0);
if (ctx->error != NEWSFEED_NO_ERROR) {
return 0;
}
return len;
#endif
return 0;
}
const char * newsfeed_parser_get_attribute_value(const char ** attr,
const char * name)
{
unsigned int i;
if ((attr == NULL) && (name == NULL))
return NULL;
for(i = 0 ; attr[i] != NULL && attr[i + 1] != NULL ; i += 2 ) {
if (strcmp(attr[i], name) == 0)
return attr[i + 1];
}
/* We haven't found anything. */
return NULL;
}
syntax highlighted by Code2HTML, v. 0.9.1