/* ====================================================================
* The Kannel Software License, Version 1.0
*
* Copyright (c) 2001-2005 Kannel Group
* Copyright (c) 1998-2001 WapIT Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Kannel Group (http://www.kannel.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Kannel" and "Kannel Group" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please
* contact org@kannel.org.
*
* 5. Products derived from this software may not be called "Kannel",
* nor may "Kannel" appear in their name, without prior written
* permission of the Kannel Group.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
* OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Kannel Group. For more information on
* the Kannel Group, please see .
*
* Portions of this software are based upon software originally written at
* WapIT Ltd., Helsinki, Finland for the Kannel project.
*/
/*
* wap_push_sl_compiler.c: Tokenizes a SL document. SL DTD is defined in
* Wapforum specification WAP-168-ServiceLoad-20010731-a (hereafter called sl),
* chapter 9.2.
*
* By Aarno Syvänen for Wiral Ltd
*/
#include
#include
#include
#include
#include
#include "xml_shared.h"
#include "wap_push_sl_compiler.h"
/******************************************************************************
*
* Following global variables are unique to SL compiler. See sl, chapter 10.3.
*
* Two token table types, with one and two token fields.
*/
struct sl_2table_t {
char *name;
unsigned char token;
};
typedef struct sl_2table_t sl_2table_t;
/*
* Value part can mean whole or part of the value. It can be NULL, too; then
* no part of the value will be tokenized, see sl, chapter 10.3.2.
*/
struct sl_3table_t {
char *name;
char *value_part;
unsigned char token;
};
typedef struct sl_3table_t sl_3table_t;
/*
* Element from tag code page zero. It is defined in sl, chapter 10.3.1.
*/
static sl_2table_t sl_elements[] = {
{ "sl", 0x05 }
};
#define NUMBER_OF_ELEMENTS sizeof(sl_elements)/sizeof(sl_elements[0])
/*
* Attributes (and sometimes start or whole of their value) from code page
* zero. These are defined in sl, chapter 10.3.2.
*/
static sl_3table_t sl_attributes[] = {
{ "action", "execute-low", 0x05 },
{ "action", "execute-high", 0x06 },
{ "action", "cache", 0x07 },
{ "href", "http://", 0x09 },
{ "href", "http://www.", 0x0a },
{ "href", "https://", 0x0b },
{ "href", "https://www.", 0x0c },
{ "href", NULL, 0x08 }
};
#define NUMBER_OF_ATTRIBUTES sizeof(sl_attributes)/sizeof(sl_attributes[0])
/*
* URL value codes from code page zero. These are defined in sl, chapter
* 10.3.3.
*/
static sl_2table_t sl_url_values[] = {
{ ".com/", 0x85 },
{ ".edu/", 0x86 },
{ ".net/", 0x87 },
{ ".org/", 0x88 },
};
#define NUMBER_OF_URL_VALUES sizeof(sl_url_values)/sizeof(sl_url_values[0])
#include "xml_definitions.h"
/****************************************************************************
*
* Prototypes of internal functions. Note that 'Ptr' means here '*'.
*/
static int parse_document(xmlDocPtr document, Octstr *charset,
simple_binary_t **slbxml);
static int parse_node(xmlNodePtr node, simple_binary_t **slbxml);
static int parse_element(xmlNodePtr node, simple_binary_t **slbxml);
static int parse_attribute(xmlAttrPtr attr, simple_binary_t **slbxml);
static int url(int hex);
static int action(int hex);
static void parse_url_value(Octstr *value, simple_binary_t **slbxml);
/****************************************************************************
*
* Implementation of the external function
*/
int sl_compile(Octstr *sl_doc, Octstr *charset, Octstr **sl_binary)
{
simple_binary_t *slbxml;
int ret;
xmlDocPtr pDoc;
size_t size;
char *sl_c_text;
*sl_binary = octstr_create("");
slbxml = simple_binary_create();
octstr_strip_blanks(sl_doc);
set_charset(sl_doc, charset);
size = octstr_len(sl_doc);
sl_c_text = octstr_get_cstr(sl_doc);
pDoc = xmlParseMemory(sl_c_text, size);
ret = 0;
if (pDoc) {
ret = parse_document(pDoc, charset, &slbxml);
simple_binary_output(*sl_binary, slbxml);
xmlFreeDoc(pDoc);
} else {
xmlFreeDoc(pDoc);
octstr_destroy(*sl_binary);
simple_binary_destroy(slbxml);
error(0, "SL: No document to parse. Probably an error in SL source");
return -1;
}
simple_binary_destroy(slbxml);
return ret;
}
/****************************************************************************
*
* Implementation of internal functions
*
* Parse document node. Store sl version number, public identifier and
* character set at the start of the document
*/
static int parse_document(xmlDocPtr document, Octstr *charset,
simple_binary_t **slbxml)
{
xmlNodePtr node;
(**slbxml).wbxml_version = 0x02; /* WBXML Version number 1.2 */
(**slbxml).public_id = 0x06; /* SL 1.0 Public ID */
charset = octstr_create("UTF-8");
(**slbxml).charset = parse_charset(charset);
octstr_destroy(charset);
node = xmlDocGetRootElement(document);
return parse_node(node, slbxml);
}
/*
* The recursive parsing function for the parsing tree. Function checks the
* type of the node, calls for the right parse function for the type, then
* calls itself for the first child of the current node if there's one and
* after that calls itself for the next child on the list. We parse whole
* tree, even though SL DTD defines only one node (see sl, chapter 9.2); this
* allows us throw an error message when an unknown element is found.
*/
static int parse_node(xmlNodePtr node, simple_binary_t **slbxml)
{
int status = 0;
/* Call for the parser function of the node type. */
switch (node->type) {
case XML_ELEMENT_NODE:
status = parse_element(node, slbxml);
break;
case XML_TEXT_NODE:
case XML_COMMENT_NODE:
case XML_PI_NODE:
/* Text nodes, comments and PIs are ignored. */
break;
/*
* XML has also many other node types, these are not needed with
* SL. Therefore they are assumed to be an error.
*/
default:
error(0, "SL COMPILER: Unknown XML node in the SL source.");
return -1;
break;
}
/*
* If node is an element with content, it will need an end tag after it's
* children. The status for it is returned by parse_element.
*/
switch (status) {
case 0:
if (node->children != NULL)
if (parse_node(node->children, slbxml) == -1)
return -1;
break;
case 1:
if (node->children != NULL)
if (parse_node(node->children, slbxml) == -1)
return -1;
parse_end(slbxml);
break;
case -1: /* Something went wrong in the parsing. */
return -1;
default:
warning(0,"SL compiler: undefined return value in a parse function.");
return -1;
break;
}
if (node->next != NULL)
if (parse_node(node->next, slbxml) == -1)
return -1;
return 0;
}
/*
* Parse an element node. Check if there is a token for an element tag; if not
* output the element as a string, else ouput the token. After that, call
* attribute parsing functions. Note that we take advantage of the fact that
* sl documents have only one element (see sl, chapter 6.2).
* Returns: 1, add an end tag (element node has no children)
* 0, do not add an end tag (it has children)
* -1, an error occurred
*/
static int parse_element(xmlNodePtr node, simple_binary_t **slbxml)
{
Octstr *name,
*nameos;
unsigned char status_bits,
sl_hex;
int add_end_tag;
xmlAttrPtr attribute;
name = octstr_create(node->name);
if (octstr_len(name) == 0) {
octstr_destroy(name);
return -1;
}
status_bits = 0x00;
sl_hex = 0x00;
add_end_tag = 0;
if (octstr_compare(name, octstr_imm(sl_elements[0].name)) != 0) {
warning(0, "unknown tag %s in SL source", octstr_get_cstr(name));
sl_hex = WBXML_LITERAL;
if ((status_bits = element_check_content(node)) > 0) {
sl_hex = sl_hex | status_bits;
/* If this node has children, the end tag must be added after
them. */
if ((status_bits & WBXML_CONTENT_BIT) == WBXML_CONTENT_BIT)
add_end_tag = 1;
}
output_char(sl_hex, slbxml);
output_octet_string(nameos = octstr_duplicate(name), slbxml);
octstr_destroy(nameos);
} else {
sl_hex = sl_elements[0].token;
if ((status_bits = element_check_content(node)) > 0) {
sl_hex = sl_hex | status_bits;
if ((status_bits & WBXML_CONTENT_BIT) == WBXML_CONTENT_BIT) {
add_end_tag = 1;
}
output_char(sl_hex, slbxml);
}
}
if (node->properties != NULL) {
attribute = node->properties;
while (attribute != NULL) {
parse_attribute(attribute, slbxml);
attribute = attribute->next;
}
parse_end(slbxml);
}
octstr_destroy(name);
return add_end_tag;
}
static int parse_attribute(xmlAttrPtr attr, simple_binary_t **slbxml)
{
Octstr *name,
*value,
*valueos;
unsigned char sl_hex;
size_t i,
value_len;
name = octstr_create(attr->name);
if (attr->children != NULL)
value = create_octstr_from_node(attr->children);
else
value = NULL;
if (value == NULL)
goto error;
i = 0;
valueos = NULL;
while (i < NUMBER_OF_ATTRIBUTES) {
if (octstr_compare(name, octstr_imm(sl_attributes[i].name)) == 0) {
if (sl_attributes[i].value_part == NULL) {
debug("wap.push.sl.compiler", 0, "value part was NULL");
break;
} else {
value_len = octstr_len(valueos =
octstr_imm(sl_attributes[i].value_part));
if (octstr_ncompare(value, valueos, value_len) == 0) {
break;
}
}
}
++i;
}
if (i == NUMBER_OF_ATTRIBUTES) {
warning(0, "unknown attribute in SL source");
goto error;
}
sl_hex = sl_attributes[i].token;
if (action(sl_hex)) {
output_char(sl_hex, slbxml);
} else if (url(sl_hex)) {
output_char(sl_hex, slbxml);
octstr_delete(value, 0, octstr_len(valueos));
parse_url_value(value, slbxml);
} else {
output_char(sl_hex, slbxml);
parse_inline_string(value, slbxml);
}
octstr_destroy(name);
octstr_destroy(value);
return 0;
error:
octstr_destroy(name);
octstr_destroy(value);
return -1;
}
/*
* checks whether a sl attribute value is an URL or some other kind of value.
* Returns 1 for an URL and 0 otherwise.
*/
static int url(int hex)
{
switch ((unsigned char) hex) {
case 0x08: /* href */
case 0x09: case 0x0b: /* href http://, href https:// */
case 0x0a: case 0x0c: /* href http://www., href https://www. */
return 1;
}
return 0;
}
/*
* checks whether a sl attribute value is an action attribute or some other
* kind of value.
* Returns 1 for an action attribute and 0 otherwise.
*/
static int action(int hex)
{
switch ((unsigned char) hex) {
case 0x05: case 0x06: /* action execute-low, action execute-high */
case 0x07: /* action cache */
return 1;
}
return 0;
}
/*
* In the case of SL document, only attribute values to be tokenised are parts
* of urls. See sl, chapter 10.3.3. The caller removes the start of the url.
* Check whether we can find one of tokenisable values in value. If not, parse
* value as a inline string, else parse parts before and after the tokenisable
* url value as a inline string.
*/
static void parse_url_value(Octstr *value, simple_binary_t **slbxml)
{
size_t i;
long pos;
Octstr *urlos,
*first_part,
*last_part;
size_t first_part_len;
i = 0;
first_part_len = 0;
first_part = NULL;
last_part = NULL;
while (i < NUMBER_OF_URL_VALUES) {
pos = octstr_search(value,
urlos = octstr_imm(sl_url_values[i].name), 0);
if (pos >= 0) {
first_part = octstr_duplicate(value);
octstr_delete(first_part, pos, octstr_len(first_part) - pos);
first_part_len = octstr_len(first_part);
parse_inline_string(first_part, slbxml);
output_char(sl_url_values[i].token, slbxml);
last_part = octstr_duplicate(value);
octstr_delete(last_part, 0, first_part_len + octstr_len(urlos));
parse_inline_string(last_part, slbxml);
octstr_destroy(first_part);
octstr_destroy(last_part);
break;
}
octstr_destroy(urlos);
++i;
}
if (pos < 0)
parse_inline_string(value, slbxml);
}