#include <stdio.h>
#include <string.h>
#include "gskxml.h"
typedef struct _ParseData ParseData;
struct _ParseData
{
GskXmlNode *doc;
gboolean multiple_documents;
};
#define PARSE_DATA_INIT { NULL, FALSE }
static GskXmlNode *
finish_parse_impl (ParseData *parse_data,
GskXmlParser *parser,
GError **error)
{
gsk_xml_parser_free (parser);
if (parse_data->multiple_documents)
{
if (parse_data->doc)
gsk_xml_node_unref (parse_data->doc);
g_set_error (error, GSK_G_ERROR_DOMAIN,
GSK_ERROR_MULTIPLE_ROOTS,
"multiple documents in xml file");
return NULL;
}
if (parse_data->doc == NULL)
{
g_set_error (error, GSK_G_ERROR_DOMAIN,
GSK_ERROR_NO_DOCUMENT,
"no documents in xml file");
return NULL;
}
return parse_data->doc;
}
static void
handle_root_node (GskXmlNode *node,
gpointer data)
{
ParseData *parse_data = data;
if (parse_data->doc == NULL)
parse_data->doc = gsk_xml_node_ref (node);
else
parse_data->multiple_documents = TRUE;
}
GskXmlNode *gsk_xml_parse_file (const char *filename,
GskXmlParseFlags flags,
GError **error)
{
ParseData parse_data = PARSE_DATA_INIT;
GskXmlParser *parser = gsk_xml_parser_new (flags,
handle_root_node,
&parse_data,
NULL);
if (!gsk_xml_parser_feed_file (parser, filename, error)
|| !gsk_xml_parser_finish (parser, error))
{
if (parse_data.doc)
gsk_xml_node_unref (parse_data.doc);
gsk_xml_parser_free (parser);
return NULL;
}
return finish_parse_impl (&parse_data, parser, error);
}
GskXmlNode *gsk_xml_parse_data (char *buf,
gssize buf_len,
GskXmlParseFlags flags,
GError **error)
{
ParseData parse_data = PARSE_DATA_INIT;
GskXmlParser *parser = gsk_xml_parser_new (flags,
handle_root_node,
&parse_data,
NULL);
if (!gsk_xml_parser_feed (parser, buf, buf_len, error)
|| !gsk_xml_parser_finish (parser, error))
{
if (parse_data.doc)
gsk_xml_node_unref (parse_data.doc);
gsk_xml_parser_free (parser);
return NULL;
}
return finish_parse_impl (&parse_data, parser, error);
}
typedef struct _Sink Sink;
struct _Sink
{
void (*printf) (gpointer data, const char *format, va_list args);
gpointer data;
};
static void
sink_printf (Sink *sink,
const char *format,
...) G_GNUC_PRINTF(2,3);
static void
sink_printf (Sink *sink,
const char *format,
...)
{
va_list args;
va_start (args, format);
sink->printf (sink->data, format, args);
va_end (args);
}
static void
sink_print_spaces (Sink *sink, guint n_spaces)
{
sink_printf (sink, "%.*s", n_spaces, "");
}
static void
write_sink_raw (GskXmlNode *doc,
Sink *sink)
{
if (doc->type == GSK_XML_NODE_TYPE_TEXT)
{
char *fmt;
GskXmlNodeText *text = GSK_XML_NODE_TEXT (doc);
fmt = g_markup_escape_text ((const char*)(text->content), -1);
sink_printf (sink, "%s", fmt);
g_free (fmt);
}
else
{
guint i;
GskXmlNodeElement *element = GSK_XML_NODE_ELEMENT (doc);
sink_printf (sink, "<%s", (char*)(element->name));
for (i = 0; i < element->n_attributes; i++)
{
GskXmlAttribute *attr = element->attributes + i;
if (attr->ns)
sink_printf (sink, " %s=\"%s\"", (char*)(attr->name), (char*)(attr->value));
else
sink_printf (sink, " %s:%s=\"%s\"", (char*)(attr->ns->abbrev), (char*)(attr->name), (char*)(attr->value));
}
if (element->n_children == 0)
sink_printf (sink, " />");
else
{
sink_printf (sink, ">");
for (i = 0; i < element->n_children; i++)
write_sink_raw (element->children[i], sink);
sink_printf (sink, "</%s>", (char*)(element->name));
}
}
}
static guint
get_length_with_max (GskXmlNode *node,
guint max_len)
{
switch (node->type)
{
case GSK_XML_NODE_TYPE_TEXT:
/* HACK */
return strlen (GSK_XML_STR (GSK_XML_NODE_TEXT (node)->content)) * 3 / 2;
case GSK_XML_NODE_TYPE_ELEMENT:
{
GskXmlNodeElement *element = GSK_XML_NODE_ELEMENT (node);
guint elt_name_len = strlen (GSK_XML_STR (element->name));
guint rv = 2 + elt_name_len;
guint i;
if (element->n_children)
rv += 4 + elt_name_len;
for (i = 0; rv < max_len && i < element->n_children; i++)
rv += get_length_with_max (element->children[i], max_len - rv);
return rv;
}
}
g_return_val_if_reached (0);
}
static gboolean
fits_in (GskXmlNode *node, guint max_len)
{
return get_length_with_max (node, max_len) < max_len;
}
static void
write_sink_formatted (GskXmlNode *doc,
Sink *sink,
guint indent,
guint line_length)
{
if (doc->type == GSK_XML_NODE_TYPE_TEXT
|| indent >= line_length
|| fits_in (doc, line_length - indent))
{
sink_print_spaces (sink, indent);
write_sink_raw (doc, sink);
sink_printf (sink, "\n");
}
else /* we have a structured tag that doesn't fit on one line */
{
/* emit open tag on one line */
guint subindent;
guint i;
GskXmlNodeElement *element = GSK_XML_NODE_ELEMENT (doc);
sink_print_spaces (sink, indent);
sink_printf (sink, "<%s>\n", (const char *) element->name);
/* emit contents indented */
subindent = indent + 2;
if (subindent > line_length * 2 / 3)
subindent = 0;
for (i = 0; i < element->n_children; i++)
{
/* TODO: should trim whitespace? */
write_sink_formatted (doc, sink, subindent, line_length);
}
/* emit close tag on one line */
sink_print_spaces (sink, indent);
sink_printf (sink, "</%s>\n", (const char *) element->name);
}
}
static inline void
xml_write_sink (GskXmlNode *doc,
Sink *sink,
gboolean format)
{
if (format)
write_sink_formatted (doc, sink, 0, 80);
else
{
write_sink_raw (doc, sink);
sink_printf (sink, "\n");
}
}
static void
my_vfprintf (void *data, const char *format, va_list args)
{
vfprintf (data, format, args);
}
gboolean gsk_xml_write_file (GskXmlNode *doc,
const char *filename,
gboolean format,
GError **error)
{
FILE *fp = fopen (filename, "w");
Sink sink;
int err;
sink.printf = my_vfprintf;
sink.data = fp;
xml_write_sink (doc, &sink, format);
err = ferror (fp);
fclose (fp);
return err == 0;
}
static void
my_string_append_vprintf (gpointer data,
const char *format,
va_list args)
{
guint bound = g_printf_string_upper_bound (format, args);
char *to_free = NULL;
char *buf;
GString *rv = data;
if (bound < 1024)
buf = g_alloca (bound + 1);
else
buf = to_free = g_malloc (bound + 1);
g_vsnprintf (buf, bound, format, args);
g_string_append (rv, buf);
}
char *
gsk_xml_to_string (GskXmlNode *doc,
gboolean format)
{
GString *rv = g_string_new ("");
Sink sink;
sink.printf = my_string_append_vprintf;
sink.data = rv;
xml_write_sink (doc, &sink, format);
return g_string_free (rv, FALSE);
}
static GskXmlString *
concat_text_nodes_to_string (guint n_text_nodes,
GskXmlNode **text_nodes)
{
guint i;
if (n_text_nodes > 64)
{
GskXmlString **pieces = g_new (GskXmlString *, n_text_nodes);
GskXmlString *rv;
for (i = 0; i < n_text_nodes; i++)
pieces[i] = GSK_XML_NODE_TEXT (text_nodes[i])->content;
rv = gsk_xml_strings_concat (n_text_nodes, pieces);
g_free (pieces);
return rv;
}
else
{
GskXmlString **pieces = g_newa (GskXmlString *, n_text_nodes);
for (i = 0; i < n_text_nodes; i++)
pieces[i] = GSK_XML_NODE_TEXT (text_nodes[i])->content;
return gsk_xml_strings_concat (n_text_nodes, pieces);
}
}
GskXmlNode *
gsk_xml_trim_whitespace (GskXmlNode *node)
{
switch (node->type)
{
case GSK_XML_NODE_TYPE_TEXT:
{
const char *str = GSK_XML_STR (GSK_XML_NODE_TEXT (node)->content);
gboolean has_leading_whitespace, has_trailing_whitespace;
gunichar u;
const char *end;
if (*str == 0)
return gsk_xml_node_ref (node);
u = g_utf8_get_char (str);
has_leading_whitespace = g_unichar_isspace (u);
end = strchr (str, 0);
end = g_utf8_find_prev_char (str, end);
u = g_utf8_get_char (end);
has_trailing_whitespace = g_unichar_isspace (u);
if (has_leading_whitespace || has_trailing_whitespace)
{
GskXmlString *trimmed;
const char *trimmed_start = str;
const char *trimmed_end = end;
if (has_leading_whitespace)
{
trimmed_start += g_utf8_skip[*(guchar*)trimmed_start];
do
{
u = g_utf8_get_char (trimmed_start);
if (!g_unichar_isspace (u))
break;
trimmed_start += g_utf8_skip[*(guchar*)trimmed_start];
}
while (trimmed_start < trimmed_end);
}
if (has_trailing_whitespace && trimmed_start < trimmed_end)
{
trimmed_end = g_utf8_find_prev_char (trimmed_start,
trimmed_end);
while (trimmed_start < trimmed_end)
{
const char *prev;
prev = g_utf8_find_prev_char (trimmed_start, trimmed_end);
if (!g_unichar_isspace (g_utf8_get_char (prev)))
break;
trimmed_end = prev;
}
}
trimmed = gsk_xml_string_new_len (trimmed_start,
trimmed_end - trimmed_start);
node = gsk_xml_node_new_text (trimmed);
gsk_xml_string_unref (trimmed);
return node;
}
else
return gsk_xml_node_ref (node);
}
case GSK_XML_NODE_TYPE_ELEMENT:
{
GskXmlNode **new_subnodes;
GskXmlNode **to_free;
GskXmlNodeElement *element = GSK_XML_NODE_ELEMENT (node);
guint n_children = element->n_children;
GskXmlNode **old_subnodes = element->children;
GskXmlNode *rv;
guint i, o;
gboolean did_something = FALSE;
if (element->n_children > 16)
{
new_subnodes = g_new (GskXmlNode *, n_children);
to_free = new_subnodes;
}
else
{
new_subnodes = g_newa (GskXmlNode *, n_children);
to_free = NULL;
}
for (i = 0; i < n_children; i++)
{
new_subnodes[i] = gsk_xml_trim_whitespace (old_subnodes[i]);
did_something = did_something
|| (new_subnodes[i] != old_subnodes[i]);
}
o = 0;
for (i = 0; i < n_children; )
if (new_subnodes[i]->type == GSK_XML_NODE_TYPE_ELEMENT)
{
new_subnodes[o++] = new_subnodes[i++];
}
else
{
guint past_text;
for (past_text = i + 1; i < n_children; past_text++)
if (new_subnodes[i]->type != GSK_XML_NODE_TYPE_TEXT)
break;
if (past_text == i + 1)
{
if (GSK_XML_STR (GSK_XML_NODE_TEXT (new_subnodes[i])->content)[0] == '\0')
{
gsk_xml_node_unref (new_subnodes[i++]);
did_something = TRUE;
}
else
new_subnodes[o++] = new_subnodes[i++];
}
else
{
/* concatenate text nodes */
GskXmlString *concat;
concat = concat_text_nodes_to_string (past_text - i, new_subnodes + i);
/* free old nodes */
while (i < past_text)
gsk_xml_node_unref (new_subnodes[i++]);
/* only store it if it is nonempty */
if (GSK_XML_STR (concat)[0] != '\0')
new_subnodes[o++] = gsk_xml_node_new_text (concat);
gsk_xml_string_unref (concat);
did_something = TRUE;
}
}
if (did_something)
/* create new xmlnode */
rv = gsk_xml_node_new_from_element_with_new_children (node, o, new_subnodes);
else
rv = gsk_xml_node_ref (node);
for (i = 0; i < o; i++)
gsk_xml_node_unref (new_subnodes[i]);
if (to_free)
g_free (to_free);
return rv;
}
default:
g_assert_not_reached ();
}
}
syntax highlighted by Code2HTML, v. 0.9.1