#include <string.h>
#include <stdlib.h>
#include "gskxml.h"
#include "../gskghelpers.h"

typedef struct _BuilderNode BuilderNode;
struct _BuilderNode
{
  GskXmlNamespace *ns;
  GskXmlString *name;
  guint n_attrs;
  GskXmlAttribute *attrs;
  BuilderNode *to_root;
  GPtrArray   *children;

  GskXmlNamespace *default_ns;
  guint n_ns;
  GskXmlNamespace **ns_by_abbrev;
};

struct _GskXmlBuilder
{
  BuilderNode *root;         /* of BuilderNode */
  BuilderNode *cur;          /* points to head of 'root' list */
  GSList      *first_doc;
  GSList      *last_doc;
  gboolean     namespace_support;
};

GskXmlBuilder *gsk_xml_builder_new (GskXmlParseFlags flags)
{
  GskXmlBuilder *builder = g_new (GskXmlBuilder, 1);
  builder->root = NULL;
  builder->cur = NULL;
  builder->first_doc = builder->last_doc = NULL;
  builder->namespace_support = ((flags & GSK_XML_PARSE_WITHOUT_NAMESPACE_SUPPORT) == 0);
  return builder;
}

static int
compare_nspointer_by_abbrev (gconstpointer a,
                             gconstpointer b)
{
  GskXmlNamespace *ns_a = * (GskXmlNamespace **) a;
  GskXmlNamespace *ns_b = * (GskXmlNamespace **) b;
  GskXmlString *abbrev_a = ns_a->abbrev;
  GskXmlString *abbrev_b = ns_b->abbrev;
  return (abbrev_a < abbrev_b) ? -1
       : (abbrev_a > abbrev_b) ? +1
       : 0;
}

/* merge parent namespace abbreviates with 'namespaces'.

   do this by sorting namespaces by 'abbrev' pointer,
   then iterating parent_ns_by_abbrev,
   using namespaces[i] */
static GskXmlNamespace **
merge_namespace_tables (guint parent_n_ns,
                        GskXmlNamespace **parent_ns,
                        guint             n_namespaces,
                        GskXmlNamespace **namespaces,
                        guint            *n_rv_out)
{
  guint i;
  GskXmlNamespace **rv;
  guint n_rv;
  guint i_parent, i_namespaces;

  qsort (namespaces, n_namespaces, sizeof (GskXmlNamespace *),
         compare_nspointer_by_abbrev);

  /* ensure there are no duplicate namespaces,
     optimizing for the case where there are not. */
  for (i = 0; i + 1 < n_namespaces; i++)
    if (G_UNLIKELY (namespaces[i]->abbrev == namespaces[i+1]->abbrev))
      {
        /* ok, slow case: uniq namespaces */
        guint o = i;

        while (i < n_namespaces)
          {
            guint end_run;
            GskXmlNamespace *ns;

            /* find the last similarly abbreviated ns */
            for (end_run = i;
                 end_run + 1 < n_namespaces
                 && namespaces[end_run]->abbrev == namespaces[end_run+1]->abbrev;
                 end_run++)
              ;

            /* replace the run with a single namespace */
            ns = gsk_xml_namespace_ref (namespaces[i]);
            for (   ; i <= end_run; i++)
              gsk_xml_namespace_unref (namespaces[i]);
            namespaces[o++] = ns;
          }
        n_namespaces = o;
      }

  /* collate parent_ns_by_abbrev with n_namespaces */
  rv = g_new (GskXmlNamespace *, n_namespaces + parent_n_ns);
  n_rv = 0;
  for (i_parent = i_namespaces = 0;
       i_parent < parent_n_ns || i_namespaces < n_namespaces;
      )
    {
      guint op;
      if (i_parent == parent_n_ns)
        op = 0;         /* use namespaces */
      else if (i_namespaces == n_namespaces)
        op = 1;         /* use parent_ns */
      else if (parent_ns[i_parent]->abbrev < namespaces[i_namespaces]->abbrev)
        op = 1;         /* use parent_ns */
      else if (parent_ns[i_parent]->abbrev > namespaces[i_namespaces]->abbrev)
        op = 0;         /* use namespaces */
      else
        op = 2;

      switch (op)
        {
        case 0:
          rv[n_rv++] = namespaces[i_namespaces];
          gsk_xml_namespace_ref (namespaces[i_namespaces]);
          i_namespaces++;
          break;
        case 1:
          rv[n_rv++] = parent_ns[i_parent];
          gsk_xml_namespace_ref (parent_ns[i_parent]);
          i_parent++;
          break;
        case 2:
          /* XXX: what to do if the namespace abbreviations conflict!?!??! */
          rv[n_rv++] = parent_ns[i_parent];
          gsk_xml_namespace_ref (parent_ns[i_parent]);
          i_parent++;
          i_namespaces++;
          break;
        }
    }
  *n_rv_out = n_rv;
  return rv;
}

static GskXmlNamespace *
bsearch_ns_array (guint n, GskXmlNamespace **ns,
                  GskXmlString *abbrev)
{
  guint lo = 0, count = n;
  while (count)
    {
      guint mid = lo + count / 2;
      if (ns[mid]->abbrev < abbrev)
        {
          guint new_lo = mid + 1;
          count -= (new_lo - lo);
          lo = new_lo;
        }
      else if (ns[mid]->abbrev > abbrev)
        {
          count /= 2;
        }
      else
        return ns[mid];
    }
  return NULL;
}

static void
try_ns_support (guint n_ns,
                GskXmlNamespace **ns_by_abbrev,
                GskXmlNamespace *default_ns,
                GskXmlString **name_inout,
                GskXmlNamespace **ns_inout)
{
  GskXmlString *old_name = *name_inout;
  const char *name_str = (char*) old_name;
  const char *colon = strchr (name_str, ':');
  g_assert (*ns_inout == NULL);
  if (colon != NULL)
    {
      GskXmlString *ns_name = gsk_xml_string_new_len (name_str, colon - name_str);
      GskXmlNamespace *ns = bsearch_ns_array (n_ns, ns_by_abbrev, ns_name);
      if (ns)
        {
          *ns_inout = ns;
          *name_inout = gsk_xml_string_new (colon + 1);
          gsk_xml_string_unref (old_name);
        }
      else
        {
          /* XXX: error handling??? */
        }
      gsk_xml_string_unref (ns_name);
    }
  else
    {
      if (default_ns)
        *ns_inout = default_ns;
    }
}

void           gsk_xml_builder_start (GskXmlBuilder *builder,
                                      GskXmlString  *name,
                                      guint          n_attrs,
                                      GskXmlString **attrs)
{
  BuilderNode *bn = g_new (BuilderNode, 1);
  guint i;
  bn->name = gsk_xml_string_ref (name);
  bn->n_attrs = n_attrs;
  bn->attrs = g_new (GskXmlAttribute, n_attrs);
  bn->ns_by_abbrev = NULL;
  bn->default_ns = NULL;
  bn->ns = NULL;
  bn->to_root = NULL;
  if (builder->namespace_support)
    {
      /* look for 'xmlns' and 'xmlns:' entries. */

      /* the number of attributes found,
         disclusing xmlns attributes. */
      guint n_real_attrs = 0;

      /* array of namespace attributes found,
         optimized for small numbers of namespaces. */
      guint n_ns_entries = 0;
      guint n_ns_entries_alloced = 16;
      GskXmlNamespace **ns_entries = g_newa (GskXmlNamespace *, n_ns_entries_alloced);
      gboolean must_free_ns_entries = FALSE;

      for (i = 0; i < n_attrs * 2; i+=2)
        {
          if (memcmp ((char*)(attrs[i]), "xmlns", 5) == 0)
            {
              const char *suffix = (char*)(attrs[i]) + 5;
              if (suffix[0] == '\0')
                {
                  /* xmlns=... */
                  if (bn->default_ns != NULL)
                    {
                      /* um, shouldn't happen in wellformed invocation
                         (since behavior with duplicate attributes is undefined) */
                      gsk_xml_namespace_unref (bn->default_ns);
                    }
                  bn->default_ns = gsk_xml_namespace_new (NULL, attrs[i+1]);
                  continue;
                }
              else if (suffix[0] == ':' && suffix[1] != '\0')
                {
                  /* xmlns:something=... */
                  GskXmlString *abbrev = gsk_xml_string_new (suffix+1);
                  if (G_UNLIKELY (n_ns_entries == n_ns_entries_alloced))
                    {
                      /* too many namespaces.  must reallocate */
                      guint new_alloced = n_ns_entries_alloced * 2;
                      GskXmlNamespace **new = g_new (GskXmlNamespace *, new_alloced);
                      memcpy (new, ns_entries, sizeof (GskXmlNamespace*) * n_ns_entries_alloced);
                      if (must_free_ns_entries)
                        g_free (ns_entries);
                      ns_entries = new;
                      must_free_ns_entries = TRUE;
                      n_ns_entries_alloced = new_alloced;
                    }
                  ns_entries[n_ns_entries++] = gsk_xml_namespace_new (abbrev, attrs[i+1]);
                  gsk_xml_string_unref (abbrev);
                  continue;
                }
              else
                {
                  /* fall through to normal attribute code. */
                }
            }

          /* ok, it's a real attribute */
          bn->attrs[n_real_attrs].name = gsk_xml_string_ref (attrs[i]);
          bn->attrs[n_real_attrs].value = gsk_xml_string_ref (attrs[i+1]);
          bn->attrs[n_real_attrs].ns = NULL;
          n_real_attrs++;
        }

      bn->n_attrs = n_real_attrs;

      /* if no "xmlns" attribute found,
         inherit default namespace from parent. */
      if (bn->default_ns == NULL
       && builder->cur != NULL
       && builder->cur->default_ns != NULL)
        bn->default_ns = gsk_xml_namespace_ref (builder->cur->default_ns);

      /* merge parent's namespace table
         with the new namespaces */
      {
        guint cur_n;
        GskXmlNamespace **cur_ns;
        if (builder->cur)
          {
            cur_n = builder->cur->n_ns;
            cur_ns = builder->cur->ns_by_abbrev;
          }
        else
          {
            cur_n = 0;
            cur_ns = NULL;
          }
        bn->ns_by_abbrev = merge_namespace_tables (cur_n, cur_ns,
                                                   n_ns_entries, ns_entries,
                                                   &bn->n_ns);
      }
    }
  else
    {
      bn->n_attrs = n_attrs;
      for (i = 0; i < n_attrs; i++)
        {
          bn->attrs[i].name = gsk_xml_string_ref (attrs[2*i+0]);
          bn->attrs[i].value = gsk_xml_string_ref (attrs[2*i+1]);
          bn->attrs[i].ns = NULL;
        }
      bn->n_ns = 0;
      bn->ns_by_abbrev = NULL;
    }

  bn->children = NULL;
  if (builder->root == NULL)
    {
      builder->root = builder->cur = bn;
    }
  else
    {
      bn->to_root = builder->cur;
      builder->cur = bn;
    }


  if (builder->namespace_support)
    {
      /* resolve attribute namespaces */
      for (i = 0; i < bn->n_attrs; i++)
        {
          try_ns_support (bn->n_ns, bn->ns_by_abbrev, bn->default_ns,
                          &bn->attrs[i].name,
                          &bn->attrs[i].ns);

        }
      
      /* resolve 'name' namespace */
      try_ns_support (bn->n_ns, bn->ns_by_abbrev, bn->default_ns,
                      &bn->name,
                      &bn->ns);
    }
  else
    {
      bn->ns = NULL;
    }
}

/* XXX: optimize this!!! */
void           gsk_xml_builder_start_c (GskXmlBuilder *builder,
                                        const char    *name,
                                        guint          n_attrs,
                                        char         **attrs)
{
  GskXmlString **x_attrs = g_newa (GskXmlString *, n_attrs * 2);
  GskXmlString *x_name = gsk_xml_string_new (name);
  guint i;
  for (i = 0; i < n_attrs * 2; i++)
    x_attrs[i] = gsk_xml_string_new (attrs[i]);
  gsk_xml_builder_start (builder, x_name, n_attrs, x_attrs);
  for (i = 0; i < n_attrs * 2; i++)
    gsk_xml_string_unref (x_attrs[i]);
  gsk_xml_string_unref (x_name);
}

void           gsk_xml_builder_start_ns (GskXmlBuilder *builder,
                                         GskXmlString  *name_ns,
                                         GskXmlString  *name,
                                         guint          n_attrs,
                                         GskXmlRawAttribute *attrs)
{
  BuilderNode *bn = g_new (BuilderNode, 1);
  guint i;
  bn->name = gsk_xml_string_ref (name);
  bn->n_attrs = n_attrs;
  bn->attrs = g_new (GskXmlAttribute, n_attrs);
  bn->ns_by_abbrev = NULL;
  bn->default_ns = NULL;

  if (builder->namespace_support)
    {
      /* look for 'xmlns' and 'xmlns:' entries. */

      /* the number of attributes found,
         disclusing xmlns attributes. */
      guint n_real_attrs = 0;

      /* array of namespace attributes found,
         optimized for small numbers of namespaces. */
      guint n_ns_entries = 0;
      guint n_ns_entries_alloced = 16;
      GskXmlNamespace **ns_entries = g_newa (GskXmlNamespace *, n_ns_entries_alloced);
      gboolean must_free_ns_entries = FALSE;

      for (i = 0; i < n_attrs; i++)
        {
          if (gsk_xml_string__xmlns == attrs[i].ns_abbrev)
            {
              /* xmlns:something=... */
              if (G_UNLIKELY (n_ns_entries == n_ns_entries_alloced))
                {
                  /* too many namespaces.  must reallocate */
                  guint new_alloced = n_ns_entries_alloced * 2;
                  GskXmlNamespace **new = g_new (GskXmlNamespace *, new_alloced);
                  memcpy (new, ns_entries, sizeof (GskXmlNamespace*) * n_ns_entries_alloced);
                  if (must_free_ns_entries)
                    g_free (ns_entries);
                  ns_entries = new;
                  must_free_ns_entries = TRUE;
                  n_ns_entries_alloced = new_alloced;
                }
              ns_entries[n_ns_entries++] = gsk_xml_namespace_new (attrs[i].name, attrs[i].value);
            }
          else if (attrs[i].ns_abbrev == NULL && attrs[i].name == gsk_xml_string__xmlns)
            {
              /* xmlns=... */
              if (bn->default_ns != NULL)
                {
                  /* um, shouldn't happen in wellformed invocation
                     (since behavior with duplicate attributes is undefined) */
                  gsk_xml_namespace_unref (bn->default_ns);
                }
              bn->default_ns = gsk_xml_namespace_new (NULL, attrs[i].value);
            }
          else
            {
              /* ok, it's a real attribute */
              bn->attrs[n_real_attrs].name = gsk_xml_string_ref (attrs[i].name);
              bn->attrs[n_real_attrs].value = gsk_xml_string_ref (attrs[i].value);
              bn->attrs[n_real_attrs].ns = (GskXmlNamespace *) attrs[i].ns_abbrev;
              n_real_attrs++;
            }
        }

      bn->n_attrs = n_real_attrs;

      /* if no "xmlns" attribute found,
         inherit default namespace from parent. */
      if (bn->default_ns == NULL
       && builder->cur != NULL
       && builder->cur->default_ns != NULL)
        bn->default_ns = gsk_xml_namespace_ref (builder->cur->default_ns);

      /* merge parent's namespace table
         with the new namespaces */
      bn->ns_by_abbrev = merge_namespace_tables (builder->cur->n_ns,
                                                 builder->cur->ns_by_abbrev,
                                                 n_ns_entries, ns_entries,
                                                 &bn->n_ns);
    }

  bn->children = NULL;
  if (builder->root == NULL)
    {
      builder->root = builder->cur = bn;
    }
  else
    {
      bn->to_root = builder->cur;
      builder->cur = bn;
    }


  /* resolve attribute namespaces */
  for (i = 0; i < bn->n_attrs; i++)
    {
      GskXmlString *ns_abbrev = (GskXmlString *) bn->attrs[i].ns;
      if (ns_abbrev == NULL)
        bn->attrs[i].ns = bn->default_ns;
      else
        bn->attrs[i].ns = bsearch_ns_array (bn->n_ns, bn->ns_by_abbrev, ns_abbrev);
    }
      
  /* resolve 'name' namespace */
  if (name_ns == NULL)
    bn->ns = bn->default_ns;
  else
    bn->ns = bsearch_ns_array (bn->n_ns, bn->ns_by_abbrev, name_ns);
}

void           gsk_xml_builder_text  (GskXmlBuilder *builder,
                                      GskXmlString  *str)
{
  g_return_if_fail (builder->root != NULL);
  if (builder->cur->children == NULL)
    builder->cur->children = g_ptr_array_new ();
  g_ptr_array_add (builder->cur->children,
                   gsk_xml_node_new_text (str));
}

void           gsk_xml_builder_add_node  (GskXmlBuilder *builder,
                                          GskXmlNode    *node)
{
  g_return_if_fail (builder->root != NULL);
  if (builder->cur->children == NULL)
    builder->cur->children = g_ptr_array_new ();
  g_ptr_array_add (builder->cur->children, gsk_xml_node_ref (node));
}

GskXmlNode    *gsk_xml_builder_end   (GskXmlBuilder *builder,
                                      GskXmlString  *name)
{
  BuilderNode *to_finish;
  GskXmlNode *node;
  guint i;
  g_return_val_if_fail (builder->cur != NULL, NULL);
  if (name)
    g_return_val_if_fail (builder->cur->name == name, NULL);

  /* make cur into an xml node */
  to_finish = builder->cur;
  builder->cur = builder->cur->to_root;
  node = gsk_xml_node_new_element (to_finish->ns,
                                   to_finish->name,
                                   to_finish->n_attrs,
                                   to_finish->attrs,
                                   to_finish->children->len,
                                   (GskXmlNode **) to_finish->children->pdata);

  /* free to_finish */
  gsk_xml_string_unref (to_finish->name);
  for (i = 0; i < to_finish->n_attrs; i++)
    {
      /* NOTE: ns is NOT referenced by the attributes! */
      gsk_xml_string_unref (to_finish->attrs[i].name);
      gsk_xml_string_unref (to_finish->attrs[i].value);
    }
  g_free (to_finish->attrs);
  if (to_finish->children != NULL)
    {
      for (i = 0; i < to_finish->children->len; i++)
        gsk_xml_node_unref (to_finish->children->pdata[i]);
      g_ptr_array_free (to_finish->children, FALSE);
    }
  g_free (to_finish);

  /* add new node to parent */
  if (builder->cur == NULL)
    {
      /* this is a new doc */
      if (builder->first_doc == NULL)
        builder->first_doc = builder->last_doc = g_slist_prepend (NULL, node);
      else
        {
          builder->last_doc = g_slist_append (builder->last_doc, node)->next;
        }
    }
  else
    {
      if (builder->cur->children == NULL)
        builder->cur->children = g_ptr_array_new ();
      g_ptr_array_add (builder->cur->children, node);
    }
  return node;
}
GskXmlNode    *gsk_xml_builder_get_doc(GskXmlBuilder *builder)
{
  GskXmlNode *rv;
  if (builder->first_doc == NULL)
    return NULL;
  rv = builder->first_doc->data;
  builder->first_doc = g_slist_remove (builder->first_doc, rv);
  if (builder->first_doc == NULL)
    builder->last_doc = NULL;
  return rv;
}

void
gsk_xml_builder_free (GskXmlBuilder *builder)
{
  BuilderNode *node;
  g_slist_foreach (builder->first_doc, (GFunc) gsk_xml_node_unref, NULL);
  g_slist_free (builder->first_doc);
  for (node = builder->cur; node; )
    {
      BuilderNode *next = node->to_root;
      guint i;
      gsk_xml_string_unref (node->name);
      for (i = 0; i < node->n_attrs; i++)
        {
          gsk_xml_string_unref (node->attrs[i].name);
          gsk_xml_string_unref (node->attrs[i].value);
        }
      gsk_g_ptr_array_foreach (node->children, (GFunc) gsk_xml_node_unref, NULL);
      g_ptr_array_free (node->children, TRUE);
      if (node->default_ns)
        gsk_xml_namespace_unref (node->default_ns);
      for (i = 0; i < node->n_ns; i++)
        gsk_xml_namespace_unref (node->ns_by_abbrev[i]);
      g_free (node->ns_by_abbrev);
      g_free (node);
      node = next;
    }

  g_free (builder);
}


syntax highlighted by Code2HTML, v. 0.9.1