/* stringprep.c		Core stringprep implementation.
 * Copyright (C) 2002, 2003  Simon Josefsson
 *
 * This file is part of GNU Libidn.
 *
 * GNU Libidn is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * GNU Libidn is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with GNU Libidn; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

#include "internal.h"

static ssize_t
stringprep_find_character_in_table (my_uint32_t ucs4,
				    Stringprep_table_element * table)
{
  ssize_t i;

  for (i = 0; table[i].start; i++)
    if (ucs4 >= table[i].start &&
	ucs4 <= (table[i].end ? table[i].end : table[i].start))
      return i;

  return -1;
}

static ssize_t
stringprep_find_string_in_table (my_uint32_t * ucs4,
				 size_t ucs4len,
				 size_t * tablepos,
				 Stringprep_table_element * table)
{
  size_t j;
  ssize_t pos;

  for (j = 0; j < ucs4len; j++)
    if ((pos = stringprep_find_character_in_table (ucs4[j], table)) != -1)
      {
	if (tablepos)
	  *tablepos = pos;
	return j;
      }

  return -1;
}

static int
stringprep_apply_table_to_string (my_uint32_t * ucs4,
				  size_t * ucs4len,
				  size_t maxucs4len,
				  Stringprep_table_element * table,
				  const char *tablename)
{
  ssize_t pos;
  size_t i, maplen;

  while ((pos = stringprep_find_string_in_table (ucs4, *ucs4len,
						 &i, table)) != -1)
    {
      for (maplen = STRINGPREP_MAX_MAP_CHARS;
	   maplen > 0 && table[i].map[maplen - 1] == 0; maplen--)
	;

      if (*ucs4len - 1 + maplen >= maxucs4len)
	return STRINGPREP_TOO_SMALL_BUFFER;

      memmove (&ucs4[pos + maplen], &ucs4[pos + 1],
	       *ucs4len * sizeof (my_uint32_t) - (&ucs4[pos + 1] - ucs4));
      memcpy (&ucs4[pos], table[i].map, sizeof (my_uint32_t) * maplen);
      *ucs4len = *ucs4len - 1 + maplen;
    }

  return STRINGPREP_OK;
}

#define INVERTED(x) ((x) & ((~0UL) >> 1))
#define UNAPPLICAPLEFLAGS(flags, profileflags) \
  ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
   ( INVERTED(profileflags) && (profileflags & flags)))

/**
 * stringprep:
 * @in: input/ouput array with string to prepare.
 * @maxlen: maximum length of input/output array.
 * @flags: optional stringprep profile flags.
 * @profile: pointer to stringprep profile to use.
 *
 * Prepare the input UTF-8 string according to the stringprep profile.
 * Normally application programmers use stringprep profile macros such
 * as stringprep_nameprep(), stringprep_kerberos5() etc instead of
 * calling this function directly.
 *
 * Since the stringprep operation can expand the string, @maxlen
 * indicate how large the buffer holding the string is.  The @flags
 * are one of Stringprep_profile_flags, or 0.  The profile indicates
 * processing details specific to that profile.  Your application can
 * define new profiles, possibly re-using the generic stringprep
 * tables that always will be part of the library.
 *
 * Note that you must convert strings entered in the systems locale
 * into UTF-8 before using this function.
 *
 * Return value: Returns 0 iff successful, or an error code.
 **/
int
stringprep (char *in,
	    size_t maxlen,
	    Stringprep_profile_flags flags, Stringprep_profile * profile)
{
  size_t i, j;
  ssize_t k;
  int rc;
  char *p = 0;
  my_uint32_t *q = 0;
  my_uint32_t *ucs4;
  size_t ucs4len, maxucs4len;

  ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
  maxucs4len = 4 * ucs4len + 10;	/* XXX */
  ucs4 = realloc (ucs4, 1 + maxucs4len * sizeof (my_uint32_t));
  if (!ucs4)
    {
      rc = STRINGPREP_MALLOC_ERROR;
      goto done;
    }

  for (i = 0; profile[i].operation; i++)
    {
      switch (profile[i].operation)
	{
	case STRINGPREP_NFKC:
	  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
	    {
	      break;
	    }

	  if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
	    {
	      /* Profile requires NFKC, but callee asked for no NFKC. */
	      rc = STRINGPREP_FLAG_ERROR;
	      goto done;
	    }

	  q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);

	  if (!q)
	    {
	      rc = STRINGPREP_NFKC_FAILED;
	      goto done;
	    }

	  for (j = 0; q[j]; j++)
	    ;

	  free (ucs4);
	  ucs4 = q;
	  ucs4len = j;
	  q = 0;
	  break;

	case STRINGPREP_PROHIBIT_TABLE:
	  k = stringprep_find_string_in_table (ucs4, ucs4len,
					       NULL, profile[i].table);
	  if (k != -1)
	    {
	      rc = STRINGPREP_CONTAINS_PROHIBITED;
	      goto done;
	    }
	  break;

	case STRINGPREP_UNASSIGNED_TABLE:
	  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
	    break;
	  if (flags & STRINGPREP_NO_UNASSIGNED)
	    {
	      k = stringprep_find_string_in_table
		(ucs4, ucs4len, NULL, profile[i].table);
	      if (k != -1)
		{
		  rc = STRINGPREP_CONTAINS_UNASSIGNED;
		  goto done;
		}
	    }
	  break;

	case STRINGPREP_MAP_TABLE:
	  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
	    break;
	  rc = stringprep_apply_table_to_string
	    (ucs4, &ucs4len, maxucs4len, profile[i].table, profile[i].name);
	  if (rc != STRINGPREP_OK)
	    goto done;
	  break;

	case STRINGPREP_BIDI_PROHIBIT_TABLE:
	case STRINGPREP_BIDI_RAL_TABLE:
	case STRINGPREP_BIDI_L_TABLE:
	  break;

	case STRINGPREP_BIDI:
	  {
	    int done_prohibited = 0;
	    int done_ral = 0;
	    int done_l = 0;
	    int contains_ral = -1;
	    int contains_l = -1;

	    for (j = 0; profile[j].operation; j++)
	      if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
		{
		  done_prohibited = 1;
		  k = stringprep_find_string_in_table (ucs4, ucs4len,
						       NULL,
						       profile[j].table);
		  if (k != -1)
		    {
		      rc = STRINGPREP_BIDI_CONTAINS_PROHIBITED;
		      goto done;
		    }
		}
	      else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
		{
		  done_ral = 1;
		  if (stringprep_find_string_in_table
		      (ucs4, ucs4len, NULL, profile[j].table) != -1)
		    contains_ral = j;
		}
	      else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
		{
		  done_l = 1;
		  if (stringprep_find_string_in_table
		      (ucs4, ucs4len, NULL, profile[j].table) != -1)
		    contains_l = j;
		}

	    if (!done_prohibited || !done_ral || !done_l)
	      {
		rc = STRINGPREP_PROFILE_ERROR;
		goto done;
	      }

	    if (contains_ral != -1 && contains_l != -1)
	      {
		rc = STRINGPREP_BIDI_BOTH_L_AND_RAL;
		goto done;
	      }

	    if (contains_ral != -1)
	      {
		if (!(stringprep_find_character_in_table
		      (ucs4[0], profile[contains_ral].table) != -1 &&
		      stringprep_find_character_in_table
		      (ucs4[ucs4len - 1], profile[contains_ral].table) != -1))
		  {
		    rc = STRINGPREP_BIDI_LEADTRAIL_NOT_RAL;
		    goto done;
		  }
	      }
	  }
	  break;

	default:
	  rc = STRINGPREP_PROFILE_ERROR;
	  goto done;
	  break;
	}
    }

  p = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);

  if (strlen (p) >= maxlen)
    {
      rc = STRINGPREP_TOO_SMALL_BUFFER;
      goto done;
    }

  strcpy (in, p);		/* flawfinder: ignore */

  rc = STRINGPREP_OK;

done:
  if (p)
    free (p);
  if (q)
    free (q);
  if (ucs4)
    free (ucs4);
  return rc;
}

/**
 * stringprep_profile:
 * @in: input/ouput array with string to prepare.
 * @out: output variable with newly allocate string.
 * @flags: optional stringprep profile flags.
 * @profile: name of stringprep profile to use.
 *
 * Prepare the input UTF-8 string according to the stringprep profile.
 * Normally application programmers use stringprep profile macros such
 * as stringprep_nameprep(), stringprep_kerberos5() etc instead of
 * calling this function directly.
 *
 * Note that you must convert strings entered in the systems locale
 * into UTF-8 before using this function.
 *
 * The output @out variable must be deallocated by the caller.
 *
 * Return value: Returns 0 iff successful, or an error code.
 **/
int
stringprep_profile (char *in,
		    char **out, char *profile, Stringprep_profile_flags flags)
{
  Stringprep_profiles *p;
  char *str;
  size_t len;
  int rc;

  for (p = &stringprep_profiles[0]; p->name; p++)
    if (strcmp (p->name, profile) == 0)
      break;

  if (!p || !p->name || !p->tables)
    return STRINGPREP_UNKNOWN_PROFILE;

  len = strlen (in) + BUFSIZ;
  str = (char *) malloc (len);
  if (str == NULL)
    return STRINGPREP_MALLOC_ERROR;

  strcpy (str, in);

  rc = stringprep (str, len, flags, p->tables);

  if (rc == STRINGPREP_OK)
    *out = str;
  else
    free (str);

  return rc;
}

/**
 * STRINGPREP_VERSION
 *
 * String defined via CPP denoting the header file version number.
 * Used together with stringprep_check_version() to verify header file
 * and run-time library consistency.
 */

/**
 * STRINGPREP_MAX_MAP_CHARS
 *
 * Maximum number of code points that can replace a single code point,
 * during stringprep mapping.
 */

/**
 * Stringprep_rc
 *
 * Enumerated return codes of stringprep(), stringprep_profile()
 * functions (and macros using those functions).  The value 0 is
 * guaranteed to always correspond to success.
 */

/**
 * Stringprep_profile_flags:
 * @STRINGPREP_NO_NFKC: Disable the NFKC normalization, as well as
 *   selecting the non-NFKC case folding tables.  Usually the profile
 *   specifies BIDI and NFKC settings, and applications should not
 *   override it unless in special situations.
 * @STRINGPREP_NO_BIDI: Disable the BIDI step.  Usually the profile
 *   specifies BIDI and NFKC settings, and applications should not
 *   override it unless in special situations.
 * @STRINGPREP_NO_UNASSIGNED: Make the library return with an error if
 *   string contains unassigned characters according to profile.
 *
 * Stringprep profile flags.
 */

/**
 * Stringprep_profile_steps:
 *
 * Various steps in the stringprep algorithm.  You really want to
 * study the source code to understand this one.  Only useful if you
 * want to add another profile.
 */

/**
 * stringprep_nameprep:
 * @in: input/ouput array with string to prepare.
 * @maxlen: maximum length of input/output array.
 *
 * Prepare the input UTF-8 string according to the nameprep profile.
 * The AllowUnassigned flag is true, use
 * stringprep_nameprep_no_unassigned() for false AllowUnassigned.
 * Returns 0 iff successful, or an error code.
 **/

/**
 * stringprep_nameprep_no_unassigned:
 * @in: input/ouput array with string to prepare.
 * @maxlen: maximum length of input/output array.
 *
 * Prepare the input UTF-8 string according to the nameprep profile.
 * The AllowUnassigned flag is false, use stringprep_nameprep() for
 * true AllowUnassigned.  Returns 0 iff successful, or an error code.
 **/

/**
 * stringprep_iscsi:
 * @in: input/ouput array with string to prepare.
 * @maxlen: maximum length of input/output array.
 *
 * Prepare the input UTF-8 string according to the draft iSCSI
 * stringprep profile.  Returns 0 iff successful, or an error code.
 **/

/**
 * stringprep_kerberos5:
 * @in: input/ouput array with string to prepare.
 * @maxlen: maximum length of input/output array.
 *
 * Prepare the input UTF-8 string according to the draft Kerberos5
 * stringprep profile.  Returns 0 iff successful, or an error code.
 **/

/**
 * stringprep_plain:
 * @in: input/ouput array with string to prepare.
 * @maxlen: maximum length of input/output array.
 *
 * Prepare the input UTF-8 string according to the draft SASL
 * ANONYMOUS profile.  Returns 0 iff successful, or an error code.
 **/

/**
 * stringprep_xmpp_nodeprep:
 * @in: input/ouput array with string to prepare.
 * @maxlen: maximum length of input/output array.
 *
 * Prepare the input UTF-8 string according to the draft XMPP node
 * identifier profile.  Returns 0 iff successful, or an error code.
 **/

/**
 * stringprep_xmpp_resourceprep:
 * @in: input/ouput array with string to prepare.
 * @maxlen: maximum length of input/output array.
 *
 * Prepare the input UTF-8 string according to the draft XMPP resource
 * identifier profile.  Returns 0 iff successful, or an error code.
 **/

/**
 * stringprep_generic:
 * @in: input/ouput array with string to prepare.
 * @maxlen: maximum length of input/output array.
 *
 * Prepare the input UTF-8 string according to a hypotetical "generic"
 * stringprep profile. This is mostly used for debugging or when
 * constructing new stringprep profiles. Returns 0 iff successful, or
 * an error code.
 **/


syntax highlighted by Code2HTML, v. 0.9.1