// ---------------------------------------------------------------------------
// - cucd.cxx                                                                -
// - unicode database (ucd) library functions implementation                 -
// ---------------------------------------------------------------------------
// - This program is free software;  you can redistribute it  and/or  modify -
// - it provided that this copyright notice is kept intact.                  -
// -                                                                         -
// - This program  is  distributed in  the hope  that it will be useful, but -
// - without  any  warranty;  without  even   the   implied    warranty   of -
// - merchantability or fitness for a particular purpose.  In no event shall -
// - the copyright holder be liable for any  direct, indirect, incidental or -
// - special damages arising in any way out of the use of this software.     -
// ---------------------------------------------------------------------------
// - copyright (c) 1999-2007 amaury darsch                                   -
// ---------------------------------------------------------------------------

#include "cucd.hxx"
#include "cthr.hpp"
#include "csys.hpp"

namespace afnix {

  // -------------------------------------------------------------------------
  // - private section                                                       -
  // -------------------------------------------------------------------------

  // the plane size 0x8000 = 32768
  static const long UCD_DPA_MAX = 65536;
  // the indirect plane size 0x8000 = 32768
  static const long UCD_IPA_MAX = 32768;

  // the indirect plane access array
  static const ucd_s*** p_ipa = nilp;

  // static mutex creation function
  static void* mtx_create (void);
  // mutex or network services
  static void* mtx = mtx_create ();

  // this function destroy the mutex at exit
  static void mtx_destroy (void) {
    // destroy each ipa
    if (p_ipa != nilp) {
      for (long i = 0; i < UCD_IPA_MAX; i++) delete [] p_ipa[i];
    }
    // destroy the indirect array
    delete [] p_ipa;
    // destroy the mutex
    c_mtxdestroy (mtx);
  }

  // this function initialize a mutex statically and register its
  // destruction to be done at exit
  static void* mtx_create (void) {
    void* mtx = c_mtxcreate ();
    c_atexit (mtx_destroy);
    return mtx;
  }

  // this function loads a plane by index
  static void ucd_load_plane (const long index) {
    // check the plane index
    if ((index < 0) || (index >= UCD_IPA_MAX)) return;
    // lock the mutex
    c_mtxlock (mtx);
    // preload the indirect array
    if (p_ipa == nilp) {
      p_ipa = new const ucd_s**[UCD_IPA_MAX];
      for (long i = 0; i < UCD_IPA_MAX; i++) p_ipa[i] = nilp;
    }
    if (p_ipa[index] == nilp) {
      // initialize the array
      const ucd_s** dpa = new const ucd_s*[UCD_DPA_MAX];
      for (long i = 0; i < UCD_DPA_MAX; i++) dpa[i] = nilp;
      // get the plane size and data
      const long   psize = c_ucdpsize (index);
      const ucd_s* plane = c_ucdplane (index);
      // initialize the direct access plane
      for (long i = 0; i < psize; i++) {
        long pidx = (long) (plane[i].d_code & 0x0000FFFF);
	dpa[pidx] = &plane[i];
      }
      p_ipa[index] = dpa;
    }
    c_mtxunlock (mtx);
  }

  // fill an array with a canonical decompostion and an index

  static bool ucd_fill_nfd (t_quad dst[UCD_CDV_MAX], long& index, 
			    const t_quad code) {
    // check the index
    if (index >= UCD_CDV_MAX) return false;
    // get the ucd record
    const ucd_s* ucd = c_getucd (code);
    if (ucd == nilp) {
      dst[index++] = code;
      return true;
    }
    // check if a canonical decomposition exists
    if (ucd->d_pdmv != UCD_DMV_NIL) {
      dst[index++] = code;
      return true;
    }
    // check if the first decomposition is null - if yes then there is no
    // decomposition and the character is mapped in placed
    if (ucd->d_cdmv[0] == nilq) {
      dst[index++] = code;
      return true;
    }
    // here a decomposition exists - the decomposition mapping value is nil
    // so it means that it is a canonical decomposition - let's go with it!
    for (long i = 0; i < UCD_CDV_MAX; i++) {
      // get the decomposition value
      t_quad c = ucd->d_cdmv[i];
      if (c == nilq) break;
      // recursivelly remap it
      ucd_fill_nfd (dst, index, c);
    }
    return true;
  }

  // this procedure find the ccc value for a code point
  static inline long ucd_find_ccc (const t_quad code) {
    // get the ucd record
    const ucd_s* ucd = c_getucd (code);
    // extract ccc value
    return (ucd == nilp) ? 0 : ucd->d_pccc;
  }


  // -------------------------------------------------------------------------
  // - public section                                                        -
  // -------------------------------------------------------------------------

  // return the ucd plane size by index

  const long c_ucdpsize (const long index) {
    switch (index) {
    case 0x0000: return ucd_get_psize_0000 ();
    case 0x0001: return ucd_get_psize_0001 ();
    case 0x0002: return ucd_get_psize_0002 ();
    case 0x0003: return ucd_get_psize_0003 ();
    case 0x0004: return ucd_get_psize_0004 ();
    case 0x0005: return ucd_get_psize_0005 ();
    case 0x0006: return ucd_get_psize_0006 ();
    case 0x0007: return ucd_get_psize_0007 ();
    case 0x0008: return ucd_get_psize_0008 ();
    case 0x0009: return ucd_get_psize_0009 ();
    case 0x000A: return ucd_get_psize_000A ();
    case 0x000B: return ucd_get_psize_000B ();
    case 0x000C: return ucd_get_psize_000C ();
    case 0x000D: return ucd_get_psize_000D ();
    case 0x000E: return ucd_get_psize_000E ();
    default:
      break;
    }
    return 0;
  }

  // return the ucd plane array by index

  const ucd_s* c_ucdplane (const long index) {
    switch (index) {
    case 0x0000: return ucd_get_plane_0000 ();
    case 0x0001: return ucd_get_plane_0001 ();
    case 0x0002: return ucd_get_plane_0002 ();
    case 0x0003: return ucd_get_plane_0003 ();
    case 0x0004: return ucd_get_plane_0004 ();
    case 0x0005: return ucd_get_plane_0005 ();
    case 0x0006: return ucd_get_plane_0006 ();
    case 0x0007: return ucd_get_plane_0007 ();
    case 0x0008: return ucd_get_plane_0008 ();
    case 0x0009: return ucd_get_plane_0009 ();
    case 0x000A: return ucd_get_plane_000A ();
    case 0x000B: return ucd_get_plane_000B ();
    case 0x000C: return ucd_get_plane_000C ();
    case 0x000D: return ucd_get_plane_000D ();
    case 0x000E: return ucd_get_plane_000E ();
    default:
      break;
    }
    return nilp;
  }

  // return a ucd structure by code point

  const ucd_s* c_getucd (const t_quad code) {
    // get the plane index
    long plane = code >> 16;
    // check if we are loaded
    ucd_load_plane (plane);
    // get the indirect plane
    if (p_ipa == nilp) return nilp;
    const ucd_s** dpa = p_ipa[plane];
    if (dpa == nilp) return nilp;
    // get the ucd structure
    long index = (long) (code & 0x0000FFFF);
    return dpa[index];
  }

  // fill an array with a canonical decomposition

  bool c_ucdnfd (t_quad dst[UCD_CDV_MAX], const t_quad code) {
    // initialize the array
    for (long i = 0; i < UCD_CDV_MAX; i++) dst[i] = nilq;
    // get the decomposition
    long index = 0;
    bool result = ucd_fill_nfd (dst, index, code);
    if (result == false) return false;
    // update the result with the ccc coding
    c_ucdcof (dst, UCD_CDV_MAX);
    return true;
  }

  // fill an array with a canonical decomposition - this one is for
  // test purpose only since the array is bounded

  bool c_ucdnfd (t_quad dst[UCD_CDV_MAX], const t_quad src[UCD_CDV_MAX]) {
    // initialize the array
    for (long i = 0; i < UCD_CDV_MAX; i++) dst[i] = nilq;
    // get the decomposition
    long  index = 0;
    bool status = true;
    for (long i = 0; i < UCD_CDV_MAX; i++) {
      t_quad code = src[i];
      if (code == nilq) break;
      status &= ucd_fill_nfd (dst, index, code);
    }		  
    if (status == false) return false;
    // update the result with the ccc coding
    c_ucdcof (dst, UCD_CDV_MAX);
    return true;
  }

  // return a nil allocated string

  t_quad* c_ucdnil (void) {
    t_quad* result = new t_quad[1];
    result[0] = nilq;
    return result;
  }

  // normalize a character buffer into a canonical form

  t_quad* c_ucdnrm (const char* s, const long size) {
    // check for nil first
    if ((s == nilp) || (size == 0)) return c_ucdnil ();
    // create a temporary buffer that holds the quad representation
    t_quad* buf = new t_quad[size];
    for (long i = 0; i < size; i++) buf[i] = ((t_quad) s[i]) & 0x000000FF;
    try {
      // convert the buffer
      t_quad* result = c_ucdnrm (buf, size);
      // clean and return
      delete [] buf;
      return result;
    } catch (...) {
      delete [] buf;
      throw;
    }
  }

  // normalize a string into a canonical form

  t_quad* c_ucdnrm (const t_quad* s, const long size){
    if ((s == nilp) || (size <= 0)) return c_ucdnil ();
    // allocate a buffer of sufficent size an initialize it
    long    len = size * UCD_CDV_MAX + 1;
    t_quad* buf = new t_quad[len];
    for (long i = 0; i < len; i++) buf[i] = nilq;
    // loop in the string and update the buffer
    long pos = 0;
    for (long i = 0; i < size; i++) {
      // get the code and check for nil
      t_quad code = s[i];
      if (code == nilq) {
	buf[pos++] = nilq;
	break;
      }
      // get the character mapping
      t_quad dst[UCD_CDV_MAX];
      if (c_ucdnfd (dst, code) == false) {
	delete [] buf;
	return c_ucdnil ();
      }
      // update the buffer with the mapping
      for (long j = 0; j < UCD_CDV_MAX; j++) {
	t_quad c = dst[j];
	if (c == nilq) break;
	buf[pos++] = c;
      }
    }
    // put the buffer in canonical order
    c_ucdcof (buf, len);
    return buf;
  }

  // put a character array in a canonical order form

  void c_ucdcof (t_quad* buf, const long size) {
    // check for 0 order
    if ((buf == nilp) || (size == 0)) return;
    // order in place
    for (long i = 1; i < size; i++) {
      // get the code point and exit if null
      t_quad code = buf[i];
      if (code == nilq) break;
      // get the code point ccc - if the ccc is 0 continue
      long ccci = ucd_find_ccc (code);
      if (ccci == 0) continue;
      // find the initial position for swaping - the scan position is 0
      // or the first position with a ccc of 0
      long pos = i;
      for (long j = pos; j >= 0; j--) {
	long cccj = ucd_find_ccc (buf[j]);
	if (cccj == 0) break;
	pos = j;
      }
      // loop from position and eventuall swap if a lower condition is found
      // as a matter of fact we do no swap but rather rotate from left to right
      for (long j = pos; j < i; j++) {
	long cccj = ucd_find_ccc (buf[j]);
	if (ccci < cccj) {
	  for (long k = i; k > j; k--) buf[k] = buf[k-1];
	  buf[j] = code;
	  i = j;
	  break;
	}
      }  
    }
  }

  // convert a unicode character to lower case

  long c_ucdtol (t_quad dst[UCD_LCM_MAX], const t_quad code) {
    // get the ucd record and do nothing if it does not exist
    const ucd_s* ucd = c_getucd (code);
    if (ucd == nilp) {
      dst[0] = code;
      return 1;
    }
    // loop in the lower map
    long result = 0;
    for (long i = 0; i < UCD_LCM_MAX; i++) {
      t_quad c = ucd->d_lmap[i];
      if (c == nilq) break;
      dst[i] = c;
      result++;
    }
    // if the result is null just map the existing character
    if (result == 0) dst[0] = code;
    return 1;
  }
  
  // convert a unicode character to upper case
  
  long c_ucdtou (t_quad dst[UCD_UCM_MAX], const t_quad code) {
    // get the ucd record and do nothing if it does not exist
    const ucd_s* ucd = c_getucd (code);
    if (ucd == nilp) {
      dst[0] = code;
      return 1;
    }
    // loop in the upper map
    long result = 0;
    for (long i = 0; i < UCD_UCM_MAX; i++) {
      t_quad c = ucd->d_umap[i];
      if (c == nilq) break;
      dst[i] = c;
      result++;
    }
    // if the result is null just map the existing character
    if (result == 0) dst[0] = code;
    return 1;
  }

  // return true if the code point is not combining

  bool c_ucdncc (const t_quad code) {
    // get the ucd record and do nothing if it does not exist
    const ucd_s* ucd = c_getucd (code);
    if (ucd == nilp) return false;
    // check for ccc 0
    return (ucd->d_pccc == 0);
  }
};


syntax highlighted by Code2HTML, v. 0.9.1