// ---------------------------------------------------------------------------
// - cucd.cxx -
// - unicode database (ucd) library functions implementation -
// ---------------------------------------------------------------------------
// - This program is free software; you can redistribute it and/or modify -
// - it provided that this copyright notice is kept intact. -
// - -
// - This program is distributed in the hope that it will be useful, but -
// - without any warranty; without even the implied warranty of -
// - merchantability or fitness for a particular purpose. In no event shall -
// - the copyright holder be liable for any direct, indirect, incidental or -
// - special damages arising in any way out of the use of this software. -
// ---------------------------------------------------------------------------
// - copyright (c) 1999-2007 amaury darsch -
// ---------------------------------------------------------------------------
#include "cucd.hxx"
#include "cthr.hpp"
#include "csys.hpp"
namespace afnix {
// -------------------------------------------------------------------------
// - private section -
// -------------------------------------------------------------------------
// the plane size 0x8000 = 32768
static const long UCD_DPA_MAX = 65536;
// the indirect plane size 0x8000 = 32768
static const long UCD_IPA_MAX = 32768;
// the indirect plane access array
static const ucd_s*** p_ipa = nilp;
// static mutex creation function
static void* mtx_create (void);
// mutex or network services
static void* mtx = mtx_create ();
// this function destroy the mutex at exit
static void mtx_destroy (void) {
// destroy each ipa
if (p_ipa != nilp) {
for (long i = 0; i < UCD_IPA_MAX; i++) delete [] p_ipa[i];
}
// destroy the indirect array
delete [] p_ipa;
// destroy the mutex
c_mtxdestroy (mtx);
}
// this function initialize a mutex statically and register its
// destruction to be done at exit
static void* mtx_create (void) {
void* mtx = c_mtxcreate ();
c_atexit (mtx_destroy);
return mtx;
}
// this function loads a plane by index
static void ucd_load_plane (const long index) {
// check the plane index
if ((index < 0) || (index >= UCD_IPA_MAX)) return;
// lock the mutex
c_mtxlock (mtx);
// preload the indirect array
if (p_ipa == nilp) {
p_ipa = new const ucd_s**[UCD_IPA_MAX];
for (long i = 0; i < UCD_IPA_MAX; i++) p_ipa[i] = nilp;
}
if (p_ipa[index] == nilp) {
// initialize the array
const ucd_s** dpa = new const ucd_s*[UCD_DPA_MAX];
for (long i = 0; i < UCD_DPA_MAX; i++) dpa[i] = nilp;
// get the plane size and data
const long psize = c_ucdpsize (index);
const ucd_s* plane = c_ucdplane (index);
// initialize the direct access plane
for (long i = 0; i < psize; i++) {
long pidx = (long) (plane[i].d_code & 0x0000FFFF);
dpa[pidx] = &plane[i];
}
p_ipa[index] = dpa;
}
c_mtxunlock (mtx);
}
// fill an array with a canonical decompostion and an index
static bool ucd_fill_nfd (t_quad dst[UCD_CDV_MAX], long& index,
const t_quad code) {
// check the index
if (index >= UCD_CDV_MAX) return false;
// get the ucd record
const ucd_s* ucd = c_getucd (code);
if (ucd == nilp) {
dst[index++] = code;
return true;
}
// check if a canonical decomposition exists
if (ucd->d_pdmv != UCD_DMV_NIL) {
dst[index++] = code;
return true;
}
// check if the first decomposition is null - if yes then there is no
// decomposition and the character is mapped in placed
if (ucd->d_cdmv[0] == nilq) {
dst[index++] = code;
return true;
}
// here a decomposition exists - the decomposition mapping value is nil
// so it means that it is a canonical decomposition - let's go with it!
for (long i = 0; i < UCD_CDV_MAX; i++) {
// get the decomposition value
t_quad c = ucd->d_cdmv[i];
if (c == nilq) break;
// recursivelly remap it
ucd_fill_nfd (dst, index, c);
}
return true;
}
// this procedure find the ccc value for a code point
static inline long ucd_find_ccc (const t_quad code) {
// get the ucd record
const ucd_s* ucd = c_getucd (code);
// extract ccc value
return (ucd == nilp) ? 0 : ucd->d_pccc;
}
// -------------------------------------------------------------------------
// - public section -
// -------------------------------------------------------------------------
// return the ucd plane size by index
const long c_ucdpsize (const long index) {
switch (index) {
case 0x0000: return ucd_get_psize_0000 ();
case 0x0001: return ucd_get_psize_0001 ();
case 0x0002: return ucd_get_psize_0002 ();
case 0x0003: return ucd_get_psize_0003 ();
case 0x0004: return ucd_get_psize_0004 ();
case 0x0005: return ucd_get_psize_0005 ();
case 0x0006: return ucd_get_psize_0006 ();
case 0x0007: return ucd_get_psize_0007 ();
case 0x0008: return ucd_get_psize_0008 ();
case 0x0009: return ucd_get_psize_0009 ();
case 0x000A: return ucd_get_psize_000A ();
case 0x000B: return ucd_get_psize_000B ();
case 0x000C: return ucd_get_psize_000C ();
case 0x000D: return ucd_get_psize_000D ();
case 0x000E: return ucd_get_psize_000E ();
default:
break;
}
return 0;
}
// return the ucd plane array by index
const ucd_s* c_ucdplane (const long index) {
switch (index) {
case 0x0000: return ucd_get_plane_0000 ();
case 0x0001: return ucd_get_plane_0001 ();
case 0x0002: return ucd_get_plane_0002 ();
case 0x0003: return ucd_get_plane_0003 ();
case 0x0004: return ucd_get_plane_0004 ();
case 0x0005: return ucd_get_plane_0005 ();
case 0x0006: return ucd_get_plane_0006 ();
case 0x0007: return ucd_get_plane_0007 ();
case 0x0008: return ucd_get_plane_0008 ();
case 0x0009: return ucd_get_plane_0009 ();
case 0x000A: return ucd_get_plane_000A ();
case 0x000B: return ucd_get_plane_000B ();
case 0x000C: return ucd_get_plane_000C ();
case 0x000D: return ucd_get_plane_000D ();
case 0x000E: return ucd_get_plane_000E ();
default:
break;
}
return nilp;
}
// return a ucd structure by code point
const ucd_s* c_getucd (const t_quad code) {
// get the plane index
long plane = code >> 16;
// check if we are loaded
ucd_load_plane (plane);
// get the indirect plane
if (p_ipa == nilp) return nilp;
const ucd_s** dpa = p_ipa[plane];
if (dpa == nilp) return nilp;
// get the ucd structure
long index = (long) (code & 0x0000FFFF);
return dpa[index];
}
// fill an array with a canonical decomposition
bool c_ucdnfd (t_quad dst[UCD_CDV_MAX], const t_quad code) {
// initialize the array
for (long i = 0; i < UCD_CDV_MAX; i++) dst[i] = nilq;
// get the decomposition
long index = 0;
bool result = ucd_fill_nfd (dst, index, code);
if (result == false) return false;
// update the result with the ccc coding
c_ucdcof (dst, UCD_CDV_MAX);
return true;
}
// fill an array with a canonical decomposition - this one is for
// test purpose only since the array is bounded
bool c_ucdnfd (t_quad dst[UCD_CDV_MAX], const t_quad src[UCD_CDV_MAX]) {
// initialize the array
for (long i = 0; i < UCD_CDV_MAX; i++) dst[i] = nilq;
// get the decomposition
long index = 0;
bool status = true;
for (long i = 0; i < UCD_CDV_MAX; i++) {
t_quad code = src[i];
if (code == nilq) break;
status &= ucd_fill_nfd (dst, index, code);
}
if (status == false) return false;
// update the result with the ccc coding
c_ucdcof (dst, UCD_CDV_MAX);
return true;
}
// return a nil allocated string
t_quad* c_ucdnil (void) {
t_quad* result = new t_quad[1];
result[0] = nilq;
return result;
}
// normalize a character buffer into a canonical form
t_quad* c_ucdnrm (const char* s, const long size) {
// check for nil first
if ((s == nilp) || (size == 0)) return c_ucdnil ();
// create a temporary buffer that holds the quad representation
t_quad* buf = new t_quad[size];
for (long i = 0; i < size; i++) buf[i] = ((t_quad) s[i]) & 0x000000FF;
try {
// convert the buffer
t_quad* result = c_ucdnrm (buf, size);
// clean and return
delete [] buf;
return result;
} catch (...) {
delete [] buf;
throw;
}
}
// normalize a string into a canonical form
t_quad* c_ucdnrm (const t_quad* s, const long size){
if ((s == nilp) || (size <= 0)) return c_ucdnil ();
// allocate a buffer of sufficent size an initialize it
long len = size * UCD_CDV_MAX + 1;
t_quad* buf = new t_quad[len];
for (long i = 0; i < len; i++) buf[i] = nilq;
// loop in the string and update the buffer
long pos = 0;
for (long i = 0; i < size; i++) {
// get the code and check for nil
t_quad code = s[i];
if (code == nilq) {
buf[pos++] = nilq;
break;
}
// get the character mapping
t_quad dst[UCD_CDV_MAX];
if (c_ucdnfd (dst, code) == false) {
delete [] buf;
return c_ucdnil ();
}
// update the buffer with the mapping
for (long j = 0; j < UCD_CDV_MAX; j++) {
t_quad c = dst[j];
if (c == nilq) break;
buf[pos++] = c;
}
}
// put the buffer in canonical order
c_ucdcof (buf, len);
return buf;
}
// put a character array in a canonical order form
void c_ucdcof (t_quad* buf, const long size) {
// check for 0 order
if ((buf == nilp) || (size == 0)) return;
// order in place
for (long i = 1; i < size; i++) {
// get the code point and exit if null
t_quad code = buf[i];
if (code == nilq) break;
// get the code point ccc - if the ccc is 0 continue
long ccci = ucd_find_ccc (code);
if (ccci == 0) continue;
// find the initial position for swaping - the scan position is 0
// or the first position with a ccc of 0
long pos = i;
for (long j = pos; j >= 0; j--) {
long cccj = ucd_find_ccc (buf[j]);
if (cccj == 0) break;
pos = j;
}
// loop from position and eventuall swap if a lower condition is found
// as a matter of fact we do no swap but rather rotate from left to right
for (long j = pos; j < i; j++) {
long cccj = ucd_find_ccc (buf[j]);
if (ccci < cccj) {
for (long k = i; k > j; k--) buf[k] = buf[k-1];
buf[j] = code;
i = j;
break;
}
}
}
}
// convert a unicode character to lower case
long c_ucdtol (t_quad dst[UCD_LCM_MAX], const t_quad code) {
// get the ucd record and do nothing if it does not exist
const ucd_s* ucd = c_getucd (code);
if (ucd == nilp) {
dst[0] = code;
return 1;
}
// loop in the lower map
long result = 0;
for (long i = 0; i < UCD_LCM_MAX; i++) {
t_quad c = ucd->d_lmap[i];
if (c == nilq) break;
dst[i] = c;
result++;
}
// if the result is null just map the existing character
if (result == 0) dst[0] = code;
return 1;
}
// convert a unicode character to upper case
long c_ucdtou (t_quad dst[UCD_UCM_MAX], const t_quad code) {
// get the ucd record and do nothing if it does not exist
const ucd_s* ucd = c_getucd (code);
if (ucd == nilp) {
dst[0] = code;
return 1;
}
// loop in the upper map
long result = 0;
for (long i = 0; i < UCD_UCM_MAX; i++) {
t_quad c = ucd->d_umap[i];
if (c == nilq) break;
dst[i] = c;
result++;
}
// if the result is null just map the existing character
if (result == 0) dst[0] = code;
return 1;
}
// return true if the code point is not combining
bool c_ucdncc (const t_quad code) {
// get the ucd record and do nothing if it does not exist
const ucd_s* ucd = c_getucd (code);
if (ucd == nilp) return false;
// check for ccc 0
return (ucd->d_pccc == 0);
}
};
syntax highlighted by Code2HTML, v. 0.9.1