ports//mail/gmime/work/gmime-1.0.8/unicode.c

/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
 *  Authors: Jeffrey Stedfast <fejj@ximian.com>
 *
 *  Copyright 2002 Ximian, Inc. (www.ximian.com)
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
 *
 */


#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "unicode.h"

const char unicode_skip[256] = {
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
	3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
};

#define UTF8_COMPUTE(ch, mask, len)					      \
  if (ch < 128) 							      \
    {									      \
      len = 1;								      \
      mask = 0x7f;							      \
    }									      \
  else if ((ch & 0xe0) == 0xc0) 					      \
    {									      \
      len = 2;								      \
      mask = 0x1f;							      \
    }									      \
  else if ((ch & 0xf0) == 0xe0) 					      \
    {									      \
      len = 3;								      \
      mask = 0x0f;							      \
    }									      \
  else if ((ch & 0xf8) == 0xf0) 	 				      \
    {									      \
      len = 4;								      \
      mask = 0x07;							      \
    }									      \
  else if ((ch & 0xfc) == 0xf8) 					      \
    {									      \
      len = 5;								      \
      mask = 0x03;							      \
    }									      \
  else if ((ch & 0xfe) == 0xfc) 					      \
    {									      \
      len = 6;								      \
      mask = 0x01;							      \
    }									      \
  else									      \
    len = -1;

#define UTF8_GET(result, chars, count, mask, len)			      \
  (result) = (chars)[0] & (mask);					      \
  for ((count) = 1; (count) < (len); ++(count))				      \
    {									      \
      if (((chars)[(count)] & 0xc0) != 0x80)				      \
	{								      \
	  (result) = -1;						      \
	  break;							      \
	}								      \
      (result) <<= 6;							      \
      (result) |= ((chars)[(count)] & 0x3f);				      \
    }

#define UNICODE_VALID(ch)                   \
    ((ch) < 0x110000 &&                     \
     ((ch) < 0xD800 || (ch) >= 0xE000) &&   \
     (ch) != 0xFFFE && (ch) != 0xFFFF)



#define unicode_next_char(p) (char *)((p) + unicode_skip[*(unsigned char *)(p)])

unichar
unicode_get_char (const char  *p)
{
	unsigned char c = (unsigned char) *p;
	int i, mask = 0, len;
	unichar result;
	
	UTF8_COMPUTE (c, mask, len);
	if (len == -1)
		return (unichar) -1;
	UTF8_GET (result, p, i, mask, len);
	
	return result;
}

gboolean
unichar_validate (unichar ch)
{
	return UNICODE_VALID (ch);
}
syntax highlighted by Code2HTML, v. 0.9.1