/*
* R : A Computer Language for Statistical Data Analysis
* Copyright (C) 2005 The R Development Core Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* A copy of the GNU General Public License is available via WWW at
* http://www.gnu.org/copyleft/gpl.html. You can also obtain it by
* writing to the Free Software Foundation, Inc., 51 Franklin Street
* Fifth Floor, Boston, MA 02110-1301 USA.
*/
/* This file was contributed by Ei-ji Nakama.
* See also the comments in R_ext/rlocale.h.
*
* It provides replacements for the wctype functions on
* Windows (where they are not correct in e.g. Japanese)
* AIX (missing)
* MacOS X in CJK (where these just call the ctype functions)
*
* It also provides wc[s]width, where widths of CJK fonts are often
* wrong in vendor-supplied versions and in Marcus Kuhn's version
* used for Windows in R 2.[12].x.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifdef HAVE_GLIBC2
# define _GNU_SOURCE /* iswblank is a GNU extension */
#endif
#include <string.h>
#include <stdlib.h>
#ifndef SUPPORT_MBCS
void Ri18n_wcswidth(void)
{
return;
}
void Ri18n_wcwidth(void)
{
return;
}
void Ri18n_wctype(void)
{
return;
}
void Ri18n_iswctype(void)
{
return;
}
#else /* SUPPORT_MBCS */
#include "rlocale_data.h"
#include <wctype.h>
#include <wchar.h>
#include <ctype.h>
#include <locale.h>
#include <limits.h>
#include <R_ext/Riconv.h>
static int wcwidthsearch(int wint, const struct interval_wcwidth *table,
int max, int locale)
{
int min = 0;
int mid;
max--;
if (wint < table[0].first || wint > table[max].last) return 0;
while (max >= min) {
mid = (min + max) / 2;
if (wint > table[mid].last)
min = mid + 1;
else if (wint < table[mid].first)
max = mid - 1;
else{
return(table[mid].mb[locale]);
}
}
return -1;
}
typedef struct {
char *name;
int locale;
} cjk_locale_name_t;
static cjk_locale_name_t cjk_locale_name[] = {
{"CHINESE(SINGAPORE)_SIGNAPORE", MB_zh_SG},
{"CHINESE_SIGNAPORE", MB_zh_SG},
{"CHINESE(PRC)_PEOPLE'S REPUBLIC OF CHINA", MB_zh_CN},
{"CHINESE_PEOPLE'S REPUBLIC OF CHINA", MB_zh_CN},
{"CHINESE_MACAU S.A.R.", MB_zh_HK},
{"CHINESE(PRC)_HONG KONG", MB_zh_HK},
{"CHINESE_HONG KONG S.A.R.", MB_zh_HK},
{"CHINESE(TAIWAN)_TAIWAN", MB_zh_TW},
{"CHINESE_TAIWAN", MB_zh_TW},
{"CHINESE-S", MB_zh_CN},
{"CHINESE-T", MB_zh_TW},
{"JAPANESE_JAPAN", MB_ja_JP},
{"JAPANESE", MB_ja_JP},
{"KOREAN_KOREA", MB_ko_KR},
{"KOREAN", MB_ko_KR},
{"ZH_TW", MB_zh_TW},
{"ZH_CN", MB_zh_CN},
{"ZH_CN.BIG5", MB_zh_TW},
{"ZH_HK", MB_zh_HK},
{"ZH_SG", MB_zh_SG},
{"JA_JP", MB_ja_JP},
{"KO_KR", MB_ko_KR},
{"ZH", MB_zh_CN},
{"JA", MB_ja_JP},
{"KO", MB_ko_KR},
{"", MB_UTF8},
};
int Ri18n_wcwidth(wchar_t c)
{
char lc_str[128];
int i;
static char *lc_cache = "";
static int lc = 0;
if (0 != strcmp(setlocale(LC_CTYPE, NULL), lc_cache)) {
strncpy(lc_str, setlocale(LC_CTYPE, NULL), sizeof(lc_str));
for (i = 0; i < strlen(lc_str) && i < sizeof(lc_str); i++)
lc_str[i] = toupper(lc_str[i]);
for (i = 0; i < (sizeof(cjk_locale_name)/sizeof(cjk_locale_name_t));
i++) {
if (0 == strncmp(cjk_locale_name[i].name, lc_str,
strlen(cjk_locale_name[i].name))) {
lc = cjk_locale_name[i].locale;
break;
}
}
}
return(wcwidthsearch(c, table_wcwidth,
(sizeof(table_wcwidth)/sizeof(struct interval_wcwidth)),
lc));
}
int Ri18n_wcswidth (const wchar_t *s, size_t n)
{
int rs = 0;
while ((n-- > 0) && (*s != L'\0'))
{
int now = Ri18n_wcwidth (*s);
if (now == -1) return -1;
rs += now;
s++;
}
return rs;
}
#if defined(Win32) || defined(_AIX) || defined(__APPLE_CC__)
static int wcsearch(int wint, const struct interval *table, int max)
{
int min = 0;
int mid;
max--;
if (wint < table[0].first || wint > table[max].last)
return 0;
while (max >= min) {
mid = (min + max) / 2;
if (wint > table[mid].last)
min = mid + 1;
else if (wint < table[mid].first)
max = mid - 1;
else
return 1;
}
return 0;
}
#endif
/* what was this for ?
#ifdef Win32
# include <windows.h>
# include <winnls.h>
#else
# include <langinfo.h>
#endif
*/
/*********************************************************************
* There is MacOS with a CSI(CodeSet Independence) system.
* (wchar_t != unicode)
* However, it is Unicode at the time of UTF-8.
********************************************************************/
#if defined(HAVE_ICONV) && defined(ICONV_LATIN1) && defined(__APPLE_CC__)
/* allow for MacIntel platforms */
#ifdef WORDS_BIGENDIAN
static const char UNICODE[] = "UCS-4BE";
#else
static const char UNICODE[] = "UCS-4LE";
#endif
char *locale2charset(const char *locale);
#define ISWFUNC(ISWNAME) static int Ri18n_isw ## ISWNAME (wint_t wc) \
{ \
char mb_buf[MB_LEN_MAX+1]; \
size_t mb_len; \
int ucs4_buf[2]; \
size_t wc_len; \
void *cd; \
char fromcode[128]; \
char *_mb_buf; \
char *_wc_buf; \
size_t rc ; \
\
strncpy(fromcode, locale2charset(NULL), sizeof(fromcode)); \
if(0 == strcmp(fromcode, "UTF-8")) \
return wcsearch(wc,table_w ## ISWNAME , table_w ## ISWNAME ## _count);\
memset(mb_buf, 0, sizeof(mb_buf)); \
memset(ucs4_buf, 0, sizeof(ucs4_buf)); \
wcrtomb( mb_buf, wc, NULL); \
if((void *)(-1) != (cd = Riconv_open((char*)UNICODE, fromcode))) { \
wc_len = sizeof(ucs4_buf); \
_wc_buf = (char *)ucs4_buf; \
mb_len = strlen(mb_buf); \
_mb_buf = (char *)mb_buf; \
rc = Riconv(cd, (char **)&_mb_buf, (size_t *)&mb_len, \
(char **)&_wc_buf, (size_t *)&wc_len); \
Riconv_close(cd); \
wc = ucs4_buf[0]; \
return wcsearch(wc,table_w ## ISWNAME , table_w ## ISWNAME ## _count); \
} \
return(-1); \
}
#endif
/*********************************************************************
* iswalpha etc. does not function correctly for Windows
* iswalpha etc. does not function at all in AIX.
* all locale wchar_t == UNICODE
********************************************************************/
#if defined(Win32) || defined(_AIX)
#define ISWFUNC(ISWNAME) static int Ri18n_isw ## ISWNAME (wint_t wc) \
{ \
return wcsearch(wc,table_w ## ISWNAME , table_w ## ISWNAME ## _count); \
}
#endif
/*********************************************************************
* iswalpha etc. does function normally for Linux
********************************************************************/
#ifndef ISWFUNC
#define ISWFUNC(ISWNAME) static int Ri18n_isw ## ISWNAME (wint_t wc) \
{ \
return isw ## ISWNAME (wc); \
}
/* Solaris 8 is missing iswblank. Its man page is missing iswcntrl,
but the function is there. Windows also does not have iswblank. */
#ifndef HAVE_ISWBLANK
#define iswblank(wc) iswctype(wc, wctype("blank"))
#endif
#endif
/* These are the functions which C99 and POSIX define. However,
not all are used elsewhere in R, but they are used in Ri18n_iswctype. */
ISWFUNC(upper)
ISWFUNC(lower)
ISWFUNC(alpha)
ISWFUNC(digit)
ISWFUNC(xdigit)
ISWFUNC(space)
ISWFUNC(print)
ISWFUNC(graph)
ISWFUNC(blank)
ISWFUNC(cntrl)
ISWFUNC(punct)
/* defined below in terms of digit and alpha
ISWFUNC(alnum)
*/
wctype_t Ri18n_wctype(const char *);
int Ri18n_iswctype(wint_t, wctype_t);
static int Ri18n_iswalnum (wint_t wc)
{
return (Ri18n_iswctype(wc, Ri18n_wctype("digit")) ||
Ri18n_iswctype(wc, Ri18n_wctype("alpha")) );
}
/*
* iswctype
*/
typedef struct {
char * name;
wctype_t wctype;
int(*func)(wint_t);
} Ri18n_wctype_func_l ;
static const Ri18n_wctype_func_l Ri18n_wctype_func[] = {
{"upper", 1<<0, Ri18n_iswupper},
{"lower", 1<<1, Ri18n_iswlower},
{"alpha", 1<<2, Ri18n_iswalpha},
{"digit", 1<<3, Ri18n_iswdigit},
{"xdigit", 1<<4, Ri18n_iswxdigit},
{"space", 1<<5, Ri18n_iswspace},
{"print", 1<<6, Ri18n_iswprint},
{"graph", 1<<7, Ri18n_iswgraph},
{"blank", 1<<8, Ri18n_iswblank},
{"cntrl", 1<<9, Ri18n_iswcntrl},
{"punct", 1<<10, Ri18n_iswpunct},
{"alnum", 1<<11, Ri18n_iswalnum},
{NULL, 0, NULL}
};
wctype_t Ri18n_wctype(const char *name)
{
int i;
for (i = 0 ; Ri18n_wctype_func[i].name != NULL &&
0 != strcmp(Ri18n_wctype_func[i].name, name) ; i++ );
return Ri18n_wctype_func[i].wctype;
}
int Ri18n_iswctype(wint_t wc, wctype_t desc)
{
int i;
for (i = 0 ; Ri18n_wctype_func[i].wctype != 0 &&
Ri18n_wctype_func[i].wctype != desc ; i++ );
return (*Ri18n_wctype_func[i].func)(wc);
}
#endif /* SUPPORT_MBCS */
syntax highlighted by Code2HTML, v. 0.9.1