/* $Header: /home/agc/src/libutf-2.10/RCS/urelang.c,v 1.9 1997/10/20 12:37:33 agc Exp $ */ /* * Copyright © 1996-1997 Alistair G. Crooks. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Alistair G. Crooks. * 4. The name of the author may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #ifdef HAVE_SYS_TYPES_H #include #endif #ifdef HAVE_SYS_STAT_H #include #endif #include #include #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_STDARG_H #include #endif #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_STRING_H #include #endif #ifdef HAVE_LIMITS_H #include #endif #include "utf.h" #include "ure.h" /*************************************************************************/ /* basic unicode routines */ /* define EBCDIC_CHAR_SET if you use EBCDIC - ASCII is the default */ #ifdef EBCDIC_CHAR_SET static unsigned char cv[256] = { /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6 */ 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* c */ 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* f */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0 }; #else static unsigned char cv[256] = { /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3 */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, /* 4 */ 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6 */ 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #endif /* !EBCDIC_CHAR_SET */ /* convert `cc' chars of `s' into a `base' number */ int AsciiToNumber(char *s, int cc, int base) { long n; for (n = 0 ; *s && cc-- > 0 ; s++) { n = (n * base) + cv[(unsigned char)(*s)]; } return n; } /* the character constant in cp is converted to an integer */ int CharToNum(char *cp) { char *s; if (*cp == '\\') { switch(*++cp) { case '\a': return '\a'; case '\b': return '\b'; case '\f': return '\f'; case '\r': return '\r'; case '\n': return '\n'; case '\t': return '\t'; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': s = strchr(cp, '\''); return AsciiToNumber(cp, s - cp, 8); case 'x': s = strchr(++cp, '\''); return AsciiToNumber(cp, s - cp, 16); } } return *cp; } /* return the decimal value of `ch' */ int CharToDec(unsigned char ch) { return (int) cv[ch]; } /*************************************************************************/ /* language specific functions */ /* a dumb-string structure - no ref counting, just the gubbins */ typedef struct dumbstr { int ds_c; /* length of string */ char ds_v[1]; /* the string itself */ } ds_t; /* this struct describes a language's collation sequences */ typedef struct langstruct { ds_t *l_lang; /* language name */ ds_t *l_lower; /* lower case */ ds_t *l_upper; /* upper case */ ds_t *l_digits; /* digits */ ds_t *l_imports; /* imported runes */ ds_t *l_values; /* their values */ } Lang_t; static Lang_t *language; /* current language being used */ #ifndef HAVE_MEMMOVE /* overlapping-safe memory move function */ static char * memmove(char *dst, char *src, int nbytes) { char *ret; if ((ret = dst) >= src && dst <= &src[nbytes]) { for (dst += nbytes, src += nbytes ; nbytes-- > 0 ; ) { *--dst = *--src; } } else { while (nbytes-- > 0) { *dst++ = *src++; } } return ret; } #endif /* make a dumb string from `n' chars of `s' */ static ds_t * ds_save(char *s, int n) { ds_t *dp; if ((dp = (ds_t *) malloc(sizeof(ds_t) + (sizeof(char) * (n + 1)))) == (ds_t *) NULL) { (void) fprintf(stderr, "Memory problems in strnsave\n"); exit(1); } (void) memmove(dp->ds_v, s, dp->ds_c = n); dp->ds_v[n] = 0; return dp; } /* free storage allocated to dumb string */ void ds_free(ds_t *dp) { free(dp); } /* set the language sequence to that given */ static int LangSetSequence(char *lang, char *lower, char *upper, char *digits, char *imports, char *values) { if (language != (Lang_t *) NULL) { ds_free(language->l_lang); ds_free(language->l_lower); ds_free(language->l_upper); ds_free(language->l_digits); ds_free(language->l_imports); ds_free(language->l_values); } if ((language = (Lang_t *) malloc(sizeof(Lang_t))) == (Lang_t *) NULL) { return 0; } language->l_lang = ds_save(lang, utfbytes(lang)); language->l_lower = ds_save(lower, utfbytes(lower)); language->l_upper = ds_save(upper, utfbytes(upper)); language->l_digits = ds_save(digits, utfbytes(digits)); language->l_imports = ds_save(imports, utfbytes(imports)); language->l_values = ds_save(values, utfbytes(values)); return 1; } #ifndef LANG_COLL_FILE #define LANG_COLL_FILE "langcoll.utf" #endif #ifndef ETCDIR #define ETCDIR "/usr/local/etc" #endif #ifndef DEFLANG #define DEFLANG "English" #endif /* open the language collation sequence file, and return its handle */ /* the order for searching is described in the code below */ static FILE * opencollfile(void) { FILE *fp; char buf[BUFSIZ]; char *cp; /* then look for file in cwd */ if ((fp = fopen(LANG_COLL_FILE, "r")) != (FILE *) NULL) { return fp; } /* look for LANG_COLL_FILE in $HOME */ if ((cp = getenv("HOME")) != (char *) NULL) { if ((cp = utfrune(cp, '=')) != (char *) NULL) { (void) utf_snprintf(buf, sizeof(buf), "%s/%s", cp + 1, LANG_COLL_FILE); if ((fp = fopen(buf, "r")) != (FILE *) NULL) { return fp; } } } /* then look for system wide file */ (void) utf_snprintf(buf, sizeof(buf), "%s/%s", ETCDIR, LANG_COLL_FILE); if ((fp = fopen(buf, "r")) != (FILE *) NULL) { return fp; } /* give up */ return (FILE *) NULL; } /* return the contents of the language collation sequence file */ static char * getcollfile(void) { struct stat s; FILE *fp; char *cp; int cc; if ((fp = opencollfile()) == (FILE *) NULL) { return (char *) NULL; } (void) fstat(fileno(fp), &s); if ((cp = (char *) malloc((size_t)s.st_size + 1)) == (char *) NULL) { return (char *) NULL; } cc = read(fileno(fp), cp, (size_t) s.st_size); (void) fclose(fp); if (cc != s.st_size) { free(cp); return (char *) NULL; } cp[cc] = 0; return cp; } /* initialise the language collation sequences */ int urecollseq(char *collseq) { Rune r; char *imports; char *values; char *digits; char *lower; char *upper; char *buf; char *seq; char *nl; char *cp; int seplen; int rc; if (language != (Lang_t *) NULL && collseq == (char *) NULL) { return 1; } if (collseq == (char *) NULL && (collseq = getenv("UTFCOLLSEQ")) == (char *) NULL) { collseq = DEFLANG; } if (language != (Lang_t *) NULL && utfcmp(language->l_lang->ds_v, collseq) == 0) { return 1; } seplen = utfbytes("\t"); if (collseq != (char *) NULL && (cp = buf = getcollfile()) != (char *) NULL) { for (;;) { if ((nl = utfrune(cp, '\n')) == (char *) NULL) { break; } cp += utfspan(cp, " \t", &rc); (void) chartorune(&r, cp); if (r == '#' || r == '\n') { /* comment */ cp = nl + utfbytes("\n"); continue; } /* got a seq name */ cp = utfrune(seq = cp, '\t'); *cp = 0; if (utfcmp(collseq, seq) == 0) { lower = cp + seplen; cp = utfrune(lower, '\t'); *cp = 0; upper = cp + seplen; cp = utfrune(upper, '\t'); *cp = 0; digits = cp + seplen; if ((cp = utfrune(digits, '\t')) == (char *) NULL) { imports = values = ""; } else { *cp = 0; imports = cp + seplen; cp = utfrune(imports, '\t'); *cp = 0; values = cp + seplen; } *nl = 0; LangSetSequence(seq, lower, upper, digits, imports, values); free(buf); return 1; } cp = nl + utfbytes("\n"); } free(buf); } (void) fprintf(stderr, "%s not found - using %s\n", collseq, DEFLANG); LangSetSequence("English", "abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "0123456789", "", ""); return 0; } /* return the number of UTF chars from the start of UTF string */ int runesubscript(char *s, Rune r) { Rune ch; char *cp; int count; int len; int i; cp = s; count = 1; i = 0; for (;;) { len = chartorune(&ch, cp); if (ch == r) { return i; } if (ch == 0) { return -1; } cp += len; if (ch == '[') { count = 0; } if (ch == ']') { count = 1; } if (count) { i++; } } return -1; } /* return the `n'th rune in s */ Rune runeutfget(char *s, int n) { Rune r; for (;;) { s += chartorune(&r, s); if (n-- <= 0) { break; } } return r; } enum { LowercaseRune = 1, UppercaseRune, NocaseRune }; /* return the ordinal number of the rune `r', in current language */ int runeord(Rune r, int *runecase) { int i; if (r == '[' || r == ']') { return -1; } if ((i = runesubscript(language->l_imports->ds_v, r)) >= 0) { r = runeutfget(language->l_values->ds_v, i); } if ((i = runesubscript(language->l_digits->ds_v, r)) >= 0) { return i; } if ((i = runesubscript(language->l_upper->ds_v, r)) >= 0) { *runecase = UppercaseRune; return i; } if ((i = runesubscript(language->l_lower->ds_v, r)) >= 0) { *runecase = LowercaseRune; return i; } *runecase = NocaseRune; return r; } /* return 1 if the two strings are the same in `lang' */ /* ignore the case of the letters if icase is non-zero */ int utflangcmp(char *s1, char *s2, char *lang, int icase) { Rune r1, r2; int rc1, rc2; int i1, i2; urecollseq(lang); #if 0 do { s1 += chartorune(&r1, s1); s2 += chartorune(&r2, s2); i1 = runeord(r1, icase); i2 = runeord(r2, icase); } while (i1 == i2 && r1 != 0 && r2 != 0); return i2 - i1; #else for (;;) { s1 += chartorune(&r1, s1); s2 += chartorune(&r2, s2); i1 = runeord(r1, &rc1); i2 = runeord(r2, &rc2); if (i1 != i2 || r1 == 0 || r2 == 0) { return i2 - i1; } if (!icase && rc1 != rc2) { return rc2 - rc1; } } #endif } /* structure to describe a range of runes */ typedef struct rangestruct { Rune r_lower; /* lower bound of range */ Rune r_upper; /* upper bound of range */ } Range_t; static Range_t unicode_digits[15] = { { 0x0030, 0x0039 }, /* ISO-Latin-1, and ASCII, digits */ { 0x0660, 0x0669 }, /* Arabic-Indic digits */ { 0x06f0, 0x06f9 }, /* Eastern Arabic-Indic digits */ { 0x0966, 0x096f }, /* Devenagari digits */ { 0x09e6, 0x09ef }, /* Bengali digits */ { 0x0a66, 0x0a6f }, /* Germukhi digits */ { 0x0ae6, 0x0aef }, /* Gujurati digits */ { 0x0b66, 0x0b6f }, /* Oriya digits */ { 0x0be7, 0x0bef }, /* Tamil digits (only nine, no zero) */ { 0x0c66, 0x0c6f }, /* Telegu digits */ { 0x0ce6, 0x0cef }, /* Kannada digits */ { 0x0d66, 0x0d6f }, /* Malayalam digits */ { 0x0e50, 0x0e59 }, /* Thai digits */ { 0x0ed0, 0x0ed9 }, /* Lao digits */ { 0xff10, 0xff19 } /* Fullwidth digits */ }; static Range_t unicode_letters[13] = { { 0x0041, 0x005a }, /* ISO-Latin-1, ASCII, uppercase */ { 0x0061, 0x007a }, /* ISO-Latin-1, ASCII, lowercase */ { 0x00c0, 0x00d6 }, /* ISO-Latin-1 supplementary letters */ { 0x00d8, 0x00f6 }, /* ISO-Latin-1 supplementary letters */ { 0x00f8, 0x00ff }, /* ISO-Latin-1 supplementary letters */ { 0x0100, 0x1fff }, /* Latin-extended-A... */ { 0x3040, 0x9fff }, /* Hiragana... */ { 0xf900, 0xfdff }, /* CJK compatibility... */ { 0xfe70, 0xfefe }, /* arabic */ { 0xff10, 0xff19 }, /* Fullwidth digits */ { 0xff21, 0xff3a }, /* Fullwidth Latin uppercase */ { 0xff41, 0xff5a }, /* Fullwidth Latin lowercase */ { 0xff66, 0xffdc } /* Halfwidth katakana/Hangul */ }; /* returns non-zero if ch is a Unicode digit */ int UNICODE_isdigit(Rune ch) { Range_t *rp; int i; for (i = 0, rp = unicode_digits ; i < 15 ; i++, rp++) { if (ch >= rp->r_lower && ch <= rp->r_upper) { return 1; } if (ch < rp->r_lower) { break; } } return 0; } /* returns non-zero if ch is a Unicode letter */ int UNICODE_isletter(Rune ch) { Range_t *rp; int i; for (i = 0, rp = unicode_letters ; i < 13 ; i++, rp++) { if (ch >= rp->r_lower && ch <= rp->r_upper) { /* some digits overlap one of the letters ranges */ return !UNICODE_isdigit(ch); } if (ch < rp->r_lower) { break; } } return 0; } /* this isn't quick, but we're after functionality here */ int UNICODE_IsIdent(Rune ch) { return (UNICODE_isdigit(ch) || UNICODE_isletter(ch) || ch == '_'); } /* return ch as lower case (if it's upper case), or as ch otherwise */ Rune UNICODE_tolower(Rune ch) { Rune r; char *cp; if (language == (Lang_t *) NULL) { urecollseq(NULL); } if (ch == '[' || ch == ']' || (cp = utfrune(language->l_upper->ds_v, ch)) == (char *) NULL || (cp - language->l_upper->ds_v) > language->l_upper->ds_c) { return ch; } (void) chartorune(&r, &language->l_lower->ds_v[cp - language->l_upper->ds_v]); return r; } /* return ch as upper case (if it's lower case), or as ch otherwise */ Rune UNICODE_toupper(Rune ch) { Rune r; char *cp; if (language == (Lang_t *) NULL) { urecollseq(NULL); } if (ch == '[' || ch == ']' || (cp = utfrune(language->l_lower->ds_v, ch)) == (char *) NULL || (cp - language->l_lower->ds_v) > language->l_lower->ds_c) { return ch; } (void) chartorune(&r, &language->l_upper->ds_v[cp - language->l_lower->ds_v]); return r; } /* is upper case */ int UNICODE_isupper(Rune ch) { if (language == (Lang_t *) NULL) { urecollseq(NULL); } return ch != '[' && ch != ']' && utfrune(language->l_upper->ds_v, ch) != (char *) NULL; } /* is lower case */ int UNICODE_islower(Rune ch) { if (language == (Lang_t *) NULL) { urecollseq(NULL); } return ch != '[' && ch != ']' && utfrune(language->l_lower->ds_v, ch) != (char *) NULL; } /* returns 1 if c is a letter in the current `alphabet' */ int UNICODE_isalpha(Rune c) { return UNICODE_isupper(c) || UNICODE_islower(c); } /* returns 1 if c is a number in the current `alphabet' */ int UNICODE_isnumber(Rune c) { if (language == (Lang_t *) NULL) { urecollseq(NULL); } return utfrune(language->l_digits->ds_v, c) != (char *) NULL; } /* is a hexadecimal digit */ int UNICODE_isxdigit(Rune c) { if (UNICODE_isnumber(c)) { return 1; } return utfrune("abcdefABCDEF", c) != (char *) NULL; } /* is alphabetic or numeric */ int UNICODE_isalnum(Rune c) { return UNICODE_isdigit(c) || UNICODE_isletter(c); } /* is a space character */ int UNICODE_isspace(Rune c) { switch(c) { case ' ': case '\n': case '\r': case '\t': case '\f': return 1; } return 0; } /* is a blank character */ int UNICODE_isblank(Rune c) { return (c == ' ' || c == '\t'); } /* is a punctuation character */ int UNICODE_ispunct(Rune c) { return UNICODE_isprint(c) && (c != ' ' || !UNICODE_isalnum(c)); } /* is a control character */ int UNICODE_iscntrl(Rune c) { return (c == 0x7f || c < 0x20); } /* is a printable character */ int UNICODE_isprint(Rune c) { return !UNICODE_iscntrl(c); } /* is an ASCII character */ int UNICODE_isascii(Rune c) { return c < 0x7f; } /* is a graphics character */ int UNICODE_isgraph(Rune c) { return UNICODE_isprint(c) && c != ' '; } /* return 1 if ch is >= first && <= last */ int UNICODE_InRange(Rune first, Rune last, Rune ch) { char *alphabet; char *ind; char *cp2; char *cp; if (language == (Lang_t *) NULL) { urecollseq(NULL); } /* determine the case of the first character */ if (ch == '[' || ch == ']') { /* not in any alphabet, and could clash with homo-rune metachars */ return 0; } if ((cp = utfrune(language->l_lower->ds_v, first)) != (char *) NULL) { alphabet = language->l_lower->ds_v; } else if ((cp = utfrune(language->l_upper->ds_v, first)) != (char *) NULL) { alphabet = language->l_upper->ds_v; } else if ((cp = utfrune(language->l_digits->ds_v, first)) != (char *) NULL) { alphabet = language->l_digits->ds_v; } else { return 0; } if ((cp2 = utfrune(alphabet, last)) == (char *) NULL) { /* check first and last are in same alphabet */ return 0; } if ((cp - alphabet) > (cp2 - alphabet) + 1) { /* check first appears before last in alphabet */ return 0; } return (ind = utfrune(alphabet, ch)) != (char *) NULL && ind >= cp && ind <= cp2; } /*************************************************************************/ /* unicode string routines */ /* these routines manipulate arrays of Runes */ /* compare an array of Runes against a UTF-string */ int UNICODE_mixed_strncmp(Rune *s1, char *s2, int n) { Rune r; int i; int c; while (n-- > 0 && *s1) { i = chartorune(&r, s2); s2 += i; if ((c = *s1 - r) != 0) { return c; } } return 0; } int UNICODE_strcasecmp(Rune *s1, Rune *s2) { int c; while (*s1) { if ((c = UNICODE_tolower(*s1) - UNICODE_tolower(*s2)) != 0) { return c; } } return 0; } int UNICODE_strlen(Rune *s) { Rune *cp; for (cp = s ; *s ; s++) { } return s - cp; } Rune * UNICODE_strcat(Rune *s1, Rune *s2) { Rune *cp; for (cp = s1, s1 += UNICODE_strlen(s1) ; (*s1++ = *s2++) != 0 ; ) { } return cp; } Rune * UNICODE_strchr(Rune *s, Rune ch) { for ( ; *s && *s != ch ; s++) { } return (*s == 0) ? (Rune *) NULL : s; } int UNICODE_strcmp(Rune *s1, Rune *s2) { int c; while (*s1) { if ((c = *s1 - *s2) != 0) { return c; } } return 0; } Rune * UNICODE_strcpy(Rune *to, Rune *from) { Rune *ret; for (ret = to; (*to++ = *from++) != 0 ; ) { } return ret; } int UNICODE_strcspn(Rune *s1, Rune *s2) { int c; for (c = 0 ; *s1 ; s1++, c++) { if (UNICODE_strchr(s2, *s1) != (Rune *) NULL) { break; } } return c; } Rune * UNICODE_strdup(Rune *s) { Rune *cp; int n; n = UNICODE_strlen(s); cp = (Rune *) calloc(sizeof(Rune), n + 1); if (cp != (Rune *) NULL) { (void) memcpy(cp, s, n * sizeof(Rune)); } cp[n] = 0; return cp; } int UNICODE_strncasecmp(Rune *s1, Rune *s2, int n) { int c; for ( ; n-- > 0 && *s1 ; s1++, s2++) { if ((c = UNICODE_tolower(*s1) - UNICODE_tolower(*s2)) != 0) { return c; } } return 0; } Rune * UNICODE_strncat(Rune *s1, Rune *s2, int n) { Rune *cp; for (cp = s1, s1 += UNICODE_strlen(s1) ; n-- > 0 && (*s1++ = *s2++) != 0 ; ) { } return cp; } int UNICODE_strncmp(Rune *s1, Rune *s2, int n) { int c; for ( ; n-- > 0 && *s1 ; s1++, s2++) { if ((c = *s1 - *s2) != 0) { return c; } } return 0; } Rune * UNICODE_strncpy(Rune *to, Rune *from, int n) { Rune *ret; for (ret = to; n-- > 0 && (*to++ = *from++) != 0 ; ) { } return ret; } Rune * UNICODE_strpbrk(Rune *s1, Rune *s2) { for ( ; *s1 ; s1++) { if (UNICODE_strchr(s2, *s1) != (Rune *) NULL) { return s1; } } return (Rune *) NULL; } Rune * UNICODE_strrchr(Rune *s, Rune ch) { Rune *cp; for (cp = &s[UNICODE_strlen(s) - 1] ; cp >= s && *s != ch ; --s) { } return (s < cp) ? (Rune *) NULL : s; } int UNICODE_strspn(Rune *s1, Rune *s2) { int c; for (c = 0 ; *s1 ; s1++, c++) { if (UNICODE_strchr(s2, *s1) == (Rune *) NULL) { break; } } return c; } Rune * UNICODE_strstr(Rune *s, Rune *find) { Rune first; Rune *cp; int n; for (first = *find, n = UNICODE_strlen(find), cp = s ; (cp = UNICODE_strchr(cp, first)) != (Rune *) NULL ; ) { if (UNICODE_strncmp(cp + 1, find + 1, n - 1) == 0) { return cp; } } return (Rune *) NULL; }