/* * Copyright (C) 2004, 2005 Jean-Yves Lefort * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Jean-Yves Lefort nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include #include #include #include #include #include #include #include "translate.h" #include "translate-sgml-entities-private.h" static gunichar translate_sgml_ref_get_unichar (const char *ref); /** * translate_ascii_strcase_equal: * @s1: a nul-terminated string. * @s2: a nul-terminated string. * * Compares two strings, ignoring the case of ASCII characters of both * strings, and returns %TRUE if they are equal. It can be passed to * g_hash_table_new() as the @key_equal_func parameter, when using * strings as case-insensitive keys in a #GHashTable. * * Return value: %TRUE if the two strings match. **/ gboolean translate_ascii_strcase_equal (gconstpointer s1, gconstpointer s2) { return g_ascii_strcasecmp(s1, s2) == 0; } /** * translate_ascii_strcase_hash: * @key: a string key. * * Converts a string to a hash value, ignoring the case of ASCII * characters of the string. It can be passed to g_hash_table_new() as * the @hash_func parameter, when using strings as case-insensitive * keys in a #GHashTable. * * Return value: a hash value corresponding to the key. **/ unsigned int translate_ascii_strcase_hash (gconstpointer key) { const char *p = key; unsigned int h = g_ascii_tolower(*p); if (h) for (p++; *p; p++) h = (h << 5) - h + g_ascii_tolower(*p); return h; } /** * translate_ascii_strcasestr: * @big: a nul-terminated string, which may not be encoded in UTF-8. * @little: the nul-terminated string to search for, which may not be * encoded in UTF-8. * * Locates the first occurrence of @little in @big, ignoring the case * of ASCII characters of both strings. * * Return value: if @little is an empty string, @big is returned; if * @little occurs nowhere in @big, %NULL is returned; otherwise a * pointer to the first character of the first occurrence of @little * in @big is returned. **/ char * translate_ascii_strcasestr (const char *big, const char *little) { g_return_val_if_fail(big != NULL, NULL); g_return_val_if_fail(little != NULL, NULL); return translate_ascii_strcasestr_len(big, -1, little); } /** * translate_ascii_strcasestr_len: * @big: a nul-terminated string, which may not be encoded in UTF-8. * @big_len: length of @big in bytes, or -1 if @big is nul-terminated. * @little: the nul-terminated string to search for, which may not be * encoded in UTF-8. * * Locates the first occurrence of @little in @big, ignoring the case * of ASCII characters of both strings, and limiting the length of the * search to @big_len. * * Return value: if @little is an empty string, @big is returned; if * @little occurs nowhere in @big, %NULL is returned; otherwise a * pointer to the first character of the first occurrence of @little * in @big is returned. **/ char * translate_ascii_strcasestr_len (const char *big, unsigned int big_len, const char *little) { char *lower_big; char *lower_little; char *s; g_return_val_if_fail(big != NULL, NULL); g_return_val_if_fail(little != NULL, NULL); lower_big = g_ascii_strdown(big, (int) big_len); lower_little = g_ascii_strdown(little, -1); s = strstr(lower_big, lower_little); if (s) s = (char *) big + (s - lower_big); g_free(lower_big); g_free(lower_little); return s; } /** * translate_ascii_strcasecoll: * @s1: a nul-terminated string, which may not be encoded in UTF-8. * @s2: a nul-terminated string, which may not be encoded in UTF-8. * * Compares two strings for ordering using the linguistically correct * rules for the current locale, ignoring the case of ASCII characters * of both strings. * * Return value: an integer greater than, equal to, or less than 0, * according as @s1 is greater than, equal to, or less than @s2. **/ int translate_ascii_strcasecoll (const char *s1, const char *s2) { char *lower_s1; char *lower_s2; int coll; lower_s1 = g_ascii_strdown(s1, -1); lower_s2 = g_ascii_strdown(s2, -1); coll = strcoll(lower_s1, lower_s2); g_free(lower_s1); g_free(lower_s2); return coll; } /** * translate_utf8_strcasecoll: * @s1: a nul-terminated string. * @s2: a nul-terminated string. * * Compares two UTF-8 strings for ordering using the linguistically * correct rules for the current locale, ignoring the case of both * strings. * * Return value: an integer greater than, equal to, or less than 0, * according as @s1 is greater than, equal to, or less than @s2. **/ int translate_utf8_strcasecoll (const char *s1, const char *s2) { char *folded_s1; char *folded_s2; int coll; g_return_val_if_fail(s1 != NULL, 0); g_return_val_if_fail(s2 != NULL, 0); folded_s1 = g_utf8_casefold(s1, -1); folded_s2 = g_utf8_casefold(s2, -1); coll = g_utf8_collate(folded_s1, folded_s2); g_free(folded_s1); g_free(folded_s2); return coll; } /** * translate_utf8_strcmp: * @s1: a nul-terminated string. * @s2: a nul-terminated string. * * Compares two UTF-8 strings for ordering. * * Return value: an integer greater than, equal to, or less than 0, * according as @s1 is greater than, equal to, or less than @s2. **/ int translate_utf8_strcmp (const char *s1, const char *s2) { char *normalized_s1; char *normalized_s2; int cmp; g_return_val_if_fail(s1 != NULL, 0); g_return_val_if_fail(s2 != NULL, 0); normalized_s1 = g_utf8_normalize(s1, -1, G_NORMALIZE_ALL); normalized_s2 = g_utf8_normalize(s2, -1, G_NORMALIZE_ALL); cmp = strcmp(normalized_s1, normalized_s2); g_free(normalized_s1); g_free(normalized_s2); return cmp; } /** * translate_utf8_strcasecmp: * @s1: a nul-terminated string. * @s2: a nul-terminated string. * * Compares two UTF-8 strings for ordering, ignoring the case of both * strings. * * Return value: an integer greater than, equal to, or less than 0, * according as @s1 is greater than, equal to, or less than @s2. **/ int translate_utf8_strcasecmp (const char *s1, const char *s2) { char *normalized_s1; char *normalized_s2; char *case_normalized_s1; char *case_normalized_s2; int cmp; g_return_val_if_fail(s1 != NULL, 0); g_return_val_if_fail(s2 != NULL, 0); normalized_s1 = g_utf8_normalize(s1, -1, G_NORMALIZE_ALL); normalized_s2 = g_utf8_normalize(s2, -1, G_NORMALIZE_ALL); case_normalized_s1 = g_utf8_casefold(normalized_s1, -1); case_normalized_s2 = g_utf8_casefold(normalized_s2, -1); cmp = strcmp(case_normalized_s1, case_normalized_s2); g_free(normalized_s1); g_free(normalized_s2); g_free(case_normalized_s1); g_free(case_normalized_s2); return cmp; } /** * translate_time: * * Returns the current time, issuing a warning if an error occurs. * * Return value: the number of seconds since 0 hours, 0 minutes, 0 * seconds, January 1, 1970, Coordinated Universal Time, or 0 if an * error has occurred. **/ time_t translate_time (void) { time_t now; now = time(NULL); if (now < 0) { g_warning(_("cannot get current time: %s"), g_strerror(errno)); now = 0; } return now; } static gunichar translate_sgml_ref_get_unichar (const char *ref) { g_return_val_if_fail(ref != NULL, 0); if (*ref == '#') { /* numeric reference */ const char *nptr; int base; if (*(ref + 1) == 'x' || *(ref + 1) == 'X') { /* hexadecimal number */ nptr = ref + 2; base = 16; } else { /* decimal number */ nptr = ref + 1; base = 10; } if (*nptr) { char *end; unsigned long code; code = strtoul(nptr, &end, base); if (*end == 0) /* could convert */ return code; } } else { /* entity reference */ int i; for (i = 0; i < G_N_ELEMENTS(entities); i++) if (! strcmp(ref, entities[i].name)) return entities[i].character; } return 0; /* invalid reference */ } /** * translate_sgml_ref_expand: * @str: a nul-terminated string. * * Parses @str, expanding its SGML character references and XHTML * character entities into their Unicode character value. * * Numerical SGML character references as well as XHTML entities are * supported. Unsupported entities will be inserted verbatim into the * result. * * Return value: the expansion of str. The returned string should be * freed when no longer needed. **/ char * translate_sgml_ref_expand (const char *str) { GString *unescaped; const char *start; g_return_val_if_fail(str != NULL, NULL); unescaped = g_string_new(NULL); while ((start = strchr(str, '&'))) { const char *end; gunichar c; end = strpbrk(start + 1, "; &\t\n"); if (! end) end = strchr(start + 1, 0); { char ref[end - start]; strncpy(ref, start + 1, end - start - 1); ref[end - start - 1] = 0; c = translate_sgml_ref_get_unichar(ref); } if (*end == ';') /* semicolon is part of entity, skip it */ end++; g_string_append_len(unescaped, str, start - str); if (c) g_string_append_unichar(unescaped, c); else /* invalid reference, append it raw */ g_string_append_len(unescaped, start, end - start); str = end; } g_string_append(unescaped, str); return g_string_free(unescaped, FALSE); } /** * translate_utf8_strpbrk: * @p: a nul-terminated string. * @len: length of @p in bytes, or -1 if @p is nul-terminated. * @charset: the set of characters to search for. * * Locates in @p the first occurrence of any character in the string * @charset. * * Return value: the first occurrence of any character of @charset in * @p, or %NULL if no characters from @charset occur anywhere in @p. **/ char * translate_utf8_strpbrk (const char *p, gssize len, const char *charset) { g_return_val_if_fail(p != NULL, NULL); g_return_val_if_fail(charset != NULL, NULL); for (; *charset; charset = g_utf8_next_char(charset)) { char *match; match = g_utf8_strchr(p, len, g_utf8_get_char(charset)); if (match) return match; } return NULL; } /** * translate_utf8_strrpbrk: * @p: a nul-terminated string. * @len: length of @p in bytes, or -1 if @p is nul-terminated. * @charset: the set of characters to search for. * * Locates in @p the last occurrence of any character in the string * @charset. * * Return value: the last occurrence of any character of @charset in * @p, or %NULL if no characters from @charset occur anywhere in @p. **/ char * translate_utf8_strrpbrk (const char *p, gssize len, const char *charset) { g_return_val_if_fail(p != NULL, NULL); g_return_val_if_fail(charset != NULL, NULL); for (; *charset; charset = g_utf8_next_char(charset)) { char *match; match = g_utf8_strrchr(p, len, g_utf8_get_char(charset)); if (match) return match; } return NULL; }