ports//net/diradmin/work/directory_administrator-1.5.1/src/charset.c

/*
 * Main part of code, written by:
 *
 * Copyright (C) 1999-2001  H�ard Kv�en <havardk@xmms.org>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 * 02111-1307, USA.
 *
 */

#include <config.h>
#include <stdlib.h>
#include <glib.h>
#include <string.h>
#include <errno.h>

#define HAVE_ICONV_OPEN
#ifdef HAVE_ICONV_OPEN
#include <iconv.h>
#endif

#ifdef HAVE_LANGINFO_CODESET
#include <langinfo.h>
#endif

#include "charset.h"
#include "mylocale.h"

char *_latinchars[] = {
  "á", "é", "í", "ó","ú","Á","É","Í","Ó","Ú","ä","ë","ï","ö","ü","Ä","Ë","Ï","Ö","Ü", NULL
};
char *_replacements[] = {
  "a", "e", "i", "o","u","A","E","I","O","U","a","e","i","o","u","A","E","I","O","U", NULL
};


/****************
 * Declarations *
 ****************/

#define CHARSET_TRANS_ARRAY_LEN ( sizeof(charset_trans_array) / sizeof((charset_trans_array)[0]) )
const CharsetInfo charset_trans_array[] = {
    {N_("Arabic (IBM-864)"),                  "IBM864"        },
    {N_("Arabic (ISO-8859-6)"),               "ISO-8859-6"    },
    {N_("Arabic (Windows-1256)"),             "windows-1256"  },
    {N_("Baltic (ISO-8859-13)"),              "ISO-8859-13"   },
    {N_("Baltic (ISO-8859-4)"),               "ISO-8859-4"    },
    {N_("Baltic (Windows-1257)"),             "windows-1257"  },
    {N_("Celtic (ISO-8859-14)"),              "ISO-8859-14"   },
    {N_("Central European (IBM-852)"),        "IBM852"        },
    {N_("Central European (ISO-8859-2)"),     "ISO-8859-2"    },
    {N_("Central European (Windows-1250)"),   "windows-1250"  },
    {N_("Chinese Simplified (GB18030)"),      "gb18030"       },
    {N_("Chinese Simplified (GB2312)"),       "GB2312"        },
    {N_("Chinese Traditional (Big5)"),        "Big5"          },
    {N_("Chinese Traditional (Big5-HKSCS)"),  "Big5-HKSCS"    },
    {N_("Cyrillic (IBM-855)"),                "IBM855"        },
    {N_("Cyrillic (ISO-8859-5)"),             "ISO-8859-5"    },
    {N_("Cyrillic (ISO-IR-111)"),             "ISO-IR-111"    },
    {N_("Cyrillic (KOI8-R)"),                 "KOI8-R"        },
    {N_("Cyrillic (Windows-1251)"),           "windows-1251"  },
    {N_("Cyrillic/Russian (CP-866)"),         "IBM866"        },
    {N_("Cyrillic/Ukrainian (KOI8-U)"),       "KOI8-U"        },
    {N_("English (US-ASCII)"),                "us-ascii"      },
    {N_("Greek (ISO-8859-7)"),                "ISO-8859-7"    },
    {N_("Greek (Windows-1253)"),              "windows-1253"  },
    {N_("Hebrew (IBM-862)"),                  "IBM862"        },
    {N_("Hebrew (Windows-1255)"),             "windows-1255"  },
    {N_("Japanese (EUC-JP)"),                 "EUC-JP"        },
    {N_("Japanese (ISO-2022-JP)"),            "ISO-2022-JP"   },
    {N_("Japanese (Shift_JIS)"),              "Shift_JIS"     },
    {N_("Korean (EUC-KR)"),                   "EUC-KR"        },
    {N_("Nordic (ISO-8859-10)"),              "ISO-8859-10"   },
    {N_("South European (ISO-8859-3)"),       "ISO-8859-3"    },
    {N_("Thai (TIS-620)"),                    "TIS-620"       },
    {N_("Turkish (IBM-857)"),                 "IBM857"        },
    {N_("Turkish (ISO-8859-9)"),              "ISO-8859-9"    },
    {N_("Turkish (Windows-1254)"),            "windows-1254"  },
    {N_("Unicode (UTF-7)"),                   "UTF-7"         },
    {N_("Unicode (UTF-8)"),                   "UTF-8"         },
    {N_("Unicode (UTF-16BE)"),                "UTF-16BE"      },
    {N_("Unicode (UTF-16LE)"),                "UTF-16LE"      },
    {N_("Unicode (UTF-32BE)"),                "UTF-32BE"      },
    {N_("Unicode (UTF-32LE)"),                "UTF-32LE"      },
    {N_("Vietnamese (VISCII)"),               "VISCII"        },
    {N_("Vietnamese (Windows-1258)"),         "windows-1258"  },
    {N_("Visual Hebrew (ISO-8859-8)"),        "ISO-8859-8"    },
    {N_("Western (IBM-850)"),                 "IBM850"        },
    {N_("Western (ISO-8859-1)"),              "ISO-8859-1"    },
    {N_("Western (ISO-8859-15)"),             "ISO-8859-15"   },
    {N_("Western (Windows-1252)"),            "windows-1252"  }

    /*
     * From this point, character sets aren't supported by iconv
     */
/*    {N_("Arabic (IBM-864-I)"),                "IBM864i"              },
    {N_("Arabic (ISO-8859-6-E)"),             "ISO-8859-6-E"         },
    {N_("Arabic (ISO-8859-6-I)"),             "ISO-8859-6-I"         },
    {N_("Arabic (MacArabic)"),                "x-mac-arabic"         },
    {N_("Armenian (ARMSCII-8)"),              "armscii-8"            },
    {N_("Central European (MacCE)"),          "x-mac-ce"             },
    {N_("Chinese Simplified (GBK)"),          "x-gbk"                },
    {N_("Chinese Simplified (HZ)"),           "HZ-GB-2312"           },
    {N_("Chinese Traditional (EUC-TW)"),      "x-euc-tw"             },
    {N_("Croatian (MacCroatian)"),            "x-mac-croatian"       },
    {N_("Cyrillic (MacCyrillic)"),            "x-mac-cyrillic"       },
    {N_("Cyrillic/Ukrainian (MacUkrainian)"), "x-mac-ukrainian"      },
    {N_("Farsi (MacFarsi)"),                  "x-mac-farsi"},
    {N_("Greek (MacGreek)"),                  "x-mac-greek"          },
    {N_("Gujarati (MacGujarati)"),            "x-mac-gujarati"       },
    {N_("Gurmukhi (MacGurmukhi)"),            "x-mac-gurmukhi"       },
    {N_("Hebrew (ISO-8859-8-E)"),             "ISO-8859-8-E"         },
    {N_("Hebrew (ISO-8859-8-I)"),             "ISO-8859-8-I"         },
    {N_("Hebrew (MacHebrew)"),                "x-mac-hebrew"         },
    {N_("Hindi (MacDevanagari)"),             "x-mac-devanagari"     },
    {N_("Icelandic (MacIcelandic)"),          "x-mac-icelandic"      },
    {N_("Korean (JOHAB)"),                    "x-johab"              },
    {N_("Korean (UHC)"),                      "x-windows-949"        },
    {N_("Romanian (MacRomanian)"),            "x-mac-romanian"       },
    {N_("Turkish (MacTurkish)"),              "x-mac-turkish"        },
    {N_("User Defined"),                      "x-user-defined"       },
    {N_("Vietnamese (TCVN)"),                 "x-viet-tcvn5712"      },
    {N_("Vietnamese (VPS)"),                  "x-viet-vps"           },
    {N_("Western (MacRoman)"),                "x-mac-roman"          },
    // charsets whithout posibly translatable names
    {"T61.8bit",                              "T61.8bit"             },
    {"x-imap4-modified-utf7",                 "x-imap4-modified-utf7"},
    {"x-u-escaped",                           "x-u-escaped"          },
    {"windows-936",                           "windows-936"          }
*/
};




/*************
 * Functions *
 *************/

static char* get_current_charset (void)
{
    char *charset = getenv("CHARSET");

#ifdef HAVE_LANGINFO_CODESET
    if (!charset)
        charset = nl_langinfo(CODESET);
#endif
    if (!charset)
        charset = "ISO-8859-1";

    return charset;
}


char* replacement_table(char* string) {
	char**a,b;
	char*position;
	char*tempstring,tempstring2;
	//int replacementlength;
	int current;
return NULL;
	tempstring = g_strdup(string);
	for (current=0;_latinchars[current];current++) {
		g_print("To replace: %s         Replacement: %s\n",_latinchars[current],_replacements[current]);
//		replacementlength =strlen(b[0]);
		while (position = strstr(tempstring,_latinchars[current]) ) {
			g_print("Found %s from this part on: %s\n",_latinchars[0],position);
			//tempstring = g_strdup(tempstring);
//			position[0] == NULL;
//			g_print("%d ",tempstring);g_print("%d ",replacements[0]);g_print("%d ",position+replacementlength);
//			g_print("done");
			tempstring2 = g_strconcat(g_strdup("Perruno gatuno"),NULL,NULL);
			tempstring = g_strdup(tempstring2);
//			g_print("Before: %d      After: %d\n",string,tempstring);
		}
	}
	return tempstring;
}

#ifdef HAVE_ICONV_OPEN
static char* convert_string (const char *string, char *from, char *to)
{
    size_t outleft, outsize, length;
    iconv_t cd;
    char *out, *outptr;
    const char *input = string;

    if (!string)
        return NULL;

    length = strlen(string);

//      g_message("converting %s from %s to %s", string, from, to);
    if ((cd = iconv_open(to, from)) == (iconv_t)-1)
    {
        g_warning("convert_string(): Conversion not supported. Charsets: %s -> %s", from, to);
        return g_strdup(string);
    }

    /* Due to a GLIBC bug, round outbuf_size up to a multiple of 4 */
    /* + 1 for nul in case len == 1 */
    outsize = ((length + 3) & ~3) + 1;
    out = g_malloc(outsize);
    outleft = outsize - 1;
    outptr = out;

 retry:
    if (iconv(cd, &input, &length, &outptr, &outleft) == -1)
    {
        int used;
        switch (errno)
        {
            case E2BIG:
 g_message("Too big; retrying");
                 used = outptr - out;
                outsize = (outsize - 1) * 2 + 1;
                out = g_realloc(out, outsize);
                outptr = out + used;
                outleft = outsize - 1 - used;
                goto retry;
            case EINVAL:
 g_message("OK; going ahead");
                break;
            case EILSEQ:
 g_message("Invalid sequence; retrying");
                /* Invalid sequence, try to get the
                                   rest of the string */
                input++;
                length = strlen(input);
                goto retry;
            default:
                g_warning("convert_string(): Conversion failed. Inputstring: %s; Error: %s", string, strerror(errno));
                break;
        }
    }
    *outptr = '\0';

    iconv_close(cd);
//    g_message("\nconvert_string: About to return %s",out);
    return out;
}
#else
static char* convert_string (const char *string, char *from, char *to)
{
    if (!string)
        return NULL;
    return g_strdup(string);
}
#endif


/*
 * Conversion with UTF-8 for ogg tags
 */
char* convert_to_utf8 (const char *string)
{
    char *charset = get_current_charset();

    return convert_string(string, charset, "UTF-8");
}

char* convert_from_utf8 (const char *string)
{
    char * d;
    char *charset = get_current_charset();

    d= convert_string(string, "UTF-8", charset);
//    g_message("\nconvert_from_utf8: About to return %s",d);
    return d;
}

char* convert_to_ascii (const char *string)
{
    char *charset = get_current_charset();
	//char * blah = replacement_table(string);

    return convert_string(string, charset, "US-ASCII");
}

char* convert_from_ascii (const char *string)
{
    char * d;
    char *charset = get_current_charset();

    d= convert_string(string, "US-ASCII", charset);
//    g_message("\nconvert_from_utf8: About to return %s",d);
    return d;
}


/*
 * Commons conversion functions
 */
char* convert_from_file_to_user (const char *string)
{
/*    char *file_charset = FILE_CHARACTER_SET;
    char *user_charset = USER_CHARACTER_SET;*/

//    return convert_string(string,file_charset,user_charset);
return NULL;
}

char* convert_from_user_to_file (const char *string)
{
/*    char *file_charset = FILE_CHARACTER_SET;
     char *user_charset = USER_CHARACTER_SET;*/

//   return convert_string(string,user_charset,file_charset);
return NULL;
}


GList *Charset_Create_List (void)
{
    GList *list = NULL;
    gint i;

    for (i=0; i<CHARSET_TRANS_ARRAY_LEN; i++)
        list = g_list_append(list,_(charset_trans_array[i].charset_title));
    return list;
}


/*
 * Return charset_name from charset_title
 */
gchar *Charset_Get_Name_From_Title (gchar *charset_title)
{
    gint i;

    if (charset_title)
        for (i=0; i<CHARSET_TRANS_ARRAY_LEN; i++)
            if ( strcasecmp(_(charset_title),_(charset_trans_array[i].charset_title)) == 0 )
                return charset_trans_array[i].charset_name;
    return "";
}


/*
 * Return charset_title from charset_name
 */
gchar *Charset_Get_Title_From_Name (gchar *charset_name)
{
    gint i;

    if (charset_name)
        for (i=0; i<CHARSET_TRANS_ARRAY_LEN; i++)
            if ( strcasecmp(charset_name,charset_trans_array[i].charset_name) == 0 )
                return _(charset_trans_array[i].charset_title);
    return "";
}



/*
 * Test if the conversion is supported between two character sets ('from' and 'to)
 */
#ifdef HAVE_ICONV_OPEN
gboolean test_conversion_charset (char *from, char *to)
{
    iconv_t cd;

    if ((cd=iconv_open(to,from)) == (iconv_t)-1)
    {
        /* Conversion not supported */
        return FALSE;
    }
    iconv_close(cd);
    return TRUE;
}
#else
gboolean test_conversion_charset (char *from, char *to)
{
    return TRUE;
}
#endif
syntax highlighted by Code2HTML, v. 0.9.1