# LANGUAGE.awk -- awk script to generate LANGUAGE.tbl	-*- coding: utf-8; -*-
# Copyright (C) 2007
#   National Institute of Advanced Industrial Science and Technology (AIST)
#   Registration Number H15PRO112

# This file is part of the m17n database; a sub-part of the m17n
# library.

# The m17n library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public License
# as published by the Free Software Foundation; either version 2.1 of
# the License, or (at your option) any later version.

# The m17n library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.

# You should have received a copy of the GNU Lesser General Public
# License along with the m17n library; if not, write to the Free
# Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301, USA.

BEGIN {
    print ";; LANGUAGE.tbl -- ISO639 Language Code		-*- mode:lisp; coding:utf-8; -*-";
    print ";; Copyright (C) 2007";
    print ";;   National Institute of Advanced Industrial Science and Technology (AIST)";
    print ";;   Registration Number H15PRO112";
    print "";
    print ";; This file is part of the m17n database; a sub-part of the m17n";
    print ";; library.";
    print "";
    print ";; The m17n library is free software; you can redistribute it and/or";
    print ";; modify it under the terms of the GNU Lesser General Public License";
    print ";; as published by the Free Software Foundation; either version 2.1 of";
    print ";; the License, or (at your option) any later version.";
    print "";
    print ";; The m17n library is distributed in the hope that it will be useful,";
    print ";; but WITHOUT ANY WARRANTY; without even the implied warranty of";
    print ";; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU";
    print ";; Lesser General Public License for more details.";
    print "";
    print ";; You should have received a copy of the GNU Lesser General Public";
    print ";; License along with the m17n library; if not, write to the Free";
    print ";; Software Foundation, Inc., 51 Franklin Street, Fifth Floor,";
    print ";; Boston, MA 02110-1301, USA.";
    print ";;";
    print ";; The file format is this:";
    print ";;    (ISO639-2 ISO639-1 \"ENGLISH-NAME\" [ \"NATIVE-NAME\" [ \"EXTRA-CHARS\" ]]) ...";
    print ";; ISO639-2: 3-letter language code of ISO639-2.";
    print ";; ISO639-1: 2-letter language code of ISO639-1, or nil if it doesn't exist.";
    print ";; ENGLISH-NAME: English name of the language.";
    print ";; NATIVE-NAME: Native name of the language.";
    print ";; EXTRA-CHARS: Extra characters that uniquifies the language.";
    print ";;";
    print ";; ISO639-2 and ISO639-1 are extracted from ISO-639-2.txt.";
    print ";; ENGLISH-NAME and NATIVE-NAME are mainly extracted from CLDR,";
    print ";;   but are also supplemented from these sites:";

    while (getline < "native.txt") {
	if ($0 ~ /^[a-z]/) {
	    NATIVE[$1] = $2;
	    if ($3 != "")
		CHARS[$1] = $3;
	} else if ($0 ~ /^;;/) {
	    print;
	}
    }
    while (getline < "native.ext") {
	if ($0 ~ /^[a-z]/) {
	    NATIVE[$1] = $2;
	}
    }
    while (getline < "en.lnm") {
	split($0, array, "[( ]");
	code = array[2];
	split($0, array, "\"");
	ENGLISH[code] = array[2];
    }
}

/^[a-z][a-z][a-z]\|/ {
    code3 = $1;
    code2 = $3;
    if (code2 == "" || code2 == "NULL")
	code2 = "nil";
    native = NATIVE[code2];
    if (! native)
	native = NATIVE[code3];
    chars = CHARS[code3];
    name = $4;
    name = ENGLISH[code2];
    if (! name)
	name = ENGLISH[code3];
#     if (name == "Greek, Modern (1453-)")
# 	name = "Greek";
#     else {
# 	gsub("; .*", "", name);
# 	gsub(" \\(Other\\)$", "", name);
# 	gsub(" languages$", "", name);
# 	gsub(" Languages$", "", name);
# 	if (name ~ /\(.*[0-9].*\)$/)
# 	    gsub(" \\([^)]*\\)$", "", name);
# 	if (name ~ ", ") {
# 	    split(name, array, ", ");
# 	    name = array[2] " " array[1];
# 	}
#     }
    printf "(%s %-3s \"%s\"", code3, code2, name;
    if (native != "")
	printf " \"%s\"", native;
    else if (chars != "")
	printf " nil";
    if (chars != "")
	printf " \"%s\"", chars;
    printf ")\n";
}


syntax highlighted by Code2HTML, v. 0.9.1