### File: latin4.pl ### Version 0.1, December 19, 1997 ### includes lower --> uppercase conversion tables ### ### ISO_8859-4 encoding information ### ## Copyright (C) 1995 by Ross Moore ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. $CHARSET = "iso-8859-4"; $INPUTENC='latin4'; # empty implies 'latin1' #Character ranges for lower --> upper-case conversion $sclower = "\\261\\263\\265\\266\\271-\\274\\276\\255\\340-\\366\\370-\\376"; $scupper = "\\241\\243\\245\\246\\251-\\254\\256\\257\\300-\\326\\330-\\336"; #extra pattern match preceding lower --> upper-case conversion $scextra = "s/\\337/ss/g"; %extra_small_caps = ( '223' , 'ss' ); %low_entities = ( '177', '161' # ,'178', '162' ,'179', '163' # ,'180', '164' ,'181', '165' ,'182', '166' # ,'183', '167' # ,'184', '168' ,'185', '169' ,'186', '170' ,'187', '171' ,'188', '172' # ,'189', '173' ,'190', '174' ,'191', '189' ,'224', '192' ,'225', '193' ,'226', '194' ,'227', '195' ,'228', '196' ,'229', '197' ,'230', '198' ,'231', '199' ,'232', '200' ,'233', '201' ,'234', '202' ,'235', '203' ,'236', '204' ,'237', '205' ,'238', '206' ,'239', '207' ,'240', '208' ,'241', '209' ,'242', '210' ,'243', '211' ,'244', '212' ,'245', '213' ,'246', '214' # ,'247', '215' ,'248', '216' ,'249', '217' ,'250', '218' ,'251', '219' ,'252', '220' ,'253', '221' ,'254', '222' # ,'255', '223' ); sub do_cmd_tl { join('', &iso_map("t", "strok"), $_[0]);} sub do_cmd_TL { join('', &iso_map("T", "strok"), $_[0]);} sub do_cmd_dh { join('', &iso_map("d", "strok"), $_[0]);} sub do_cmd_DH { join('', &iso_map("D", "strok"), $_[0]);} sub do_cmd_ss { join('', &iso_map("sz", "lig"), $_[0]);} sub do_cmd_ng { join('', &iso_map("eng", ""), $_[0]);} sub do_cmd_NG { join('', &iso_map("ENG", ""), $_[0]);} sub do_cmd_kra { join('', &iso_map("k", "green"), $_[0]);} sub do_cmd_textcurrency { join('', &iso_map("curren", ""), $_[0]);} sub do_cmd_mathdegree { join('', &iso_map("deg", ""), $_[0]);} sub do_cmd_S { join('', &iso_map("sect", ""), $_[0]);} sub do_cmd_div { join('', &iso_map("divide", ""), $_[0]);} sub do_cmd_times { join('', &iso_map("times", ""), $_[0]);} #sub do_cmd_minus { join('', &iso_map("shy", ""), $_[0]);} %iso_8859_4_character_map = ( 'Aacute', 'Á', # capital A, acute accent 'Acirc', 'Â', # capital A, circumflex accent 'AElig', 'Æ', # capital AE diphthong (ligature) 'Aogon', '¡', # capital A, ogonek accent 'Amacr', 'À', # capital A, macron accent 'Aring', 'Å', # capital A, ring 'Atilde', 'Ã', # capital A, tilde 'Auml', 'Ä', # capital A, dieresis or umlaut mark 'Ccaron', 'È', # capital C, caron accent 'Dstrok', 'Ð', # capital D, stroke 'ETH', 'Ð', # capital Eth, Icelandic 'Eacute', 'É', # capital E, acute accent 'Edot', 'Ì', # capital E, dot above 'Eng', '½', # capital Eng (Sami) 'Eogon', 'Ê', # capital E, ogonek accent 'Emacr', 'ª', # capital E, macron accent 'Euml', 'Ë', # capital E, dieresis or umlaut mark 'Gcedil', '«', # capital G, cedilla accent 'Iacute', 'Í', # capital I, acute accent 'Icirc', 'Î', # capital I, circumflex accent 'Imacr', 'Ï', # capital I, macron accent 'Iogon', 'Ç', # capital I, ogonek accent 'Itilde', '¥', # capital I, tilde accent 'Lcedil', '¦', # capital L, cedil accent 'Kcedil', 'Ó', # capital K, cedilla accent 'Ncedil', 'Ñ', # capital N, cedil accent 'Ocirc', 'Ô', # capital O, circumflex accent 'Omacr', 'Ò', # capital O, macron accent 'Oslash', 'Ø', # capital O, slash 'Otilde', 'Õ', # capital O, tilde 'Ouml', 'Ö', # capital O, dieresis or umlaut mark 'Racute', 'À', # capital R, acute accent 'Rcedil', '£', # capital R, cedilla accent 'Scaron', '©', # capital S, caron accent 'Tstrok', '¬', # capital T, stroke 'Uacute', 'Ú', # capital U, acute accent 'Ucirc', 'Û', # capital U, circumflex accent 'Umacr', 'Þ', # capital U, macron accent 'Uogon', 'Ù', # capital U, ogonek accent 'Utilde', 'Ý', # capital U, tilde accent 'Uuml', 'Ü', # capital U, dieresis or umlaut mark 'Zcaron', '®', # capital Z, caron accent 'aacute', 'á', # small a, acute accent 'acirc', 'â', # small a, circumflex accent 'aelig', 'æ', # small ae diphthong (ligature) 'amacr', 'à', # small a, macron accent 'amp', '&', # ampersand 'aogon', '±', # small a, ogonek accent 'aring', 'å', # small a, ring 'atilde', 'ã', # small a, tilde 'auml', 'ä', # small a, dieresis or umlaut mark 'ccaron', 'è', # small c, caron accent 'dstrok', 'ð', # small d, stroke 'eacute', 'é', # small e, acute accent 'edot', 'ì', # small e, dot above 'emacr', 'º', # small e, macron accent 'eogon', 'ê', # small e, ogonek accent 'eng', '¿', # small eng (Sami) 'eth', 'ð', # small eth, Icelandic 'euml', 'ë', # small e, dieresis or umlaut mark 'gcedil', '»', # small g, cedilla accent 'gt', '>', # greater than 'iacute', 'í', # small i, acute accent 'icirc', 'î', # small i, circumflex accent 'imacr', 'ï', # small i, macron accent 'iogon', 'ç', # small i, ogonek accent 'itilde', 'µ', # small i, tilde accent 'kcedil', 'ó', # small k, cedilla accent 'kgreen', '¢', # small kra (Greenlandic) 'lcedil', '¶', # small l, cedil accent 'lt', '<', # less than 'ncedil', 'ñ', # small n, cedil accent 'ocirc', 'ô', # small o, circumflex accent 'omacr', 'ò', # small o, macron accent 'oslash', 'ø', # small o, slash 'otilde', 'õ', # small o, tilde 'ouml', 'ö', # small o, dieresis or umlaut mark 'quot', '"', # double quote 'racute', 'à', # small r, acute accent 'rcedil', '³', # small r, cedilla accent 'scaron', '¹', # small s, caron accent 'szlig', 'ß', # small sharp s, German (sz ligature) 'tstrok', '¼', # small t, stroke 'uacute', 'ú', # small u, acute accent 'ucirc', 'û', # small u, circumflex accent 'umacr', 'þ', # small u, macron accent 'uogon', 'ù', # small u, ogonek accent 'utilde', 'ý', # small u, tilde accent 'uuml', 'ü', # small u, dieresis or umlaut mark 'zcaron', '¾', # small z, caron accent # These do not have HTML mnemonic names ... 'nbsp', ' ', # non-breaking space 'curren', '¤', # currency sign 'sect', '§', # section mark 'times', '×', 'divide', '÷', # These are character types without arguments ... 'grave' , "`", 'circ', '^', 'tilde', '~', 'uml', '¨', 'macron', '¯', 'deg', '°', 'ogon', '²', 'acute' , "´", 'caron', '·', 'cedil', "¸", 'dblac', "½", 'dot', 'ÿ' ); %iso_8859_4_character_map_inv = ( '>' , '\\ensuremath{>}', '<' , '\\ensuremath{<}', '&' , '&', '^' , '\\^{}', '~' , '\\~{}', '"' , '"', ' ' , '\\nobreakspace{}', '¡' , '\\k{A}', '¢' , '\\kra{}', # k-greenland '£' , '\\c{R}', '¤' , '\\textcurrency{}', '¥' , '\\~{I}', '¦' , '\\c{L}', '§' , '\\S{}', '¨' , '\\"{}', '©' , '\\v{S}', 'ª' , '\\={E}', '«' , '\\c{G}', '¬' , '\\TL{}', # Tstrok '­' , '\\-', '®' , '\\v{Z}', '¯' , '\\={}', '°' , '\\r{}', '±' , '\\k{a}', '²' , '\\k{ }', '³' , '\\c{r}', '´' , '\\\'{}', 'µ' , '\\~{\i}', '¶' , '\\c{l}', '·' , '\\v{}', '¸' , '\\c{ }', '¹' , '\\v{s}', 'º' , '\\={e}', '»' , '\\\'{g}', '¼' , '\\tl{}', # tstrok '½' , '\\NG{}', '¾' , '\\v{z}', '¿' , '\\ng{}', 'À' , '\\={A}', 'Á' , '\\\'{A}', 'Â' , '\\^{A}', 'Ã' , '\\~{A}', 'Ä' , '\\"{A}', 'Å' , '\\r{A}', 'Æ' , '\\AE{}', 'Ç' , '\\k{I}', 'È' , '\\v{C}', 'É' , '\\\'{E}', 'Ê' , '\\k{E}', 'Ë' , '\\"{E}', 'Ì' , '\\dot{E}', 'Í' , '\\\'{I}', 'Î' , '\\^{I}', 'Ï' , '\\={I}', 'Ð' , '\\DH{}', 'Ñ' , '\\c{N}', 'Ò' , '\\={O}', 'Ó' , '\\c{K}', 'Ô' , '\\^{O}', 'Õ' , '\\~{O}', 'Ö' , '\\"{O}', '×' , '\\ensuremath{\\times}', 'Ø' , '\\O{}', 'Ù' , '\\k{U}', 'Ú' , '\\\'{U}', 'Û' , '\\^{U}', 'Ü' , '\\"{U}', 'Ý' , '\\~{U}', 'Þ' , '\\={U}', 'ß' , '\\ss{}', 'à' , '\\={a}', 'á' , '\\\'{a}', 'â' , '\\^{a}', 'ã' , '\\~{a}', 'ä' , '\\"{a}', 'å' , '\\r{a}', 'æ' , '\\ae{}', 'ç' , '\\k{\i}', 'è' , '\\v{c}', 'é' , '\\\'{e}', 'ê' , '\\k{e}', 'ë' , '\\"{e}', 'ì' , '\\dot{e}', 'í' , '\\\'{\\i}', 'î' , '\\^{\\i}', 'ï' , '\\={\i}', 'ð' , '\\dh{}', 'ñ' , '\\c{n}', 'ò' , '\\={o}', 'ó' , '\\c{k}', 'ô' , '\\^{o}', 'õ' , '\\~{o}', 'ö' , '\\"{o}', '÷' , '\\ensuremath{\\div}', 'ø' , '\\o{}', 'ù' , '\\k{u}', 'ú' , '\\\'{u}', 'û' , '\\^{u}', 'ü' , '\\"{u}', 'ý' , '\\~{u}', 'þ' , '\\={u}', 'ÿ' , '\\.{}' ); 1;