### File: latin7.pl ### Version 0.1, September 10, 1999 ### Written by Ross Moore ### ### ISO_8859-13 encoding information ### ## Copyright (C) 1999 by Ross Moore ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. $CHARSET = 'iso-8859-13'; $INPUTENC = 'latin7'; #Character ranges for lower --> upper-case conversion $sclower = "\\270\\272\\277\\340-\\366\\370-\\376"; $scupper = "\\250\\252\\257\\300-\\326\\330-\\336"; #extra pattern match preceding lower --> upper-case conversion $scextra = "s/\\337/ss/g"; %extra_small_caps = ( '223' , 'ss' ); %low_entities = ( '224', '192' ,'225', '193' ,'226', '194' ,'227', '195' ,'228', '196' ,'229', '197' ,'230', '198' ,'231', '199' ,'232', '200' ,'233', '201' ,'234', '202' ,'235', '203' ,'236', '204' ,'237', '205' ,'238', '206' ,'239', '207' ,'240', '208' ,'241', '209' ,'242', '210' ,'243', '211' ,'244', '212' ,'245', '213' ,'246', '214' # ,'247', '215' ,'248', '216' ,'249', '217' ,'250', '218' ,'251', '219' ,'252', '220' ,'253', '221' ,'254', '222' # ,'255', '223' ,'184', '200' ,'186', '202' ,'191', '207' ); #sub do_cmd_oe { join('', &iso_map("oe", "lig"), $_[0]);} #sub do_cmd_OE { join('', &iso_map("OE", "lig"), $_[0]);} sub do_cmd_l { join('', &iso_map("l", "stroke"), $_[0]);} sub do_cmd_L { join('', &iso_map("L", "stroke"), $_[0]);} #sub do_cmd_ng { join('', &iso_map("eng", ""), $_[0]);} sub do_cmd_ss { join('', &iso_map("sz", "lig"), $_[0]);} sub do_cmd_i { join('', &iso_map("i", "nodot"), $_[0]);} sub do_cmd_textonequarter { join('', &iso_map("frac14", ""), $_[0]);} sub do_cmd_textonehalf { join('', &iso_map("frac12", ""), $_[0]);} sub do_cmd_textthreequarters { join('', &iso_map("frac34", ""), $_[0]);} sub do_cmd_textcent { join('', &iso_map("cent", ""), $_[0]);} sub do_cmd_textyen { join('', &iso_map("yen", ""), $_[0]);} sub do_cmd_textcurrency { join('', &iso_map("curren", ""), $_[0]);} sub do_cmd_textbrokenbar { join('', &iso_map("brvbar", ""), $_[0]);} sub do_cmd_textregistered { join('', &iso_map("reg", ""), $_[0]);} sub do_cmd_textperiodcentered { join('', &iso_map("middot", ""), $_[0]);} sub do_cmd_guillemotleft { join('', &iso_map("laquo", ""), $_[0]);} sub do_cmd_guillemotright { join('', &iso_map("raquo", ""), $_[0]);} sub do_cmd_quotedblbase { join('', &iso_map("dbquo", ""), $_[0]);} sub do_cmd_quotesinglbase { join('', &iso_map("sbquo", ""), $_[0]);} sub do_cmd_mathdegree { join('', &iso_map("deg", ""), $_[0]);} sub do_cmd_mathonesuperior { join('', &iso_map("sup1", ""), $_[0]);} sub do_cmd_mathtwosuperior { join('', &iso_map("sup2", ""), $_[0]);} sub do_cmd_maththreesuperior { join('', &iso_map("sup3", ""), $_[0]);} sub do_cmd_mathordmasculine { join('', &iso_map("ordm", ""), $_[0]);} sub do_cmd_mathordfeminine { join('', &iso_map("ordf", ""), $_[0]);} sub do_cmd_P { join('', &iso_map("para", ""), $_[0]);} sub do_cmd_S { join('', &iso_map("sect", ""), $_[0]);} sub do_cmd_pm { join('', &iso_map("plusmn", ""), $_[0]);} sub do_cmd_div { join('', &iso_map("divide", ""), $_[0]);} sub do_cmd_times { join('', &iso_map("times", ""), $_[0]);} sub do_cmd_minus { join('', &iso_map("shy", ""), $_[0]);} sub do_cmd_copyright { join('', &iso_map("copy", ""), $_[0]);} sub do_cmd_pounds { join('', &iso_map("pound", ""), $_[0]);} sub do_cmd_cents { join('', &iso_map("cent", ""), $_[0]);} sub do_cmd_lnot { join('', &iso_map("not", ""), $_[0]);} sub do_cmd_cdot { join('', &iso_map("middot", ""), $_[0]);} sub do_cmd_micron { join('', &iso_map("micro", ""), $_[0]);} %iso_8859_13_character_map = ( 'AElig', '¿', # capital AE diphthong (ligature) 'Amacr', 'Â', # capital A, with macron 'Aogon', 'À', # capital A, with ogonek 'Aring', 'Å', # capital A, with ring 'Auml' , 'Ä', # capital A, dieresis or umlaut mark 'Cacute','Ã', # capital C, acute accent 'Ccaron','È', # capital C, with caron 'Eacute','É', # capital E, acute accent 'Edot' , 'Ë', # capital E, with dot 'Emacr', 'Ç', # capital E, with macron 'Eogon', 'Æ', # capital E, with ogonek 'Euml' , 'Ë', # capital E, dieresis or umlaut mark 'Gcedil','Ì', # capital G, cedilla 'Imacr', 'Î', # capital I, with macron 'Iogon', 'Á', # capital I, with ogonek 'Kcedil','Í', # capital K, cedilla 'Lcedil','Ï', # capital L, cedilla 'Lstrok','Ù', # capital L, with stroke 'Nacute','Ñ', # capital N, acute accent 'Ncedil','Ò', # capital N, cedilla 'Oacute','Ó', # capital O, acute accent 'Omacr', 'Ô', # capital O, with macron 'Oslash','¨', # capital O, slash 'Otilde','Õ', # capital O, tilde 'Ouml', 'Ö', # capital O, dieresis or umlaut mark 'Rcedil','ª', # capital R, cedil accent 'Sacute','Ú', # capital S, acute accent 'Scaron','Ð', # capital S, caron accent 'Umacr', 'Û', # capital U, with macron 'Uogon', 'Ø', # capital U, with ogonek 'Uuml' , 'Ü', # capital U, dieresis or umlaut mark 'Zacute','Ê', # capital Z, acute accent 'Zcaron','Þ', # capital Z, caron accent 'Zdot' , 'Ý', # capital Z, with dot # 'aelig', '¿', # small ae diphthong (ligature) 'amacr', 'â', # small a, with macron 'amp', '&', # ampersand 'aogon', 'à', # small a, with ogonek 'aring', 'å', # small a, ring 'auml' , 'ä', # small a, dieresis or umlaut mark 'cacute','ã', # small c, acute accent 'cmacr', 'è', # small c, with macron 'eacute','é', # small e, acute accent 'edot' ,'ë', # small e, with dot 'emacr', 'ç', # small e, with macron 'eogon', 'æ', # small e, with ogonek 'gcedil','ì', # small g, cedilla 'gt', '>', # greater than 'imacr', 'î', # small i, with macron 'iogon', 'á', # small i, with ogonek 'lstrok','ñ', # small l, with stroke 'lt', '<', # less than 'nacute','ñ', # small n, acute accent 'ncedil','ò', # small n, cedilla 'oacute','ó', # small o, acute accent 'omacr', 'ô', # small o, with macron 'oslash','¸', # small o, slash 'otilde','õ', # small o, tilde 'ouml' , 'ö', # small o, dieresis or umlaut mark 'rcedil','º', # small r, cedil accent 'sacute','ú', # small s, acute accent 'scaron','ð', # small s, with caron 'szlig', 'ß', # small sharp s, German (sz ligature) 'umacr', 'û', # small u, with macron 'uogon', 'ø', # small u, with ogonek 'uuml' , 'ü', # small u, dieresis or umlaut mark 'zdot' ,'ý', # small z, with dot 'zcaron','þ', # small z, with caron 'quot', '"', # double quote # These have HTML mnemonic names for HTML 4.0 ... 'nbsp', ' ', # non-breaking space 'rdquo', '¡', # double quote, right 'cent', '¢', # cents sign 'pound', '£', # pound sign 'curren', '¤', # currency sign 'dbquo', '¥', 'brvbar', '¦', 'sect', '§', # section mark 'copy', '©', # copyright mark 'laquo', '«', 'raquo', '»', 'not', '¬', 'shy', '­', 'reg', '®', 'plusmn', '±', 'sup1', '¹', 'sup2', '²', 'sup3', '³', 'ldquo', '´', 'micro', 'µ', 'para', '¶', # paragraph mark 'middot', '·', 'frac14', '¼', 'frac12', '½', 'frac34', '¾', 'times', '×', 'divide', '÷', 'rsquo', 'ÿ', # These are character types without arguments ... 'grave', "`", 'circ', '^', 'tilde', '~', 'dot', '.' ); %iso_8859_13_character_map_inv = ( '^' , '\\^{}', '>' , '\\ensuremath{>}', '<' , '\\ensuremath{<}', '&' , '\\&', '~' , '\\~{}', ' ' , '\\nobreakspace{}', '¡' , "{''}", '¢' , '\\textcent{}', '£' , '\\pounds{}', '¤' , '\\textcurrency{}', '¥' , '\\quotedblbase{}', '¦' , '\\textbrokenbar{}', '§' , '\\S{}', '¨' , '\\O{}', '©' , '\\copyright{}', 'ª' , '\\c{R}', '«' , '\\guillemotleft{}', '¬' , '\\lnot{}', '­' , '\\-', # '®' , '\\textregistered{}', '®' , '\\ensuremath{\\circledR}', '¯' , '\\AE{}', # '°' , '\\mathdegree{}', '°' , '\\ensuremath{^{\\circ}}', '±' , '\\ensuremath{\\pm}', # '²' , '\\mathtwosuperior{}', '²' , '\\ensuremath{^{2}}', # '³' , '\\maththreesuperior{}', '³' , '\\ensuremath{^{3}}', '´' , '{``}', 'µ' , '\\ensuremath{\\mu}', '¶' , '\\P{}', # '·' , '\\textperiodcentered{}', '·' , '\\ensuremath{\\cdot{}}', '¸' , '\\o{}', # '¹' , '\\mathonesuperior{}', '¹' , '\\ensuremath{^{1}}', 'º' , '\\c{r}', '»' , '\\guillemotright{}', # '¼' , '\\textonequarter{}', '¼' , '\\ensuremath{\\frac{1}{4}}', # '½' , '\\textonehalf{}', '½' , '\\ensuremath{\\frac{1}{2}}', # '¾' , '\\textthreequarters{}', '¾' , '\\ensuremath{\\frac{3}{4}}', '¿' , '\\ae{}', 'À' , '\\k{A}', 'Á' , '\\k{I}', 'Â' , '\\={A}', 'Ã' , '\\\'{C}', 'Ä' , '\\"{A}', 'Å' , '\\r{A}', 'Æ' , '\\k{E}', 'Ç' , '\\={E}', 'È' , '\\v{C}', 'É' , '\\\'{E}', 'Ê' , '\\\'{Z}', 'Ë' , '\\.{E}', 'Ì' , '\\c{G}', 'Í' , '\\c{K}', 'Î' , '\\={I}', 'Ï' , '\\c{L}', 'Ð' , '\\v{S}', 'Ñ' , '\\\'{N}', 'Ò' , '\\c{N}', 'Ó' , '\\\'{O}', 'Ô' , '\\={O}', 'Õ' , '\\~{O}', 'Ö' , '\\"{O}', '×' , '\\ensuremath{\\times}', 'Ø' , '\\k{U}', 'Ù' , '\\L{}', 'Ú' , '\\\'{S}', 'Û' , '\\={U}', 'Ü' , '\\"{U}', 'Ý' , '\\.{Z}', 'Þ' , '\\v{Z}', 'ß' , '\\ss{}', 'à' , '\\k{a}', 'á' , '\\k{\\i}', 'â' , '\\={a}', 'ã' , '\\\'{c}', 'ä' , '\\"{a}', 'å' , '\\r{a}', 'æ' , '\\k{e}', 'ç' , '\\={e}', 'è' , '\\v{c}', 'é' , '\\\'{e}', 'ê' , '\\\'{z}', 'ë' , '\\.{e}', 'ì' , '\\c{g}', 'í' , '\\c{k}', 'î' , '\\={\\i}', 'ï' , '\\c{l}', 'ð' , '\\v{s}', 'ñ' , '\\\'{n}', 'ò' , '\\c{n}', 'ó' , '\\\'{o}', 'ô' , '\\={o}', 'õ' , '\\~{o}', 'ö' , '\\"{o}', '÷' , '\\ensuremath{\\div}', 'ø' , '\\k{u}', 'ù' , '\\l{}', 'ú' , '\\\'{s}', 'û' , '\\={u}', 'ü' , '\\"{u}', 'ý' , '\\.{z}', 'þ' , '\\v{z}', 'ÿ' , "{'}", ); 1;