### File: latin8.pl ### Version 0.1, September 10, 1999 ### Written by Ross Moore ### ### ISO_8859-14 encoding information ### ### based on latin1.pl ## Copyright (C) 1999 by Ross Moore ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # # # $CHARSET = "iso-8859-14"; $INPUTENC='latin8'; # empty implies 'latin1' #Character ranges for lower --> upper-case conversion $sclower = "\\242\\245\\253\\270\\272\\274\\261\\263\\265\\271\\277\\276\\340-\\376\\377"; $scupper = "\\241\\244\\246\\250\\252\\254\\260\\262\\264\\267\\273\\275\\300-\\336\\257"; #extra pattern match preceding lower --> upper-case conversion $scextra = "s/\\337/ss/g"; %extra_small_caps = ( '223' , 'ss' ); %low_entities = ( '224', '192' ,'225', '193' ,'226', '194' ,'227', '195' ,'228', '196' ,'229', '197' ,'230', '198' ,'231', '199' ,'232', '200' ,'233', '201' ,'234', '202' ,'235', '203' ,'236', '204' ,'237', '205' ,'238', '206' ,'239', '207' ,'240', '208' ,'241', '209' ,'242', '210' ,'243', '211' ,'244', '212' ,'245', '213' ,'246', '214' ,'247', '215' ,'248', '216' ,'249', '217' ,'250', '218' ,'251', '219' ,'252', '220' ,'253', '221' ,'254', '222' ,'255', '175' ,'162', '161' ,'165', '164' ,'171', '166' ,'184', '168' ,'186', '170' ,'188', '172' ,'177', '176' ,'179', '178' ,'181', '180' ,'185', '183' ,'191', '187' ,'190', '189' ); sub do_cmd_ss { join('', &iso_map("sz", "lig"), $_[0]);} sub do_cmd_textregistered { join('', &iso_map("reg", ""), $_[0]);} sub do_cmd_P { join('', &iso_map("para", ""), $_[0]);} sub do_cmd_S { join('', &iso_map("sect", ""), $_[0]);} #sub do_cmd_minus { join('', &iso_map("shy", ""), $_[0]);} sub do_cmd_copyright { join('', &iso_map("copy", ""), $_[0]);} sub do_cmd_pounds { join('', &iso_map("pound", ""), $_[0]);} %iso_8859_14_character_map = ( 'AElig', 'Æ', # capital AE diphthong (ligature) 'Aacute', 'Á', # capital A, acute accent 'Acirc', 'Â', # capital A, circumflex accent 'Agrave', 'À', # capital A, grave accent 'Aring', 'Å', # capital A, ring 'Atilde', 'Ã', # capital A, tilde 'Auml', 'Ä', # capital A, dieresis or umlaut mark 'Bdot', '¡', # capital B, with dot 'Ccedil', 'Ç', # capital C, cedilla 'Cdot', '¤', # capital C, with dot 'Ddot', '¦', # capital D, with dot 'Eacute', 'É', # capital E, acute accent 'Ecirc', 'Ê', # capital E, circumflex accent 'Egrave', 'È', # capital E, grave accent 'Euml', 'Ë', # capital E, dieresis or umlaut mark 'Fdot', '°', # capital F, with dot 'Gdot', '²', # capital G, with dot 'Iacute', 'Í', # capital I, acute accent 'Icirc', 'Î', # capital I, circumflex accent 'Igrave', 'Ì', # capital I, grave accent 'Iuml', 'Ï', # capital I, dieresis or umlaut mark 'Mdot', '´', # capital M, with dot 'Ntilde', 'Ñ', # capital N, tilde 'Oacute', 'Ó', # capital O, acute accent 'Ocirc', 'Ô', # capital O, circumflex accent 'Ograve', 'Ò', # capital O, grave accent 'Oslash', 'Ø', # capital O, slash 'Otilde', 'Õ', # capital O, tilde 'Ouml', 'Ö', # capital O, dieresis or umlaut mark 'Pdot', '·', # capital P, with dot 'Sdot', '»', # capital S, with dot 'Tdot', '×', # capital T, with dot 'Uacute', 'Ú', # capital U, acute accent 'Ucirc', 'Û', # capital U, circumflex accent 'Ugrave', 'Ù', # capital U, grave accent 'Uuml', 'Ü', # capital U, dieresis or umlaut mark 'Wacute', 'ª', # capital W, acute accent 'Wcirc', 'Ð', # capital W, circumflex accent 'Wgrave', '¨', # capital W, grave accent 'Wuml', '½', # capital W, dieresis or umlaut mark 'Yacute', 'Ý', # capital Y, acute accent 'Ygrave', '¬', # capital Y, grave accent 'Ycirc', 'Þ', # capital Y, circumflex accent 'Yuml', '¯', # capital Y, dieresis or umlaut mark # 'aacute', 'á', # small a, acute accent 'acirc', 'â', # small a, circumflex accent 'aelig', 'æ', # small ae diphthong (ligature) 'agrave', 'à', # small a, grave accent 'amp', '&', # ampersand 'aring', 'å', # small a, ring 'atilde', 'ã', # small a, tilde 'auml', 'ä', # small a, dieresis or umlaut mark 'bdot', '¢', # small b, with dot 'cdot', '¥', # small c, with dot 'ccedil', 'ç', # small c, cedilla 'ddot', '«', # small d, with dot 'eacute', 'é', # small e, acute accent 'ecirc', 'ê', # small e, circumflex accent 'egrave', 'è', # small e, grave accent 'eth', 'ð', # small eth, Icelandic 'euml', 'ë', # small e, dieresis or umlaut mark 'fdot', '±', # small f, with dot 'gdot', '³', # small g, with dot 'gt', '>', # greater than 'iacute', 'í', # small i, acute accent 'icirc', 'î', # small i, circumflex accent 'igrave', 'ì', # small i, grave accent 'iuml', 'ï', # small i, dieresis or umlaut mark 'lt', '<', # less than 'mdot', 'µ', # small m, with dot 'ntilde', 'ñ', # small n, tilde 'oacute', 'ó', # small o, acute accent 'ocirc', 'ô', # small o, circumflex accent 'ograve', 'ò', # small o, grave accent 'oslash', 'ø', # small o, slash 'otilde', 'õ', # small o, tilde 'ouml', 'ö', # small o, dieresis or umlaut mark 'pdot', '¹', # small p, with dot 'sdot', '¿', # small s, with dot 'szlig', 'ß', # small sharp s, German (sz ligature) 'tdot', '÷', # small t, with dot 'uacute', 'ú', # small u, acute accent 'ucirc', 'û', # small u, circumflex accent 'ugrave', 'ù', # small u, grave accent 'uuml', 'ü', # small u, dieresis or umlaut mark 'wacute', 'º', # small w, acute accent 'wcirc', 'ð', # small w, circumflex accent 'wgrave', '¸', # small w, grave accent 'wuml', '¾', # small w, dieresis or umlaut mark 'yacute', 'ý', # small y, acute accent 'ycirc', 'þ', # small y, circumflex accent 'ygrave', '¼', # small y, grave accent 'yuml', 'ÿ', # small y, dieresis or umlaut mark 'quot', '"', # double quote # These have HTML mnemonic names for HTML 4.0 ... 'nbsp', ' ', # non-breaking space 'pound', '£', # pound sign 'sect', '§', # section mark 'copy', '©', # copyright mark 'shy', '­', 'reg', '®', 'para', '¶', # paragraph mark # These are character types without arguments ... 'grave' , "`", ); %iso_8859_14_character_map_inv = ( '^' , '\\^{}', '>' , '\\ensuremath{>}', '<' , '\\ensuremath{<}', '&' , '\\&', '~' , '\\~{}', ' ' , '\\nobreakspace{}', '¡' , '\\.{B}', '¢' , '\\.{b}', '£' , '\\pounds{}', '¤' , '\\.{C}', '¥' , '\\.{c}', '¦' , '\\.{D}', '§' , '\\S{}', '¨' , '\\`{W}', '©' , '\\copyright{}', 'ª' , '\\\'{W}', '«' , '\\.{d}', '¬' , '\\`{Y}', '­' , '\\-', # '®' , '\\textregistered{}', '®' , '\\ensuremath{\\circledR}', '¯' , '\\"{Y}', '°' , '\\.{F}', '±' , '\\.{f}', '²' , '\\.{G}', '³' , '\\.{g}', '´' , '\\.{M}', 'µ' , '\\.{m}', '¶' , '\\P{}', '·' , '\\.{P}', '¸' , '\\`{w}', '¹' , '\\.{p}', 'º' , '\\\'{w}', '»' , '\\.{S}', '¼' , '\\`{y}', '½' , '\\"{W}', '¾' , '\\"{w}', '¿' , '\\.{s}', 'À' , '\\`{A}', 'Á' , '\\\'{A}', 'Â' , '\\^{A}', 'Ã' , '\\~{A}', 'Ä' , '\\"{A}', # 'Å' , '\\AA{}', 'Å' , '\\r{A}', 'Æ' , '\\AE{}', 'Ç' , '\\c{C}', 'È' , '\\`{E}', 'É' , '\\\'{E}', 'Ê' , '\\^{E}', 'Ë' , '\\"{E}', 'Ì' , '\\`{I}', 'Í' , '\\\'{I}', 'Î' , '\\^{I}', 'Ï' , '\\"{I}', 'Ð' , '\\^{W}', 'Ñ' , '\\~{N}', 'Ò' , '\\`{O}', 'Ó' , '\\\'{O}', 'Ô' , '\\^{O}', 'Õ' , '\\~{O}', 'Ö' , '\\"{O}', '×' , '\\.{T}', 'Ø' , '\\O{}', 'Ù' , '\\`{U}', 'Ú' , '\\\'{U}', 'Û' , '\\^{U}', 'Ü' , '\\"{U}', 'Ý' , '\\\'{Y}', 'Þ' , '\\^{Y}', 'ß' , '\\ss{}', 'à' , '\\`{a}', 'á' , '\\\'{a}', 'â' , '\\^{a}', 'ã' , '\\~{a}', 'ä' , '\\"{a}', # 'å' , '\\aa{}', 'å' , '\\r{a}', 'æ' , '\\ae{}', 'ç' , '\\c{c}', 'è' , '\\`{e}', 'é' , '\\\'{e}', 'ê' , '\\^{e}', 'ë' , '\\"{e}', 'ì' , '\\`{\\i}', 'í' , '\\\'{\\i}', 'î' , '\\^{\\i}', 'ï' , '\\"{\\i}', 'ð' , '\\^{w}', 'ñ' , '\\~{n}', 'ò' , '\\`{o}', 'ó' , '\\\'{o}', 'ô' , '\\^{o}', 'õ' , '\\~{o}', 'ö' , '\\"{o}', '÷' , '\\.{t}', 'ø' , '\\o{}', 'ù' , '\\`{u}', 'ú' , '\\\'{u}', 'û' , '\\^{u}', 'ü' , '\\"{u}', 'ý' , '\\\'{y}', 'þ' , '\\^{y}', 'ÿ' , '\\"{y}' ); 1;