### File: unicode.pl ### Version 0.2, December 12, 1997 ### Written by Ross Moore ### ### ISO_10646 encoding information ### ### ### Copyright (C) 1997 by Ross Moore ### ### Version 0.2, December 12, 1997 ### added lower --> upper-case conversions ### ### Version 0.1, October 15, 1997 ### information extracted from i18n.pl ### contains... ### ### Language definitions for HTML 2.1 (I18N, Internationalization) ### Written by Marcus E. Hennecke ### Version 0.3, March 6, 1996 ### Version 0.2, February 2, 1996 ## Copyright (C) 1995 by Marcus E. Hennecke ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # save previous settings before loading latin1.pl $PREV_CHARSET = $CHARSET if ($CHARSET); $PREV_charset = $charset if ($charset); require("$LATEX2HTMLVERSIONS${dd}latin1.pl"); $CHARSET = $PREV_CHARSET if ($PREV_CHARSET); %unicode_table = (); $CHARSET = "iso-10646" unless ($PREV_CHARSET); $charset = ($NO_UTF ? ($PREV_charset ? $PREV_charset : $CHARSET) : 'utf-8'); %unicode_table = (); # This creates a table of translations to Unicode #bignumber; entities, # used for converting embedded 8-bit font characters in the range \200-\377 # sub make_unicode_map { return unless ($PREV_CHARSET); print "\n*** Constructing conversion $PREV_CHARSET -> Unicode ***\n"; local($character_map,@ents,@nums,$key,$ent); $character_map=$PREV_CHARSET; $character_map =~ tr/-/_/; eval "\@nums = values (\%${character_map}_character_map)"; eval "\@ents = keys (\%${character_map}_character_map)"; while (@nums) { $key = pop @nums; $ent = pop @ents; $unicode_table{$key} = $iso_10646_character_map{$ent} if ($key =~ /\#\d+;/); print "\n$key : $ent : ".$unicode_table{$key} if ($VERBOSITY > 2); } } sub convert_to_unicode { # MRO: by reference; local(*_) = @_; my $char, $uchar; return($_[0]) if ($NO_UTF && !$USE_UTF); $_[0] =~ s/([\200-\377])/$char="\&#".ord($1).";"; $unicode_table{$char}||$char /eg; # $uchar = $unicode_table{$char};($uchar ? $uchar : $char)/eg; } # # # sub do_cmd_oe { join('', &iso_map("oe", "lig"), $_[0]);} sub do_cmd_OE { join('', &iso_map("OE", "lig"), $_[0]);} sub do_cmd_l { join('', &iso_map("l", "strok"), $_[0]);} sub do_cmd_L { join('', &iso_map("L", "strok"), $_[0]);} sub do_cmd_ng { join('', &iso_map("eng", ""), $_[0]);} # inhibit later wrapping for an image $raw_arg_cmds{'l'} = $raw_arg_cmds{'L'} = -1 ; $raw_arg_cmds{'oe'} = $raw_arg_cmds{'OE'} = -1 ; # this maps lowercase characters to non-entity equivalents # e.g. german sharp-s --> 'ss' %extra_small_caps = (%iso_8859_xsc); # this maps lowercase characters to their uppercase equivalents %low_entities = ( %iso_8859_low_ents ,'255', '376' ,'257', '256' ,'259', '258' ,'261', '260' ,'263', '262' ,'265', '264' ,'267', '266' ,'269', '268' ,'271', '270' ,'273', '272' ,'275', '274' ,'277', '276' ,'279', '278' ,'281', '280' ,'283', '282' ,'285', '284' ,'287', '286' ,'289', '288' ,'291', '290' ,'293', '292' ,'295', '294' ,'297', '296' ,'299', '298' ,'301', '300' ,'303', '302' ,'305', '304' ,'307', '306' ,'309', '308' ,'311', '310' ,'314', '313' ,'316', '315' ,'318', '317' ,'320', '319' ,'322', '321' ,'324', '323' ,'326', '325' ,'328', '327' ,'331', '330' ,'333', '332' ,'335', '334' ,'337', '336' ,'339', '338' ,'341', '340' ,'343', '342' ,'345', '344' ,'347', '346' ,'349', '348' ,'351', '350' ,'353', '352' ,'355', '354' ,'357', '356' ,'359', '358' ,'361', '360' ,'363', '362' ,'365', '364' ,'367', '366' ,'369', '368' ,'371', '370' ,'373', '372' ,'375', '374' ,'378', '377' ,'380', '379' ,'382', '381' # Greek alphabet ,'945', '913' ,'946', '914' ,'947', '915' ,'948', '916' ,'949', '917' ,'950', '918' ,'951', '919' ,'952', '920' ,'953', '921' ,'954', '922' ,'955', '923' ,'956', '924' ,'957', '925' ,'958', '926' ,'959', '927' ,'960', '928' ,'961', '929' # ,'962', '930' ,'963', '931' ,'964', '932' ,'965', '933' ,'966', '934' ,'967', '935' ,'968', '936' ,'969', '937' ); %iso_10646_character_map = ( %iso_8859_1_character_map, 'Amacr', 'Ā', # capital A, macron accent 'amacr', 'ā', # small a, macron accent 'Abreve', 'Ă', # capital A, breve accent 'abreve', 'ă', # small a, breve accent 'Aogon', 'Ą', # capital A, ogonek accent 'aogon', 'ą', # small a, ogonek accent 'Cacute', 'Ć', # capital C, acute accent 'cacute', 'ć', # small c, acute accent 'Ccirc', 'Ĉ', # capital C, circumflex accent 'ccirc', 'ĉ', # small c, circumflex accent 'Cdot', 'Ċ', # capital C, dot above 'cdot', 'ċ', # small c, dot above 'Ccaron', 'Č', # capital C, caron accent 'ccaron', 'č', # small c, caron accent 'Dcaron', 'Ď', # capital D, caron accent 'dcaron', 'ď', # small d, caron accent 'Dstrok', 'Đ', # capital D, stroke 'dstrok', 'đ', # small d, stroke 'Emacr', 'Ē', # capital E, macron accent 'emacr', 'ē', # small e, macron accent 'Ebreve', 'Ĕ', # capital E, breve accent 'ebreve', 'ĕ', # small e, breve accent 'Edot', 'Ė', # capital E, dot above 'edot', 'ė', # small e, dot above 'Eogon', 'Ę', # capital E, ogonek accent 'eogon', 'ę', # small e, ogonek accent 'Ecaron', 'Ě', # capital E, caron accent 'ecaron', 'ě', # small e, caron accent 'Gcirc', 'Ĝ', # capital G, circumflex accent 'gcirc', 'ĝ', # small g, circumflex accent 'Gbreve', 'Ğ', # capital G, breve accent 'gbreve', 'ğ', # small g, breve accent 'Gdot', 'Ġ', # capital G, dot above 'gdot', 'ġ', # small g, dot above 'Gcedil', 'Ģ', # capital G, cedilla accent 'gcedil', 'ģ', # small g, cedilla accent 'Hcirc', 'Ĥ', # capital H, accent 'hcirc', 'ĥ', # small h, accent 'Hstrok', 'Ħ', # capital H, stroke 'hstrok', 'ħ', # small h, stroke 'Itilde', 'Ĩ', # capital I, tilde accent 'itilde', 'ĩ', # small i, tilde accent 'Imacr', 'Ī', # capital I, macron accent 'imacr', 'ī', # small i, macron accent 'Ibreve', 'Ĭ', # capital I, breve accent 'ibreve', 'ĭ', # small i, breve accent 'Iogon', 'Į', # capital I, ogonek accent 'iogon', 'į', # small i, ogonek accent 'Idot', 'İ', # capital I, dot above 'inodot', 'ı', # small i, no dot 'IJlig', 'IJ', # capital IJ ligature 'ijlig', 'ij', # small ij ligature 'Jcirc', 'Ĵ', # capital J, circumflex accent 'jcirc', 'ĵ', # small j, circumflex accent 'Kcedil', 'Ķ', # capital K, cedilla accent 'kcedil', 'ķ', # small k, cedilla accent 'kgreen', 'ĸ', # small kra (Greenlandic) 'Lacute', 'Ĺ', # capital L, acute accent 'lacute', 'ĺ', # small l, acute accent 'Lcedil', 'Ļ', # capital L, cedil accent 'lcedil', 'ļ', # small l, cedil accent 'Lcaron', 'Ľ', # capital L, caron accent 'lcaron', 'ľ', # small l, caron accent 'Lmiddot', 'Ŀ', # capital L, middle dot 'lmiddot', 'ŀ', # small l, middle dot 'Lstrok', 'Ł', # capital L, stroke 'lstrok', 'ł', # small l, stroke 'Nacute', 'Ń', # capital N, acute accent 'nacute', 'ń', # small n, acute accent 'Ncedil', 'Ņ', # capital N, cedil accent 'ncedil', 'ņ', # small n, cedil accent 'Ncaron', 'Ň', # capital N, caron accent 'ncaron', 'ň', # small n, caron accent 'napo', 'ʼn', # small n, preceded by apostrophe 'Eng', 'Ŋ', # capital Eng (Sami) 'eng', 'ŋ', # small eng (Sami) 'Omacr', 'Ō', # capital O, macron accent 'omacr', 'ō', # small o, macron accent 'Obreve', 'Ŏ', # capital O, breve accent 'obreve', 'ŏ', # small o, breve accent 'Odblac', 'Ő', # capital O, double acute accent 'odblac', 'ő', # small o, double acute accent 'OElig', 'Œ', # capital OE ligature 'oelig', 'œ', # small oe ligature 'Racute', 'Ŕ', # capital R, acute accent 'racute', 'ŕ', # small r, acute accent 'Rcedil', 'Ŗ', # capital R, cedilla accent 'rcedil', 'ŗ', # small r, cedilla accent 'Rcaron', 'Ř', # capital R, caron accent 'rcaron', 'ř', # small r, caron accent 'Sacute', 'Ś', # capital S, acute accent 'sacute', 'ś', # small s, acute accent 'Scirc', 'Ŝ', # capital S, circumflex accent 'scirc', 'ŝ', # small s, circumflex accent 'Scedil', 'Ş', # capital S, cedilla accent 'scedil', 'ş', # small s, cedilla accent 'Scaron', 'Š', # capital S, caron accent 'scaron', 'š', # small s, caron accent 'Tcedil', 'Ţ', # capital T, cedilla accent 'tcedil', 'ţ', # small t, cedilla accent 'Tcaron', 'Ť', # capital T, caron accent 'tcaron', 'ť', # small t, caron accent 'Tstrok', 'Ŧ', # capital T, stroke 'tstrok', 'ŧ', # small t, stroke 'Utilde', 'Ũ', # capital U, tilde accent 'utilde', 'ũ', # small u, tilde accent 'Umacr', 'Ū', # capital U, macron accent 'umacr', 'ū', # small u, macron accent 'Ubreve', 'Ŭ', # capital U, breve accent 'ubreve', 'ŭ', # small u, breve accent 'Uring', 'Ů', # capital U, ring above 'uring', 'ů', # small u, ring above 'Udblac', 'Ű', # capital U, double acute accent 'udblac', 'ű', # small u, double acute accent 'Uogon', 'Ų', # capital U, ogonek accent 'uogon', 'ų', # small u, ogonek accent 'Wcirc', 'Ŵ', # capital W, circumflex accent 'wcirc', 'ŵ', # small w, circumflex accent 'Ycirc', 'Ŷ', # capital Y, circumflex accent 'ycirc', 'ŷ', # small y, circumflex accent 'Yuml', 'Ÿ', # capital Y, diaresis accent 'Zacute', 'Ź', # capital Z, acute accent 'zacute', 'ź', # small z, acute accent 'Zdot', 'Ż', # capital Z, dot above 'zdot', 'ż', # small z, dot above 'Zcaron', 'Ž', # capital Z, caron accent 'zcaron', 'ž', # small z, caron accent # 'fnof', 'ƒ', # 'apos', 'ʼ', # apostrophe 'circ', 'ˆ', 'caron', 'ˇ', 'breve', '˘', 'dot', '˙', 'ring', '˚', 'ogon', '˛', 'tilde', '˜', 'dblac', '˝', # 'Alpha', 'Α', 'Beta', 'Β', 'Gamma', 'Γ', 'Delta', 'Δ', 'Epsilon', 'Ε', 'Zeta', 'Ζ', 'Eta', 'Η', 'Theta', 'Θ', 'Iota', 'Ι', 'Kappa', 'Κ', 'Lambda', 'Λ', 'Mu', 'Μ', 'Nu', 'Ν', 'Xi', 'Ξ', 'Omicron', 'Ο', 'Pi', 'Π', 'Rho', 'Ρ', # 'Sigmaf', '΢', 'Sigma', 'Σ', 'Tau', 'Τ', 'Upsilon', 'Υ', 'Phi', 'Φ', 'Chi', 'Χ', 'Psi', 'Ψ', 'Omega', 'Ω', 'alpha', 'α', 'beta', 'β', 'gamma', 'γ', 'delta', 'δ', 'epsilon', 'ε', 'zeta', 'ζ', 'eta', 'η', 'theta', 'θ', 'iota', 'ι', 'kappa', 'κ', 'lambda', 'λ', 'mu', 'μ', 'nu', 'ν', 'xi', 'ξ', 'omicron', 'ο', 'pi', 'π', 'rho', 'ρ', 'sigmaf', 'ς', 'sigma', 'σ', 'tau', 'τ', 'upsilon', 'υ', 'phi', 'φ', 'chi', 'χ', 'psi', 'ψ', 'omega', 'ω', 'thetasym', 'ϑ', 'upsih', 'ϒ', 'piv', 'ϖ', # 'sheva', 'ְ', 'hatafsegol', 'ֱ', 'hatafpatah', 'ֲ', 'hatafqamats', 'ֳ', 'hiriq', 'ִ', 'tzere', 'ֵ', 'segol', 'ֶ', 'patah', 'ַ', 'qamats', 'ָ', 'holam', 'ֹ', # '', 'ֺ', 'qubuts', 'ֻ', 'dagesh', 'ּ', 'meteg', 'ֽ', 'maqaf', '־', 'rafe', 'ֿ', 'paseq', '׀', 'sofpasuq', '׃', 'gershayim', '׈', 'doublevav', 'װ', 'vavyod', 'ױ', 'doubleyod', 'ײ', 'geresh', '׳', # 'alef', 'א', 'bet', 'ב', 'gimel', 'ג', 'dalet', 'ד', 'he', 'ה', 'vav', 'ו', 'zayin', 'ז', 'het', 'ח', 'tet', 'ט', 'yod', 'י', 'finalkaf', 'ך', 'kaf', 'כ', 'lamed', 'ל', 'finalmem', 'ם', 'mem', 'מ', 'finalnun', 'ן', 'nun', 'נ', 'samekh', 'ס', 'ayin', 'ע', 'finalpe', 'ף', 'pe', 'פ', 'finaltsadi', 'ץ', 'tsadi', 'צ', 'qof', 'ק', 'resh', 'ר', 'shin', 'ש', 'tav', 'ת', # 'ensp', ' ', 'emsp', ' ', 'thinsp', ' ', 'zwnj', '‌', 'zwj', '‍', 'lrm', '‎', 'rlm', '‏', 'ndash', '–', 'mdash', '—', 'lsquo', '‘', 'rsquo', '’', 'sbquo', '‚', 'ldquo', '“', 'rdquo', '”', 'bdquo', '„', 'dagger', '†', 'Dagger', '‡', # 'bull', '•', 'hellip', '…', # horiz ellipsis 'permil', '‰', # per million 'prime', '′', # prime; e.g. feet 'Prime', '″', # double-prime; e.g. inches 'lsaquo', '‹', 'rsaquo', '›', 'oline', '‾', 'frasl', '⁄', # fraction-slash 'euro', '€', # Euro sign # 'image', 'ℑ', # black-letter I 'weierp', '℘', # Weierstrasse-P 'real', 'ℜ', # black-letter R 'trade', '™', # trademark # NS4(Mac) 'alefsym', 'ℵ', # aleph # NS4(Mac) # 'aleph', 'ℵ', # aleph # NS4(Mac) # 'larr', '←', 'uarr', '↑', 'rarr', '→', 'darr', '↓', 'harr', '↔', 'crarr', '↵', # carriage-return arrow 'lArr', '⇐', 'uArr', '⇑', 'rArr', '⇒', 'dArr', '⇓', 'hArr', '⇔', # 'forall', '∀', 'part', '∂', 'exist', '∃', 'empty', '∅', 'nabla', '∇', 'isin', '∈', 'notin', '∉', 'ni', '∋', 'prod', '∏', 'sum', '∑', 'minus', '−', 'lowast', '∗', 'radic', '√', 'prop', '∝', 'infin', '∞', 'ang', '∠', 'and', '∧', 'or', '∨', 'cap', '∩', 'cup', '∪', 'int', '∫', 'there4', '∴', 'sim', '∼', 'cong', '≅', 'asymp', '≈', 'ne', '≠', 'equiv', '≡', 'le', '≤', 'ge', '≥', 'sub', '⊂', 'sup', '⊃', 'nsub', '⊄', 'sube', '⊆', 'supe', '⊇', 'oplus', '⊕', 'otimes', '⊗', 'perp', '⊥', 'sdot', '⋅', # dot operator # 'lceil', '⌈', 'rceil', '⌉', 'lfloor', '⌊', 'rfloor', '⌋', 'lang', '〈', 'rang', '〉', # 'loz', '◊', # 'spades', '♠', 'clubs', '♣', 'hearts', '♥', 'diams', '♦' ); &make_unicode_map if ($PREV_CHARSET); %iso_10646_character_map_inv = ( %iso_8859_1_character_map_inv, 'Ā', '\\={A}', 'ā', '\\={a}', 'Ă', '\\u{A}', 'ă', '\\u{a}', 'Ą', '\\k{A}', 'ą', '\\k{a}', 'Ć', '\\\'{C}', 'ć', '\\\'{c}', 'Ĉ', '\\^{C}', 'ĉ', '\\^{c}', 'Ċ', '\\.{C}', 'ċ', '\\.{c}', 'Č', '\\v{C}', 'č', '\\v{c}', 'Ď', '\\v{D}', 'ď', '\\v{d}', 'Đ', '\\DH{}', 'đ', '\\dh{}', 'Ē', '\\={E}', 'ē', '\\={e}', 'Ĕ', '\\u{E}', 'ĕ', '\\u{e}', 'Ė', '\\.{E}', 'ė', '\\.{e}', 'Ę', '\\k{E}', 'ę', '\\k{e}', 'Ě', '\\v{E}', 'ě', '\\v{e}', 'Ĝ', '\\^{G}', 'ĝ', '\\^{g}', 'Ğ', '\\u{G}', 'ğ', '\\u{g}', 'Ġ', '\\.{G}', 'ġ', '\\.{g}', 'Ģ', '\\c{G}', 'ģ', '\\c{g}', 'Ĥ', '\\^{H}', 'ĥ', '\\^{h}', # 'Ħ', '\\?{H}', # Don't know how in LaTeX # 'ħ', '\\?{h}', # Don't know how in LaTeX 'Ĩ', '\\~{I}', 'ĩ', '\\~{\i}', 'Ī', '\\={I}', 'ī', '\\={\i}', 'Ĭ', '\\u{I}', 'ĭ', '\\u{\i}', 'Į', '\\k{I}', 'į', '\\k{i}', 'İ', '\\.{I}', 'ı', '\\i{}', # 'IJ', '\\??', # Don't know how in LaTeX # 'ij', '\\??', # Don't know how in LaTeXy 'Ĵ', '\\^{J}', 'ĵ', '\\^{\j}', 'Ķ', '\\c{K}', 'ķ', '\\c{k}', # 'ĸ', '\\??', # Don't know how in LaTeX 'Ĺ', '\\\'{L}', 'ĺ', '\\\'{l}', 'Ļ', '\\c{L}', 'ļ', '\\c{l}', 'Ľ', '\\v{L}', 'ľ', '\\v{l}', # 'Ŀ', '\\?{L}', # Don't know how in LaTeX # 'ŀ', '\\?{l}', # Don't know how in LaTeX 'Ł', '\\L', 'ł', '\\l', 'Ń', '\\\'{N}', 'ń', '\\\'{n}', 'Ņ', '\\c{N}', 'ņ', '\\c{n}', 'Ň', '\\v{N}', 'ň', '\\v{n}', 'ʼn', '\'n', # Probably never occurs # 'Ŋ', '\\??', # Don't know how in LaTeX # 'ŋ', '\\??', # Don't know how in LaTeX 'Ō', '\\={O}', 'ō', '\\={o}', 'Ŏ', '\\u{O}', 'ŏ', '\\u{o}', 'Ő', '\\H{O}', 'ő', '\\H{o}', 'Œ', '\\OE', 'œ', '\\oe', 'Ŕ', '\\\'{R}', 'ŕ', '\\\'{r}', 'Ŗ', '\\c{R}', 'ŗ', '\\c{r}', 'Ř', '\\v{R}', 'ř', '\\v{r}', 'Ś', '\\\'{S}', 'ś', '\\\'{s}', 'Ŝ', '\\^{S}', 'ŝ', '\\^{s}', 'Ş', '\\c{S}', 'ş', '\\c{s}', 'Š', '\\v{S}', 'š', '\\v{s}', 'Ţ', '\\c{T}', 'ţ', '\\c{t}', 'Ť', '\\v{T}', 'ť', '\\v{t}', # 'Ŧ', '\\?{T}', # Don't know how in LaTeX # 'ŧ', '\\?{t}', # Don't know how in LaTeX 'Ũ', '\\~{U}', 'ũ', '\\~{u}', 'Ū', '\\={U}', 'ū', '\\={u}', 'Ŭ', '\\u{U}', 'ŭ', '\\u{u}', 'Ů', '\\r{U}', 'ů', '\\r{u}', 'Ű', '\\H{U}', 'ű', '\\H{u}', 'Ų', '\\k{U}', 'ų', '\\k{u}', 'Ŵ', '\\^{W}', 'ŵ', '\\^{w}', 'Ŷ', '\\^{Y}', 'ŷ', '\\^{y}', 'Ÿ', '\\"{Y}', 'Ź', '\\\'{Z}', 'ź', '\\\'{z}', 'Ż', '\\.{Z}', 'ż', '\\.{z}', 'Ž', '\\v{Z}', 'ž', '\\v{z}', 'ƒ', '\\ensuremath{f}', 'ˆ', '\\hash{}', '˜', '\\~{\\phantom{x}}', 'Α', '\\Alpha ', 'Β', '\\Beta ', 'Γ', '\\Gamma ', 'Δ', '\\Delta ', 'Ε', '\\Epsilon ', 'Ζ', '\\Zeta ', 'Η', '\\Eta ', 'Θ', '\\Theta ', 'Ι', '\\Iota ', 'Κ', '\\Kappa ', 'Λ', '\\Lambda ', 'Μ', '\\Mu ', 'Ν', '\\Nu ', 'Ξ', '\\Xi ', 'Ο', '\\Omicron ', 'Π', '\\Pi ', 'Ρ', '\\Rho ', 'Σ', '\\Sigma ', 'Τ', '\\Tau ', 'Υ', '\\Upsilon ', 'Φ', '\\Phi ', 'Χ', '\\Chi ', 'Ψ', '\\Psi ', 'Ω', '\\Omega ', 'α', '\\alpha ', 'β', '\\beta', 'γ', '\\gamma ', 'δ', '\\delta ', 'ε', '\\epsilon ', 'ζ', '\\zeta ', 'η', '\\eta ', 'θ', '\\theta ', 'ι', '\\iota ', 'κ', '\\kappa ', 'λ', '\\lambda ', 'μ', '\\mu ', 'ν', '\\nu ', 'ξ', '\\xi ', 'ο', '\\omicron ', 'π', '\\pi ', 'ρ', '\\rho ', 'ς', '\\varsigma ', 'σ', '\\sigma ', 'τ', '\\tau ', 'υ', '\\upsilon ', 'φ', '\\phi ', 'χ', '\\chi ', 'ψ', '\\psi ', 'ω', '\\omega ', 'ϑ', '\\vartheta ', 'ϒ', '\\upsilon ', # this is wrong, but close 'ϖ', '\\varpi ', 'א' , '\\alef ', 'ב' , '\\bet ', 'ג' , '\\gimel ', 'ד' , '\\dalet ', 'ה' , '\\he ', 'ו' , '\\vav ', 'ז' , '\\zayin ', 'ח' , '\\het ', 'ט' , '\\tet ', 'י' , '\\yod ', 'ך' , '\\finalkaf ', 'כ' , '\\kaf ', 'ל' , '\\lamed ', 'ם' , '\\finalmem ', 'מ' , '\\mem ', 'ן' , '\\finalnun ', 'נ' , '\\nun ', 'ס' , '\\samekh ', 'ע' , '\\ayin ', 'ף' , '\\finalpe ', 'פ' , '\\pe ', 'ץ' , '\\finaltsadi ', 'צ' , '\\tsadi ', 'ק' , '\\qof ', 'ר' , '\\resh ', 'ש' , '\\shin ', 'ת' , '\\tav ', ' ', '\\;', ' ', '\\>', ' ', '\\,', '‌', '\\goodbreak{}', '‍', '\\nobreak{}', # '‎', # l-r text marker # '‏', # r-l text marker '–', '{--}', '—', '{---}', '‘', '\`{}', '’', "\'{}", '‚', '\\quotesinglbase{}', '“', '\`\`', '”', "\'\'", '„', '\\quotedblbase{}', '†', '\\dagger{}', '‡', '\\ddagger{}', '•', '\\ensuremath{\\bullet}', '…', '\\dots{}', '‰', '\\textperthousand{}', # per mille '′', '\\ensuremath{^{\\prime}}', '″', '\\ensuremath{^{\\prime\\prime}}', '‹', '\\leftguilsingl{}', '›', '\\rightguilsingl{}', '‾', '\\ensuremath{\\overline{\phantom{x}}}', '⁄', '\\emsuremath{/}', '€', '\\texteuro{}', # Euro sign 'ℑ', '\\ensuremath{\\Im}', '℘', '\\ensuremath{\\wp}', 'ℜ', '\\ensuremath{\\Re}', '™', '\\trademark{}', 'ℵ', '\\ensuremath{\\aleph}', '←', '\\leftarrow ', '↑', '\\uparrow ', '→', '\\rightarrow ', '↓', '\\downarrow ', '↔', '\\leftrightarrow ', '↵', '\\downharpoonleft ', '⇐', '\\Leftarrow ', '⇑', '\\Uparrow ', '⇒', '\\Rightarrow ', '⇓', '\\Downarrow ', '⇔', '\\Leftrightarrow ' ); 1;