#include "tag.h" #define HASHSIZE 161 static EntList* hashtab[HASHSIZE]; static EntList entities[] = { /* Portions (C) International Organization for Standardization 1986 Permission to copy in any form is granted for use with conforming SGML systems and applications as defined in ISO 8879, provided this notice is included in all copies. */ /* Character entity set. Typical invocation: {"%", PUBLIC}, "-//W3C//ENTITIES Full Latin 1//EN//HTML"> %HTMLlat1; */ {"nbsp", 160}, {"iexcl", 161}, {"cent", 162}, {"pound", 163}, {"curren", 164}, {"yen", 165}, {"brvbar", 166}, {"sect", 167}, {"uml", 168}, {"copy", 169}, {"ordf", 170}, {"laquo", 171}, {"not", 172}, {"shy", 173}, {"reg", 174}, {"macr", 175}, {"deg", 176}, {"plusmn", 177}, {"sup2", 178}, {"sup3", 179}, {"acute", 180}, {"micro", 181}, {"para", 182}, {"middot", 183}, {"cedil", 184}, {"sup1", 185}, {"ordm", 186}, {"raquo", 187}, {"frac14", 188}, {"frac12", 189}, {"frac34", 190}, {"iquest", 191}, {"Agrave", 192}, {"Aacute", 193}, {"Acirc", 194}, {"Atilde", 195}, {"Auml", 196}, {"Aring", 197}, {"AElig", 198}, {"Ccedil", 199}, {"Egrave", 200}, {"Eacute", 201}, {"Ecirc", 202}, {"Euml", 203}, {"Igrave", 204}, {"Iacute", 205}, {"Icirc", 206}, {"Iuml", 207}, {"ETH", 208}, {"Ntilde", 209}, {"Ograve", 210}, {"Oacute", 211}, {"Ocirc", 212}, {"Otilde", 213}, {"Ouml", 214}, {"times", 215}, {"Oslash", 216}, {"Ugrave", 217}, {"Uacute", 218}, {"Ucirc", 219}, {"Uuml", 220}, {"Yacute", 221}, {"THORN", 222}, {"szlig", 223}, {"agrave", 224}, {"aacute", 225}, {"acirc", 226}, {"atilde", 227}, {"auml", 228}, {"aring", 229}, {"aelig", 230}, {"ccedil", 231}, {"egrave", 232}, {"eacute", 233}, {"ecirc", 234}, {"euml", 235}, {"igrave", 236}, {"iacute", 237}, {"icirc", 238}, {"iuml", 239}, {"eth", 240}, {"ntilde", 241}, {"ograve", 242}, {"oacute", 243}, {"ocirc", 244}, {"otilde", 245}, {"ouml", 246}, {"divide", 247}, {"oslash", 248}, {"ugrave", 249}, {"uacute", 250}, {"ucirc", 251}, {"uuml", 252}, {"yacute", 253}, {"thorn", 254}, {"yuml", 255}, /* Mathematical, Greek and Symbolic characters for HTML */ /* Character entity set. Typical invocation: {"%", PUBLIC}, "-//W3C//ENTITIES Symbolic//EN//HTML"> %HTMLsymbol; */ /* Portions (C) International Organization for Standardization 1986: Permission to copy in any form is granted for use with conforming SGML systems and applications as defined in ISO 8879, provided this notice is included in all copies. */ /* Relevant ISO entity set is given unless names are newly introduced. New names (i.e., not in ISO 8879 list) do not clash with any existing ISO 8879 entity names. ISO 10646 character numbers are given for each character, in hex. CDATA values are decimal conversions of the ISO 10646 values and refer to the document character set. Names are Unicode 2.0 names. */ /* Latin Extended-B */ {"fnof", 402}, /* Greek */ {"Alpha", 913}, {"Beta", 914}, {"Gamma", 915}, {"Delta", 916}, {"Epsilon", 917}, {"Zeta", 918}, {"Eta", 919}, {"Theta", 920}, {"Iota", 921}, {"Kappa", 922}, {"Lambda", 923}, {"Mu", 924}, {"Nu", 925}, {"Xi", 926}, {"Omicron", 927}, {"Pi", 928}, {"Rho", 929}, /* (there is no Sigmaf, and no u+03A2 character either) */ {"Sigma", 931}, {"Tau", 932}, {"Upsilon", 933}, {"Phi", 934}, {"Chi", 935}, {"Psi", 936}, {"Omega", 937}, {"alpha", 945}, {"beta", 946}, {"gamma", 947}, {"delta", 948}, {"epsilon", 949}, {"zeta", 950}, {"eta", 951}, {"theta", 952}, {"iota", 953}, {"kappa", 954}, {"lambda", 955}, {"mu", 956}, {"nu", 957}, {"xi", 958}, {"omicron", 959}, {"pi", 960}, {"rho", 961}, {"sigmaf", 962}, {"sigma", 963}, {"tau", 964}, {"upsilon", 965}, {"phi", 966}, {"chi", 967}, {"psi", 968}, {"omega", 969}, {"thetasym", 977}, {"upsih", 978}, {"piv", 982}, /* General Punctuation */ {"bull", 8226}, /* bullet is NOT the same as bullet operator, u+2219 */ {"hellip", 8230}, {"prime", 8242}, {"Prime", 8243}, {"oline", 8254}, {"frasl", 8260}, /* Letterlike Symbols */ {"weierp", 8472}, {"image", 8465}, {"real", 8476}, {"trade", 8482}, {"alefsym", 8501}, /* alef symbol is NOT the same as hebrew letter alef, u+05D0 although the same glyph could be used to depict both characters */ /* Arrows */ {"larr", 8592}, {"uarr", 8593}, {"rarr", 8594}, {"darr", 8595}, {"harr", 8596}, {"crarr", 8629}, {"lArr", 8656}, /* Unicode does not say that lArr is the same as the 'is implied by' arrow but also does not have any other character for that function. So ? lArr can be used for 'is implied by' as ISOtech suggests */ {"uArr", 8657}, {"rArr", 8658}, /* Unicode does not say this is the 'implies' character but does not have another character with this function so ? rArr can be used for 'implies' as ISOtech suggests */ {"dArr", 8659}, {"hArr", 8660}, /* Mathematical Operators */ {"forall", 8704}, {"part", 8706}, {"exist", 8707}, {"empty", 8709}, {"nabla", 8711}, {"isin", 8712}, {"notin", 8713}, {"ni", 8715}, /* should there be a more memorable name than 'ni'? */ {"prod", 8719}, /* prod is NOT the same character as u+03A0 'greek capital letter pi' though the same glyph might be used for both */ {"sum", 8721}, /* sum is NOT the same character as u+03A3 'greek capital letter sigma' though the same glyph might be used for both */ {"minus", 8722}, {"lowast", 8727}, {"radic", 8730}, {"prop", 8733}, {"infin", 8734}, {"ang", 8736}, {"and", 8869}, {"or", 8870}, {"cap", 8745}, {"cup", 8746}, {"int", 8747}, {"there4", 8756}, {"sim", 8764}, /* tilde operator is NOT the same character as the tilde, u+007E, although the same glyph might be used to represent both */ {"cong", 8773}, {"asymp", 8776}, {"ne", 8800}, {"equiv", 8801}, {"le", 8804}, {"ge", 8805}, {"sub", 8834}, {"sup", 8835}, /* note that nsup, 'not a superset of, u+2283' is not covered by the Symbol font encoding and is not included. Should it be, for symmetry? It is in ISOamsn */ {"nsub", 8836}, {"sube", 8838}, {"supe", 8839}, {"oplus", 8853}, {"otimes", 8855}, {"perp", 8869}, {"sdot", 8901}, /* dot operator is NOT the same character as u+00B7 middle dot */ /* Miscellaneous Technical */ {"lceil", 8968}, {"rceil", 8969}, {"lfloor", 8970}, {"rfloor", 8971}, {"lang", 9001}, /* lang is NOT the same character as u+003C 'less than' or u+2039 'single left-pointing angle quotation mark' */ {"rang", 9002}, /* rang is NOT the same character as u+003E 'greater than' or u+203A 'single right-pointing angle quotation mark' */ /* Geometric Shapes */ {"loz", 9674}, /* Miscellaneous Symbols */ {"spades", 9824}, /* black here seems to mean filled as opposed to hollow */ {"clubs", 9827}, {"hearts", 9829}, {"diams", 9830}, /* Special characters for HTML */ /* Character entity set. Typical invocation: {"%", PUBLIC}, "-//W3C//ENTITIES Special//EN//HTML"> %HTMLspecial; */ /* Portions (C) International Organization for Standardization 1986: Permission to copy in any form is granted for use with conforming SGML systems and applications as defined in ISO 8879, provided this notice is included in all copies. */ /* Relevant ISO entity set is given unless names are newly introduced. New names (i.e., not in ISO 8879 list) do not clash with any existing ISO 8879 entity names. ISO 10646 character numbers are given for each character, in hex. CDATA values are decimal conversions of the ISO 10646 values and refer to the document character set. Names are Unicode 2.0 names. */ /* C0 Controls and Basic Latin */ {"quot", 34}, {"amp", 38}, {"lt", 60}, {"gt", 62}, /* Latin Extended-A */ {"OElig", 338}, {"oelig", 339}, /* ligature is a misnomer, this is a separate character in some languages */ {"Scaron", 352}, {"scaron", 353}, {"Yuml", 376}, /* Spacing Modifier Letters */ {"circ", 710}, {"tilde", 732}, /* General Punctuation */ {"ensp", 8194}, {"emsp", 8195}, {"thinsp", 8201}, {"zwnj", 8204}, {"zwj", 8205}, {"lrm", 8206}, {"rlm", 8207}, {"ndash", 8211}, {"mdash", 8212}, {"lsquo", 8216}, {"rsquo", 8217}, {"sbquo", 8218}, {"ldquo", 8220}, {"rdquo", 8221}, {"bdquo", 8222}, {"dagger", 8224}, {"Dagger", 8225}, {"permil", 8240}, {"lsaquo", 8249}, /* lsaquo is proposed but not yet ISO standardised */ {"rsaquo", 8250}, /* rsaquo is proposed but not yet ISO standardised */ { NULL, 0, }, }; static u_int hash(char *s) { u_int hashval; for (hashval = 0; *s != '\0'; s++) hashval = *s + 31*hashval; return hashval % HASHSIZE; } EntList * GetEntity(char *s) { EntList *p; for (p = hashtab[hash(s)]; p != NULL; p = p->next) if (strcmp(s, p->name) == 0) return p; return NULL; } void InitEntities(void) { EntList *ep; u_int hashval; for (ep = entities; ep->name != NULL; ep++) { hashval = hash(ep->name); ep->next = hashtab[hashval]; hashtab[hashval] = ep; } }