// --------------------------------------------------------------------------- // - XmlBuffer.cpp - // - afnix:xml module - xml character buffer class implementation - // --------------------------------------------------------------------------- // - This program is free software; you can redistribute it and/or modify - // - it provided that this copyright notice is kept intact. - // - - // - This program is distributed in the hope that it will be useful, but - // - without any warranty; without even the implied warranty of - // - merchantability or fitness for a particular purpose. In no event shall - // - the copyright holder be liable for any direct, indirect, incidental or - // - special damages arising in any way out of the use of this software. - // --------------------------------------------------------------------------- // - copyright (c) 1999-2007 amaury darsch - // --------------------------------------------------------------------------- #include "Vector.hpp" #include "Utility.hpp" #include "Unicode.hpp" #include "XmlBuffer.hpp" #include "Exception.hpp" namespace afnix { // ------------------------------------------------------------------------- // - private section - // ------------------------------------------------------------------------- // default buffer size static const long XML_BUFFER_SIZE = 1024; // default preserve space flag static const bool XML_BUFFER_PRSV = false; // the xml special characters static const t_quad XML_CHAR_AM = 0x00000026; // & static const t_quad XML_CHAR_NS = 0x00000023; // # static const t_quad XML_CHAR_DQ = 0x00000022; // " static const t_quad XML_CHAR_SQ = 0x00000027; // ' static const t_quad XML_CHAR_PC = 0x00000025; // % static const t_quad XML_CHAR_SC = 0x0000003B; // ; static const t_quad XML_CHAR_OP = 0x00000028; // ( static const t_quad XML_CHAR_CP = 0x00000029; // ) static const t_quad XML_CHAR_VL = 0x0000007C; // | static const t_quad XML_CHAR_LB = 0x0000005B; // [ static const t_quad XML_CHAR_RB = 0x0000005D; // ] // return true if the character is a valid XML 1.0 character static inline bool is_char_1_0 (const t_quad c) { if (c == 0x00000009) return true; if (c == 0x0000000A) return true; if (c == 0x0000000D) return true; if ((0x00000020<= c) && (c <= 0x0000D7FF)) return true; if ((0x0000E000<= c) && (c <= 0x0000FFFD)) return true; if ((0x00010000<= c) && (c <= 0x0010FFFF)) return true; return false; } // return true if the character is a valid XML 1.1 character static inline bool is_char_1_1 (const t_quad c) { if ((0x00000001<= c) && (c <= 0x0000D7FF)) return true; if ((0x0000E000<= c) && (c <= 0x0000FFFD)) return true; if ((0x00010000<= c) && (c <= 0x0010FFFF)) return true; return false; } // return true if the character is valid static inline bool is_char (XmlSystem::t_xmlv xmlv, const t_quad c) { switch (xmlv) { case XmlSystem::XML_1_0: return is_char_1_0 (c); case XmlSystem::XML_1_1: return is_char_1_1 (c); } throw Exception ("internal-error", "illegal xml is-char access"); } // return true if a buffer is valid static bool is_valid (XmlSystem::t_xmlv xmlv, const t_quad* s) { t_quad c = nilq; while ((c = *s++) != nilq) { if (is_char (xmlv, c) == false) return false; } return true; } // return true if the character a valid XML 1.0 space static inline bool is_spcc_1_0 (const t_quad c) { if (c == 0x00000020) return true; if (c == 0x00000009) return true; if (c == 0x0000000D) return true; if (c == 0x0000000A) return true; return false; } // return true if the character a valid XML 1.1 space static inline bool is_spcc_1_1 (const t_quad c) { if (c == 0x00000020) return true; if (c == 0x00000009) return true; if (c == 0x0000000D) return true; if (c == 0x0000000A) return true; return false; } // return true if the character is a valid space static inline bool is_spcc (XmlSystem::t_xmlv xmlv, const t_quad c) { switch (xmlv) { case XmlSystem::XML_1_0: return is_spcc_1_0 (c); case XmlSystem::XML_1_1: return is_spcc_1_1 (c); } throw Exception ("internal-error", "illegal xml is-spcc access"); } // return true if the character is a valid XML 1.0 base character static inline bool is_base_1_0 (const t_quad c) { if ((0x00000041 <= c) && (c <= 0x0000005A)) return true; if ((0x00000061 <= c) && (c <= 0x0000007A)) return true; if ((0x000000C0 <= c) && (c <= 0x000000D6)) return true; if ((0x000000D8 <= c) && (c <= 0x000000F6)) return true; if ((0x000000F8 <= c) && (c <= 0x000000FF)) return true; if ((0x00000100 <= c) && (c <= 0x00000131)) return true; if ((0x00000134 <= c) && (c <= 0x0000013E)) return true; if ((0x00000141 <= c) && (c <= 0x00000148)) return true; if ((0x0000014A <= c) && (c <= 0x0000017E)) return true; if ((0x00000180 <= c) && (c <= 0x000001C3)) return true; if ((0x000001CD <= c) && (c <= 0x000001F0)) return true; if ((0x000001F4 <= c) && (c <= 0x000001F5)) return true; if ((0x000001FA <= c) && (c <= 0x00000217)) return true; if ((0x00000250 <= c) && (c <= 0x000002A8)) return true; if ((0x000002BB <= c) && (c <= 0x000002C1)) return true; if (c == 0x00000386) return true; if ((0x00000388 <= c) && (c <= 0x0000038A)) return true; if (c == 0x0000038C) return true; if ((0x0000038E <= c) && (c <= 0x000003A1)) return true; if ((0x000003A3 <= c) && (c <= 0x000003CE)) return true; if ((0x000003D0 <= c) && (c <= 0x000003D6)) return true; if (c == 0x000003DA) return true; if (c == 0x000003DC) return true; if (c == 0x000003DE) return true; if (c == 0x000003E0) return true; if ((0x000003E2 <= c) && (c <= 0x000003F3)) return true; if ((0x00000401 <= c) && (c <= 0x0000040C)) return true; if ((0x0000040E <= c) && (c <= 0x0000044F)) return true; if ((0x00000451 <= c) && (c <= 0x0000045C)) return true; if ((0x0000045E <= c) && (c <= 0x00000481)) return true; if ((0x00000490 <= c) && (c <= 0x000004C4)) return true; if ((0x000004C7 <= c) && (c <= 0x000004C8)) return true; if ((0x000004CB <= c) && (c <= 0x000004CC)) return true; if ((0x000004D0 <= c) && (c <= 0x000004EB)) return true; if ((0x000004EE <= c) && (c <= 0x000004F5)) return true; if ((0x000004F8 <= c) && (c <= 0x000004F9)) return true; if ((0x00000531 <= c) && (c <= 0x00000556)) return true; if (c == 0x00000559) return true; if ((0x00000561 <= c) && (c <= 0x00000586)) return true; if ((0x000005D0 <= c) && (c <= 0x000005EA)) return true; if ((0x000005F0 <= c) && (c <= 0x000005F2)) return true; if ((0x00000621 <= c) && (c <= 0x0000063A)) return true; if ((0x00000641 <= c) && (c <= 0x0000064A)) return true; if ((0x00000671 <= c) && (c <= 0x000006B7)) return true; if ((0x000006BA <= c) && (c <= 0x000006BE)) return true; if ((0x000006C0 <= c) && (c <= 0x000006CE)) return true; if ((0x000006D0 <= c) && (c <= 0x000006D3)) return true; if (c == 0x000006D5) return true; if ((0x000006E5 <= c) && (c <= 0x000006E6)) return true; if ((0x00000905 <= c) && (c <= 0x00000939)) return true; if (c == 0x0000093D) return true; if ((0x00000958 <= c) && (c <= 0x00000961)) return true; if ((0x00000985 <= c) && (c <= 0x0000098C)) return true; if ((0x0000098F <= c) && (c <= 0x00000990)) return true; if ((0x00000993 <= c) && (c <= 0x000009A8)) return true; if ((0x000009AA <= c) && (c <= 0x000009B0)) return true; if (c == 0x000009B2) return true; if ((0x000009B6 <= c) && (c <= 0x000009B9)) return true; if ((0x000009DC <= c) && (c <= 0x000009DD)) return true; if ((0x000009DF <= c) && (c <= 0x000009E1)) return true; if ((0x000009F0 <= c) && (c <= 0x000009F1)) return true; if ((0x00000A05 <= c) && (c <= 0x00000A0A)) return true; if ((0x00000A0F <= c) && (c <= 0x00000A10)) return true; if ((0x00000A13 <= c) && (c <= 0x00000A28)) return true; if ((0x00000A2A <= c) && (c <= 0x00000A30)) return true; if ((0x00000A32 <= c) && (c <= 0x00000A33)) return true; if ((0x00000A35 <= c) && (c <= 0x00000A36)) return true; if ((0x00000A38 <= c) && (c <= 0x00000A39)) return true; if ((0x00000A59 <= c) && (c <= 0x00000A5C)) return true; if (c == 0x00000A5E) return true; if ((0x00000A72 <= c) && (c <= 0x00000A74)) return true; if ((0x00000A85 <= c) && (c <= 0x00000A8B)) return true; if (c == 0x00000A8D) return true; if ((0x00000A8F <= c) && (c <= 0x00000A91)) return true; if ((0x00000A93 <= c) && (c <= 0x00000AA8)) return true; if ((0x00000AAA <= c) && (c <= 0x00000AB0)) return true; if ((0x00000AB2 <= c) && (c <= 0x00000AB3)) return true; if ((0x00000AB5 <= c) && (c <= 0x00000AB9)) return true; if (c == 0x00000ABD) return true; if (c == 0x00000AE0) return true; if ((0x00000B05 <= c) && (c <= 0x00000B0C)) return true; if ((0x00000B0F <= c) && (c <= 0x00000B10)) return true; if ((0x00000B13 <= c) && (c <= 0x00000B28)) return true; if ((0x00000B2A <= c) && (c <= 0x00000B30)) return true; if ((0x00000B32 <= c) && (c <= 0x00000B33)) return true; if ((0x00000B36 <= c) && (c <= 0x00000B39)) return true; if (c == 0x00000B3D) return true; if ((0x00000B5C <= c) && (c <= 0x00000B5D)) return true; if ((0x00000B5F <= c) && (c <= 0x00000B61)) return true; if ((0x00000B85 <= c) && (c <= 0x00000B8A)) return true; if ((0x00000B8E <= c) && (c <= 0x00000B90)) return true; if ((0x00000B92 <= c) && (c <= 0x00000B95)) return true; if ((0x00000B99 <= c) && (c <= 0x00000B9A)) return true; if (c == 0x00000B9C) return true; if ((0x00000B9E <= c) && (c <= 0x00000B9F)) return true; if ((0x00000BA3 <= c) && (c <= 0x00000BA4)) return true; if ((0x00000BA8 <= c) && (c <= 0x00000BAA)) return true; if ((0x00000BAE <= c) && (c <= 0x00000BB5)) return true; if ((0x00000BB7 <= c) && (c <= 0x00000BB9)) return true; if ((0x00000C05 <= c) && (c <= 0x00000C0C)) return true; if ((0x00000C0E <= c) && (c <= 0x00000C10)) return true; if ((0x00000C12 <= c) && (c <= 0x00000C28)) return true; if ((0x00000C2A <= c) && (c <= 0x00000C33)) return true; if ((0x00000C35 <= c) && (c <= 0x00000C39)) return true; if ((0x00000C60 <= c) && (c <= 0x00000C61)) return true; if ((0x00000C85 <= c) && (c <= 0x00000C8C)) return true; if ((0x00000C8E <= c) && (c <= 0x00000C90)) return true; if ((0x00000C92 <= c) && (c <= 0x00000CA8)) return true; if ((0x00000CAA <= c) && (c <= 0x00000CB3)) return true; if ((0x00000CB5 <= c) && (c <= 0x00000CB9)) return true; if (c == 0x00000CDE) return true; if ((0x00000CE0 <= c) && (c <= 0x00000CE1)) return true; if ((0x00000D05 <= c) && (c <= 0x00000D0C)) return true; if ((0x00000D0E <= c) && (c <= 0x00000D10)) return true; if ((0x00000D12 <= c) && (c <= 0x00000D28)) return true; if ((0x00000D2A <= c) && (c <= 0x00000D39)) return true; if ((0x00000D60 <= c) && (c <= 0x00000D61)) return true; if ((0x00000E01 <= c) && (c <= 0x00000E2E)) return true; if (c == 0x00000E30) return true; if ((0x00000E32 <= c) && (c <= 0x00000E33)) return true; if ((0x00000E40 <= c) && (c <= 0x00000E45)) return true; if ((0x00000E81 <= c) && (c <= 0x00000E82)) return true; if (c == 0x00000E84) return true; if ((0x00000E87 <= c) && (c <= 0x00000E88)) return true; if (c == 0x00000E8A) return true; if (c == 0x00000E8D) return true; if ((0x00000E94 <= c) && (c <= 0x00000E97)) return true; if ((0x00000E99 <= c) && (c <= 0x00000E9F)) return true; if ((0x00000EA1 <= c) && (c <= 0x00000EA3)) return true; if (c == 0x00000EA5) return true; if (c == 0x00000EA7) return true; if ((0x00000EAA <= c) && (c <= 0x00000EAB)) return true; if ((0x00000EAD <= c) && (c <= 0x00000EAE)) return true; if (c == 0x00000EB0) return true; if ((0x00000EB2 <= c) && (c <= 0x00000EB3)) return true; if (c == 0x00000EBD) return true; if ((0x00000EC0 <= c) && (c <= 0x00000EC4)) return true; if ((0x00000F40 <= c) && (c <= 0x00000F47)) return true; if ((0x00000F49 <= c) && (c <= 0x00000F69)) return true; if ((0x000010A0 <= c) && (c <= 0x000010C5)) return true; if ((0x000010D0 <= c) && (c <= 0x000010F6)) return true; if (c == 0x00001100) return true; if ((0x00001102 <= c) && (c <= 0x00001103)) return true; if ((0x00001105 <= c) && (c <= 0x00001107)) return true; if (c == 0x00001109) return true; if ((0x0000110B <= c) && (c <= 0x0000110C)) return true; if ((0x0000110E <= c) && (c <= 0x00001112)) return true; if (c == 0x0000113C) return true; if (c == 0x0000113E) return true; if (c == 0x00001140) return true; if (c == 0x0000114C) return true; if (c == 0x0000114E) return true; if (c == 0x00001150) return true; if ((0x00001154 <= c) && (c <= 0x00001155)) return true; if (c == 0x00001159) return true; if ((0x0000115F <= c) && (c <= 0x00001161)) return true; if (c == 0x00001163) return true; if (c == 0x00001165) return true; if (c == 0x00001167) return true; if (c == 0x00001169) return true; if ((0x0000116D <= c) && (c <= 0x0000116E)) return true; if ((0x00001172 <= c) && (c <= 0x00001173)) return true; if (c == 0x00001175) return true; if (c == 0x0000119E) return true; if (c == 0x000011A8) return true; if (c == 0x000011AB) return true; if ((0x000011AE <= c) && (c <= 0x000011AF)) return true; if ((0x000011B7 <= c) && (c <= 0x000011B8)) return true; if (c == 0x000011BA) return true; if ((0x000011BC <= c) && (c <= 0x000011C2)) return true; if (c == 0x000011EB) return true; if (c == 0x000011F0) return true; if (c == 0x000011F9) return true; if ((0x00001E00 <= c) && (c <= 0x00001E9B)) return true; if ((0x00001EA0 <= c) && (c <= 0x00001EF9)) return true; if ((0x00001F00 <= c) && (c <= 0x00001F15)) return true; if ((0x00001F18 <= c) && (c <= 0x00001F1D)) return true; if ((0x00001F20 <= c) && (c <= 0x00001F45)) return true; if ((0x00001F48 <= c) && (c <= 0x00001F4D)) return true; if ((0x00001F50 <= c) && (c <= 0x00001F57)) return true; if (c == 0x00001F59) return true; if (c == 0x00001F5B) return true; if (c == 0x00001F5D) return true; if ((0x00001F5F <= c) && (c <= 0x00001F7D)) return true; if ((0x00001F80 <= c) && (c <= 0x00001FB4)) return true; if ((0x00001FB6 <= c) && (c <= 0x00001FBC)) return true; if (c == 0x00001FBE) return true; if ((0x00001FC2 <= c) && (c <= 0x00001FC4)) return true; if ((0x00001FC6 <= c) && (c <= 0x00001FCC)) return true; if ((0x00001FD0 <= c) && (c <= 0x00001FD3)) return true; if ((0x00001FD6 <= c) && (c <= 0x00001FDB)) return true; if ((0x00001FE0 <= c) && (c <= 0x00001FEC)) return true; if ((0x00001FF2 <= c) && (c <= 0x00001FF4)) return true; if ((0x00001FF6 <= c) && (c <= 0x00001FFC)) return true; if (c == 0x00002126) return true; if ((0x0000212A <= c) && (c <= 0x0000212B)) return true; if (c == 0x0000212E) return true; if ((0x00002180 <= c) && (c <= 0x00002182)) return true; if ((0x00003041 <= c) && (c <= 0x00003094)) return true; if ((0x000030A1 <= c) && (c <= 0x000030FA)) return true; if ((0x00003105 <= c) && (c <= 0x0000312C)) return true; if ((0x0000AC00 <= c) && (c <= 0x0000D7A3)) return true; return false; } // return true if the character is a valid XML 1.0 ideographic character static inline bool is_ideo_1_0 (const t_quad c) { if ((0x00004E00 <= c) && (c <= 0x00009FA5)) return true; if (c == 0x000030007) return true; if ((0x00003021 <= c) && (c <= 0x00003029)) return true; return false; } // return true if the character is a valid XML 1.0 combining character static inline bool is_comb_1_0 (const t_quad c) { if ((0x00000300 <= c) && (c <= 0x00000345)) return true; if ((0x00000360 <= c) && (c <= 0x00000361)) return true; if ((0x00000483 <= c) && (c <= 0x00000486)) return true; if ((0x00000591 <= c) && (c <= 0x000005A1)) return true; if ((0x000005A3 <= c) && (c <= 0x000005B9)) return true; if ((0x000005BB <= c) && (c <= 0x000005BD)) return true; if (c == 0x000005BF) return true; if ((0x000005C1 <= c) && (c <= 0x000005C2)) return true; if (c == 0x000005C4) return true; if ((0x0000064B <= c) && (c <= 0x00000652)) return true; if (c == 0x00000670) return true; if ((0x000006D6 <= c) && (c <= 0x000006DC)) return true; if ((0x000006DD <= c) && (c <= 0x000006DF)) return true; if ((0x000006E0 <= c) && (c <= 0x000006E4)) return true; if ((0x000006E7 <= c) && (c <= 0x000006E8)) return true; if ((0x000006EA <= c) && (c <= 0x000006ED)) return true; if ((0x00000901 <= c) && (c <= 0x00000903)) return true; if (c == 0x0000093C) return true; if ((0x0000093E <= c) && (c <= 0x0000094C)) return true; if (c == 0x0000094D) return true; if ((0x00000951 <= c) && (c <= 0x00000954)) return true; if ((0x00000962 <= c) && (c <= 0x00000963)) return true; if ((0x00000981 <= c) && (c <= 0x00000983)) return true; if (c == 0x000009BC) return true; if (c == 0x000009BE) return true; if (c == 0x000009BF) return true; if ((0x000009C0 <= c) && (c <= 0x000009C4)) return true; if ((0x000009C7 <= c) && (c <= 0x000009C8)) return true; if ((0x000009CB <= c) && (c <= 0x000009CD)) return true; if (c == 0x000009D7) return true; if ((0x000009E2 <= c) && (c <= 0x000009E3)) return true; if (c == 0x00000A02) return true; if (c == 0x00000A3C) return true; if (c == 0x00000A3E) return true; if (c == 0x00000A3F) return true; if ((0x00000A40 <= c) && (c <= 0x00000A42)) return true; if ((0x00000A47 <= c) && (c <= 0x00000A48)) return true; if ((0x00000A4B <= c) && (c <= 0x00000A4D)) return true; if ((0x00000A70 <= c) && (c <= 0x00000A71)) return true; if ((0x00000A81 <= c) && (c <= 0x00000A83)) return true; if (c == 0x00000ABC) return true; if ((0x00000ABE <= c) && (c <= 0x00000AC5)) return true; if ((0x00000AC7 <= c) && (c <= 0x00000AC9)) return true; if ((0x00000ACB <= c) && (c <= 0x00000ACD)) return true; if ((0x00000B01 <= c) && (c <= 0x00000B03)) return true; if (c == 0x00000B3C) return true; if ((0x00000B3E <= c) && (c <= 0x00000B43)) return true; if ((0x00000B47 <= c) && (c <= 0x00000B48)) return true; if ((0x00000B4B <= c) && (c <= 0x00000B4D)) return true; if ((0x00000B56 <= c) && (c <= 0x00000B57)) return true; if ((0x00000B82 <= c) && (c <= 0x00000B83)) return true; if ((0x00000BBE <= c) && (c <= 0x00000BC2)) return true; if ((0x00000BC6 <= c) && (c <= 0x00000BC8)) return true; if ((0x00000BCA <= c) && (c <= 0x00000BCD)) return true; if (c == 0x00000BD7) return true; if ((0x00000C01 <= c) && (c <= 0x00000C03)) return true; if ((0x00000C3E <= c) && (c <= 0x00000C44)) return true; if ((0x00000C46 <= c) && (c <= 0x00000C48)) return true; if ((0x00000C4A <= c) && (c <= 0x00000C4D)) return true; if ((0x00000C55 <= c) && (c <= 0x00000C56)) return true; if ((0x00000C82 <= c) && (c <= 0x00000C83)) return true; if ((0x00000CBE <= c) && (c <= 0x00000CC4)) return true; if ((0x00000CC6 <= c) && (c <= 0x00000CC8)) return true; if ((0x00000CCA <= c) && (c <= 0x00000CCD)) return true; if ((0x00000CD5 <= c) && (c <= 0x00000CD6)) return true; if ((0x00000D02 <= c) && (c <= 0x00000D03)) return true; if ((0x00000D3E <= c) && (c <= 0x00000D43)) return true; if ((0x00000D46 <= c) && (c <= 0x00000D48)) return true; if ((0x00000D4A <= c) && (c <= 0x00000D4D)) return true; if (c == 0x00000D57) return true; if (c == 0x00000E31) return true; if ((0x00000E34 <= c) && (c <= 0x00000E3A)) return true; if ((0x00000E47 <= c) && (c <= 0x00000E4E)) return true; if (c == 0x00000EB1) return true; if ((0x00000EB4 <= c) && (c <= 0x00000EB9)) return true; if ((0x00000EBB <= c) && (c <= 0x00000EBC)) return true; if ((0x00000EC8 <= c) && (c <= 0x00000ECD)) return true; if ((0x00000F18 <= c) && (c <= 0x00000F19)) return true; if (c == 0x00000F35) return true; if (c == 0x00000F37) return true; if (c == 0x00000F39) return true; if (c == 0x00000F3E) return true; if (c == 0x00000F3F) return true; if ((0x00000F71 <= c) && (c <= 0x00000F84)) return true; if ((0x00000F86 <= c) && (c <= 0x00000F8B)) return true; if ((0x00000F90 <= c) && (c <= 0x00000F95)) return true; if (c == 0x00000F97) return true; if ((0x00000F99 <= c) && (c <= 0x00000FAD)) return true; if ((0x00000FB1 <= c) && (c <= 0x00000FB7)) return true; if (c == 0x00000FB9) return true; if ((0x000020D0 <= c) && (c <= 0x000020DC)) return true; if (c == 0x000020E1) return true; if ((0x0000302A <= c) && (c <= 0x0000302F)) return true; if (c == 0x00003099) return true; if (c == 0x0000309A) return true; return false; } // return true if the character is a valid XML 1.0 digit character static inline bool is_digi_1_0 (const t_quad c) { if ((0x00000030 <= c) && (c <= 0x00000039)) return true; if ((0x00000660 <= c) && (c <= 0x00000669)) return true; if ((0x000006F0 <= c) && (c <= 0x000006F9)) return true; if ((0x00000966 <= c) && (c <= 0x0000096F)) return true; if ((0x000009E6 <= c) && (c <= 0x000009EF)) return true; if ((0x00000A66 <= c) && (c <= 0x00000A6F)) return true; if ((0x00000AE6 <= c) && (c <= 0x00000AEF)) return true; if ((0x00000B66 <= c) && (c <= 0x00000B6F)) return true; if ((0x00000BE7 <= c) && (c <= 0x00000BEF)) return true; if ((0x00000C66 <= c) && (c <= 0x00000C6F)) return true; if ((0x00000CE6 <= c) && (c <= 0x00000CEF)) return true; if ((0x00000D66 <= c) && (c <= 0x00000D6F)) return true; if ((0x00000E50 <= c) && (c <= 0x00000E59)) return true; if ((0x00000ED0 <= c) && (c <= 0x00000ED9)) return true; if ((0x00000F20 <= c) && (c <= 0x00000F29)) return true; return false; } // return true if the character is a valid XML 1.0 extender character static inline bool is_extd_1_0 (const t_quad c) { if (c == 0x000000B7) return true; if (c == 0x000002D0) return true; if (c == 0x000002D1) return true; if (c == 0x00000387) return true; if (c == 0x00000640) return true; if (c == 0x00000E46) return true; if (c == 0x00000EC6) return true; if (c == 0x00003005) return true; if ((0x00003031 <= c) && (c <= 0x00003035)) return true; if ((0x0000309D <= c) && (c <= 0x0000309E)) return true; if ((0x000030FC <= c) && (c <= 0x000030FE)) return true; return false; } // return true if the character is a valid XML 1.0 letter static inline bool is_letr_1_0 (const t_quad c) { if (is_base_1_0 (c) == true) return true; return is_ideo_1_0 (c); } // return true if the character is a valid XML 1.0 starting character static inline bool is_nams_1_0 (const t_quad c) { // letter if (is_letr_1_0 (c) == true) return true; // special characters _ : if (c == 0x0000005F) return true; if (c == 0x0000003A) return true; return false; } // return true if the character is a valid XML 1.0 name character static inline bool is_namc_1_0 (const t_quad c) { // letter if (is_letr_1_0 (c) == true) return true; // digit if (is_digi_1_0 (c) == true) return true; // special characters . - _ : if (c == 0x0000002E) return true; if (c == 0x0000002D) return true; if (c == 0x0000005F) return true; if (c == 0x0000003A) return true; // combining if (is_comb_1_0 (c) == true) return true; // extenders if (is_extd_1_0 (c) == true) return true; return false; } // return true if a buffer is valid XML 1.0 name static bool is_name_1_0 (const t_quad* s) { // check for nil first if (s == nilp) return false; // check for starting character t_quad c = *s++; if (is_nams_1_0 (c) == false) return false; // check the rest of the buffer while ((c = *s++) != nilq) { if (is_namc_1_0 (c) == false) return false; } return true; } // return true if the character is a valid XML 1.1 starting character static inline bool is_nams_1_1 (const t_quad c) { if (c == 0x0000003A) return true; if (c == 0x0000005F) return true; if ((0x00000041 <= c) && (c <= 0x0000005A)) return true; if ((0x00000061 <= c) && (c <= 0x0000007A)) return true; if ((0x000000C0 <= c) && (c <= 0x000000D6)) return true; if ((0x000000D8 <= c) && (c <= 0x000000F6)) return true; if ((0x000000F8 <= c) && (c <= 0x000002FF)) return true; if ((0x00000370 <= c) && (c <= 0x0000037D)) return true; if ((0x0000037F <= c) && (c <= 0x00001FFF)) return true; if ((0x0000200C <= c) && (c <= 0x0000200D)) return true; if ((0x00002070 <= c) && (c <= 0x0000218F)) return true; if ((0x00002C00 <= c) && (c <= 0x00002FEF)) return true; if ((0x00003001 <= c) && (c <= 0x0000D7FF)) return true; if ((0x0000F900 <= c) && (c <= 0x0000FDCF)) return true; if ((0x0000FDF0 <= c) && (c <= 0x0000FFFD)) return true; if ((0x00010000 <= c) && (c <= 0x000EFFFF)) return true; return false; } // return true if the character is a valid XML 1.1 name character static inline bool is_namc_1_1 (const t_quad c) { if (is_nams_1_1 (c) == true) return true; if (c == 0x0000002D) return true; if (c == 0x0000002E) return true; if ((0x00000030 <= c) && (c <= 0x00000039)) return true; if (c == 0x000000B7) return true; if ((0x00000300 <= c) && (c <= 0x0000036F)) return true; if ((0x0000203F <= c) && (c <= 0x00002040)) return true; return false; } // return true if the character is a valid name character static inline bool is_namc (XmlSystem::t_xmlv xmlv, const t_quad c) { switch (xmlv) { case XmlSystem::XML_1_0: return is_namc_1_0 (c); case XmlSystem::XML_1_1: return is_namc_1_1 (c); } throw Exception ("internal-error", "illegal xml is-namc access"); } // return true if a buffer is valid XML 1.1 name static bool is_name_1_1 (const t_quad* s) { // check for nil first if (s == nilp) return false; // check for starting character t_quad c = *s++; if (is_nams_1_1 (c) == false) return false; // check the rest of the buffer while ((c = *s++) != nilq) { if (is_namc_1_1 (c) == false) return false; } return true; } // return true if the character is a valid name string static inline bool is_name (XmlSystem::t_xmlv xmlv, const t_quad* s) { switch (xmlv) { case XmlSystem::XML_1_0: return is_name_1_0 (s); case XmlSystem::XML_1_1: return is_name_1_1 (s); } throw Exception ("internal-error", "illegal xml is-name access"); } // return true if the characters are a pe reference static inline bool is_pref (XmlSystem::t_xmlv xmlv, const t_quad* s) { // get the string length and check long slen = Unicode::strlen (s); if (slen == 0) return false; // check first character for % if (s[0] != XML_CHAR_PC) return false; // check last character for ; long smax = slen - 1; if (s[smax] != XML_CHAR_SC) return false; // check the content for valid charcaters switch (xmlv) { case XmlSystem::XML_1_0: for (long i = 1; i < smax; i++) if (is_namc_1_0 (s[i]) == false) return false; case XmlSystem::XML_1_1: for (long i = 1; i < smax; i++) if (is_namc_1_1 (s[i]) == false) return false; } return true; } // return true if the characters are a name or a pe reference static inline bool is_pnam (XmlSystem::t_xmlv xmlv, const t_quad* s) { // check for a pe ref if (is_pref (xmlv, s) == true) return true; // check for a name return is_name (xmlv, s); } // return true if the character is a valid enumeration character static inline bool is_enmc (XmlSystem::t_xmlv xmlv, const t_quad c) { if (is_namc (xmlv, c) == true) return true; if (is_spcc (xmlv, c) == true) return true; if (c == XML_CHAR_VL) return true; return false; } // ------------------------------------------------------------------------- // - class section - // ------------------------------------------------------------------------- // create a new buffer class with a default size of 1024 characters XmlBuffer::XmlBuffer (void) { d_prsv = XML_BUFFER_PRSV; d_xmlv = XmlSystem::getxmlv (); } // create a new buffer with a xml version XmlBuffer::XmlBuffer (XmlSystem::t_xmlv xmlv) { d_prsv = XML_BUFFER_PRSV; d_xmlv = xmlv; } // create a new buffer with a xml version and a string XmlBuffer::XmlBuffer (XmlSystem::t_xmlv xmlv, const String& xval) : XsoBuffer (xval) { d_prsv = XML_BUFFER_PRSV; d_xmlv = xmlv; } // copy construct this buffer XmlBuffer::XmlBuffer (const XmlBuffer& that) : XsoBuffer (that) { d_prsv = that.d_prsv; d_xmlv = that.d_xmlv; } // assign a buffer to this one XmlBuffer& XmlBuffer::operator = (const XmlBuffer& that){ // check for equality if (this == &that) return *this; // copy the base class XsoBuffer::operator = (that); d_prsv = that.d_prsv; d_xmlv = that.d_xmlv; // here it is return *this; } // reset this buffer void XmlBuffer::reset (void) { XsoBuffer::reset (); d_prsv = XML_BUFFER_PRSV; } // get the xml buffer version id String XmlBuffer::getxvid (void) const { return XmlSystem::toxvid(d_xmlv); } // create a new buffer with the same properties XmlBuffer XmlBuffer::dup (void) const { // create a new bufer XmlBuffer result = *this; // reset the content only result.clear (); return result; } // return true if the buffer is not empty after striping bool XmlBuffer::isnext (void) { stripl (); return (d_blen > 0); } // return true if the buffer has a quoted string starter bool XmlBuffer::isqstr (void) { stripl (); if (d_blen == 0) return false; if (p_ubuf[0] == XML_CHAR_DQ) return true; if (p_ubuf[0] == XML_CHAR_SQ) return true; return false; } // return true if the buffer has a subset declaration starter bool XmlBuffer::issubs (void) { stripl (); if (d_blen == 0) return false; if (p_ubuf[0] == XML_CHAR_LB) return true; return false; } // set the preserve space flag void XmlBuffer::setprsv (const bool prsv) { d_prsv = prsv; } // get the preserve space flag bool XmlBuffer::getprsv (void) const { return d_prsv; } // set the xml version void XmlBuffer::setxmlv (const String& xvid) { d_xmlv = XmlSystem::toxmlv (xvid); } // return true if the character is a space bool XmlBuffer::isspc (const t_quad c) const { return is_spcc (d_xmlv, c); } // retun true if the character must be skipped bool XmlBuffer::isskip (const t_quad c) const { bool result = isspc (c) && (d_prsv == false); return result; } // strip the buffer with leading space void XmlBuffer::stripl (void) { while (empty () == false) { t_quad c = read (); if (is_spcc (d_xmlv, c) == true) continue; pushback (c); break; } } // strip the buffer with trailing blank void XmlBuffer::stripr (void) { while (d_blen > 0) { t_quad c = p_ubuf[d_blen-1]; if (is_spcc (d_xmlv, c) == false) break; d_blen--; } } // strip the buffer with leading and trealing blanks void XmlBuffer::strip (void) { stripl (); stripr (); } // return the buffer content as a valid text String XmlBuffer::totext (void) const { // create a temporary buffer XmlBuffer xbuf = d_xmlv; // loop in the existing buffer and convert invalid characters // into a character entity repreentation for (long i = 0; i < d_blen; i++) { t_quad c = p_ubuf[i]; if (is_char (d_xmlv, c) == true) { xbuf.add (c); } else { xbuf.add (XmlSystem::tocesc (c)); } } // convert the buffer as a string return xbuf.tostring (); } // return the buffer content as a valid string String XmlBuffer::tostring (void) const { // mark the end of string t_quad* ubuf = Unicode::strdup (p_ubuf, d_blen); // check for valid characters if (is_valid (d_xmlv, ubuf) == false) { delete [] ubuf; throw Exception ("xml-error", "invalid character in buffer"); } // convert the string String result = ubuf; delete [] ubuf; return result; } // return the buffer content as a valid name String XmlBuffer::toname (void) const { // mark the end of string t_quad* ubuf = Unicode::strdup (p_ubuf, d_blen); // check for valid characters if (is_name (d_xmlv, ubuf) == false) { delete [] ubuf; throw Exception ("xml-error", "invalid character in buffer"); } // convert and clean String result = ubuf; delete [] ubuf; return result; } // return the buffer content as a valid name or pe reference String XmlBuffer::topnam (void) const { // mark the end of string t_quad* ubuf = Unicode::strdup (p_ubuf, d_blen); // check for valid characters if (is_pnam (d_xmlv, ubuf) == false) { delete [] ubuf; throw Exception ("xml-error", "invalid character in buffer"); } // convert and clean String result = ubuf; delete [] ubuf; return result; } // return the buffer content as a character reference t_quad XmlBuffer::tocref (void) const { // get the character value t_quad cval = XsoBuffer::tocref (); // check he character value if (is_char (d_xmlv, cval) == false) { throw Exception ("xml-error", "invalid character reference"); } return cval; } // get the next available general string in this buffer String XmlBuffer::getnstr (void) { // create a working buffer XmlBuffer buf (d_xmlv); // strip left the bufer stripl (); // read character from the buffer until it is empty while (empty () == false) { t_quad c = read (); if (is_spcc (d_xmlv, c) == true) break; buf.add (c); } // get the string name return buf.tostring (); } // get the next available name in this buffer String XmlBuffer::getname (void) { // create a working buffer XmlBuffer buf (d_xmlv); // strip left the bufer stripl (); // read character from the buffer until it is empty while (empty () == false) { t_quad c = read (); if (is_spcc (d_xmlv, c) == true) break; buf.add (c); } // get the string name return buf.toname (); } // get the next available name or pe reference in this buffer String XmlBuffer::getpnam (void) { // create a working buffer XmlBuffer buf (d_xmlv); // strip left the bufer stripl (); // check for empty if (empty () == true) return buf.tostring (); // check for pe reference if (get () == XML_CHAR_PC) { bool pend = false; while (empty () == false) { t_quad c = read (); if (is_spcc (d_xmlv, c) == true) break; buf.add (c); if (c == XML_CHAR_SC) { pend = true; break; } } if (pend == false) { throw Exception ("xml-error", "unterminated pe reference", buf.tostring ()); } } else { while (empty () == false) { t_quad c = read (); if (is_spcc (d_xmlv, c) == true) break; buf.add (c); } } // get the string name return buf.topnam (); } // get the next available quoted string in this buffer String XmlBuffer::getqstr (void) { // create a working buffer XmlBuffer buf (d_xmlv); // strip left the bufer stripl (); if (empty () == true) { throw Exception ("xml-error", "empty buffer while reading quoted string"); } // get the quote type t_quad c = read (); // check for double quote if (c == XML_CHAR_DQ) { while (empty () == false) { c = read (); if (c == XML_CHAR_DQ) return buf.tostring (); buf.add (c); } throw Exception("xml-error", "unterminated quoted string"); } // check fo single quote if (c == XML_CHAR_SQ) { while (empty () == false) { c = read (); if (c == XML_CHAR_SQ) return buf.tostring (); buf.add (c); } throw Exception("xml-error", "unterminated quoted string"); } // illegal starter throw Exception("xml-error", "illegal character starter for quoted string"); } // get the next available enumeration name in this buffer String XmlBuffer::getnenm (void) { // create a working buffer XmlBuffer buf (d_xmlv); // strip left the bufer stripl (); // read character from the buffer until it is empty while (empty () == false) { // check for end of enumeration if (get () == XML_CHAR_CP) break; // read next charcaters t_quad c = read (); // check for space if (is_spcc (d_xmlv, c) == true) break; // check for separator if (c == XML_CHAR_VL) break; buf.add (c); } // get the string name return buf.toname (); } // get the next available enumeration String XmlBuffer::getenum (void) { // create a working buffer XmlBuffer buf (d_xmlv); // strip left the bufer stripl (); if (empty () == true) { throw Exception ("xml-error", "empty enumeration string"); } // get the enumeration starter t_quad c = read (); // check for open parenthesis if (c != XML_CHAR_OP) { throw Exception ("xml-error", "invalid starter character for parenthesis", c); } buf.add (c); while (empty () == false) { c = read (); if (c == eofq) break; if (c == XML_CHAR_CP) { buf.add (c); return buf.tostring (); } if (is_enmc (d_xmlv, c) == false) { throw Exception("xml-error", "invalid enumeration character", c); } buf.add (c); } // unterminated enumeration throw Exception("xml-error", "unterminated enumeration string"); } // get the next a available enumeration name String XmlBuffer::getenam (void) { // create a working buffer XmlBuffer buf (d_xmlv); // strip left the bufer stripl (); if (empty () == true) { throw Exception ("xml-error", "empty buffer while reading enumeration string"); } // get the starter character t_quad c = get (); // check for open parenthesis if (c == XML_CHAR_OP) return getenum (); // this is a normal name return getname (); } // get the attribute default value String XmlBuffer::getxdef (void) { // check for predefined default declaration if (get() == XML_CHAR_NS) { String result = read (); result += getname (); return result; } // here it is a quoted string return getqstr (); } // return a subset declaration string String XmlBuffer::getsubs (void) { // create a working buffer XmlBuffer buf (d_xmlv); // check for starter t_quad c = read (); if (c != XML_CHAR_LB) { throw Exception ("xml-error", "invalid subset declaration starter", c); } // initialize counter and accumulate long bcnt = 1; while (empty () == false) { c = read (); if (c == XML_CHAR_RB) { if (--bcnt == 0) break; } if (c == XML_CHAR_LB) bcnt++; buf.add (c); } if (bcnt != 0) { throw Exception ("xml-error", "unterminated subset declaration"); } // here is the subset return buf.tostring (); } // get the next available attribute in this buffer Property XmlBuffer::getattr (void) { // create a working buffer XmlBuffer buf (d_xmlv); // initialize property name and value String name = ""; String pval = ""; // strip left the buffer stripl (); // get the name first s_name: t_quad c = read (); if (c == nilq) goto s_serr; if (is_spcc (d_xmlv, c) == true) goto s_echk; if (c == Unicode::toquad ('=')) { pushback (c); goto s_echk; } buf.add (c); goto s_name; // check the equal (=) character s_echk: c = read (); if (c == nilq) goto s_serr; if (is_spcc (d_xmlv, c) == true) goto s_echk; if (c == Unicode::toquad ('=')) { name = buf.toname (); buf.reset (); stripl (); goto s_pval; } goto s_serr; // get the " or ' starting character s_pval: c = read (); if (c == nilq) goto s_serr; if (is_spcc (d_xmlv, c) == true) goto s_pval; if (c == Unicode::toquad ('\'')) goto s_sqav; if (c == Unicode::toquad ('"')) goto s_dqav; goto s_serr; // single quoted attribute value s_sqav: c = read (); if (c == nilq) goto s_serr; if (c == Unicode::toquad ('\'')) goto s_done; buf.add (c); goto s_sqav; // double quoted attribute value s_dqav: c = read (); if (c == nilq) goto s_serr; if (c == Unicode::toquad ('"')) goto s_done; buf.add (c); goto s_dqav; // syntax error s_serr: throw Exception ("xml-error", "syntax error while reading attribute"); // get attribute value s_done: pval = buf.tostring (); return Property (name, pval); } // return true if the string is an enumeration bool XmlBuffer::isenum (const String& xval) { // create a working buffer XmlBuffer buf (d_xmlv, xval); // get the enumeration starter t_quad c = buf.read (); // check for open parenthesis if (c != XML_CHAR_OP) return false; // loop in the buffer while (buf.empty () == false) { c = buf.read (); if (c == XML_CHAR_CP) return true; if (is_enmc (d_xmlv, c) == false) break; } return false; } // return a string vector from an enumeration Strvec XmlBuffer::getxenm (const String& xval) { // create a working buffer XmlBuffer buf (d_xmlv, xval); // strip left the buffer buf.stripl (); if (buf.empty () == true) { throw Exception ("xml-error", "empty enumeration string"); } // get the enumeration starter t_quad c = buf.read (); // check for open parenthesis if (c != XML_CHAR_OP) { throw Exception ("xml-error", "invalid starter character for parenthesis", c); } // loop to get the enumeration name Strvec result; while (buf.empty () == false) { if (buf.get () == XML_CHAR_CP) break; String name = buf.getnenm (); result.add (name); } // check for proper termination if (buf.empty () == true) { throw Exception ("xml-error", "unterminated enumeration", xval); } if (buf.read () != XML_CHAR_CP) { throw Exception ("xml-error", "unterminated enumeration", xval); } buf.strip (); if (buf.empty () == false) { throw Exception ("xml-error", "trailing enumeration characters", xval); } return result; } }