/* xls2xml: Converts from Microsoft Excel files to XML. Copyright 1999 Roberto Arturo Tena Sanchez This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* Roberto Arturo Tena Sanchez */ #include #include #include #include int isolat1ToUTF8(unsigned char* out, int outlen, unsigned char* in, int inlen); /* 10 19 */ int copy_unicode_string (struct str_sst_string * pdest, U8 ** punicode_string) { U8 * unicode_string; int rich_string; int extended_string; int compressed_string; U16 count_chars; U16 total_size; assert_return (xls2xml, punicode_string != NULL, 19); assert_return (xls2xml, *punicode_string != NULL, 19); assert_return (xls2xml, pdest != NULL, 19); unicode_string = *punicode_string; count_chars = _xls2xml_sreadU16 (unicode_string); compressed_string = !(*(unicode_string + 2) & 0x01); rich_string = *(unicode_string + 2) & 0x08; extended_string = *(unicode_string + 2) & 0x04; if (extended_string) /* like japanese version */ if (rich_string) { total_size = 3 + 4 + (compressed_string ? 1 : 2) * count_chars + 2 + 8 * _xls2xml_sreadU16 (unicode_string + 3) + _xls2xml_sreadU32 (unicode_string + 5); if (_xls2xml_sreadU32 (unicode_string + 5) > 65535) verbose ("Warning: length of ExtRst > 65535"); } else { total_size = 3 + 4 + (compressed_string ? 1 : 2) * count_chars + _xls2xml_sreadU32 (unicode_string + 3); if (_xls2xml_sreadU32 (unicode_string+3) > 65535) verbose ("Warning: length of ExtRst > 65535"); } else /* !extended_string */ total_size = 3 + (compressed_string ? 1 : 2) * count_chars + (rich_string ? (2 + 4 * _xls2xml_sreadU16 (unicode_string + 3)) : 0); /* line above means the same that the next lines: */ /* size_each_character = compressed_string ? 1 : 2; total_size = 3 + size_each_character * count_chars; * 3 + because first three bytes are: count_chars and flags * if (rich_string) total_size += 2 + 4 * _xls2xml_sreadU16 (unicode_string + 3); * 4 * because each formatting run is 4 bytes lenght * * 2 + because we must add the 2 bytes of formatting count info * */ pdest->len = total_size; pdest->string = malloc (total_size); test (pdest->string != NULL, 10); memcpy (pdest->string, unicode_string, total_size); *punicode_string += total_size; return 0; } /* this write the unicode string (as Excel write it) to a new xml node */ /* 10 15 */ int write_unicode_xml_child (xmlNodePtr father, xmlNodePtr * presult_child, char * children_name, U8 * unicodestring, U16 len_unicodestring, unsigned char ** presult_string) { int rich_string; int extended_string; int compressed_string; U16 count_chars; unsigned char * string; /* variables to handle formatting */ char fontrec[6]; /* max number 65535 */ xmlNodePtr formatting; xmlNodePtr child; U16 count_formatting; struct formatting_record { U16 start; U16 index; } * formatting_list; U16 i_formatting; U8 * p; count_chars = _xls2xml_sreadU16 (unicodestring); compressed_string = !(*(unicodestring + 2) & 0x01); rich_string = *(unicodestring + 2) & 0x08; extended_string = *(unicodestring + 2) & 0x04; verboseU16 (count_chars); verboseU16 (compressed_string); verboseU16 (rich_string); verboseU16 (extended_string); if (extended_string) /* we don't write extended strings yet */ verbose ("FIXME: write_unicode_xml_child(): extended strings") else /* no extended */ if (compressed_string) { if (!rich_string) { /* ***************************************************************** */ /* ***************************************************************** */ /* ***************************************************************** */ /* no extended, compressed, no rich string */ test (count_chars + 3 <= len_unicodestring, 15); string = calloc (1, 2*count_chars + 1); /* 2 * will needed if all char in string are >= 0x80 */ test (string != NULL, 10); /* UNICODE CONVERSION */ /* we need convert actual string from parameters->code_page to ucs2 */ verbose ("FIXME: write_unicode_xml_child(): convert from code page to ucs2"); /* by now, we use this function */ test (isolat1ToUTF8 (string, 2*count_chars, unicodestring+(rich_string ? 6 : 3), count_chars) != -1, 10); verbose_wonl ("->"); verbose_wonl (string); verbose ("<-"); if (presult_string != NULL) { *presult_string = string; return 0; } if (presult_child != NULL) { *presult_child = xmlNewChild (father, NULL, (char unsigned*)children_name, string); test (*presult_child != NULL, 10); } else test (xmlNewChild (father, NULL, (char unsigned*)children_name, string) != NULL, 10); free (string); } else /* rich_string */ { /* ***************************************************************** */ /* ***************************************************************** */ /* ***************************************************************** */ /* no extended, compressed, rich string */ test (presult_string == NULL, 15); count_formatting = _xls2xml_sreadU16 (unicodestring+3); test (count_formatting > 0, 15); test (count_formatting * 4 + count_chars + 5 <= len_unicodestring, 15); /* save formatting records */ formatting_list = malloc ((2+count_formatting) * sizeof (struct formatting_record)); test (formatting_list != NULL, 10); formatting_list[0].start = 0; formatting_list[0].index = 0xffff; /*dummy*/ p = unicodestring + 5 + count_chars; /* + 5 to jump info before actual string */ for (i_formatting = 1; i_formatting < count_formatting+1; i_formatting++) { formatting_list[i_formatting].start = _xls2xml_sreadU16 (p); verboseU16 (formatting_list[i_formatting].start); if (i_formatting > 0) test (formatting_list[i_formatting-1].start <= formatting_list[i_formatting].start, 15); p += 2; formatting_list[i_formatting].index = _xls2xml_sreadU16 (p); p += 2; } formatting_list[count_formatting+1].start = count_chars; formatting_list[count_formatting+1].index = 0xffff; /*dummy*/ child = xmlNewChild (father, NULL, (char unsigned*)children_name, NULL); test (child != NULL, 10); if (presult_child != NULL) *presult_child = child; for (i_formatting = 1; i_formatting < count_formatting + 2; i_formatting++) { U16 part_size; U8 * part_start; part_size = formatting_list[i_formatting].start - formatting_list[i_formatting-1].start; test (formatting_list[i_formatting].start > formatting_list[i_formatting-1].start, 15); part_start = unicodestring + 5 + formatting_list[i_formatting-1].start; string = calloc (1, 3*part_size + 1); /* 2 * will needed if all char in string are >= 0x80 */ test_exitf (string != NULL, 10, free (formatting_list)); /* UNICODE CONVERSION */ /* we need convert actual string from parameters->code_page to ucs2 */ verbose ("FIXME: write_unicode_xml_child(): convert from code page to ucs2"); /* by now, we use this function */ test (isolat1ToUTF8 (string, 3*part_size, part_start, part_size) != -1, 10); /*== part_size */ string[part_size] = 0; /* end very bad hack */ /* dont do the bad hack if we are visiting last (dummy) record */ if (i_formatting+1 <= count_formatting+2) if (formatting_list[i_formatting-1].start != formatting_list[i_formatting].start) { /* end very bad hack */ formatting = xmlNewChild (child, NULL, (unsigned char*)"formatting", string); if (i_formatting-1 > 0 || formatting_list[i_formatting].start == 0) { /* first part may not have formatting information */ sprintf (fontrec, "%d", formatting_list[i_formatting-1].index - (formatting_list[i_formatting-1].index > 3 ? 1 : 0)); xmlSetProp (formatting, (char unsigned*)"fontrefnum", (unsigned char*)fontrec); } } free (string); } free (formatting_list); } } else /* !compressed_string */ /* no extended, no compressed */ if (rich_string) { /* ***************************************************************** */ /* ***************************************************************** */ /* ***************************************************************** */ /* no extended, no compressed, rich string */ verbose ("FIXME: write_unicode_xml_child(): no extended, no compressed, rich string") /* FIXME: attention: next code is not tested at all !! */ test (presult_string == NULL, 15); count_formatting = _xls2xml_sreadU16 (unicodestring+3); test (count_formatting > 0, 15); test (count_formatting*4 + 2*count_chars + 5 <= len_unicodestring, 15); /* save formatting records */ formatting_list = malloc ((2+count_formatting) * sizeof (struct formatting_record)); test (formatting_list != NULL, 10); formatting_list[0].start = 0; formatting_list[0].index = 0xffff; /*dummy*/ p = unicodestring + 5 + 2*count_chars; /* + 5 to jump info before actual string */ for (i_formatting = 1; i_formatting < count_formatting+1; i_formatting++) { formatting_list[i_formatting].start = _xls2xml_sreadU16 (p); verboseU16 (formatting_list[i_formatting].start); if (i_formatting > 0) test (formatting_list[i_formatting-1].start <= formatting_list[i_formatting].start, 15); p += 2; formatting_list[i_formatting].index = _xls2xml_sreadU16 (p); p += 2; } formatting_list[count_formatting+1].start = 2*count_chars; formatting_list[count_formatting+1].index = 0xffff; /*dummy*/ child = xmlNewChild (father, NULL, (char unsigned*)children_name, NULL); test (child != NULL, 10); if (presult_child != NULL) *presult_child = child; for (i_formatting = 1; i_formatting < count_formatting+2; i_formatting++) { U16 part_size; U8 * part_start; part_size = formatting_list[i_formatting].start - formatting_list[i_formatting-1].start; test (formatting_list[i_formatting].start > formatting_list[i_formatting-1].start, 15); part_start = unicodestring + 5 + formatting_list[i_formatting-1].start; string = calloc (1, 3 * part_size + 1); /* 3 * will be needed if all chars in string are >= 0x0800 */ test_exitf (string != NULL, 10, free (formatting_list)); /* UNICODE CONVERSION */ test (fil_ucs2ToUTF8 (string, 3 * part_size, part_start, 2*part_size) == part_size, 10); string[part_size] = 0; /* end very bad hack */ /* dont do the bad hack if we are visiting last (dummy) record */ if (i_formatting+1 <= count_formatting+2) if (formatting_list[i_formatting-1].start != formatting_list[i_formatting].start) { /* end very bad hack */ formatting = xmlNewChild (child, NULL, (unsigned char*)"formatting", string); if (i_formatting-1 > 0 || formatting_list[i_formatting].start == 0) { /* first part may not have formatting information */ sprintf (fontrec, "%d", formatting_list[i_formatting-1].index - (formatting_list[i_formatting-1].index > 3 ? 1 : 0)); xmlSetProp (formatting, (char unsigned*)"fontrefnum", (unsigned char*)fontrec); } } free (string); } free (formatting_list); } else { /* ***************************************************************** */ /* ***************************************************************** */ /* ***************************************************************** */ /* no extended, no compressed, no rich string */ test (count_chars + 3 <= len_unicodestring, 15); string = calloc (1, 3*count_chars + 1); /* 3 * will needed if all char in string are >= 0x0800 */ test (string != NULL, 10); /* UNICODE CONVERSION */ test (fil_ucs2ToUTF8 (string, 3*count_chars, unicodestring+(rich_string ? 6 : 3), 2*count_chars) > 0, 10); if (presult_string != NULL) { *presult_string = string; return 0; } if (presult_child != NULL) { *presult_child = xmlNewChild (father, NULL, (char unsigned*)children_name, string); test (*presult_child != NULL, 10); } else test (xmlNewChild (father, NULL, (char unsigned*)children_name, string) != NULL, 10); free (string); } return 0; }