/* xls2xml: Converts from Microsoft Excel files to XML. Copyright 1999 Roberto Arturo Tena Sanchez This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* Roberto Arturo Tena Sanchez */ #include #include #include #include #include #include /* stack ************ */ struct str_stack_record { char * data; U16 precedence; struct str_stack_record * next; }; typedef struct str_stack_record stack_record; typedef stack_record * pstack_record; typedef pstack_record * ppstack_record; static int push (ppstack_record pstack, char * data, U16 precedence); static int push_get (ppstack_record pstack, char * data, U16 precedence); static pstack_record pop (ppstack_record pstack); static pstack_record top (pstack_record stack); static void free_stack (ppstack_record pstack); static int is_empty (pstack_record stack); static size_t count_args_length (pstack_record stack, U32 nargs); static void verbose_stack (pstack_record stack); #define MAX_PRECEDENCE 65535 /* 65535 is the max number that precedence (U16) can hold */ #define PLUS_MINUS_PRECEDENCE 500 #define BY_DIV_PRECEDENCE 510 #define PAREN_PRECEDENCE 515 #define NUM_PRECEDENCE 520 #define FUNC_PRECEDENCE NUM_PRECEDENCE #define STRING_PRECEDENCE NUM_PRECEDENCE #define CELLREF_PRECEDENCE NUM_PRECEDENCE #define UNUARY_PRECEDENCE 530 struct str_ptg_list_record { U8 ptg; /* id number */ char * prefix; /* prefix string */ char * midfix; /* midfix string */ char * sufix; /* sufix string */ U8 args; /* number of args */ U8 pre_space_before_fix; /* number of fix (1=pre, 2=mid, 3=suf) which can have space before (ptgAttr) */ U8 post_space_after_fix; /* number of fix (2=mid) which can have space after (ptgAttr) */ U16 precedence; /* precedence level */ }; typedef struct str_ptg_list_record ptg_list_record; typedef ptg_list_record * pptg_list_record; /* ATTENTION: This list MUST to be ordered by ptg number */ static ptg_list_record ptg_list[] = { /* right now only 2 args are supported */ { 0x03, NULL, "+", NULL, 2, 2, 2, PLUS_MINUS_PRECEDENCE }, { 0x04, NULL, "-", NULL, 2, 2, 2, PLUS_MINUS_PRECEDENCE }, { 0x05, NULL, "*", NULL, 2, 2, 2, BY_DIV_PRECEDENCE }, { 0x06, NULL, "/", NULL, 2, 2, 2, BY_DIV_PRECEDENCE }, { 0x12, "+", NULL, NULL, 1, 1, 2, UNUARY_PRECEDENCE }, { 0x13, "-", NULL, NULL, 1, 1, 2, UNUARY_PRECEDENCE }, { 0x15, "(", NULL, ")", 1, 1, 2, PAREN_PRECEDENCE } }; static U32 size_ptg_list = sizeof (ptg_list) / sizeof (ptg_list_record); static U32 size_ptg_list_record = sizeof (ptg_list_record); static int cmp_func_list_record (const void * p_a, const void * p_b); static int cmp_ptg_list_record (const void * p_a, const void * p_b); struct str_func_list_record { U16 index; /* index number */ char * str; /* string */ U8 args; /* number of args */ U16 precedence; /* precedence level */ }; typedef struct str_func_list_record func_list_record; typedef func_list_record * pfunc_list_record; /* ATTENTION: This list MUST to be ordered by index number */ static func_list_record func_list[] = { { 0x000f, "SIN", 1, FUNC_PRECEDENCE }, { 0x0010, "COS", 1, FUNC_PRECEDENCE }, { 0x0011, "TAN", 1, FUNC_PRECEDENCE }/*, { 0x0064, "CHOOSE", 0, FUNC_PRECEDENCE }*/ /* var arg */ }; /* FIXME CHOOSE : Args in stack are in reverse order! */ static U32 size_func_list = sizeof (func_list) / sizeof (func_list_record); static U32 size_func_list_record = sizeof (func_list_record); static int cmp_func_list_record (const void * p_a, const void * p_b); #define get_ptg_base(ptg) ((((ptg) & 0x40) ? ((ptg) | 0x20) : (ptg)) & 0x3F) #undef ends_local_msg #define ends_local_msg(msg) \ { \ free_stack (&stack); \ *presult_string = strdup ("Formula error: "#msg); \ } #undef ends_local_free_msg #define ends_local_free_msg(msg) \ { \ ends_local_msg (msg); \ free (new_result_string); \ } #undef add_spaces #define add_spaces(n,s) \ { \ n = malloc (1 + pre_spaces + strlen (s) + post_spaces); \ test_exitf (n != NULL, 10, ends_local_msg (not enough memory)); \ memset (n, ' ', pre_spaces); \ n [pre_spaces] = 0; \ strcat (n, s); \ memset (n + pre_spaces + strlen (s), ' ', post_spaces); \ n [pre_spaces + strlen (s) + post_spaces] = 0; \ pre_spaces = post_spaces = 0; \ } int fil_parsed_formula2str (char ** presult_string, U8 * parsed_expression, int * volatile_function) { char anumber[6]; /* max number 65535 */ char * pnumber; U16 length_parsed_expr; U8 * current_ptg; U8 current_ptg_base; pstack_record arg1, arg2; int bracket_arg1, bracket_arg2; pstack_record stack = NULL; size_t result_string_length; char * new_result_string; char * temp2_result_string; pptg_list_record pptg_record; U16 i; U16 func; pfunc_list_record pfunc_record; U16 pre_spaces, post_spaces, pre_fmla_spaces; U8 flags; test (presult_string != NULL, 19); *volatile_function = 0; pre_spaces = post_spaces = pre_fmla_spaces = 0; length_parsed_expr = _xls2xml_sreadU16 (parsed_expression); for (current_ptg = parsed_expression + 2; current_ptg - parsed_expression - 2 < length_parsed_expr; /* increment to current_ptg is inside the loop */ ) { current_ptg_base = get_ptg_base (*current_ptg); if (current_ptg_base > 0x7f) break; /* max no reserved ptg */ verbose_wonl ("ptg "); verboseU8 (current_ptg_base); switch (current_ptg_base) { case 0x03: /* ptgAdd */ case 0x04: /* ptgSub */ case 0x05: /* ptgMul */ case 0x06: /* ptgDiv */ case 0x12: /* ptgUplus */ case 0x13: /* ptgUminux */ case 0x15: /* ptgParen */ pptg_record = bsearch (¤t_ptg_base, ptg_list, size_ptg_list, size_ptg_list_record, &cmp_ptg_list_record); assert_return (xls2xml, pptg_record != NULL, 19); result_string_length = 0; if (pptg_record->args > 1) { arg2 = pop (&stack); test_exitf (arg2 != NULL, 0, ends_local_msg (unknown formula)); result_string_length += strlen (arg2->data); bracket_arg2 = arg2->precedence < pptg_record->precedence; if (bracket_arg2) result_string_length += 2; } arg1 = pop (&stack); test_exitf (arg1 != NULL, 0, ends_local_msg (unknown formula)); result_string_length += strlen (arg1->data); bracket_arg1 = arg1->precedence < pptg_record->precedence; if (bracket_arg1) result_string_length += 2; /* += 2 because there are _two_ brackets around the arg, if any */ result_string_length += 1 + pre_spaces + (pptg_record->prefix == NULL ? 0 : strlen (pptg_record->prefix)) + (pptg_record->midfix == NULL ? 0 : strlen (pptg_record->midfix)) + (pptg_record->sufix == NULL ? 0 : strlen (pptg_record->sufix)) + post_spaces; /* 1 + because it's zero based string */ new_result_string = malloc (result_string_length); test_exitf (new_result_string != NULL, 10, ends_local_msg (not enough memory)); new_result_string[0] = 0; /* prefix */ if (pre_spaces && pptg_record->pre_space_before_fix == 1) { new_result_string [strlen(new_result_string) + pre_spaces] = 0; memset (new_result_string + strlen(new_result_string), ' ', pre_spaces); pre_spaces = 0; } if (pptg_record->prefix != NULL) strcat (new_result_string, pptg_record->prefix); /* arg1 */ if (bracket_arg1) strcat (new_result_string, "("); strcat (new_result_string, arg1->data); if (bracket_arg1) strcat (new_result_string, ")"); /* midfix */ if (pre_spaces && pptg_record->pre_space_before_fix == 2) { new_result_string [strlen(new_result_string) + pre_spaces] = 0; memset (new_result_string + strlen(new_result_string), ' ', pre_spaces); pre_spaces = 0; } if (pptg_record->midfix != NULL) strcat (new_result_string, pptg_record->midfix); if (post_spaces && pptg_record->post_space_after_fix == 2) { new_result_string [strlen(new_result_string) + post_spaces] = 0; memset (new_result_string+strlen(new_result_string), ' ', post_spaces); post_spaces = 0; } /* arg2 */ if (pptg_record->args > 1) { if (bracket_arg2) strcat (new_result_string, "("); strcat (new_result_string, arg2->data); if (bracket_arg2) strcat (new_result_string, ")"); } /* sufix */ if (pre_spaces && pptg_record->pre_space_before_fix == 3) { new_result_string [strlen(new_result_string) + pre_spaces] = 0; memset (new_result_string + strlen(new_result_string), ' ', pre_spaces); pre_spaces = 0; } if (pptg_record->sufix != NULL) strcat (new_result_string, pptg_record->sufix); verbose ("FIXME: is post_space_after_fix == 3 necessary?") /* if (post_spaces && pptg_record->post_space_after_fix == 3) { new_result_string [strlen(new_result_string) + post_spaces] = 0; memset (new_result_string+strlen(new_result_string), ' ', post_spaces); post_spaces = 0; } */ test_call_exitf (push_get (&stack, new_result_string, pptg_record->precedence), int, ends_local_free_msg (not enough memory)); /* don't free new_result_string, is taken by push_get */ verbose_stack (stack); current_ptg++; break; case 0x17: /* ptgStr */ switch (parameters->biff_version) { case BIFF_5_7: verbose ("FIXME: ptgStr: Dangerous - 1"); *(current_ptg - 1) = *(current_ptg + 1); *(current_ptg) = 0; *(current_ptg + 1) = 0; /* flags */ test_call_exitf ( write_unicode_xml_child (NULL, NULL, NULL, current_ptg - 1, 3 + _xls2xml_sreadU16 (current_ptg - 1), &new_result_string), int, ends_local_msg (erroneous string) ); /* 3 + because two bytes for length and one for flags */ current_ptg += 2 + *(current_ptg - 1); break; case BIFF_8: *(current_ptg) = *(current_ptg + 1); *(current_ptg + 1) = 0; test_call_exitf ( write_unicode_xml_child (NULL, NULL, NULL, current_ptg, 3 + _xls2xml_sreadU16 (current_ptg), &new_result_string), int, ends_local_msg (erroneous string formula) ); /* 3 + because two bytes for length and one for flags */ current_ptg += 3 + _xls2xml_sreadU16 (current_ptg); break; default: ends_local_msg (biff version unknown); return 15; } temp2_result_string = malloc (3 + strlen (new_result_string)); /* 3 + because two '' signs and one zero at end */ test_exitf (temp2_result_string != NULL, 10, ends_local_free_msg (not enough memory)); temp2_result_string[0] = 0; strcat (temp2_result_string, "\""); strcat (temp2_result_string, new_result_string); strcat (temp2_result_string, "\""); free (new_result_string); add_spaces (new_result_string, temp2_result_string); free (temp2_result_string); test_call_exitf (push_get (&stack, new_result_string, STRING_PRECEDENCE), int, ends_local_free_msg (not enough memory)); /* don't free new_result_string, is taken by push_get */ verbose_stack (stack); break; /* 0x17 */ case 0x19: /* ptgAttr */ verbose ("FIXME: ptgAttr: missing implementation: bitFAttrIf, bitFAttrChoose, bitFAttrSum, bitFAttrBaxcel"); flags = *(current_ptg + 1); switch (flags) { case 0x01: /* bitFAttrSemi */ *volatile_function = 1; verbose ("volatile function"); break; case 0x04: /* bitFAttrChoose */ /* ignore it */ verbose ("FIXME: CHOOSE implementation problem"); current_ptg += 2 * (_xls2xml_sreadU16 (current_ptg + 2) + 1); verbose ("ignoring bitFAttrChoose"); break; case 0x08: /* bitFAttrGoto */ /* ignore it */ verbose ("ignoring bitFAttrGoto"); break; case 0x40: /* bitFAttrSpace */ verbose ("FIXME: bitFAttrSpace: missing implementation: bitFEnter, bitFPreEnter, bitFPostEnter"); switch (*(current_ptg + 2)) { case 0x00: /* bitFSpace */ case 0x02: /* bitFPreSpace */ pre_spaces += *(current_ptg + 3); verboseU16 (pre_spaces); break; case 0x04: /* bitFPostSpace */ post_spaces += *(current_ptg + 3); verboseU16 (post_spaces); break; case 0x06: /* bitFPreFmlaSpace */ pre_fmla_spaces += *(current_ptg + 3); verboseU16 (pre_fmla_spaces); break; default: verboseU8 (*(current_ptg + 2)); verbose ("Warning: unknown ptgAttr bitFAttrSpace space"); ends_local_msg (unknown formula); return 0; } default: verboseU8 (flags); verbose ("Warning: unknown ptgAttr flag"); ends_local_msg (unknown formula); return 0; } current_ptg += 4; break; /* 0x19 */ case 0x1e: /* ptgInt */ sprintf (anumber, "%d", _xls2xml_sreadU16 (current_ptg + 1)); add_spaces (new_result_string, anumber); test_call_exitf (push_get (&stack, new_result_string, NUM_PRECEDENCE), int, ends_local_free_msg (not enough memory)); /* don't free new_result_string, is taken by push_get */ verbose_stack (stack); current_ptg += 3; break; /* 0x1e */ case 0x1f: /* ptgNum */ pnumber = IEEEnumber2str (current_ptg + 1, 1); test_exitf (pnumber != NULL, 15, ends_local_msg (erroneous number)); add_spaces (new_result_string, pnumber); free (pnumber); test_call_exitf (push (&stack, new_result_string, NUM_PRECEDENCE), int, ends_local_free_msg (not enough memory)); verbose_stack (stack); current_ptg += 9; break; /* 0x1f */ case 0x21: /* ptgFunc */ func = _xls2xml_sreadU16 (current_ptg + 1); pfunc_record = bsearch (&func, func_list, size_func_list, size_func_list_record, &cmp_func_list_record); test_exitf (pfunc_record != NULL, 0, ends_local_msg (unknown formula)); assert_return (xls2xml, pfunc_record->str != NULL, 19); assert_return (xls2xml, pfunc_record->args != 0, 19); result_string_length = 3 + strlen (pfunc_record->str); /* 3 + because there are two brackets around the args and the end zero */ result_string_length += strlen (pfunc_record->str) + count_args_length (stack, pfunc_record->args); test_exitf (result_string_length > 0, 0, ends_local_msg (unknown formula)); new_result_string = malloc (result_string_length); test_exitf (new_result_string != NULL, 10, ends_local_msg (not enough memory)); new_result_string[0] = 0; strcat (new_result_string, pfunc_record->str); strcat (new_result_string, "("); for (i = 0; i < pfunc_record->args; i++) { arg1 = pop (&stack); assert_return (xls2xml, arg1 != NULL, 19); strcat (new_result_string, arg1->data); if (i && i < pfunc_record->args - 1) strcat (new_result_string, ","); free (arg1); } strcat (new_result_string, ")"); add_spaces (temp2_result_string, new_result_string); free (new_result_string); test_call_exitf (push_get (&stack, temp2_result_string, pfunc_record->precedence), int, ends_local_free_msg (not enough memory)); /* don't free new_result_string, is taken by push_get */ verbose_stack (stack); current_ptg += 3; break; /* 0x21 */ case 0x22: /* ptgFuncVar */ verbose ("FIXME: ptgFuncVar: missing implementation fPrompt and fCE"); func = _xls2xml_sreadU16 (current_ptg + 2) & 0x7fff; pfunc_record = bsearch (&func, func_list, size_func_list, size_func_list_record, &cmp_func_list_record); test_exitf (pfunc_record != NULL, 0, ends_local_msg (unknown formula)); assert_return (xls2xml, pfunc_record->str != NULL, 19); assert_return (xls2xml, pfunc_record->args == 0, 19); result_string_length = 3 + strlen (pfunc_record->str); /* 3 + because there are two brackets around the args and the end zero */ result_string_length += strlen (pfunc_record->str) + count_args_length (stack, _xls2xml_sreadU16 (current_ptg + 1) & 0x7f); test_exitf (result_string_length > 0, 0, ends_local_msg (unknown formula)); new_result_string = malloc (result_string_length); test_exitf (new_result_string != NULL, 10, ends_local_msg (not enough memory)); new_result_string[0] = 0; strcat (new_result_string, pfunc_record->str); strcat (new_result_string, "("); verboseU8 (_xls2xml_sreadU16 (current_ptg + 1) & 0x7f); for (i = 0; i < (_xls2xml_sreadU16 (current_ptg + 1) & 0x7f); i++) { verbose ("1"); arg1 = pop (&stack); assert_return (xls2xml, arg1 != NULL, 19); if (i) strcat (new_result_string, ","); strcat (new_result_string, arg1->data); free (arg1); } strcat (new_result_string, ")"); add_spaces (temp2_result_string, new_result_string); free (new_result_string); test_call_exitf (push_get (&stack, temp2_result_string, pfunc_record->precedence), int, ends_local_free_msg (not enough memory)); /* don't free new_result_string, is taken by push_get */ verbose_stack (stack); current_ptg += 4; break; /* 0x22 */ case 0x24: /* ptgRef */ switch (parameters->biff_version) { case BIFF_5_7: test_call_exitf (create_coord (&pnumber, _xls2xml_sreadU16 (current_ptg + 1) & 0x3fff, *(current_ptg + 3), _xls2xml_sreadU16 (current_ptg + 1) & 0x8000, _xls2xml_sreadU16 (current_ptg + 1) & 0x4000), int, ends_local_free_msg (not enough memory)); current_ptg += 4; break; case BIFF_8: test_call_exitf (create_coord (&pnumber, _xls2xml_sreadU16 (current_ptg + 1), _xls2xml_sreadU16 (current_ptg + 3) & 0x3fff, _xls2xml_sreadU16 (current_ptg + 3) & 0x8000, _xls2xml_sreadU16 (current_ptg + 3) & 0x4000), int, ends_local_free_msg (not enough memory)); current_ptg += 5; break; default: ends_local_msg (biff version unknown); return 15; } add_spaces (new_result_string, pnumber); free (pnumber); test_call_exitf (push_get (&stack, new_result_string, CELLREF_PRECEDENCE), int, ends_local_free_msg (not enough memory)); /* don't free pnumber, is taken by push_get */ verbose_stack (stack); break; /* 0x24 */ default: /* ptg unknown */ verbose ("Warning: unknown ptg"); ends_local_msg (unknown formula); return 0; }; } /* at this point, stack should have exactly one item: the string */ test_exitf (!is_empty (stack), 0, ends_local_msg (erroneous formula)); test_exitf (is_empty (stack->next), 0, ends_local_msg (erroneous formula)); /* join "=" sign with string */ result_string_length = 2 + pre_fmla_spaces + strlen (stack->data); verboseU16 (result_string_length); verbose (stack->data); /* 2 + because: one for "=" sign and another because zero based string */ *presult_string = malloc (result_string_length); test_exitf (*presult_string != NULL, 10, ends_local_msg (not enough memory)); (*presult_string)[0] = '='; if (pre_fmla_spaces) memset (1 + (*presult_string), ' ', pre_fmla_spaces); (*presult_string)[1 + pre_fmla_spaces] = 0; strcat (*presult_string, stack->data); free_stack (&stack); return 0; } int cmp_func_list_record (const void * p_a, const void * p_b) { U16 func; func_list_record * b; func = *((U16 *)p_a); b = (pfunc_list_record)p_b; if (func > b->index) return 1; else if (func < b->index) return -1; else return 0; } int cmp_ptg_list_record (const void * p_a, const void * p_b) { U8 ptg_base; ptg_list_record * b; ptg_base = *((U8 *)p_a); b = (pptg_list_record)p_b; if (ptg_base > b->ptg) return 1; else if (ptg_base < b->ptg) return -1; else return 0; } /* stack ******************************* */ int push (ppstack_record pstack, char * data, U16 precedence) { pstack_record new_record; test (pstack != NULL, 19); new_record = malloc (sizeof (stack_record)); test (new_record != NULL, 10); if (data == NULL) new_record->data = NULL; else { new_record->data = strdup (data); test_exitf (new_record->data != NULL, 10, free (new_record)); } new_record->precedence = precedence; new_record->next = *pstack; *pstack = new_record; return 0; } int push_get (ppstack_record pstack, char * data, U16 precedence) { pstack_record new_record; test (pstack != NULL, 19); new_record = malloc (sizeof (stack_record)); test (new_record != NULL, 10); if (data == NULL) new_record->data = NULL; else { new_record->data = data; test_exitf (new_record->data != NULL, 10, free (new_record)); } new_record->precedence = precedence; new_record->next = *pstack; *pstack = new_record; return 0; } pstack_record pop (ppstack_record pstack) { pstack_record record; test (pstack != NULL, NULL); record = *pstack; if (*pstack != NULL) *pstack = (*pstack)->next; return record; } pstack_record top (pstack_record stack) { verbose ("FIXME: formula.c: convert top to macro for performance"); return stack; } void free_stack (ppstack_record pstack) { pstack_record record; if (pstack == NULL) return; while (*pstack != NULL) { record = *pstack; *pstack = (*pstack)->next; if (record->data != NULL) free (record->data); free (record); } } int is_empty (pstack_record stack) { verbose ("FIXME: formula.c: convert is_empty to macro for performance"); return stack == NULL; } size_t count_args_length (pstack_record stack, U32 nargs) { U32 i; size_t length; pstack_record record; length = 0; for (record = stack, i = 0; i < nargs; record = record->next, i++) { test (record != NULL, 0); length += strlen (record->data); } length += nargs - 1; /* commas beween args */ return length; } void verbose_stack (pstack_record stack) { pstack_record record; #ifdef XLS2XML_VERBOSE printf ("stack (from top to bottom):\n"); for (record = stack; record != NULL; record = record->next) printf ("[%s:%d]\n", record->data, record->precedence); printf ("end stack\n"); #endif }