/* * File: html.c * * Copyright (C) 1997 Raph Levien * Copyright (C) 1999 James McCollough * Copyright (C) 2000-2004 Jorge Arellano Cid * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */ /* * Dillo HTML parsing routines */ /* Undefine if you want to unroll tables. For instance for PDAs */ #define USE_TABLES /* Define to 1 to ignore white space immediately after an open tag, * and immediately before a close tag. */ #define SGML_SPCDEL 0 #include /* for isspace and tolower */ #include /* for memcpy and memmove */ #include #include /* for sprintf */ #include /* for rint */ #include #include #include "msg.h" #include "list.h" #include "binaryconst.h" #include "colors.h" #include "dillo.h" #include "history.h" #include "nav.h" #include "menu.h" #include "commands.h" #include "dw.h" /* for Dw_cursor_hand */ #include "dw_gtk_viewport.h" #include "dw_gtk_scrolled_window.h" #include "dw_widget.h" #include "dw_page.h" #include "dw_bullet.h" #include "dw_button.h" #include "dw_hruler.h" #include "dw_embed_gtk.h" #include "dw_table.h" #include "dw_table_cell.h" #include "dw_list_item.h" #include "dw_style.h" #include "interface.h" #include "progressbar.h" #include "prefs.h" #include "misc.h" #include "capi.h" #include "html.h" #define DEBUG_LEVEL 10 #include "debug.h" typedef void (*TagOpenFunct) (DilloHtml *Html, char *Tag, gint Tagsize); typedef void (*TagCloseFunct) (DilloHtml *Html, gint TagIdx); #define TAB_SIZE 8 /* * Forward declarations */ static const char *Html_get_attr(DilloHtml *html, const char *tag, gint tagsize, const char *attrname); static const char *Html_get_attr2(DilloHtml *html, const char *tag, gint tagsize, const char *attrname, DilloHtmlTagParsingFlags flags); static char *Html_get_attr_wdef(DilloHtml *html, const char *tag, gint tagsize, const char *attrname, const char *def); static void Html_add_widget(DilloHtml *html, DwWidget *widget, char *width_str, char *height_str, DwStyle *style_attrs); static gint Html_write_raw(DilloHtml *html, char *buf, gint bufsize, gint Eof); static void Html_write(DilloHtml *html, char *Buf, gint BufSize, gint Eof); static void Html_close(DilloHtml *html, gint ClientKey); static void Html_callback(int Op, CacheClient_t *Client); static DilloHtml *Html_new(BrowserWindow *bw, const DilloUrl *url); static void Html_tag_open_input(DilloHtml *html, char *tag, gint tagsize); static void Html_add_input(DilloHtmlForm *form, DilloHtmlInputType type, GtkWidget *widget, const char *name, const char *init_str, DilloHtmlSelect *select, gboolean init_val); static void Html_submit_form(GtkWidget *submit, DilloHtmlLB *html_lb, gint click_x, gint click_y); static void Html_reset_form(GtkWidget *reset, DilloHtmlLB *html_lb); static gint Html_tag_index(char *tag); /* exported function */ DwWidget *a_Html_text(const char *Type, void *P, CA_Callback_t *Call, void **Data); /* * Local Data */ /* The following array of font sizes has to be _strictly_ crescent */ static const gint FontSizes[] = {8, 10, 12, 14, 18, 24}; static const gint FontSizesNum = 6; static const gint FontSizesBase = 2; /* Parsing table structure */ typedef struct { gchar *name; /* element name */ unsigned char Flags; /* flags (explained near the table data) */ gchar EndTag; /* Is it Required, Optional or Forbidden */ guchar TagLevel; /* Used to heuristically parse bad HTML */ TagOpenFunct open; /* Open function */ TagCloseFunct close; /* Close function */ } TagInfo; static const TagInfo Tags[]; /* * Return the line number of the tag being processed by the parser. */ static gint Html_get_line_number(DilloHtml *html) { gint i, ofs, line; const char *p = html->Start_Buf; g_return_val_if_fail(p != NULL, -1); ofs = html->CurrTagOfs; line = html->OldTagLine; for (i = html->OldTagOfs; i < ofs; ++i) if (p[i] == '\n') ++line; html->OldTagOfs = html->CurrTagOfs; html->OldTagLine = line; return line; } /* * Collect HTML error strings inside the linkblock. */ static void Html_msg(DilloHtml *html, const char *format, ... ) { va_list argp; gchar buf[512]; g_snprintf(buf, 512, "HTML warning: line %d, ", Html_get_line_number(html)); g_string_append(html->linkblock->page_bugs, buf); va_start(argp, format); g_vsnprintf(buf, 512, format, argp); va_end(argp); g_string_append(html->linkblock->page_bugs, buf); a_Interface_bug_meter_update(html->bw, ++html->linkblock->num_page_bugs); } /* * Wrapper for a_Url_new that adds an error detection message. * (if use_base_url is TRUE, html->linkblock->base_url is used) */ static DilloUrl *Html_url_new(DilloHtml *html, const gchar *url_str, const gchar *base_url, gint flags, gint32 posx, gint32 posy, gint use_base_url) { DilloUrl *url; gint n_ic, n_ic_spc; url = a_Url_new( url_str, (use_base_url) ? base_url : URL_STR_(html->linkblock->base_url), flags, posx, posy); if ((n_ic = URL_ILLEGAL_CHARS(url)) != 0) { const char *suffix = (n_ic) > 1 ? "s" : ""; n_ic_spc = URL_ILLEGAL_CHARS_SPC(url); if (n_ic == n_ic_spc) { MSG_HTML("URL has %d illegal character%s [%d space%s]\n", n_ic, suffix, n_ic_spc, suffix); } else if (n_ic_spc == 0) { MSG_HTML("URL has %d illegal character%s [%d in (00-1F or 7F)]\n", n_ic, suffix, n_ic); } else { MSG_HTML("URL has %d illegal character%s " "[%d space%s and %d in (00-1F or 7F)]\n", n_ic, suffix, n_ic_spc, n_ic_spc ? "s" : "", n_ic-n_ic_spc); } } return url; } /* * Set callback function and callback data for "html/text" MIME type. */ DwWidget *a_Html_text(const char *Type, void *P, CA_Callback_t *Call, void **Data) { DilloWeb *web = P; DilloHtml *html = Html_new(web->bw, web->url); *Data = (void *) html; *Call = (CA_Callback_t) Html_callback; return html->dw; } /* * We'll make the linkblock first to get it out of the way. */ static DilloHtmlLB *Html_lb_new(BrowserWindow *bw, const DilloUrl *url) { DilloHtmlLB *html_lb = g_new(DilloHtmlLB, 1); html_lb->bw = bw; html_lb->base_url = a_Url_dup(url); html_lb->num_forms_max = 1; html_lb->num_forms = 0; html_lb->forms = NULL; html_lb->num_links_max = 1; html_lb->num_links = 0; html_lb->links = NULL; a_Dw_image_map_list_init(&html_lb->maps); html_lb->link_color = prefs.link_color; html_lb->visited_color = prefs.visited_color; html_lb->num_page_bugs = 0; html_lb->page_bugs = g_string_new(""); return html_lb; } /* * Free the memory used by the linkblock */ static void Html_lb_free(void *lb) { gint i, j, k; DilloHtmlForm *form; DilloHtmlLB *html_lb = lb; DEBUG_MSG(3, "Html_lb_free\n"); a_Url_free(html_lb->base_url); for (i = 0; i < html_lb->num_forms; i++) { form = &html_lb->forms[i]; a_Url_free(form->action); for (j = 0; j < form->num_inputs; j++) { g_free(form->inputs[j].name); g_free(form->inputs[j].init_str); if (form->inputs[j].type == DILLO_HTML_INPUT_SELECT || form->inputs[j].type == DILLO_HTML_INPUT_SEL_LIST) { for (k = 0; k < form->inputs[j].select->num_options; k++) { g_free(form->inputs[j].select->options[k].value); } g_free(form->inputs[j].select->options); g_free(form->inputs[j].select); } } g_free(form->inputs); } g_free(html_lb->forms); for (i = 0; i < html_lb->num_links; i++) if (html_lb->links[i]) a_Url_free(html_lb->links[i]); g_free(html_lb->links); a_Dw_image_map_list_free(&html_lb->maps); g_string_free(html_lb->page_bugs, TRUE); g_free(html_lb); } /* * Set the URL data for image maps. */ static void Html_set_link_coordinates(DilloHtmlLB *lb, gint link, gint x, gint y) { gchar data[64]; if (x != -1) { g_snprintf(data, 64, "?%d,%d", x, y); a_Url_set_ismap_coords(lb->links[link], data); } } /* * Handle the status function generated by the dw scroller, * and show the url in the browser status-bar. */ static void Html_handle_status(DwWidget *widget, gint link, gint x, gint y, DilloHtmlLB *lb) { DilloUrl *url; url = (link == -1) ? NULL : lb->links[link]; if (url) { Html_set_link_coordinates(lb, link, x, y); a_Interface_msg(lb->bw, "%s", URL_ALT_(url) ? URL_ALT_(url) : URL_STR_(url)); a_Dw_widget_set_cursor (widget, Dw_cursor_hand); lb->bw->status_is_link = 1; } else { if (lb->bw->status_is_link) a_Interface_msg(lb->bw, ""); a_Dw_widget_set_cursor (widget, NULL); } } /* * Popup the link menu ("link_pressed" callback of the page) */ static gboolean Html_link_menu(DwWidget *widget, gint link, gint x, gint y, GdkEventButton *event, DilloHtmlLB *lb) { DwWidget *widget_at_cursor; gboolean show_oi = FALSE; if (event->button == 3) { Html_set_link_coordinates(lb, link, x, y); a_Menu_popup_set_url(lb->bw, lb->links[link]); /* if we've got an image, prepare the image popup */ widget_at_cursor = a_Dw_gtk_scrolled_window_widget_at_viewport_point( GTK_DW_SCROLLED_WINDOW (lb->bw->docwin), event->x, event->y); if (widget_at_cursor && DW_IS_IMAGE (widget_at_cursor)) { DwImage *image = DW_IMAGE (widget_at_cursor); /* test image->url (it may have not started to arrive yet!) */ if (image->url) { /* use the second URL for this popup */ gtk_object_set_data(GTK_OBJECT (lb->bw->menu_popup.over_image), "url2", GINT_TO_POINTER(2)); a_Menu_popup_set_url2(lb->bw, image->url); show_oi = TRUE; } } a_Menu_popup_ol_show_oi(lb->bw, show_oi); gtk_menu_popup(GTK_MENU(lb->bw->menu_popup.over_link), NULL, NULL, NULL, NULL, event->button, event->time); return TRUE; } return FALSE; } /* * Activate a link ("link_clicked" callback of the page) */ static gboolean Html_link_clicked(DwWidget *widget, gint link, gint x, gint y, GdkEventButton *event, DilloHtmlLB *lb) { Html_set_link_coordinates(lb, link, x, y); if (event->button == 1) a_Nav_push(lb->bw, lb->links[link]); else if (event->button == 2) { a_Nav_push_nw(lb->bw, lb->links[link]); } else { return FALSE; } if (DW_IS_PAGE (widget)) a_Dw_page_change_link_color (DW_PAGE (widget), link, lb->visited_color); return TRUE; } /* * Popup the image menu ("button_press_event" callback of image) */ static gboolean Html_image_menu(DwWidget *widget, gint32 x, gint32 y, GdkEventButton *event, BrowserWindow *bw) { DwImage *image = DW_IMAGE (widget); if (event->button == 3 && image->url) { a_Menu_popup_set_url(bw, image->url); a_Menu_popup_clear_url2(bw->menu_popup.over_image); gtk_menu_popup(GTK_MENU(bw->menu_popup.over_image), NULL, NULL, NULL, NULL, event->button, event->time); return TRUE; } return FALSE; } /* * Popup the page menu ("button_press_event" callback of the viewport) */ static int Html_page_menu(GtkWidget *viewport, GdkEventButton *event, BrowserWindow *bw) { gpointer bug_pix; if (event->button == 3) { /* set the working URL */ a_Menu_popup_set_url(bw, a_History_get_url(NAV_TOP(bw))); /* set "View page Bugs" sensitivity */ bug_pix = gtk_object_get_data(GTK_OBJECT(bw->status_bug_meter), "bug"); gtk_widget_set_sensitive(bw->viewbugs_menuitem, GTK_WIDGET_VISIBLE(GTK_WIDGET(bug_pix))); gtk_menu_popup(GTK_MENU(bw->menu_popup.over_page), NULL, NULL, NULL, NULL, event->button, event->time); return TRUE; } else return FALSE; } /* * Connect all signals of a page or an image. */ static void Html_connect_signals(DilloHtml *html, GtkObject *widget) { gtk_signal_connect (widget, "link_entered", GTK_SIGNAL_FUNC(Html_handle_status), (gpointer)html->linkblock); gtk_signal_connect (widget, "link_pressed", GTK_SIGNAL_FUNC(Html_link_menu), (gpointer)html->linkblock); gtk_signal_connect (widget, "link_clicked", GTK_SIGNAL_FUNC(Html_link_clicked), (gpointer)html->linkblock); } /* * Create a new link in the linkblock, set it as the url's parent * and return the index. */ static gint Html_set_new_link(DilloHtml *html, DilloUrl **url) { gint nl; nl = html->linkblock->num_links; a_List_add(html->linkblock->links, nl, html->linkblock->num_links_max); html->linkblock->links[nl] = (*url) ? *url : NULL; return html->linkblock->num_links++; } /* * Check an integer value to be inside a range. * Return: 'n' if valid, 'def' if not. */ static int Html_check_int(int n, int min, int max, int def) { return (n >= min && n <= max) ? n : def; } /* * Allocate and insert form information into the Html linkblock */ static gint Html_form_new(DilloHtmlLB *html_lb, DilloHtmlMethod method, const DilloUrl *action, DilloHtmlEnc enc) { gint nf; a_List_add(html_lb->forms, html_lb->num_forms, html_lb->num_forms_max); nf = html_lb->num_forms; html_lb->forms[nf].method = method; html_lb->forms[nf].action = a_Url_dup(action); html_lb->forms[nf].enc = enc; html_lb->forms[nf].num_inputs = 0; html_lb->forms[nf].num_inputs_max = 4; html_lb->forms[nf].inputs = NULL; html_lb->forms[nf].num_entry_fields = 0; html_lb->forms[nf].num_submit_buttons = 0; html_lb->num_forms++; _MSG("Html_form_new: action=%s nform=%d\n", action, nf); return nf; } /* * Change one toplevel attribute. var should be an identifier. val is * only evaluated once, so you can safely use a function call for it. */ #define HTML_SET_TOP_ATTR(html, var, val) \ do { \ DwStyle style_attrs, *old_style; \ \ old_style = (html)->stack[(html)->stack_top].style; \ style_attrs = *old_style; \ style_attrs.var = (val); \ (html)->stack[(html)->stack_top].style = \ a_Dw_style_new (&style_attrs, (html)->bw->main_window->window); \ a_Dw_style_unref (old_style); \ } while (FALSE) /* * Set the font at the top of the stack. BImask specifies which * attributes in BI should be changed. */ static void Html_set_top_font(DilloHtml *html, gchar *name, gint size, gint BI, gint BImask) { DwStyleFont font_attrs; font_attrs = *html->stack[(html)->stack_top].style->font; if ( name ) font_attrs.name = name; if ( size ) font_attrs.size = size; if ( BImask & 1 ) font_attrs.weight = (BI & 1) ? 700 : 400; if ( BImask & 2 ) font_attrs.style = (BI & 2) ? (prefs.use_oblique ? DW_STYLE_FONT_STYLE_OBLIQUE : DW_STYLE_FONT_STYLE_ITALIC) : DW_STYLE_FONT_STYLE_NORMAL; HTML_SET_TOP_ATTR (html, font, a_Dw_style_font_new (&font_attrs)); } /* * Evaluates the ALIGN attribute (left|center|right|justify) and * sets the style at the top of the stack. */ static void Html_tag_set_align_attr(DilloHtml *html, char *tag, gint tagsize) { const char *align, *charattr; if ((align = Html_get_attr(html, tag, tagsize, "align"))) { if (g_strcasecmp (align, "left") == 0) HTML_SET_TOP_ATTR (html, text_align, DW_STYLE_TEXT_ALIGN_LEFT); else if (g_strcasecmp (align, "right") == 0) HTML_SET_TOP_ATTR (html, text_align, DW_STYLE_TEXT_ALIGN_RIGHT); else if (g_strcasecmp (align, "center") == 0) HTML_SET_TOP_ATTR (html, text_align, DW_STYLE_TEXT_ALIGN_CENTER); else if (g_strcasecmp (align, "justify") == 0) HTML_SET_TOP_ATTR (html, text_align, DW_STYLE_TEXT_ALIGN_JUSTIFY); else if (g_strcasecmp (align, "char") == 0) { /* todo: Actually not supported for

etc. */ HTML_SET_TOP_ATTR (html, text_align, DW_STYLE_TEXT_ALIGN_STRING); if ((charattr = Html_get_attr(html, tag, tagsize, "char"))) { if (charattr[0] == 0) /* todo: ALIGN=" ", and even ALIGN="&32;" will reult in * an empty string (don't know whether the latter is * correct, has to be clarified with the specs), so * that for empty strings, " " is assumed. */ HTML_SET_TOP_ATTR (html, text_align_char, ' '); else HTML_SET_TOP_ATTR (html, text_align_char, charattr[0]); } else /* todo: Examine LANG attr of . */ HTML_SET_TOP_ATTR (html, text_align_char, '.'); } } } /* * Evaluates the VALIGN attribute (top|bottom|middle|baseline) and * sets the style in style_attrs. Returns TRUE when set. */ static gboolean Html_tag_set_valign_attr(DilloHtml *html, char *tag, gint tagsize, DwStyle *style_attrs) { const char *attr; if ((attr = Html_get_attr(html, tag, tagsize, "valign"))) { if (g_strcasecmp (attr, "top") == 0) style_attrs->valign = DW_STYLE_VALIGN_TOP; else if (g_strcasecmp (attr, "bottom") == 0) style_attrs->valign = DW_STYLE_VALIGN_BOTTOM; else if (g_strcasecmp (attr, "baseline") == 0) style_attrs->valign = DW_STYLE_VALIGN_BASELINE; else style_attrs->valign = DW_STYLE_VALIGN_MIDDLE; return TRUE; } else return FALSE; } /* * Add a new DwPage into the current DwPage, for indentation. * left and right are the horizontal indentation amounts, space is the * vertical space around the block. */ static void Html_add_indented_widget(DilloHtml *html, DwWidget *page, int left, int right, int space) { DwStyle style_attrs, *style; style_attrs = *html->stack[html->stack_top].style; a_Dw_style_box_set_val(&style_attrs.margin, 0); a_Dw_style_box_set_val(&style_attrs.border_width, 0); a_Dw_style_box_set_val(&style_attrs.padding, 0); /* Activate this for debugging */ #if 0 a_Dw_style_box_set_val(&style_attrs.border_width, 1); a_Dw_style_box_set_border_color (&style_attrs, a_Dw_style_shaded_color_new(style_attrs.color->color_val, html->bw->main_window->window)); a_Dw_style_box_set_border_style(&style_attrs, DW_STYLE_BORDER_DASHED); #endif style_attrs.margin.left = left; style_attrs.margin.right = right; style = a_Dw_style_new (&style_attrs, html->bw->main_window->window); a_Dw_page_add_parbreak (DW_PAGE (html->dw), space, style); a_Dw_page_add_widget (DW_PAGE (html->dw), page, style); a_Dw_page_add_parbreak (DW_PAGE (html->dw), space, style); html->stack[html->stack_top].page = html->dw = page; html->stack[html->stack_top].hand_over_break = TRUE; a_Dw_style_unref (style); /* Handle it when the user clicks on a link */ Html_connect_signals(html, GTK_OBJECT(page)); } /* * Create and add a new indented DwPage to the current DwPage */ static void Html_add_indented(DilloHtml *html, int left, int right, int space) { DwWidget *page = a_Dw_page_new (); Html_add_indented_widget (html, page, left, right, space); } /* * Given a font_size, this will return the correct 'level'. * (or the closest, if the exact level isn't found). */ static gint Html_fontsize_to_level(gint fontsize) { gint i, level; gdouble normalized_size = fontsize / prefs.font_factor, approximation = FontSizes[FontSizesNum-1] + 1; for (i = level = 0; i < FontSizesNum; i++) if (approximation >= fabs(normalized_size - FontSizes[i])) { approximation = fabs(normalized_size - FontSizes[i]); level = i; } else { break; } return level; } /* * Given a level of a font, this will return the correct 'size'. */ static gint Html_level_to_fontsize(gint level) { level = MAX(0, level); level = MIN(FontSizesNum - 1, level); return rint(FontSizes[level]*prefs.font_factor); } /* * Miscelaneous initializations for a DwPage */ static void Html_set_dwpage(DilloHtml *html) { DwWidget *widget; DwPage *page; DwStyle style_attrs; DwStyleFont font; g_return_if_fail (html->dw == NULL); widget = a_Dw_page_new (); page = DW_PAGE (widget); html->dw = html->stack[0].page = widget; /* Create a dummy font, attribute, and tag for the bottom of the stack. */ font.name = prefs.vw_fontname; /* Helvetica */ font.size = Html_level_to_fontsize(FontSizesBase); font.weight = 400; font.style = DW_STYLE_FONT_STYLE_NORMAL; a_Dw_style_init_values (&style_attrs, html->bw->main_window->window); style_attrs.font = a_Dw_style_font_new (&font); style_attrs.color = a_Dw_style_color_new (prefs.text_color, html->bw->main_window->window); html->stack[0].style = a_Dw_style_new (&style_attrs, html->bw->main_window->window); html->stack[0].table_cell_style = NULL; /* Handle it when the user clicks on a link */ Html_connect_signals(html, GTK_OBJECT(widget)); gtk_signal_connect_while_alive ( GTK_OBJECT(GTK_BIN(html->bw->docwin)->child), "button_press_event", GTK_SIGNAL_FUNC(Html_page_menu), (gpointer)html->bw, GTK_OBJECT (page)); /* Connect the "bug meter" button-press to the linkblock */ gtk_signal_connect_while_alive( GTK_OBJECT (html->bw->status_bug_meter), "clicked", GTK_SIGNAL_FUNC (a_Commands_view_page_bugs_callback), (gpointer)html->linkblock, GTK_OBJECT (page)); gtk_signal_connect_while_alive( GTK_OBJECT (html->bw->status_bug_meter), "clicked1", GTK_SIGNAL_FUNC (a_Commands_view_page_bugs_callback), (gpointer)html->linkblock, GTK_OBJECT (page)); /* also connect with the "View page Bugs" menuitem */ gtk_signal_connect_while_alive( GTK_OBJECT (html->bw->viewbugs_menuitem), "activate", GTK_SIGNAL_FUNC (a_Commands_view_page_bugs_callback), (gpointer)html->linkblock, GTK_OBJECT (page)); /* Destroy the linkblock when the DwPage is destroyed */ gtk_signal_connect_object(GTK_OBJECT(page), "destroy", GTK_SIGNAL_FUNC(Html_lb_free), (gpointer)html->linkblock); } /* * Create and initialize a new DilloHtml structure */ static DilloHtml *Html_new(BrowserWindow *bw, const DilloUrl *url) { DilloHtml *html; html = g_new(DilloHtml, 1); html->Start_Buf = NULL; html->Start_Ofs = 0; html->CurrTagOfs = 0; html->OldTagOfs = 0; html->OldTagLine = 1; html->DocType = DT_NONE; /* assume Tag Soup 0.0! :-) */ html->DocTypeVersion = 0.0f; html->dw = NULL; html->bw = bw; html->linkblock = Html_lb_new(bw, url); html->stack_max = 16; html->stack_top = 0; html->stack = g_new(DilloHtmlState, html->stack_max); html->stack[0].tag_name = g_strdup("none"); html->stack[0].style = NULL; html->stack[0].table_cell_style = NULL; html->stack[0].parse_mode = DILLO_HTML_PARSE_MODE_INIT; html->stack[0].table_mode = DILLO_HTML_TABLE_MODE_NONE; html->stack[0].cell_text_align_set = FALSE; html->stack[0].list_type = HTML_LIST_NONE; /* no

    or
      open */ html->stack[0].list_number = 0; html->stack[0].tag_idx = -1; /* MUST not be used */ html->stack[0].page = NULL; html->stack[0].table = NULL; html->stack[0].ref_list_item = NULL; html->stack[0].current_bg_color = prefs.bg_color; html->stack[0].hand_over_break = FALSE; html->Stash = g_string_new(""); html->StashSpace = FALSE; html->SPCBuf = NULL; html->pre_column = 0; html->PreFirstChar = FALSE; html->PrevWasCR = FALSE; html->PrevWasOpenTag = FALSE; html->SPCPending = FALSE; html->InVisitedLink = FALSE; html->ReqTagClose = FALSE; html->CloseOneTag = FALSE; html->TagSoup = TRUE; html->NameVal = NULL; html->Num_HTML = html->Num_HEAD = html->Num_BODY = html->Num_TITLE = 0; html->InFlags = 0; html->attr_data = g_string_sized_new(1024); Html_set_dwpage(html); return html; } /* * Initialize the stash buffer */ static void Html_stash_init(DilloHtml *html) { html->stack[html->stack_top].parse_mode = DILLO_HTML_PARSE_MODE_STASH; html->StashSpace = FALSE; g_string_truncate(html->Stash, 0); } /* Entities list from the HTML 4.01 DTD */ typedef struct { char *entity; int isocode; } Ent_t; #define NumEnt 252 static const Ent_t Entities[NumEnt] = { {"AElig",0306}, {"Aacute",0301}, {"Acirc",0302}, {"Agrave",0300}, {"Alpha",01621},{"Aring",0305}, {"Atilde",0303}, {"Auml",0304}, {"Beta",01622}, {"Ccedil",0307}, {"Chi",01647}, {"Dagger",020041}, {"Delta",01624},{"ETH",0320}, {"Eacute",0311}, {"Ecirc",0312}, {"Egrave",0310},{"Epsilon",01625},{"Eta",01627}, {"Euml",0313}, {"Gamma",01623},{"Iacute",0315}, {"Icirc",0316}, {"Igrave",0314}, {"Iota",01631}, {"Iuml",0317}, {"Kappa",01632}, {"Lambda",01633}, {"Mu",01634}, {"Ntilde",0321}, {"Nu",01635}, {"OElig",0522}, {"Oacute",0323},{"Ocirc",0324}, {"Ograve",0322}, {"Omega",01651}, {"Omicron",01637},{"Oslash",0330},{"Otilde",0325},{"Ouml",0326}, {"Phi",01646}, {"Pi",01640}, {"Prime",020063},{"Psi",01650}, {"Rho",01641}, {"Scaron",0540}, {"Sigma",01643}, {"THORN",0336}, {"Tau",01644}, {"Theta",01630}, {"Uacute",0332}, {"Ucirc",0333}, {"Ugrave",0331},{"Upsilon",01645},{"Uuml",0334}, {"Xi",01636}, {"Yacute",0335},{"Yuml",0570}, {"Zeta",01626}, {"aacute",0341}, {"acirc",0342}, {"acute",0264}, {"aelig",0346}, {"agrave",0340}, {"alefsym",020465},{"alpha",01661},{"amp",38}, {"and",021047}, {"ang",021040}, {"aring",0345}, {"asymp",021110},{"atilde",0343}, {"auml",0344}, {"bdquo",020036},{"beta",01662}, {"brvbar",0246}, {"bull",020042},{"cap",021051}, {"ccedil",0347}, {"cedil",0270}, {"cent",0242}, {"chi",01707}, {"circ",01306}, {"clubs",023143}, {"cong",021105},{"copy",0251}, {"crarr",020665},{"cup",021052}, {"curren",0244},{"dArr",020723}, {"dagger",020040},{"darr",020623}, {"deg",0260}, {"delta",01664}, {"diams",023146},{"divide",0367}, {"eacute",0351},{"ecirc",0352}, {"egrave",0350}, {"empty",021005}, {"emsp",020003},{"ensp",020002}, {"epsilon",01665},{"equiv",021141}, {"eta",01667}, {"eth",0360}, {"euml",0353}, {"euro",020254}, {"exist",021003},{"fnof",0622}, {"forall",021000},{"frac12",0275}, {"frac14",0274},{"frac34",0276}, {"frasl",020104},{"gamma",01663}, {"ge",021145}, {"gt",62}, {"hArr",020724}, {"harr",020624}, {"hearts",023145},{"hellip",020046},{"iacute",0355},{"icirc",0356}, {"iexcl",0241}, {"igrave",0354}, {"image",020421},{"infin",021036}, {"int",021053}, {"iota",01671}, {"iquest",0277}, {"isin",021010}, {"iuml",0357}, {"kappa",01672}, {"lArr",020720}, {"lambda",01673}, {"lang",021451},{"laquo",0253}, {"larr",020620}, {"lceil",021410}, {"ldquo",020034},{"le",021144}, {"lfloor",021412},{"lowast",021027}, {"loz",022712}, {"lrm",020016}, {"lsaquo",020071},{"lsquo",020030}, {"lt",60}, {"macr",0257}, {"mdash",020024},{"micro",0265}, {"middot",0267},{"minus",021022},{"mu",01674}, {"nabla",021007}, {"nbsp",32}, {"ndash",020023},{"ne",021140}, {"ni",021013}, {"not",0254}, {"notin",021011},{"nsub",021204}, {"ntilde",0361}, {"nu",01675}, {"oacute",0363}, {"ocirc",0364}, {"oelig",0523}, {"ograve",0362},{"oline",020076},{"omega",01711}, {"omicron",01677}, {"oplus",021225},{"or",021050}, {"ordf",0252}, {"ordm",0272}, {"oslash",0370},{"otilde",0365}, {"otimes",021227},{"ouml",0366}, {"para",0266}, {"part",021002}, {"permil",020060},{"perp",021245}, {"phi",01706}, {"pi",01700}, {"piv",01726}, {"plusmn",0261}, {"pound",0243}, {"prime",020062},{"prod",021017}, {"prop",021035}, {"psi",01710}, {"quot",34}, {"rArr",020722}, {"radic",021032}, {"rang",021452},{"raquo",0273}, {"rarr",020622}, {"rceil",021411}, {"rdquo",020035},{"real",020434},{"reg",0256}, {"rfloor",021413}, {"rho",01701}, {"rlm",020017}, {"rsaquo",020072},{"rsquo",020031}, {"sbquo",020032},{"scaron",0541},{"sdot",021305}, {"sect",0247}, {"shy",0255}, {"sigma",01703}, {"sigmaf",01702},{"sim",021074}, {"spades",023140},{"sub",021202},{"sube",021206}, {"sum",021021}, {"sup",021203}, {"sup1",0271}, {"sup2",0262}, {"sup3",0263}, {"supe",021207},{"szlig",0337}, {"tau",01704}, {"there4",021064}, {"theta",01670},{"thetasym",01721},{"thinsp",020011},{"thorn",0376}, {"tilde",01334},{"times",0327}, {"trade",020442},{"uArr",020721}, {"uacute",0372},{"uarr",020621}, {"ucirc",0373}, {"ugrave",0371}, {"uml",0250}, {"upsih",01722}, {"upsilon",01705},{"uuml",0374}, {"weierp",020430},{"xi",01676}, {"yacute",0375}, {"yen",0245}, {"yuml",0377}, {"zeta",01666}, {"zwj",020015}, {"zwnj",020014} }; /* * Comparison function for binary search */ static int Html_entity_comp(const void *a, const void *b) { return strcmp(((Ent_t *)a)->entity, ((Ent_t *)b)->entity); } /* * Binary search of 'key' in entity list */ static int Html_entity_search(char *key) { Ent_t *res, EntKey; EntKey.entity = key; res = bsearch(&EntKey, Entities, NumEnt, sizeof(Ent_t), Html_entity_comp); if ( res ) return (res - Entities); return -1; } /* * Switch a few UCS encodings to latin1. */ static gint Html_try_ucs2latin1(gint isocode) { gint ret; switch (isocode) { case 0x2018: case 0x2019: ret = '\''; break; case 0x201c: case 0x201d: ret = '"'; break; case 0x2013: case 0x2014: ret = '-'; break; case 0x2039: ret = '<'; break; case 0x203a: ret = '>'; break; case 0x2022: ret = 176; break; default: ret = -1; break; } return ret; } /* * Switch a few 'undefined for HTML' ASCII encodings to latin1. */ static gint Html_try_ascii2latin1(gint isocode) { gint ret; switch (isocode) { case 145: case 146: ret = '\''; break; case 147: case 148: ret = '"'; break; case 149: ret = 176; break; case 150: case 151: ret = '-'; break; default: ret = isocode; break; } return ret; } /* * Given an entity, return the ISO-Latin1 character code. * Returns a negative value (error code) if not a valid entity. * * The first character *token is assumed to be == '&' * * For valid entities, *entsize is set to the length of the parsed entity. */ static gint Html_parse_entity(DilloHtml *html, const gchar *token, gint toksize, gint *entsize) { gint isocode, i; gchar *tok, *s, c; token++; tok = s = toksize ? g_strndup(token, (guint)toksize) : g_strdup(token); isocode = -1; if (*s == '#') { /* numeric character reference */ errno = 0; if (*++s == 'x' || *s == 'X') { if (isxdigit(*++s)) { /* strtol with base 16 accepts leading "0x" - we don't */ if (*s == '0' && s[1] == 'x') { s++; isocode = 0; } else { isocode = strtol(s, &s, 16); } } } else if (isdigit(*s)) { isocode = strtol(s, &s, 10); } if (!isocode || errno || isocode > 0x7fffffffL) { /* this catches null bytes, errors and codes >=2^31 */ MSG_HTML("numeric character reference out of range\n"); isocode = -2; } if (isocode != -1) { if (*s == ';') s++; else if (prefs.show_extra_warnings) MSG_HTML("numeric character reference without trailing ';'\n"); } } else if (isalpha(*s)) { /* character entity reference */ while (isalnum(*++s) || strchr(":_.-", *s)); c = *s; *s = 0; if (c != ';' || (i = Html_entity_search(tok)) == -1) { if ((html->DocType == DT_HTML && html->DocTypeVersion == 4.01f) || html->DocType == DT_XHTML) MSG_HTML("undefined character entity '%s'\n", tok); isocode = -3; } else isocode = Entities[i].isocode; if (c == ';') s++; else if (prefs.show_extra_warnings) MSG_HTML("character entity reference without trailing ';'\n"); } *entsize = s-tok+1; g_free(tok); if (isocode >= 128 && isocode <= 159) { MSG_HTML("code positions 128-159 are not defined for ISO Latin-1\n"); isocode = Html_try_ascii2latin1(isocode); } else if (isocode > 255) /* Try a few UCS translations to Latin1 */ isocode = Html_try_ucs2latin1(isocode); else if (isocode == -1 && prefs.show_extra_warnings) MSG_HTML("literal '&'\n"); return isocode; } /* * Convert all the entities in a token to plain ISO character codes. Takes * a token and its length, and returns a newly allocated string. */ static char * Html_parse_entities(DilloHtml *html, gchar *token, gint toksize) { gchar *esc_set = "&\xE2\xC2"; gchar *new_str; gint i, j, isocode, entsize; new_str = g_strndup(token, toksize); if (new_str[strcspn(new_str, esc_set)] == 0) return new_str; for (i = j = 0; i < toksize; i++) { if (token[i] == '&' && (isocode = Html_parse_entity(html, token+i, toksize-i, &entsize)) >= 0) { new_str[j++] = (gchar) isocode; i += entsize-1; } else if (token[i] == '\xE2' && token[i+1] == '\x80' && i+2 < toksize){ /* Hack: for parsing some UTF-8 characters into latin1 */ switch (token[i+2]) { case '\x94': new_str[j++] = '-'; new_str[j++] = '-'; break; case '\x98': case '\x99': new_str[j++] = '\''; break; case '\x9C': case '\x9D': new_str[j++] = '"'; break; case '\xA2': new_str[j++] = '*'; new_str[j++] = ' '; break; default: /* unhandled */ new_str[j++] = '\xE2'; break; } i += 2; } else if (token[i] == '\xC2' && token[i+1] == '\xA0') { /* Hack: for parsing some UTF-8 characters into latin1 */ new_str[j++] = ' '; ++i; } else { new_str[j++] = token[i]; } } new_str[j] = '\0'; return new_str; } /* * Parse spaces * */ static void Html_process_space(DilloHtml *html, char *space, gint spacesize) { gint i, offset; DilloHtmlParseMode parse_mode = html->stack[html->stack_top].parse_mode; if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH ) { html->StashSpace = (html->Stash->len > 0); html->SPCPending = FALSE; } else if ( parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) { char *Pword = g_strndup(space, spacesize); g_string_append(html->Stash, Pword); g_free(Pword); html->SPCPending = FALSE; } else if ( parse_mode == DILLO_HTML_PARSE_MODE_PRE ) { /* re-scan the string for characters that cause line breaks */ for (i = 0; i < spacesize; i++) { /* Support for "\r", "\n" and "\r\n" line breaks (skips the first) */ if (!html->PreFirstChar && (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR))) { a_Dw_page_add_linebreak(DW_PAGE (html->dw), html->stack[(html)->stack_top].style); html->pre_column = 0; } html->PreFirstChar = FALSE; /* cr and lf should not be rendered -- they appear as a break */ switch (space[i]) { case '\r': case '\n': break; case '\t': if (prefs.show_extra_warnings) MSG_HTML("TAB character inside
      \n");
                  offset = TAB_SIZE - html->pre_column % TAB_SIZE;
                  a_Dw_page_add_text(DW_PAGE (html->dw),
                                     g_strnfill(offset, ' '),
                                     html->stack[html->stack_top].style);
                  html->pre_column += offset;
                  break;
               default:
                  a_Dw_page_add_text(DW_PAGE (html->dw),
                                     g_strndup(space + i, 1),
                                     html->stack[html->stack_top].style);
                  html->pre_column++;
                  break;
               }
      
               html->PrevWasCR = (space[i] == '\r');
            }
            html->SPCPending = FALSE;
      
         } else {
            if (SGML_SPCDEL && html->PrevWasOpenTag) {
               /* SGML_SPCDEL ignores white space inmediately after an open tag */
               html->SPCPending = FALSE;
            } else {
               g_free(html->SPCBuf);
               html->SPCBuf = g_strndup(space, spacesize);
               html->SPCPending = TRUE;
            }
      
            if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY )
               html->StashSpace = (html->Stash->len > 0);
         }
      }
      
      /*
       * Handles putting the word into its proper place
       *  > STASH and VERBATIM --> html->Stash
       *  > otherwise it goes through a_Dw_page_add_text()
       *
       * Entities are parsed (or not) according to parse_mode.
       */
      static void Html_process_word(DilloHtml *html, char *word, gint size)
      {
         gint i, start;
         gchar *Pword;
         DilloHtmlParseMode parse_mode = html->stack[html->stack_top].parse_mode;
      
         if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
              parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY ) {
            if ( html->StashSpace ) {
               g_string_append_c(html->Stash, ' ');
               html->StashSpace = FALSE;
            }
            Pword = Html_parse_entities(html, word, size);
            g_string_append(html->Stash, Pword);
            g_free(Pword);
      
         } else if ( parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) {
            /* word goes in untouched, it is not processed here. */
            Pword = g_strndup(word, size);
            g_string_append(html->Stash, Pword);
            g_free(Pword);
         }
      
         if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH  ||
              parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) {
            /* skip until the closing instructions */
      
         } else if ( parse_mode == DILLO_HTML_PARSE_MODE_PRE ) {
            /* all this overhead is to catch white-space entities */
            Pword = Html_parse_entities(html, word, size);
            for (start = i = 0; Pword[i]; start = i)
               if (isspace(Pword[i])) {
                  while (Pword[++i] && isspace(Pword[i]));
                  Html_process_space(html, Pword + start, i - start);
               } else {
                  while (Pword[++i] && !isspace(Pword[i]));
                  a_Dw_page_add_text(DW_PAGE (html->dw),
                                     g_strndup(Pword + start, i - start),
                                     html->stack[html->stack_top].style);
                  html->pre_column += i - start;
                  html->PreFirstChar = FALSE;
               }
            g_free(Pword);
      
         } else {
            /* add pending space if present */
            if (html->SPCPending && (!SGML_SPCDEL || !html->PrevWasOpenTag))
               /* SGML_SPCDEL ignores space after an open tag */
               a_Dw_page_add_space(DW_PAGE (html->dw),
                                   html->stack[html->stack_top].style);
      
            /* actually white-space entities inside the word could be
             * collapsed (except  ), but that's too much overhead
             * for a very rare case of ill-formed HTML  --Jcid */
      
            Pword = Html_parse_entities(html, word, size);
            g_strdelimit(Pword, "\t\f\n\r", ' ');
            a_Dw_page_add_text(DW_PAGE (html->dw),
                               Pword,
                               html->stack[html->stack_top].style);
         }
      
         html->PrevWasOpenTag = FALSE;
         html->SPCPending = FALSE;
      }
      
      /*
       * Does the tag in tagstr (e.g. "p") match the tag in the tag, tagsize
       * structure, with the initial < skipped over (e.g. "P align=center>")
       */
      static gboolean Html_match_tag(const char *tagstr, char *tag, gint tagsize)
      {
         gint i;
      
         for (i = 0; i < tagsize && tagstr[i] != '\0'; i++) {
            if (tolower(tagstr[i]) != tolower(tag[i]))
               return FALSE;
         }
         /* The test for '/' is for xml compatibility: "empty/>" will be matched. */
         if (i < tagsize && (isspace(tag[i]) || tag[i] == '>' || tag[i] == '/'))
            return TRUE;
         return FALSE;
      }
      
      /*
       * This function is called after popping the stack, to
       * handle nested DwPage widgets.
       */
      static void Html_eventually_pop_dw(DilloHtml *html)
      {
         /* This function is called after popping from the stack, so the
          * relevant hand_over_break is at html->stack_top + 1. */
         if (html->dw != html->stack[html->stack_top].page) {
            if (html->stack[html->stack_top + 1].hand_over_break)
               a_Dw_page_hand_over_break(DW_PAGE(html->dw),
                                         html->stack[(html)->stack_top].style);
            a_Dw_page_flush(DW_PAGE(html->dw));
            html->dw = html->stack[html->stack_top].page;
         }
      }
      
      /*
       * Push the tag (copying attributes from the top of the stack)
       */
      static void Html_push_tag(DilloHtml *html, gint tag_idx)
      {
         char *tagstr;
         gint n_items;
      
         /* Save the element's name (no parameters) into tagstr. */
         tagstr = g_strdup(Tags[tag_idx].name);
      
         n_items = html->stack_top + 1;
         a_List_add(html->stack, n_items, html->stack_max);
         /* We'll copy the former stack item and just change the tag and its index
          * instead of copying all fields except for tag.  --Jcid */
         html->stack[n_items] = html->stack[n_items - 1];
         html->stack[n_items].tag_name = tagstr;
         html->stack[n_items].tag_idx = tag_idx;
         html->stack_top = n_items;
         /* proper memory management, may be unref'd later */
         a_Dw_style_ref (html->stack[html->stack_top].style);
         if (html->stack[html->stack_top].table_cell_style)
            a_Dw_style_ref (html->stack[html->stack_top].table_cell_style);
         html->dw = html->stack[html->stack_top].page;
      }
      
      /*
       * Push the tag (used to force en element with optional open into the stack)
       * Note: now it's the same as Html_push_tag(), but things may change...
       */
      static void Html_force_push_tag(DilloHtml *html, gint tag_idx)
      {
         Html_push_tag(html, tag_idx);
      }
      
      /*
       * Pop the top tag in the stack
       */
      static void Html_real_pop_tag(DilloHtml *html)
      {
         a_Dw_style_unref (html->stack[html->stack_top].style);
         if (html->stack[html->stack_top].table_cell_style)
            a_Dw_style_unref (html->stack[html->stack_top].table_cell_style);
         g_free(html->stack[html->stack_top--].tag_name);
         Html_eventually_pop_dw(html);
      }
      
      /*
       * Default close function for tags.
       * (conditional cleanup of the stack)
       * There're several ways of doing it. Considering the HTML 4.01 spec
       * which defines optional close tags, and the will to deliver useful diagnose
       * messages for bad-formed HTML, it'll go as follows:
       *   1.- Search the stack for the first tag that requires a close tag.
       *   2.- If it matches, clean all the optional-close tags in between.
       *   3.- Cleanup the matching tag. (on error, give a warning message)
       *
       * If 'w3c_mode' is NOT enabled:
       *   1.- Search the stack for a matching tag based on tag level.
       *   2.- If it exists, clean all the tags in between.
       *   3.- Cleanup the matching tag. (on error, give a warning message)
       */
      static void Html_tag_cleanup_at_close(DilloHtml *html, gint TagIdx)
      {
         gint w3c_mode = !prefs.w3c_plus_heuristics;
         gint stack_idx, cmp = 1;
         gint new_idx = TagIdx;
      
         if (html->CloseOneTag) {
            Html_real_pop_tag(html);
            html->CloseOneTag = FALSE;
            return;
         }
      
         /* Look for the candidate tag to close */
         stack_idx = html->stack_top;
         while (stack_idx &&
                (cmp = (new_idx != html->stack[stack_idx].tag_idx)) &&
                ((w3c_mode &&
                  Tags[html->stack[stack_idx].tag_idx].EndTag == 'O') ||
                 (!w3c_mode &&
                  Tags[html->stack[stack_idx].tag_idx].TagLevel <
                  Tags[new_idx].TagLevel))) {
            --stack_idx;
         }
      
         /* clean, up to the matching tag */
         if (cmp == 0 && stack_idx > 0) {
            /* There's a valid matching tag in the stack */
            while (html->stack_top >= stack_idx) {
               gint toptag_idx = html->stack[html->stack_top].tag_idx;
               /* Warn when we decide to close an open tag (for !w3c_mode) */
               if (html->stack_top > stack_idx &&
                   Tags[toptag_idx].EndTag != 'O')
                  MSG_HTML("  - forcing close of open tag: <%s>\n",
                           Tags[toptag_idx].name);
      
               /* Close this and only this tag */
               html->CloseOneTag = TRUE;
               Tags[toptag_idx].close (html, toptag_idx);
            }
      
         } else {
            MSG_HTML("unexpected closing tag: . -- expected \n",
                     Tags[new_idx].name, html->stack[stack_idx].tag_name);
         }
      }
      
      /*
       * Cleanup (conditional), and Pop the tag (if it matches)
       */
      static void Html_pop_tag(DilloHtml *html, gint TagIdx)
      {
         Html_tag_cleanup_at_close(html, TagIdx);
      }
      
      /*
       * Some parsing routines.
       */
      
      /*
       * Used by Html_parse_length
       */
      static DwStyleLength Html_parse_length_or_multi_length (const gchar *attr,
                                                              gchar **endptr)
      {
         DwStyleLength l;
         double v;
         gchar *end;
      
         v = strtod (attr, &end);
         switch (*end) {
         case '%':
            end++;
            l = DW_STYLE_CREATE_PER_LENGTH (v / 100);
            break;
      
         case '*':
            end++;
            l = DW_STYLE_CREATE_REL_LENGTH (v);
            break;
      /*
         The "px" suffix seems not allowed by HTML4.01 SPEC.
         case 'p':
            if (end[1] == 'x')
               end += 2;
      */
         default:
            l = DW_STYLE_CREATE_ABS_LENGTH ((gint)v);
            break;
         }
      
         if (endptr)
            *endptr = end;
         return l;
      }
      
      
      /*
       * Returns a length or a percentage, or DW_STYLE_UNDEF_LENGTH in case
       * of an error, or if attr is NULL.
       */
      static DwStyleLength Html_parse_length (DilloHtml *html, const gchar *attr)
      {
         DwStyleLength l;
         gchar *end;
      
         l = Html_parse_length_or_multi_length (attr, &end);
         if (DW_STYLE_IS_REL_LENGTH (l))
            /* not allowed as &Length; */
            return DW_STYLE_LENGTH_AUTO;
         else {
            /* allow only whitespaces */
            if (*end && !isspace (*end)) {
               MSG_HTML("Garbage after length: %s\n", attr);
               return DW_STYLE_LENGTH_AUTO;
            }
         }
      
         return l;
      }
      
      /*
       * Parse a color attribute.
       * Return value: parsed color, or default_color (+ error msg) on error.
       */
      static gint32
       Html_color_parse(DilloHtml *html, const char *subtag, gint32 default_color)
      {
         gint err = 1;
         gint32 color = a_Color_parse(subtag, default_color, &err);
      
         if (err) {
            MSG_HTML("color is not in \"#RRGGBB\" format\n");
         }
         return color;
      }
      
      /*
       * Check that 'val' is composed of characters inside [A-Za-z0-9:_.-]
       * Note: ID can't have entities, but this check is enough (no '&').
       * Return value: 1 if OK, 0 otherwise.
       */
      static gint
       Html_check_name_val(DilloHtml *html, const char *val, const char *attrname)
      {
         gint i;
      
         for (i = 0; val[i]; ++i)
            if (!(isalnum(val[i]) || strchr(":_.-", val[i])))
               break;
      
         if (val[i] || !isalpha(val[0]))
            MSG_HTML("'%s' value is not of the form "
                     "[A-Za-z][A-Za-z0-9:_.-]*\n", attrname);
      
         return !(val[i]);
      }
      
      /*
       * Handle DOCTYPE declaration
       *
       * Follows the convention that HTML 4.01
       * doctypes which include a full w3c DTD url are treated as
       * standards-compliant, but 4.01 without the url and HTML 4.0 and
       * earlier are not. XHTML doctypes are always standards-compliant
       * whether or not an url is present.
       *
       * Note: I'm not sure about this convention. The W3C validator
       * recognizes the "HTML Level" with or without the URL. The convention
       * comes from mozilla (see URLs below), but Dillo doesn't have the same
       * rendering modes, so it may be better to chose another behaviour. --Jcid
       * 
       * http://www.mozilla.org/docs/web-developer/quirks/doctypes.html
       * http://lists.auriga.wearlab.de/pipermail/dillo-dev/2004-October/002300.html
       *
       * This is not a full DOCTYPE parser, just enough for what Dillo uses.
       */
      static void Html_parse_doctype(DilloHtml *html, char *tag, gint tagsize)
      {
         char *HTML_sig    = "DocType = DT_HTML;
               html->DocTypeVersion = 4.01f;
            } else if (!strncmp(p, XHTML1, strlen(XHTML1)) &&
                       a_Misc_stristr(p + strlen(XHTML1), XHTML1_url)) {
               html->DocType = DT_XHTML;
               html->DocTypeVersion = 1.0f;
            } else if (!strncmp(p, XHTML11, strlen(XHTML11)) &&
                       a_Misc_stristr(p + strlen(XHTML11), XHTML11_url)) {
               html->DocType = DT_XHTML;
               html->DocTypeVersion = 1.1f;
            } else if (!strncmp(p, HTML40, strlen(HTML40))) {
               html->DocType = DT_HTML;
               html->DocTypeVersion = 4.0f;
            } else if (!strncmp(p, HTML32, strlen(HTML32))) {
               html->DocType = DT_HTML;
               html->DocTypeVersion = 3.2f;
            } else if (!strncmp(p, HTML20, strlen(HTML20))) {
               html->DocType = DT_HTML;
               html->DocTypeVersion = 2.0f;
            }
         }
      
         g_free(ntag);
      }
      
      /*
       * Handle open HTML element
       */
      static void Html_tag_open_html(DilloHtml *html, char *tag, gint tagsize)
      {
         if (!(html->InFlags & IN_HTML))
            html->InFlags |= IN_HTML;
         ++html->Num_HTML;
      
         if (html->Num_HTML > 1) {
            MSG_HTML("HTML element was already open\n");
         }
      }
      
      /*
       * Handle close HTML element
       */
      static void Html_tag_close_html(DilloHtml *html, gint TagIdx)
      {
         /* todo: may add some checks here */
         if (html->Num_HTML == 1) {
            /* beware of pages with multiple HTML close tags... :-P */
            html->InFlags &= ~IN_HTML;
         }
         Html_pop_tag(html, TagIdx);
      }
      
      /*
       * Handle open HEAD element
       */
      static void Html_tag_open_head(DilloHtml *html, char *tag, gint tagsize)
      {
         if (html->InFlags & IN_BODY) {
            MSG_HTML("HEAD element must go before the BODY section\n");
            html->ReqTagClose = TRUE;
            return;
         }
      
         if (!(html->InFlags & IN_HEAD))
            html->InFlags |= IN_HEAD;
         ++html->Num_HEAD;
      
         if (html->Num_HEAD > 1) {
            MSG_HTML("HEAD element was already open\n");
         }
      }
      
      /*
       * Handle close HEAD element
       * Note: as a side effect of Html_test_section() this function is called
       *       twice when the head element is closed implicitly.
       */
      static void Html_tag_close_head(DilloHtml *html, gint TagIdx)
      {
         if (html->InFlags & IN_HEAD) {
            if (html->Num_TITLE == 0)
               MSG_HTML("HEAD section lacks the TITLE element\n");
      
            html->InFlags &= ~IN_HEAD;
         }
         Html_pop_tag(html, TagIdx);
      }
      
      /*
       * Handle open TITLE
       * calls stash init, where the title string will be stored
       */
      static void Html_tag_open_title(DilloHtml *html, char *tag, gint tagsize)
      {
         ++html->Num_TITLE;
         Html_stash_init(html);
      }
      
      /*
       * Handle close TITLE
       * set page-title in the browser window and in the history.
       */
      static void Html_tag_close_title(DilloHtml *html, gint TagIdx)
      {
         if (html->InFlags & IN_HEAD) {
            /* title is only valid inside HEAD */
            a_Interface_set_page_title(html->linkblock->bw, html->Stash->str);
            a_History_set_title(NAV_TOP(html->linkblock->bw), html->Stash->str);
         } else {
            MSG_HTML("the TITLE element must be inside the HEAD section\n");
         }
         Html_pop_tag(html, TagIdx);
      }
      
      /*
       * Handle open SCRIPT
       * initializes stash, where the embedded code will be stored.
       * MODE_VERBATIM is used because MODE_STASH catches entities.
       */
      static void Html_tag_open_script(DilloHtml *html, char *tag, gint tagsize)
      {
         Html_stash_init(html);
         html->stack[html->stack_top].parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
      }
      
      /*
       * Handle close SCRIPT
       */
      static void Html_tag_close_script(DilloHtml *html, gint TagIdx)
      {
         /* eventually the stash will be sent to an interpreter for parsing */
         Html_pop_tag(html, TagIdx);
      }
      
      /*
       * Handle open STYLE
       * store the contents to the stash where (in the future) the style
       * sheet interpreter can get it.
       */
      static void Html_tag_open_style(DilloHtml *html, char *tag, gint tagsize)
      {
         Html_stash_init(html);
         html->stack[html->stack_top].parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
      }
      
      /*
       * Handle close STYLE
       */
      static void Html_tag_close_style(DilloHtml *html, gint TagIdx)
      {
         /* eventually the stash will be sent to an interpreter for parsing */
         Html_pop_tag(html, TagIdx);
      }
      
      /*
       * 
       */
      static void Html_tag_open_body(DilloHtml *html, char *tag, gint tagsize)
      {
         const char *attrbuf;
         DwPage *page;
         DwStyle style_attrs, *style;
         gint32 color;
      
         if (!(html->InFlags & IN_BODY))
            html->InFlags |= IN_BODY;
         ++html->Num_BODY;
      
         if (html->Num_BODY > 1) {
            MSG_HTML("BODY element was already open\n");
            return;
         }
         if (html->InFlags & IN_HEAD) {
            /* if we're here, it's bad XHTML, no need to recover */
            MSG_HTML("unclosed HEAD element\n");
         }
      
         page = DW_PAGE (html->dw);
      
         if (!prefs.force_my_colors) {
            if ((attrbuf = Html_get_attr(html, tag, tagsize, "bgcolor"))) {
               color = Html_color_parse(html, attrbuf, prefs.bg_color);
               if ( (color == 0xffffff && !prefs.allow_white_bg) ||
                    prefs.force_my_colors )
                  color = prefs.bg_color;
      
               style_attrs = *html->dw->style;
               style_attrs.background_color =
                  a_Dw_style_color_new (color, html->bw->main_window->window);
               style = a_Dw_style_new (&style_attrs, html->bw->main_window->window);
               a_Dw_widget_set_style (html->dw, style);
               a_Dw_style_unref (style);
               html->stack[html->stack_top].current_bg_color = color;
            }
      
            if ((attrbuf = Html_get_attr(html, tag, tagsize, "text"))) {
               color = Html_color_parse(html, attrbuf, prefs.text_color);
               HTML_SET_TOP_ATTR
                  (html, color,
                   a_Dw_style_color_new (color, html->bw->main_window->window));
            }
      
            if ((attrbuf = Html_get_attr(html, tag, tagsize, "link")))
               html->linkblock->link_color = Html_color_parse(html, attrbuf,
                                                              prefs.link_color);
      
            if ((attrbuf = Html_get_attr(html, tag, tagsize, "vlink")))
               html->linkblock->visited_color =
                  Html_color_parse(html, attrbuf, prefs.visited_color);
      
            if (prefs.contrast_visited_color) {
               /* get a color that has a "safe distance" from text, link and bg */
               html->linkblock->visited_color =
                  a_Color_vc(html->linkblock->visited_color,
                             html->stack[html->stack_top].style->color->color_val,
                             html->linkblock->link_color,
                             html->stack[html->stack_top].current_bg_color);
            }
         }
      
         html->stack[html->stack_top].parse_mode = DILLO_HTML_PARSE_MODE_BODY;
      }
      
      /*
       * BODY
       */
      static void Html_tag_close_body(DilloHtml *html, gint TagIdx)
      {
         if (html->Num_BODY == 1) {
            /* some tag soup pages use multiple BODY tags... */
            html->InFlags &= ~IN_BODY;
         }
         Html_pop_tag(html, TagIdx);
      }
      
      /*
       * 

      * todo: what's the point between adding the parbreak before and * after the push? */ static void Html_tag_open_p(DilloHtml *html, char *tag, gint tagsize) { a_Dw_page_add_parbreak(DW_PAGE (html->dw), 9, html->stack[(html)->stack_top].style); Html_tag_set_align_attr (html, tag, tagsize); } /* * */ static void Html_tag_open_table(DilloHtml *html, char *tag, gint tagsize) { #ifdef USE_TABLES DwWidget *table; DwStyle style_attrs, *tstyle, *old_style; const char *attrbuf; gint32 border = 0, cellspacing = 1, cellpadding = 2, bgcolor; #endif a_Dw_page_add_parbreak(DW_PAGE (html->dw), 0, html->stack[(html)->stack_top].style); #ifdef USE_TABLES if ((attrbuf = Html_get_attr(html, tag, tagsize, "border"))) border = *attrbuf ? Html_check_int(strtol(attrbuf,NULL,10), 0,100,1) : 1; if ((attrbuf = Html_get_attr(html, tag, tagsize, "cellspacing"))) cellspacing = Html_check_int(strtol(attrbuf, NULL, 10), 0, 100, 1); if ((attrbuf = Html_get_attr(html, tag, tagsize, "cellpadding"))) cellpadding = Html_check_int(strtol(attrbuf, NULL, 10), 0, 100, 2); /* The style for the table */ style_attrs = *html->stack[html->stack_top].style; /* When dillo was started with the --debug-rendering option, there * is always a border around the table. */ if (dillo_dbg_rendering) a_Dw_style_box_set_val (&style_attrs.border_width, MIN (border, 1)); else a_Dw_style_box_set_val (&style_attrs.border_width, border); a_Dw_style_box_set_border_color (&style_attrs, a_Dw_style_shaded_color_new ( html->stack[html->stack_top].current_bg_color, html->bw->main_window->window)); a_Dw_style_box_set_border_style (&style_attrs, DW_STYLE_BORDER_OUTSET); style_attrs.border_spacing = cellspacing; if ((attrbuf = Html_get_attr(html, tag, tagsize, "width"))) { int dw_len = Html_parse_length (html, attrbuf); int len = strtol(attrbuf, NULL, 10); if ((DW_STYLE_IS_PER_LENGTH(dw_len) && Html_check_int(len, 0, 100, -1) != -1) || (DW_STYLE_IS_ABS_LENGTH(dw_len) && Html_check_int(len, 0, 5000, -1) != -1) || (DW_STYLE_IS_REL_LENGTH(dw_len) && Html_check_int(len, 0, 100, -1) != -1)) { style_attrs.width = dw_len; } } if ((attrbuf = Html_get_attr(html, tag, tagsize, "align"))) { if (g_strcasecmp (attrbuf, "left") == 0) style_attrs.text_align = DW_STYLE_TEXT_ALIGN_LEFT; else if (g_strcasecmp (attrbuf, "right") == 0) style_attrs.text_align = DW_STYLE_TEXT_ALIGN_RIGHT; else if (g_strcasecmp (attrbuf, "center") == 0) style_attrs.text_align = DW_STYLE_TEXT_ALIGN_CENTER; } if (!prefs.force_my_colors && (attrbuf = Html_get_attr(html, tag, tagsize, "bgcolor"))) { bgcolor = Html_color_parse(html, attrbuf, -1); if (bgcolor != -1) { if (bgcolor == 0xffffff && !prefs.allow_white_bg) bgcolor = prefs.bg_color; html->stack[html->stack_top].current_bg_color = bgcolor; style_attrs.background_color = a_Dw_style_color_new (bgcolor, html->bw->main_window->window); } } tstyle = a_Dw_style_new (&style_attrs, html->bw->main_window->window); /* The style for the cells */ style_attrs = *html->stack[html->stack_top].style; /* When dillo was started with the --debug-rendering option, there * is always a border around the cells. */ if (dillo_dbg_rendering) a_Dw_style_box_set_val (&style_attrs.border_width, 1); else a_Dw_style_box_set_val (&style_attrs.border_width, border ? 1 : 0); a_Dw_style_box_set_val (&style_attrs.padding, cellpadding); a_Dw_style_box_set_border_color (&style_attrs, tstyle->border_color.top); a_Dw_style_box_set_border_style (&style_attrs, DW_STYLE_BORDER_INSET); old_style = html->stack[html->stack_top].table_cell_style; html->stack[html->stack_top].table_cell_style = a_Dw_style_new (&style_attrs, html->bw->main_window->window); if (old_style) a_Dw_style_unref (old_style); table = a_Dw_table_new (); a_Dw_page_add_widget (DW_PAGE (html->dw), table, tstyle); a_Dw_style_unref (tstyle); html->stack[html->stack_top].table_mode = DILLO_HTML_TABLE_MODE_TOP; html->stack[html->stack_top].cell_text_align_set = FALSE; html->stack[html->stack_top].table = table; #endif } /* * used by
      and */ static void Html_tag_open_table_cell(DilloHtml *html, char *tag, gint tagsize, DwStyleTextAlignType text_align) { #ifdef USE_TABLES DwWidget *col_page; gint colspan = 1, rowspan = 1; const char *attrbuf; DwStyle style_attrs, *style, *old_style; gint32 bgcolor; gboolean new_style; switch (html->stack[html->stack_top].table_mode) { case DILLO_HTML_TABLE_MODE_NONE: MSG_HTML(" or outside \n"); return; case DILLO_HTML_TABLE_MODE_TOP: MSG_HTML("\n"); /* a_Dw_table_add_cell takes care that dillo does not crash. */ /* continues */ case DILLO_HTML_TABLE_MODE_TR: case DILLO_HTML_TABLE_MODE_TD: /* todo: check errors? */ if ((attrbuf = Html_get_attr(html, tag, tagsize, "colspan"))) colspan = Html_check_int(strtol(attrbuf, NULL, 10), 0, 1000, 1); if ((attrbuf = Html_get_attr(html, tag, tagsize, "rowspan"))) rowspan = Html_check_int(strtol(attrbuf, NULL, 10), 0, 1000, 1); /* text style */ old_style = html->stack[html->stack_top].style; style_attrs = *old_style; if (!html->stack[html->stack_top].cell_text_align_set) style_attrs.text_align = text_align; if (Html_get_attr(html, tag, tagsize, "nowrap")) style_attrs.white_space = DW_STYLE_WHITE_SPACE_NOWRAP; else style_attrs.white_space = DW_STYLE_WHITE_SPACE_NORMAL; html->stack[html->stack_top].style = a_Dw_style_new (&style_attrs, html->bw->main_window->window); a_Dw_style_unref (old_style); Html_tag_set_align_attr (html, tag, tagsize); /* cell style */ style_attrs = *html->stack[html->stack_top].table_cell_style; new_style = FALSE; if ((attrbuf = Html_get_attr(html, tag, tagsize, "width"))) { style_attrs.width = Html_parse_length (html, attrbuf); new_style = TRUE; } if (Html_tag_set_valign_attr (html, tag, tagsize, &style_attrs)) new_style = TRUE; if (!prefs.force_my_colors && (attrbuf = Html_get_attr(html, tag, tagsize, "bgcolor"))) { bgcolor = Html_color_parse(html, attrbuf, -1); if (bgcolor != -1) { if (bgcolor == 0xffffff && !prefs.allow_white_bg) bgcolor = prefs.bg_color; new_style = TRUE; style_attrs.background_color = a_Dw_style_color_new (bgcolor, html->bw->main_window->window); html->stack[html->stack_top].current_bg_color = bgcolor; } } if (html->stack[html->stack_top].style->text_align == DW_STYLE_TEXT_ALIGN_STRING) col_page = a_Dw_table_cell_new (a_Dw_table_get_cell_ref (DW_TABLE (html->stack[html->stack_top].table))); else col_page = a_Dw_page_new (); if (new_style) { style = a_Dw_style_new (&style_attrs, html->bw->main_window->window); a_Dw_widget_set_style (col_page, style); a_Dw_style_unref (style); } else a_Dw_widget_set_style (col_page, html->stack[html->stack_top].table_cell_style); a_Dw_table_add_cell (DW_TABLE (html->stack[html->stack_top].table), col_page, colspan, rowspan); html->stack[html->stack_top].page = html->dw = col_page; /* Handle it when the user clicks on a link */ Html_connect_signals(html, GTK_OBJECT(col_page)); break; default: /* compiler happiness */ break; } html->stack[html->stack_top].table_mode = DILLO_HTML_TABLE_MODE_TD; #endif } /* * */ static void Html_tag_open_tr(DilloHtml *html, char *tag, gint tagsize) { const char *attrbuf; DwStyle style_attrs, *style, *old_style; gint32 bgcolor; #ifdef USE_TABLES switch (html->stack[html->stack_top].table_mode) { case DILLO_HTML_TABLE_MODE_NONE: _MSG("Invalid HTML syntax: outside
      or outside
      */ static void Html_tag_open_td(DilloHtml *html, char *tag, gint tagsize) { Html_tag_open_table_cell (html, tag, tagsize, DW_STYLE_TEXT_ALIGN_LEFT); } /* * */ static void Html_tag_open_th(DilloHtml *html, char *tag, gint tagsize) { Html_set_top_font(html, NULL, 0, 1, 1); Html_tag_open_table_cell (html, tag, tagsize, DW_STYLE_TEXT_ALIGN_CENTER); } /* *
      \n"); return; case DILLO_HTML_TABLE_MODE_TOP: case DILLO_HTML_TABLE_MODE_TR: case DILLO_HTML_TABLE_MODE_TD: style = NULL; if (!prefs.force_my_colors && (attrbuf = Html_get_attr(html, tag, tagsize, "bgcolor"))) { bgcolor = Html_color_parse(html, attrbuf, -1); if (bgcolor != -1) { if (bgcolor == 0xffffff && !prefs.allow_white_bg) bgcolor = prefs.bg_color; style_attrs = *html->stack[html->stack_top].style; style_attrs.background_color = a_Dw_style_color_new (bgcolor, html->bw->main_window->window); style = a_Dw_style_new (&style_attrs, html->bw->main_window->window); html->stack[html->stack_top].current_bg_color = bgcolor; } } a_Dw_table_add_row (DW_TABLE (html->stack[html->stack_top].table), style); if (style) a_Dw_style_unref (style); if (Html_get_attr (html, tag, tagsize, "align")) { html->stack[html->stack_top].cell_text_align_set = TRUE; Html_tag_set_align_attr (html, tag, tagsize); } style_attrs = *html->stack[html->stack_top].table_cell_style; if (Html_tag_set_valign_attr (html, tag, tagsize, &style_attrs)) { old_style = html->stack[html->stack_top].table_cell_style; html->stack[html->stack_top].table_cell_style = a_Dw_style_new (&style_attrs, html->bw->main_window->window); a_Dw_style_unref (old_style); } else break; default: break; } html->stack[html->stack_top].table_mode = DILLO_HTML_TABLE_MODE_TR; #else a_Dw_page_add_parbreak(DW_PAGE (html->dw), 0, html->stack[(html)->stack_top].style); #endif } /* * ,