%{ /* C-HTML scanner */ /* assume no ISO2022 string */ /* reference: [1] RFC1866, [2] http://www.nttdocomo.co.jp/i/tag/ */ #include #include #include #include "html2hdml.h" #include "html2hdml-parse.c" int gl_elem; int gl_prev_c; int gl_text_len = 0; int gl_lineno = 1; int gl_last_ret = -1; /* no such token*/ int gl_dummyspc_printed = 0; #define YYLVAL yylval.str #define SETLVAL() (YYLVAL = my_strdup(yytext)) #define FLUSH_TEXT() \ if (gl_text_len) { \ yyless(gl_text_len); \ yytext[gl_text_len] = '\0'; \ gl_text_len = 0; \ SETLVAL(); \ RETURN(TEXT); \ } else { SETLVAL(); } #define APPEND_TEXT() { \ yymore(); \ gl_text_len = yyleng; \ /*yymore();*/ } #define RETURN_DUMMYSPC(ret) { \ YYLVAL = my_strdup(""); \ RETURN(ret); \ } #define INSERT_DUMMYSPC() \ if (gl_last_ret != ' ' && gl_last_ret != '\n') { \ yyless(0); \ RETURN_DUMMYSPC(' '); \ } #define RETURN(ret) { \ gl_last_ret = ret; \ return ret; } #define POPSTATE() { \ BEGIN gl_prev_c; } #define PUSHSTATE(state) { \ gl_prev_c = YYSTATE; \ BEGIN state; } #define ELEM_RET(i) { \ SETLVAL(); \ BEGIN C_ATTRNAME; \ gl_elem = i; \ RETURN(i); \ } #define ATTR_RET(i, j) { \ YYLVAL = my_strdup(yytext); \ BEGIN C_EQ; \ if (i) { RETURN(j); } \ else { RETURN(UNKNOWNATTR); } \ } #define ATTR_RET_1(ret, e1) { \ YYLVAL = my_strdup(yytext); \ BEGIN C_EQ; \ if (gl_elem == e1) { RETURN(ret); } \ else { RETURN(UNKNOWNATTR); } \ } #define ATTR_RET_2(ret, e1, e2) { \ YYLVAL = my_strdup(yytext); \ BEGIN C_EQ; \ if (gl_elem == e1 || gl_elem == e2) { RETURN(ret); } \ else { RETURN(UNKNOWNATTR); } \ } #define ATTR_RET_3(ret, e1, e2, e3) { \ YYLVAL = my_strdup(yytext); \ BEGIN C_EQ; \ if (gl_elem == e1 || gl_elem == e2 || gl_elem == e3) { RETURN(ret); } \ else { RETURN(UNKNOWNATTR); } \ } #define ATTR_RET_4(ret, e1, e2, e3, e4) { \ YYLVAL = my_strdup(yytext); \ BEGIN C_EQ; \ if (gl_elem == e1 || gl_elem == e2 || gl_elem == e3 || \ gl_elem == e4) { RETURN(ret); } \ else { RETURN(UNKNOWNATTR); } \ } #define ATTR_RET_5(ret, e1, e2, e3, e4, e5) { \ YYLVAL = my_strdup(yytext); \ BEGIN C_EQ; \ if (gl_elem == e1 || gl_elem == e2 || gl_elem == e3 || \ gl_elem == e4 || gl_elem == e5) { RETURN(ret); } \ else { RETURN(UNKNOWNATTR); } \ } #define ATTR_RET_6(ret, e1, e2, e3, e4, e5, e6) { \ YYLVAL = my_strdup(yytext); \ BEGIN C_EQ; \ if (gl_elem == e1 || gl_elem == e2 || gl_elem == e3 || \ gl_elem == e4 || gl_elem == e5 || gl_elem == e6) { RETURN(ret); } \ else { RETURN(UNKNOWNATTR); } \ } #define ATTR_RET_7(ret, e1, e2, e3, e4, e5, e6, e7) { \ YYLVAL = my_strdup(yytext); \ BEGIN C_EQ; \ if (gl_elem == e1 || gl_elem == e2 || gl_elem == e3 || \ gl_elem == e4 || gl_elem == e5 || gl_elem == e6 || \ gl_elem == e7) { RETURN(ret); } \ else { RETURN(UNKNOWNATTR); } \ } #define ATTR_RET_8(ret, e1, e2, e3, e4, e5, e6, e7, e8) { \ YYLVAL = my_strdup(yytext); \ BEGIN C_EQ; \ if (gl_elem == e1 || gl_elem == e2 || gl_elem == e3 || \ gl_elem == e4 || gl_elem == e5 || gl_elem == e6 || \ gl_elem == e7 || gl_elem == e8) { RETURN(ret); } \ else { RETURN(UNKNOWNATTR); } \ } #define ATTR_RET_9(ret, e1, e2, e3, e4, e5, e6, e7, e8, e9) { \ YYLVAL = my_strdup(yytext); \ BEGIN C_EQ; \ if (gl_elem == e1 || gl_elem == e2 || gl_elem == e3 || \ gl_elem == e4 || gl_elem == e5 || gl_elem == e6 || \ gl_elem == e7 || gl_elem == e8 || gl_elem == e9) { RETURN(ret); } \ else { RETURN(UNKNOWNATTR); } \ } #define ATTR_RET_10(ret, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10) { \ YYLVAL = my_strdup(yytext); \ BEGIN C_EQ; \ if (gl_elem == e1 || gl_elem == e2 || gl_elem == e3 || \ gl_elem == e4 || gl_elem == e5 || gl_elem == e6 || \ gl_elem == e7 || gl_elem == e8 || gl_elem == e9 || \ gl_elem == e10) { RETURN(ret); } \ else { RETURN(UNKNOWNATTR); } \ } #undef YY_INPUT #define YY_INPUT(b, r, ms) (r=my_yyinput(b, ms)) int my_yyinput(char *buf, int max_size); %} %option case-insensitive %option noyywrap %x C_ELEM C_ATTRNAME C_EQ C_ATTRVALUE %x C_AVAL_Q C_AVAL_DQ %x C_TAGC %x C_COM %x C_NONASCII NAME [A-Z0-9\.\-] SP [ \t\r\n] SPGT [> \t\r\n] NSPGT [^> \t\r\n] SPEQ [= \t\r\n] SPGE [=> \t\r\n] NSPGE [^=> \t\r\n] %% %{ %} <*>{ \r\n/[\r\n] { FLUSH_TEXT(); gl_lineno++; yymore(); } \r/[\r] { FLUSH_TEXT(); gl_lineno++; yymore(); } \n/[\r\n] { FLUSH_TEXT(); gl_lineno++; yymore(); } } "" { FLUSH_TEXT(); RETURN(COMMENT); } "