/* c_parser.c - a C 99 parser */ /* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Library General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* Copyright 2000, 2001 Dibyendu Majumdar. * Author : Dibyendu Majumdar * Email : dibyendu@mazumdar.demon.co.uk * Website: www.mazumdar.demon.co.uk * */ /* * 12 Jan 2000 - Started * 13 Jan 2000 - Completed 1st version * 14-15 Jan 2000 - Fixed many bugs, added TRACE() * 16 Jan 2001 - Streamlined TokMap, and token test macros * 16 Jan 2001 - Documented FIRST sets, ambiguities * 17 Jan 2001 - Started using CVS * 17 Jan 2001 - Imported typedef handling from UPS sources * 18 Jan 2001 - First version that can parse typedefs * BUG: a typedef name can be hidden by declaring a function, * variable, or enumerator with the same name. Currently * cannot handle this. * 19 Jan 2001 - Started work on C99 grammer. * 19 Jan 2001 - Added support for designators in initializers * 19 Jan 2001 - Modified external declaration so that these can no longer * begin without a declaration specifier. * 19 Jan 2001 - Selection (if, switch) and iteration (for, do, while) * statements now enclosed in blocks, sub statements also * enclosed in blocks. * 19 Jan 2001 - First expression of for can now be a declaration. * 19 Jan 2001 - Compound statements can now have declartions and statements * interspersed. * 20 Jan 2001 - Tentative code for parsing compound literals. * 20 Jan 2001 - Structured trace messages to show parser in action. * 16 Nov 2001 - Tidied up for release. As you can see I have not worked on this since Jan, 2001. * It is unlikely I will be able to spend time on this in the immediate future. Hence, * I am releasing it "as is". */ #include #include #include #include #include #include "c_lex.h" #include "list.h" /*** * Various FIRST SETS * ------------------ * declarator * IDENTIFIER * ( * declaration_specifier * AUTO REGISTER STATIC EXTERN TYPEDEF VOID CHAR * SHORT INT LONG FLOAT DOUBLE SIGNED UNSIGNED STRUCT UNION * ENUM TYPEDEF_NAME CONST VOLATILE * declaration * declaration_specifier * external_declaration * declaration declarator * expression * ( ++ -- & * + - ~ ! SIZEOF IDENTIFIER FLOATING_CONSTANT * INTEGER_CONSTANT CHARACTER_CONSTANT STRING_CONSTANT * statement * expression ; { IF SWITCH WHILE DO FOR GOTO CONTINUE BREAK * RETURN CASE DEFAULT IDENTIFIER */ /*** * Ambiguity * 1) A declaration and a function definition look alike. * 2) An expression and a statement look alike. * 3) A type cast and an expression look alike. * 4) A labeled statment and an expression statement look alike. * 5) An assignment and a conditional expression look alike. */ #define is_assign_operator(tok) \ (TokMap[tok] & TOK_ASSGNOP) #define is_type_name(tok) \ (TokMap[tok] & TOK_DECL_SPEC || tok == IDENTIFIER && Cursym != 0 && Cursym->object_type == OBJ_TYPEDEF_NAME) #define is_declaration(tok) \ is_type_name(tok) #ifdef C89 #define is_external_declaration(tok) \ (is_declaration(tok) || tok == STAR || tok == LPAREN || tok == IDENTIFIER) #else #define is_external_declaration(tok) \ is_declaration(tok) #endif #define is_expression(tok) \ (TokMap[tok] & TOK_EXPR) #define is_statement(tok) \ (TokMap[tok] & TOK_STMT) #define is_function_body(tok) \ (tok == LBRACE || (is_declaration(tok) && tok != TYPEDEF)) #if 1 #define TRACEIN(s) do { if (DebugLevel == 2) printf("%*s%s {\n", TraceLevel, "", s); fflush(stdout); TraceLevel++; } while(0); #define TRACEOUT(s) do { --TraceLevel; if (DebugLevel == 2) printf("%*s} (%s)\n", TraceLevel, "", s); fflush(stdout); } while(0); #else #define TRACEIN(s) #define TRACEOUT(s) #endif enum { TOK_UNKNOWN = 0, TOK_EXPR = 1, TOK_CONSTANT = 2, TOK_TYPE_SPECIFIER_QUALIFIER = 4, TOK_TYPE_SPECIFIER = 8, TOK_TYPE_QUALIFIER = 16, TOK_TAG = 32, TOK_STRUCT = 64, TOK_STORAGE_CLASS = 256, TOK_STMT = 1024, TOK_DECL_SPEC = 2048, TOK_ASSGNOP = 4096, }; enum { LEVEL_GLOBAL = 0, LEVEL_FUNCTION = 1, LEVEL_STATEMENT = 2 }; enum { OBJ_TYPEDEF_NAME = 1, OBJ_FUNCTION_DECL = 2, OBJ_FUNCTION_DEFN = 4, OBJ_ENUMERATOR = 8, OBJ_VARIABLE = 16, OBJ_PARAMETER = 32, OBJ_IDENTIFIER = 2+4+8+16+32 }; typedef struct symbol_t { link_t link; const char *name; int storage_class; int object_type; } symbol_t; typedef struct { link_t link; int level; /* nesting level */ list_t symbols; /* list of symbols */ } symtab_t; static unsigned long TokMap[BADTOK]; static int DebugLevel = 0; static int TraceLevel = 0; static int Level = 0; static int Saw_ident = 0; static int Is_func = 0; static token_t tok; static int Parsing_struct = 0; static int Parsing_oldstyle_parmdecl = 0; static int Storage_class[100]; static int stack_ptr = -1; static list_t identifiers; /* head of the identifiers list */ static list_t labels; static list_t types; static symtab_t *Cursymtab; /* current symbol table */ static symtab_t *Curlabels; static symtab_t *Curtypes; static symbol_t *Cursym = 0; static void init_tokmap(void); static const char * object_name(int object_type); static symtab_t * new_symbol_table(list_t *owner); static void init_symbol_table(void); static symbol_t * find_symbol(symtab_t *tab, const char *name, int all_scope); static void enter_scope(void); static void exit_scope(void); static void install_symbol(const char *name, int storage_class, int object_type); static void match(token_t expected_tok); static bool check_not_typedef(void); static void constant_expression(void); static void expression(void); static void primary_expression(void); static void postfix_operator(void); static void postfix_operators(void); static void sizeof_expression(void); static void unary_expression(void); static void multiplicative_expression(void); static void additive_expression(void); static void shift_expression(void); static void relational_expression(void); static void equality_expression(void); static void and_expression(void); static void exclusive_or_expression(void); static void inclusive_or_expression(void); static void logical_and_expression(void); static void logical_or_expression(void); static void conditional_expression(void); static void assignment_expression(void); static void labeled_statement(void); static void case_statement(void); static void default_statement(void); static void if_statement(void); static void switch_statement(void); static void while_statement(void); static void do_while_statement(void); static void for_statement(void); static void break_statement(void); static void continue_statement(void); static void goto_statement(void); static void return_statement(void); static void empty_statement(void); static void expression_statement(void); static void statement(void); static void compound_statement(void); static void enumerator(void); static void enum_specifier(void); static void member(void); static void members(void); static void struct_or_union_specifier(void); static void type_name(void); static void declaration_specifiers(int no_storage_class); static void pointer(void); static void direct_declarator(int abstract); static void parameter_list(int *new_style); static void suffix_declarator(void); static void declarator(int abstract); static void designator(void); static void initializer(int recurse); static void function_definition(void); static int init_declarator(int check_if_function); static void declaration(void); static void translation_unit(void); static const char * mygetline(char *arg); static void init_tokmap(void) { TokMap[ FLOATING_CONSTANT ] = TOK_CONSTANT|TOK_EXPR|TOK_STMT ; TokMap[ INTEGER_CONSTANT ] = TOK_CONSTANT|TOK_EXPR|TOK_STMT ; TokMap[ STRING_CONSTANT ] = TOK_CONSTANT|TOK_EXPR|TOK_STMT ; TokMap[ CHARACTER_CONSTANT ] = TOK_CONSTANT|TOK_EXPR|TOK_STMT ; TokMap[ IDENTIFIER ] = TOK_EXPR|TOK_STMT ; TokMap[ SIZEOF ] = TOK_EXPR|TOK_STMT ; TokMap[ AND ] = TOK_EXPR|TOK_STMT ; TokMap[ PLUSPLUS ] = TOK_EXPR|TOK_STMT ; TokMap[ MINUSMINUS ] = TOK_EXPR|TOK_STMT ; TokMap[ STAR ] = TOK_EXPR|TOK_STMT ; TokMap[ PLUS ] = TOK_EXPR|TOK_STMT ; TokMap[ MINUS ] = TOK_EXPR|TOK_STMT ; TokMap[ TILDE ] = TOK_EXPR|TOK_STMT ; TokMap[ LPAREN ] = TOK_EXPR|TOK_STMT ; TokMap[ NOT ] = TOK_EXPR|TOK_STMT ; TokMap[ TYPEDEF_NAME ] = TOK_TYPE_SPECIFIER_QUALIFIER|TOK_TYPE_SPECIFIER|TOK_DECL_SPEC; TokMap[ CHAR ] = TOK_TYPE_SPECIFIER_QUALIFIER|TOK_TYPE_SPECIFIER|TOK_DECL_SPEC ; TokMap[ FLOAT ] = TOK_TYPE_SPECIFIER_QUALIFIER|TOK_TYPE_SPECIFIER|TOK_DECL_SPEC ; TokMap[ DOUBLE ] = TOK_TYPE_SPECIFIER_QUALIFIER|TOK_TYPE_SPECIFIER|TOK_DECL_SPEC ; TokMap[ SHORT ] = TOK_TYPE_SPECIFIER_QUALIFIER|TOK_TYPE_SPECIFIER|TOK_DECL_SPEC ; TokMap[ INT ] = TOK_TYPE_SPECIFIER_QUALIFIER|TOK_TYPE_SPECIFIER|TOK_DECL_SPEC ; TokMap[ UNSIGNED ] = TOK_TYPE_SPECIFIER_QUALIFIER|TOK_TYPE_SPECIFIER|TOK_DECL_SPEC ; TokMap[ SIGNED ] = TOK_TYPE_SPECIFIER_QUALIFIER|TOK_TYPE_SPECIFIER|TOK_DECL_SPEC ; TokMap[ VOID ] = TOK_TYPE_SPECIFIER_QUALIFIER|TOK_TYPE_SPECIFIER|TOK_DECL_SPEC ; TokMap[ STRUCT ] = TOK_TYPE_SPECIFIER_QUALIFIER|TOK_STRUCT|TOK_TAG|TOK_DECL_SPEC ; TokMap[ UNION ] = TOK_TYPE_SPECIFIER_QUALIFIER|TOK_STRUCT|TOK_TAG|TOK_DECL_SPEC ; TokMap[ ENUM ] = TOK_TYPE_SPECIFIER_QUALIFIER|TOK_TAG|TOK_DECL_SPEC ; TokMap[ LONG ] = TOK_TYPE_SPECIFIER_QUALIFIER|TOK_TYPE_SPECIFIER|TOK_DECL_SPEC ; TokMap[ CONST ] = TOK_TYPE_SPECIFIER_QUALIFIER|TOK_TYPE_QUALIFIER|TOK_DECL_SPEC ; TokMap[ VOLATILE ] = TOK_TYPE_SPECIFIER_QUALIFIER|TOK_TYPE_QUALIFIER|TOK_DECL_SPEC ; TokMap[ STATIC ] = TOK_STORAGE_CLASS|TOK_DECL_SPEC ; TokMap[ EXTERN ] = TOK_STORAGE_CLASS|TOK_DECL_SPEC ; TokMap[ AUTO ] = TOK_STORAGE_CLASS|TOK_DECL_SPEC ; TokMap[ REGISTER ] = TOK_STORAGE_CLASS|TOK_DECL_SPEC ; TokMap[ TYPEDEF ] = TOK_STORAGE_CLASS|TOK_DECL_SPEC ; TokMap[ IF ] = TOK_STMT ; TokMap[ BREAK ] = TOK_STMT ; TokMap[ CASE ] = TOK_STMT ; TokMap[ CONTINUE ] = TOK_STMT ; TokMap[ DEFAULT ] = TOK_STMT ; TokMap[ DO ] = TOK_STMT ; TokMap[ ELSE ] = TOK_STMT ; TokMap[ FOR ] = TOK_STMT ; TokMap[ GOTO ] = TOK_STMT ; TokMap[ RETURN ] = TOK_STMT ; TokMap[ SWITCH ] = TOK_STMT ; TokMap[ WHILE ] = TOK_STMT ; TokMap[ LBRACE ] = TOK_STMT; TokMap[ SEMI ] = TOK_STMT; TokMap[ EQUALS ] = TOK_ASSGNOP ; TokMap[ PLUS_EQUALS ] = TOK_ASSGNOP ; TokMap[ MINUS_EQUALS ] = TOK_ASSGNOP ; TokMap[ STAR_EQUALS ] = TOK_ASSGNOP ; TokMap[ SLASH_EQUALS ] = TOK_ASSGNOP ; TokMap[ PERCENT_EQUALS ] = TOK_ASSGNOP ; TokMap[ LSHIFT_EQUALS ] = TOK_ASSGNOP ; TokMap[ RSHIFT_EQUALS ] = TOK_ASSGNOP ; TokMap[ AND_EQUALS ] = TOK_ASSGNOP ; TokMap[ XOR_EQUALS ] = TOK_ASSGNOP ; TokMap[ OR_EQUALS ] = TOK_ASSGNOP ; } static const char * object_name(int object_type) { const char *name; switch(object_type) { case OBJ_TYPEDEF_NAME: name = "typedef"; break; case OBJ_FUNCTION_DECL: name = "function_decl"; break; case OBJ_FUNCTION_DEFN: name = "function_defn"; break; case OBJ_ENUMERATOR: name = "enumerator"; break; case OBJ_VARIABLE: name = "variable"; break; case OBJ_PARAMETER: name = "parameter"; break; case OBJ_IDENTIFIER: name = "identifier"; break; default: name = "invalid"; break; } return name; } static symtab_t * new_symbol_table(list_t *owner) { symtab_t *tab; tab = calloc(1, sizeof(symtab_t)); list_init(&tab->symbols); list_append(owner, tab); return tab; } static void init_symbol_table(void) { list_init(&identifiers); Cursymtab = new_symbol_table(&identifiers); list_init(&labels); list_init(&types); Curtypes = new_symbol_table(&types); } static symbol_t * find_symbol(symtab_t *tab, const char *name, int all_scope) { symbol_t *sym; assert(tab != 0); for (sym = (symbol_t *)list_first(&tab->symbols); sym != 0; sym = (symbol_t *)list_next(&tab->symbols, sym)) { if (strcmp(sym->name, name) == 0) { return sym; } } if (!all_scope) return 0; tab = (symtab_t *)list_prev(&identifiers, tab); if (tab == 0) return 0; return find_symbol(tab, name, all_scope); } static void enter_scope(void) { symtab_t *tab; Level++; if (DebugLevel == 3) { printf("%*sEntering scope %d\n", TraceLevel, "", Level); } if (Level == LEVEL_STATEMENT) { /* Although we increment the level when we parse * function parameters, and again when we enter the * function body, ANSI C requires these to be in the same * scope. */ return; } /* tab = calloc(1, sizeof(symtab_t)); list_init(&tab->symbols); list_append(&identifiers, tab); Cursymtab = tab; */ Cursymtab = new_symbol_table(&identifiers); } static void exit_scope(void) { symtab_t *tab; if (DebugLevel == 3) { printf("%*sExiting scope %d\n", TraceLevel, "", Level); } Level--; /* Although we increment the Level when we parse * function parameters, and again when we enter the * function body, ANSI C requires these to be in the same * scope. */ if (Level == LEVEL_FUNCTION) return; tab = Cursymtab; Cursymtab = (symtab_t *)list_prev(&identifiers, Cursymtab); assert(Cursymtab != 0); list_remove(&identifiers, tab); } static void install_symbol(const char *name, int storage_class, int object_type) { symbol_t *sym; sym = find_symbol(Cursymtab, name, 0); if (sym != 0) { fprintf(stderr, "Error: redeclaration of symbol %s as %s\n", name, object_name(object_type)); fprintf(stderr, "Error: previously declared as %s\n", object_name(sym->object_type)); /* fprintf(stderr, "Level = %d\n", Level); */ /* exit(1); */ } if (DebugLevel == 3) { printf("%*sInstalling %s name %s\n", TraceLevel, "", object_name(object_type), name); if (sym) { printf("%*s\tOverriding %s name %s\n", TraceLevel, "", object_name(sym->object_type), sym->name); } } sym = safe_calloc(1, sizeof(symbol_t)); sym->name = string_copy(name, strlen(name)); sym->storage_class = storage_class; sym->object_type = object_type; list_append(&Cursymtab->symbols, sym); } static void match(token_t expected_tok) { if (tok != expected_tok) { printf("Parse failed: Expected %s, ", tokname(expected_tok)); printf("got %s\n", tokname(tok)); /* printf("Level = %d\n", Level); */ exit(1); } else { if (DebugLevel) { const char *cp = 0; if (TokMap[tok] & TOK_CONSTANT) { cp = Lexeme->constant->co_val; } else if (tok == IDENTIFIER) { cp = Lexeme->identifier->id_name; } if (cp == 0) printf("%*s[%s]\n", TraceLevel, "", tokname(tok)); else printf("%*s[%s(%s)]\n", TraceLevel, "", tokname(tok), cp); } tok = lex_get_token(); } } static bool check_not_typedef(void) { return (Cursym == 0 || Cursym->object_type != OBJ_TYPEDEF_NAME); } static void constant_expression(void) { TRACEIN("constant_expression"); conditional_expression(); /* fold constant */ TRACEOUT("constant_expression"); } static void expression(void) { TRACEIN("expression"); if (!is_expression(tok)) { TRACEOUT("expression"); return; } assignment_expression(); while (tok == COMMA) { match(COMMA); assignment_expression(); } TRACEOUT("expression"); } static void primary_expression(void) { TRACEIN("primary_expression"); if (tok == IDENTIFIER) { check_not_typedef(); match(IDENTIFIER); } else if (TokMap[tok] & TOK_CONSTANT) { match(tok); } /* parenthesized expression handled in unary_expression() */ TRACEOUT("primary_expression"); } static void postfix_operator(void) { TRACEIN("postfix_operator"); if (tok == LBRAC) { match(LBRAC); expression(); match(RBRAC); } else if (tok == LPAREN) { match(LPAREN); if (tok != RPAREN) { assignment_expression(); while (tok == COMMA) { match(COMMA); assignment_expression(); } } match(RPAREN); } else if (tok == DOT || tok == ARROW) { match(tok); match(IDENTIFIER); } else if (tok == PLUSPLUS || tok == MINUSMINUS) { match(tok); } TRACEOUT("postfix_operator"); } static void postfix_operators(void) { TRACEIN("postfix_operators"); while (tok == LBRAC || tok == LPAREN || tok == DOT || tok == ARROW || tok == PLUSPLUS || tok == MINUSMINUS) { postfix_operator(); } TRACEOUT("postfix_operators"); } static void sizeof_expression(void) { TRACEIN("sizeof_expression"); match(SIZEOF); if (tok == LPAREN) { int found_typename = 0; match(LPAREN); if (is_type_name(tok)) { type_name(); found_typename = 1; } else { expression(); } match(RPAREN); #if 1 /* as per comp.std.c */ if (found_typename && tok == LBRACE) { initializer(0); postfix_operators(); } #endif else if (!found_typename) { postfix_operators(); } } else { unary_expression(); } TRACEOUT("sizeof_expression"); } static void unary_expression(void) { TRACEIN("unary_expression"); if (tok == SIZEOF) { sizeof_expression(); } else if (tok == LPAREN) { int found_typename = 0; match(LPAREN); if (is_type_name(tok)) { type_name(); found_typename = 1; } else { expression(); } match(RPAREN); if (found_typename && tok == LBRACE) { initializer(0); postfix_operators(); } else if (!found_typename) { postfix_operators(); } else { unary_expression(); } } else if (tok == PLUSPLUS || tok == MINUSMINUS || tok == AND || tok == STAR || tok == PLUS || tok == MINUS || tok == TILDE || tok == NOT) { match(tok); unary_expression(); } else { primary_expression(); postfix_operators(); } TRACEOUT("unary_expression"); } static void multiplicative_expression(void) { TRACEIN("multiplicative_expression"); unary_expression(); while (tok == STAR || tok == SLASH || tok == PERCENT) { match(tok); unary_expression(); } TRACEOUT("multiplicative_expression"); } static void additive_expression(void) { TRACEIN("additive_expression"); multiplicative_expression(); while (tok == PLUS || tok == MINUS) { match(tok); multiplicative_expression(); } TRACEOUT("additive_expression"); } static void shift_expression(void) { TRACEIN("shift_expression"); additive_expression(); while (tok == LSHIFT || tok == RSHIFT) { match(tok); additive_expression(); } TRACEOUT("shift_expression"); } static void relational_expression(void) { TRACEIN("relational_expression"); shift_expression(); while (tok == GREATERTHAN || tok == LESSTHAN || tok == GTEQ || tok == LESSEQ) { match(tok); shift_expression(); } TRACEOUT("relational_expression"); } static void equality_expression(void) { TRACEIN("equality_expression"); relational_expression(); while (tok == EQEQ || tok == NOTEQ) { match(tok); relational_expression(); } TRACEOUT("equality_expression"); } static void and_expression(void) { TRACEIN("and_expression"); equality_expression(); while (tok == AND) { match(AND); equality_expression(); } TRACEOUT("and_expression"); } static void exclusive_or_expression(void) { TRACEIN("exclusive_or_expression"); and_expression(); while (tok == XOR) { match(XOR); and_expression(); } TRACEOUT("exclusive_or_expression"); } static void inclusive_or_expression(void) { TRACEIN("inclusive_or_expression"); exclusive_or_expression(); while (tok == OR) { match(OR); exclusive_or_expression(); } TRACEOUT("inclusive_or_expression"); } static void logical_and_expression(void) { TRACEIN("logical_and_expression"); inclusive_or_expression(); while (tok == ANDAND) { match(ANDAND); inclusive_or_expression(); } TRACEOUT("logical_and_expression"); } static void logical_or_expression(void) { TRACEIN("logical_or_expression"); logical_and_expression(); while (tok == OROR) { match(OROR); logical_and_expression(); } TRACEOUT("logical_or_expression"); } static void conditional_expression(void) { TRACEIN("conditional_expression"); logical_or_expression(); if (tok == QUERY) { match(QUERY); expression(); match(COLON); conditional_expression(); } TRACEOUT("conditional_expression"); } static void assignment_expression(void) { TRACEIN("assignment_expression"); conditional_expression(); if (is_assign_operator(tok)) { /* TODO: check that previous expression was unary */ match(tok); assignment_expression(); } TRACEOUT("assignment_expression"); } static void labeled_statement(void) { TRACEIN("labeled_statement"); match(IDENTIFIER); match(COLON); statement(); TRACEOUT("labeled_statement"); } static void case_statement(void) { TRACEIN("case_statement"); match(CASE); constant_expression(); match(COLON); statement(); TRACEOUT("case_statement"); } static void default_statement(void) { TRACEIN("default_statement"); match(DEFAULT); match(COLON); statement(); TRACEOUT("default_statement"); } static void if_statement(void) { TRACEIN("if_statement"); enter_scope(); match(IF); match(LPAREN); expression(); match(RPAREN); enter_scope(); statement(); exit_scope(); if (tok == ELSE) { enter_scope(); match(ELSE); statement(); exit_scope(); } exit_scope(); TRACEOUT("if_statement"); } static void switch_statement(void) { TRACEIN("switch_statement"); enter_scope(); match(SWITCH); match(LPAREN); expression(); match(RPAREN); enter_scope(); statement(); exit_scope(); exit_scope(); TRACEOUT("switch_statement"); } static void while_statement(void) { TRACEIN("while_statement"); enter_scope(); match(WHILE); match(LPAREN); expression(); match(RPAREN); enter_scope(); statement(); exit_scope(); exit_scope(); TRACEOUT("while_statement"); } static void do_while_statement(void) { TRACEIN("do_while_statement"); enter_scope(); match(DO); enter_scope(); statement(); exit_scope(); match(WHILE); match(LPAREN); expression(); match(RPAREN); exit_scope(); match(SEMI); TRACEOUT("do_while_statement"); } static void for_statement(void) { TRACEIN("for_statement"); enter_scope(); match(FOR); match(LPAREN); if (tok != SEMI) { if (is_declaration(tok)) { declaration(); } else { expression(); match(SEMI); } } else { match(SEMI); } if (tok != SEMI) expression(); match(SEMI); if (tok != RPAREN) expression(); match(RPAREN); enter_scope(); statement(); exit_scope(); exit_scope(); TRACEOUT("for_statement"); } static void break_statement(void) { TRACEIN("break_statement"); match(BREAK); match(SEMI); TRACEOUT("break_statement"); } static void continue_statement(void) { TRACEIN("continue_statement"); match(CONTINUE); match(SEMI); TRACEOUT("continue_statement"); } static void goto_statement(void) { TRACEIN("goto_statement"); match(GOTO); match(IDENTIFIER); match(SEMI); TRACEOUT("goto_statement"); } static void return_statement(void) { TRACEIN("return_statement"); match(RETURN); if (tok != SEMI) expression(); match(SEMI); TRACEOUT("return_statement"); } static void empty_statement(void) { TRACEIN("empty_statement"); match(SEMI); TRACEOUT("empty_statement"); } static void expression_statement(void) { TRACEIN("expression_statement"); if (tok == IDENTIFIER && lex_colon_follows()) { labeled_statement(); } else { expression(); match(SEMI); } TRACEOUT("expression_statement"); } static void statement(void) { TRACEIN("statement"); switch (tok) { case IDENTIFIER: expression_statement(); break; case CASE: case_statement(); break; case DEFAULT: default_statement(); break; case IF: if_statement(); break; case SWITCH: switch_statement(); break; case WHILE: while_statement(); break; case DO: do_while_statement(); break; case FOR: for_statement(); break; case BREAK: break_statement(); break; case CONTINUE: continue_statement(); break; case GOTO: goto_statement(); break; case RETURN: return_statement(); break; case LBRACE: compound_statement(); break; case SEMI: empty_statement(); break; default: if (is_expression(tok)) expression_statement(); break; } TRACEOUT("statement"); } static void compound_statement(void) { TRACEIN("compound_statement"); enter_scope(); match(LBRACE); while (tok != RBRACE) { if (is_declaration(tok)) { if (tok == IDENTIFIER && lex_colon_follows()) statement(); else declaration(); } else { statement(); } } exit_scope(); match(RBRACE); TRACEOUT("compound_statement"); } static void enumerator(void) { TRACEIN("enumerator"); if (tok == IDENTIFIER) { check_not_typedef(); install_symbol(Lexeme->identifier->id_name, Storage_class[stack_ptr], OBJ_ENUMERATOR); match(IDENTIFIER); } else { TRACEOUT("enumerator"); return; } if (tok == EQUALS) { match(EQUALS); constant_expression(); } TRACEOUT("enumerator"); } static void enum_specifier(void) { TRACEIN("enum_specifier"); if (tok == ENUM) { match(ENUM); } else { TRACEOUT("enum_specifier"); return; } if (tok == IDENTIFIER) { match(IDENTIFIER); } if (tok == LBRACE) { match(LBRACE); enumerator(); while (tok == COMMA) { match(COMMA); enumerator(); } match(RBRACE); } TRACEOUT("enum_specifier"); } static void member(void) { TRACEIN("member"); if (tok != COLON) declarator(0); if (tok == COLON) { match(COLON); constant_expression(); } TRACEOUT("member"); } static void members(void) { TRACEIN("members"); do { stack_ptr++; declaration_specifiers(1); member(); while (tok == COMMA) { match(COMMA); member(); } match(SEMI); stack_ptr--; } while (tok != RBRACE); TRACEOUT("members"); } static void struct_or_union_specifier(void) { TRACEIN("struct_or_union_specifier"); Parsing_struct++; match(tok); if (tok == IDENTIFIER) match(IDENTIFIER); if (tok == LBRACE) { match(LBRACE); members(); match(RBRACE); } Parsing_struct--; TRACEOUT("struct_or_union_specifier"); } static void type_name(void) { TRACEIN("type_name"); stack_ptr++; declaration_specifiers(1); declarator(1); stack_ptr--; TRACEOUT("type_name"); } static void declaration_specifiers(int no_storage_class) { bool type_found = FALSE; TRACEIN("declaration_specifiers"); assert(stack_ptr >= 0 && stack_ptr < 100); Storage_class[stack_ptr] = 0; while (is_declaration(tok)) { if (no_storage_class && (TokMap[tok] & TOK_STORAGE_CLASS)) { fprintf(stderr, "Parse failed: unexpected storage class %s\n", tokname(tok)); exit(1); } if (tok == IDENTIFIER && type_found) break; if ((TokMap[tok] & TOK_TYPE_SPECIFIER) || tok == IDENTIFIER) { type_found = TRUE; } if (TokMap[tok] & TOK_STRUCT) { struct_or_union_specifier(); break; } else if (tok == ENUM) { enum_specifier(); break; } else { bool savedtok = 0; if (TokMap[tok] & TOK_STORAGE_CLASS) { Storage_class[stack_ptr] = tok; } else if (tok == IDENTIFIER) { savedtok = tok; } match(tok); if (savedtok == IDENTIFIER) break; } } TRACEOUT("declaration_specifiers"); } static void pointer(void) { TRACEIN("pointer"); while (tok == STAR) { match(STAR); while (TokMap[tok] & TOK_TYPE_QUALIFIER) { match(tok); } } TRACEOUT("pointer"); } static void direct_declarator(int abstract) { TRACEIN("direct_declarator"); if (tok == LPAREN) { match(LPAREN); declarator(abstract); match(RPAREN); } else { if (!abstract) { if (tok == IDENTIFIER) { Saw_ident = 1; if (Storage_class[stack_ptr] == TYPEDEF) { install_symbol(Lexeme->identifier->id_name, TYPEDEF, OBJ_TYPEDEF_NAME); } else if (!Parsing_struct && !Parsing_oldstyle_parmdecl) { install_symbol(Lexeme->identifier->id_name, Storage_class[stack_ptr], OBJ_IDENTIFIER); } match(IDENTIFIER); } } } TRACEOUT("direct_declarator"); } static void parameter_list(int *new_style) { TRACEIN("parameter_list"); if (tok == IDENTIFIER && (Cursym == 0 || Cursym->object_type != OBJ_TYPEDEF_NAME)) { *new_style = 0; install_symbol(Lexeme->identifier->id_name, AUTO, OBJ_PARAMETER); match(IDENTIFIER); while (tok == COMMA) { match(COMMA); if (tok == IDENTIFIER) { check_not_typedef(); install_symbol(Lexeme->identifier->id_name, AUTO, OBJ_PARAMETER); match(tok); } else match(IDENTIFIER); } } else { /* * CHECK: When defining a function, each declarator in a * parameter list must contain an identifier. */ *new_style = 1; stack_ptr++; declaration_specifiers(0); declarator(0); stack_ptr--; while (tok == COMMA) { match(COMMA); if (tok == ELLIPSIS) { match(ELLIPSIS); break; } stack_ptr++; declaration_specifiers(0); declarator(0); stack_ptr--; } } TRACEOUT("parameter_list"); } static void suffix_declarator(void) { TRACEIN("suffix_declarator"); if (tok == LBRAC) { match(LBRAC); constant_expression(); match(RBRAC); } else if (tok == LPAREN) { int new_style = 0; enter_scope(); match(LPAREN); parameter_list(&new_style); match(RPAREN); if (new_style && tok != LBRACE) exit_scope(); Is_func = 1; } TRACEOUT("suffix_declarator"); } static void declarator(int abstract) { TRACEIN("declarator"); if (tok == STAR) { pointer(); } direct_declarator(abstract); while (tok == LBRAC || tok == LPAREN) { suffix_declarator(); } TRACEOUT("declarator"); } static void designator(void) { TRACEIN("designator"); if (tok == LBRAC) { match(LBRAC); constant_expression(); match(RBRAC); } else if (tok == DOT) { match(DOT); if (tok == IDENTIFIER) { check_not_typedef(); match(tok); } } TRACEOUT("designator"); } static void initializer(int recurse) { TRACEIN("initializer"); if (tok == LBRACE) { match(LBRACE); initializer(recurse+1); while (tok == COMMA) { match(COMMA); initializer(recurse+1); } match(RBRACE); } else if (recurse && (tok == LBRAC || tok == DOT)) { while (tok == LBRAC || tok == DOT) { designator(); } match(EQUALS); initializer(0); } else { assignment_expression(); } TRACEOUT("initializer"); } static void function_definition(void) { TRACEIN("function_definition"); if (tok == LBRACE) { compound_statement(); } else { Parsing_oldstyle_parmdecl++; while (is_declaration(tok)) { /* * CHECK: The only storage class permitted is * register and initialization is not permitted. * if no declaration is given for a parameter, * its type is taken to be int. */ declaration(); } Parsing_oldstyle_parmdecl--; compound_statement(); } exit_scope(); TRACEOUT("function_definition"); } static int init_declarator(int check_if_function) { int old_Is_func, old_Saw_ident; int func_defn = 0; TRACEIN("init_declarator"); old_Saw_ident = Saw_ident; old_Is_func = Is_func; Saw_ident = 0; Is_func = 0; declarator(0); func_defn = check_if_function && Level == LEVEL_FUNCTION && Is_func && Saw_ident && is_function_body(tok); if (Is_func) { /* * CHECK: The only storage class specifiers allowed among the * declaration specifiers are extern or static. */ /* * CHECK: A function may not return a function or an array. */ } Is_func = old_Is_func; Saw_ident = old_Saw_ident; if (func_defn) { function_definition(); TRACEOUT("init_declarator"); return 1; } else { if (tok == EQUALS) { /* * CHECK: not allowed when parsing old style function parameters * or a prototype. */ match(EQUALS); initializer(0); } } TRACEOUT("init_declarator"); return 0; } /* * 3) a declaration must have at least one declarator, or its type specifier * must declare a structure tag, a union tag, or the members of an enumeration. * 4) empty declarations are not permitted. */ static void declaration(void) { TRACEIN("declaration"); stack_ptr++; declaration_specifiers(0); if ( tok == SEMI ) { match(SEMI); goto success; } /* 2) the first declarator at global level may start a function * definition. */ if (init_declarator(Level == LEVEL_GLOBAL) == 1) { goto success; } while ( tok == COMMA ) { match(COMMA); init_declarator(0); } match(SEMI); success: stack_ptr--; TRACEOUT("declaration"); } /* * 1) translation unit consists of a sequence of external declarations. * which are either declarations or function definitions. * 2) only at this level can functions be defined. */ static void translation_unit(void) { TRACEIN("translation_unit"); Level = LEVEL_GLOBAL; tok = lex_get_token(); while (tok != 0) { if (is_external_declaration(tok)) { /* * a function definition looks like a declaration, * hence is initially parsed as one. * the check for 2) is in init_declarator(). */ declaration(); } else if (tok == SEMI) { /*ERROR(4): empty declarations are not permitted */ match(tok); } else { fprintf(stderr, "Parse failed: unexpected input %s\n", tokname(tok)); exit(1); } assert(Level == LEVEL_GLOBAL); } TRACEOUT("translation_unit"); } token_t name_type(const char *name) { Cursym = find_symbol(Cursymtab, name, 1); return IDENTIFIER; } static const char * mygetline(char *arg) { static char line[512]; line[0] = 0; return fgets(line, sizeof line, (FILE *)arg); } void parser_main(int argc, char *argv[]) { lex_env_t mylex = {0}; FILE *fp; const char *cp = getenv("DEBUG"); if (cp != 0) { DebugLevel = atoi(cp); } if (argc != 2) exit(1); fp = fopen(argv[1], "r"); if (fp == 0) exit(1); Lex_env = &mylex; Lex_env->le_getline = mygetline; Lex_env->le_getline_arg = (char *)fp; Lexeme = &Lex_env->le_lexeme; init_tokmap(); init_symbol_table(); #if 0 putenv("LEX_DEBUG=1"); #endif translation_unit(); return; }