#taken from n869.pdf #google for n869.pdf ## A.1 Lexical grammar grammar C99::Grammar; ## A.1.1 Lexical elements token ws { [ | <'//'> \N* \n | <'/*'> .*? <'*/'> | \s+ ]* } rule c99_token { | | | | | } token pound { <'\#'> } regex preprocessing_token { | | | | | | | | <-[# \r\n\t]>\S* ## <-[#]-\S>\S* ##non-whitespace } ## A.1.2 Keywords token keyword { | auto | enum | restrict | unsigned | break | extern | return | void | case | float | short | volatile | char | for | signed | while | const | goto | sizeof | _Bool | continue | if | static | _Complex | default | inline | struct | _Imaginary | do | int | switch | double | long | typedef | else | register | union } ## A.1.3 Identifiers #token identifier { [ | \d ]* } token identifier { [ | ]* } token identifier_nondigit { | <[_]> | } ## A.1.4 Universal character names token universal_character_name { | <'\u'> **{4} | <'\U'> **{8} } ## A.1.5 Constants token constant { | | | | } token integer_constant { [ | | ] ? } token decimal_constant { <[1..9]> } #token decimal_constant { <[1..9]> \d* } token octal_constant { 0 <[0..7]>+ } token hexadecimal_constant { 0 <[xX]> + } token integer_suffix { | <[uU]> [ll?|LL?]? | [ll?|LL?] <[uU]>? } token floating_constant { | | } token decimal_floating_constant { [ ? | ] ? } token hexadecimal_prefix { 0 <[xX]> } token hexadecimal_floating_constant { [ | ] ? } token fractional_constant { | ? \. | \. } token exponent_part { <[eE]> <[\+\-]>? } #token digit_sequence { \d+ } token digit_sequence { + } token hexadecimal_fractional_constant { | ? \. | \. } token binary_exponent_part { <[pP]> <[\+\-]>? } token hexadecimal_digit_sequence { + } token floating_suffix { <[fFlL]> } token enumeration_constant { } token character_constant { <[L]>? \' + \' } token { <-['\\\n]> | } token escape_sequence { \\ [ <['"?\\abfnrtv]> | **{1..3} | x + | ] } ## A.1.6 String literals token string_literal { <[L]>? " * " } token s_char { <-["\\\n]> | } ## A.1.7 Punctuators token punctuator { | \[ | \] | <[(){}.]> | <'->'> | <'++'> | <'--'> | <[&*+\-~!/%]> | <'<<'> | <'>>'> | <[<>]> | <'<='> | <'>='> | <'=='> | <'!='> | <[^|]> | <'&&'> | <'||'> | <[?:;]> | <'...'> | [ <[*/%+\-&^|]> | <'<<'> | <'>>'> ] <'='> | <[,#]> | <'##'> | <'<:'> | <':>'> | <'<%'> | <'%>'> | <'%:'> | <'%:%:'> } ## A.1.8 Header names token header_name { | \< $=+ \> | " $:=+ " } token h_char { <-[\n>]> } token q_char { <-[\n"]> } ## A.1.9 Preprocessing numbers token pp_number { \.? \d [ \d | | <[eEpP]> <[\+\-]> | \. ]* } ## A.2 Phrase structure grammar ## XXX: see below ## A.2.2 Declarations rule declaration { + ? ; } rule declaration_specifier { | | | | } rule init_declarator_list { init_declarator [ , ]* } rule init_declarator { [ = ]? } token storage_class_specifier { typedef | extern | static | auto | register } rule type_specifier { | void | char | short | int | long | float | double | signed | unsigned | _Bool | _Complex | _Imaginary | | | } rule struct_or_union_specifier { [ $:=<'struct'> | $:=<'union'> ] [ | ? \{ + \} ] } rule struct_declaration { [|]+ + ; } rule struct_declarator { | ? : } rule enum_specifier { enum [ ? \{ [ , ]+ [,]? \}| ] } rule enumerator { [ = ]? } token type_qualifier { const | restrict | volatile } token function_specifier { inline } rule declarator { ? } rule direct_declarator { [ | \( \) ] [ | \[ ? \] | \[ \* \] | \( \) | \( ? \) ]* } rule pointer { [\* * ]+ } rule parameter_type_list { [ , ]* [ , <'...'>]? } rule parameter_declaration { + [ | ]? } rule identifier_list { [ , ]* } rule type_name { ? } rule abstract_declatator { ? | } rule direct_abstract_declarator { [ | \( \) | \[ ? \] | \( \) ] * } rule direct_abstract_declarator_1 { | \[ ? \] | \[ * \] | \( \) } rule typedef_name { } rule initializer { | \{ [,]? \} } rule initializer_list { ? [, ? ]* } rule designation { + = } rule designator { | \[ \] | \. } ## A.2.3 Statements rule statement { | | | | | | } rule labeled_statement { [ \: | case \: | default \: ] } rule compound_statement { \{ [ | ]* \} } rule expression_statement { ? ; } rule selection_statement { | if \( \) [else ]? | switch \( \) } rule iteration_statement { | while \( \) | do while \( \) ; | for \( [ ? ; ? ; ? | ? ; ? ] \) } rule jump_statement { | goto ; | continue ; | break ; | return ? ; } ## A.2.4 External definitions rule translation_unit { [ | ]+ } rule function_definition { + * } ## A.3 preprocessing directives token wsnws { ? \n ? } regex prereprocessing_file { } regex group { + } regex group_line { | | | ? $:= } regex source_line { } rule if_section { * ? } token ws_minus_n { [ | <'//'> \N* \n | <'/*'> .*? <'*/'> | \t | <' '> ## | <\s-<[\n]> ]* } regex if_group { [ | \# ? $:=<'ifndef'> | \# ? $:=<'ifdef'> | \# ? $:=<'if'> ] ? } regex elif_group { \# $:=<'elif'> ? } regex else_group { \# $:=<'else'> ? } regex endif_line { \# $:=<'endif'> } regex control_line { | \# ? $:=<'include'> $:= | \# ? $:=<'define'> [ | $:= | $:=( [ [, \.\.\.]? | [ \.\.\.] ]? \)) ] ? $:=? | \# ?? $:=<'undefine'> | \# ?? $:=<'line'> $:= | \# ?? $:=<'error'> $:=? | \# ?? $:=<'pragma'> $:=? | \# } token identifier_p { \( } regex pp_tokens { [ ]* } ## A.2.1 Expressions rule primary_expression { | | | | \( \) } rule postfix_expression { [ | \( \) \{ [\,]? \} ] [ <'++'> | <'--'> | \[ \] | \( [ [ \, ]* ]? \) | \. | <'->'> | \( \) \{ [\,]? \} ]* } rule unary_expression { [<'++'>|<'--'>|<'sizeof'>]? [ | $:=<[&*+\-~!]> | <'sizeof'> \( \) ] } rule cast_expression { [ \( \) ]* } #proto 'term:' is precedence('22=') # is parsed(primary_expression) # is pastrule('past_term') { ... } ## postfix expressions #proto 'postfix:++' is precedence('20=') { ... } #proto 'postcircumfix:[]' is equiv('postfix:++') { ... } #rule postfix_expression { \[ \] } #proto 'postcircumfix:()' is equiv('postfix:++') { ... } #rule postfix_expression { \( [ [\, ]* ]? \) } #proto 'infix:.' is equiv('postfix:++') { ... } #rule postfix_expression { . } #proto 'infix:->' is equiv('postfix:++') { ... } #rule postfix_expression { \-\> } #proto 'postfix:--' is equiv('postfix:++') { ... } #rule postfix_expression { \( \) \{ [,]? \} } ## unary-expression #proto 'prefix:++' is precedenc('18=') { ... } #proto 'prefix:--' is equiv('prefix:++') { ... } #proto 'prefix:&' is equiv('prefix:++') { ... } #proto 'prefix:*' is equiv('prefix:++') { ... } #proto 'prefix:+' is equiv('prefix:++') { ... } #proto 'prefix:-' is equiv('prefix:++') { ... } #proto 'prefix:~' is equiv('prefix:++') { ... } #proto 'prefix:!' is equiv('prefix:++') { ... } #proto 'prefix:sizeof' is equiv('prefix:++') { ... } #proto 'prefix:sizeof' is equiv('prefix:++') { ... } #rule unary_expression { sizeof } #rule unary_expression { sizeof [\( \) ] } ## cast-expression ## XXX: PGE doesn't have a precircumfix:() function (yet?) #proto 'precircumfix:()' is precedence('17=') {...} #rule cast_expression { [\( \) ]* } proto 'term:' is precedence('22=') is parsed(cast_expression) is pastrule('cast_expression') { ... } ## multiplicative proto 'infix:*' is precedence('16=') is post('mul') { ... } proto 'infix:/' is equiv('infix:*') is post('div') { ... } proto 'infix:%' is equiv('infix:*') is post('mod') { ... } ## additive proto 'infix:+' is precedence('16=') is post('add') { ... } proto 'infix:-' is equiv('infix:+') is post('sub') { ... } ## shift-expression proto 'infix:<<' is precedence('15=') { ... } proto 'infix:>>' is precedence('15=') { ... } ## relational-expression ## chaining binary proto 'infix:<' is equiv('14=') is assoc('chain') is pasttype('chain') { ... } proto 'infix:<=' is equiv('infix:<') is pasttype('chain') { ... } proto 'infix:>' is equiv('infix:<') is pasttype('chain') { ... } proto 'infix:>=' is equiv('infix:<') is pasttype('chain') { ... } # equality-expression proto 'infix:==' is precedence('13=') is assoc('chain') is pasttype('chain') { ... } proto 'infix:!=' is equiv('infix:==') is pasttype('chain') { ... } proto 'infix:&' is precedence('12=') { ... } proto 'infix:^' is precedence('11=') { ... } proto 'infix:|' is precedence('10=') { ... } proto 'infix:&&' is precedence('09=') is pasttype('cond') { ... } proto 'infix:||' is precedence('08=') is pasttype('cond') { ... } ## ternary #proto 'ternary:? :' is precedence('07=') is assoc('right') # is pasttype('cond') # is parsed('conditional_expression') # { ... } rule operator_precedence_parser { {{ .local pmc optable .local pmc match_result #optable = get_hll_namespace ['Cardinal'; 'Grammar'] optable = get_root_global [ 'parrot'; 'C99'], '$optable' match_result = optable."parse"(match) .return (match_result) }} } rule conditional_expression { [ \? : ]* } rule assignment_expression { [ ]* } token assignment_operator { | <'*='> | <'/='> | <'%='> | <'+='> | <'-='> | <'<<='> | <'>>='> | <'&='> | <'^='> | <'|='> } rule expression { [, ] } rule constant_expression { } ## assignment #proto 'infix:=' is precedence('06=') is assoc('right') # is parsed('assignment_expression') # is pasttype('assign') # { ... } #proto 'infix:*=' is equiv('infix:=') is parsed('assignment_expression') { ... } #proto 'infix:/=' is equiv('infix:=') is parsed('assignment_expression') { ... } #proto 'infix:%=' is equiv('infix:=') is parsed('assignment_expression') { ... } #proto 'infix:+=' is equiv('infix:=') is parsed('assignment_expression') { ... } #proto 'infix:-=' is equiv('infix:=') is parsed('assignment_expression') { ... } #proto 'infix:<<=' is equiv('infix:=') is parsed('assignment_expression') { ... } #proto 'infix:>>=' is equiv('infix:=') is parsed('assignment_expression') { ... } #proto 'infix:&=' is equiv('infix:=') is parsed('assignment_expression') { ... } #proto 'infix:^=' is equiv('infix:=') is parsed('assignment_expression') { ... } #proto 'infix:|=' is equiv('infix:=') is parsed('assignment_expression') { ... }