# -*- ruby -*- # # xmldtd.ry # # Copyright (C) Ueno Katsuhiro 2000 # # $Id: xmldtd.ry,v 1.2 2000/12/19 11:36:13 katsu Exp $ # # # from XML 1.0 specification: # #[30] extSubset ::= TextDecl? extSubsetDecl #[31] extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep )* # #[77] TextDecl ::= '' #[24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') #[80] EncodingDecl ::= S 'encoding' Eq ( '"' EncName '"' | "'" EncName "'" ) # #[29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl # | PI | Comment #[28a] DeclSep ::= PEReference | S # #[61] conditionalSect ::= includeSect | ignoreSect #[62] includeSect ::= '' #[63] ignoreSect ::= '' #[64] ignoreSectContents ::= Ignore ('' Ignore)* #[65] Ignore ::= Char* - (Char* ('') Char*) # #[45] elementdecl ::= '' #[46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children #[51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' # | '(' S? '#PCDATA' S? ')' #[47] children ::= (choice | seq) ('?' | '*' | '+')? #[48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? #[49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' #[50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' # #[52] AttlistDecl ::= '' #[53] AttDef ::= S Name S AttType S DefaultDecl #[54] AttType ::= StringType | TokenizedType | EnumeratedType #[55] StringType ::= 'CDATA' #[56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' # | 'NMTOKEN' | 'NMTOKENS' #[57] EnumeratedType ::= NotationType | Enumeration #[58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' #[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' #[60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | ( ('#FIXED' S)? AttValue ) #[10] AttValue ::= '"' ([^<&"] | Reference)* '"' # | "'" ([^<&'] | Reference)* "'" # #[70] EntityDecl ::= GEDecl | PEDecl #[71] GEDecl ::= '' #[72] PEDecl ::= '' #[73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) #[74] PEDef ::= EntityValue | ExternalID #[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' # | "'" ([^%&'] | PEReference | Reference)* "'" #[75] ExternalID ::= 'SYSTEM' S SystemLiteral # | 'PUBLIC' S PubidLiteral S SystemLiteral #[11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") #[12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" #[76] NDataDecl ::= S 'NDATA' S Name # #[82] NotationDecl ::= '' #[83] PublicID ::= 'PUBLIC' S PubidLiteral # #[16] PI ::= '' Char*)))? '?>' #[17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) # #[15] Comment ::= '' class XMLDTDScanner options no_result_var rule extSubset: # none # | PI extSubsetDecls | extSubsetDecl extSubsetDecls extSubset: TextDecl extSubsetDecls { p val[0] } | extSubsetDecls extSubsetDecls: # none # | extSubsetDecls PI { p [ :PI, val[1] ] } | extSubsetDecls PEReference { p [ :PI, val[1] ] } | extSubsetDecls extSubsetDecl extSubsetDecl: '' | '' | ignore '[' { @src.ignore = true } ignoreSectContents ']]>' { @src.ignore = false } include: INCLUDE | _INCLUDE ignore: IGNORE | _IGNORE ignoreSectContents: # none # | '' markupDecl: elementDecl | attlistDecl | entityDecl | notationDecl | Comment { p [ :Comment, val[0] ] } elementDecl: ELEMENT _name contentspec contentspec: _EMPTY | _ANY | ' (' childrenContent ')' repmark repmark: # none # | '?' | '*' | '+' childrenContent: seq | choice cp: name repmark | '(' childrenContent ')' repmark seq: cp | seq ',' cp choice: cp '|' cp | choice '|' cp attlistDecl: ATTLIST _name attDef attDef: # none # | attDef _name attType defaultDecl attType: _CDATA | _ID | _IDREF | _IDREFS | _ENTITY | _ENTITIES | _NMTOKEN | _NMTOKENS | _NOTATION ' (' namelist ')' | ' (' namelist ')' namelist: name | namelist '|' name defaultDecl: _HREQUIRED | _HIMPLIED | _FIXED _String | _String entityDecl: ENTITY entity entity: _name entityDef { on_general_entity(val[0], val[1]) } | ' %' _name peDef { on_parameter_entity(val[1], val[2]) } entityDef: _String | externalID ndataDecl ndataDecl: # none # | _NDATA _name peDef: _String | externalID externalID: _SYSTEM _String { [ nil, val[1] ] } | _PUBLIC _String _String { [ val[1], val[2] ] } notationDecl: NOTATION _name notationID notationID: externalID | _PUBLIC _String { [ val[1], nil ] } _name: _Name | _keywords name: Name | _name | keywords keywords: INCLUDE | IGNORE | ELEMENT | ATTLIST | ENTITY | NOTATION # | HPCDATA _keywords: _INCLUDE | _IGNORE | _EMPTY | _ANY | _CDATA | _ID | _IDREF | _IDREFS | _ENTITY | _ENTITIES | _NMTOKEN | _NMTOKENS | _NOTATION | _SYSTEM | _PUBLIC | _NDATA # | _HREQUIRED | _HIMPLIED | _HFIXED | _HPCDATA end ---- header ---- # # xmldtd.rb : generated by racc # require 'xmlscan' require 'strscan' ---- inner ---- Keywords = {} KeywordsWithSpace = {} [ 'ELEMENT', 'ATTLIST', 'INCLUDE', 'IGNORE', '#PCDATA', 'ENTITY', 'NOTATION', ].each { |i| Keywords[i] = i.tr('#', 'H').intern } [ 'EMPTY', 'ANY', 'CDATA', 'ID', 'IDREF', 'IDREFS', 'ENTITIES', 'NMTOKEN', 'NMTOKENS', '#REQUIRED', '#IMPLIED', '#FIXED', 'SYSTEM', 'PUBLIC', 'NDATA', 'INCLUDE', 'IGNORE', '#PCDATA', 'ENTITY', 'NOTATION' ].each { |i| KeywordsWithSpace[i] = ('_' + i.tr('#', 'H')).intern } class DTDTokenizer # \s* => PI # \s+--.*?-- => Comment following space # --.*?-- => Comment # \s* ' '[' # \s*]]> => ']]>' # \s* ' Name following space # \w+ => Name # \s*> => '>' # \s+( => ' (' # ( => '(' # \s*| => '|' # \s*, => ',' # \s*) => ')' # \s+\* => ' *' # \* => '*' # \s+\? => ' ?' # \? => '?' # \s+\+ => ' +' # \+ => '+' # \s+".*?" => String following space # ".*?" => String # \s+'.*?' => String following space # '.*?' => String # \s*%\w+; => PEReference # \s+% => ' %' PortWrapper = XMLScanner::XMLSource::PortWrapper def initialize(src) @src = PortWrapper.new(src) @ignore = false @buf = [] nextline end def lineno @src.lineno end def path @src.path end attr_reader :ignore def ignore=(f) @ignore = (f != nil and f != false) end def pushback(str) @buf.push @scan if @scan str = ' ' + str @scan = StringScanner.new(str << ' ') self end private def nextline @scan = (@buf.shift or ((s = @src.gets) and StringScanner.new(s))) end def scan_until(re, t) ret = @scan.scan_until(re) unless ret then ret = @scan.rest while true raise ParseError, "unterminated #{t} meets EOF" unless nextline s = @scan.scan_until(re) break if s ret << @scan.rest end ret << s end ret end def skip_until(re) ret = @scan.skip_until(re) unless ret then ret = @scan.restsize while true return nil unless nextline n = @scan.skip_until(re) break if n ret += @scan.restsize end ret += n end ret end def skip_space begin n = @scan.skip(/\s+/) break unless n end while @scan.empty? and nextline n end def scan_pi unless @scan.scan(/([^\s\?\>]+)(?:\s+|(?=\?>))/) then raise ParseError, "parse error at `/, 'PI') s.chop!.chop! if name == 'xml' then [ :TextDecl, s ] else [ :PI, [ name, s ] ] end end def on_name(s, space) if space then [ (KeywordsWithSpace[s] or :_Name), s ] else [ (Keywords[s] or :Name), s ] end end def on_pe_ref(s) [ :PEReference, s ] end def on_delimiter(c) skip_space [ c, c ] end public Delim = '\\s!"%&\'\\(\\)\\*\\+,\\/<=>\\?\\[\\]\\|' def next_token nextline while @scan and @scan.empty? space = skip_space if @scan if not @scan then [ false, :eof ] elsif @ignore then if @scan.skip_until(//) then s = @scan[0] [ s, s ] else [ false, :eof ] end elsif s = @scan.scan(/[^-#{Delim}][^#{Delim}]*/o) then on_name s, space elsif s = @scan.scan(/<\?||--|[\[>()|,*?+%'"]/) then if s == '--' then s = scan_until(/--/, 'comment') s.chop!.chop! if space then [ :_Comment, s ] else [ :Comment, s ] end elsif s == '