ports//devel/py-simpleparse/work/SimpleParse-2.1.0a1/simpleparsegrammar.py

'''Default SimpleParse EBNF grammar as a generator with productions

This module defines the original SimpleParse
grammar.  It uses the generator objects directly
as this is the first grammar being written.
'''
from simpleparse.objectgenerator import *
from simpleparse import generator, baseparser
import string
from simpleparse.dispatchprocessor import *
try:
	unicode
	HAVE_UNICODE = 1
except NameError:
	HAVE_UNICODE = 0

# note that whitespace is slightly different
# due to a bug with NULL-matching repeating groups
# we make all the ts references ts?
whitespace = Name (value = "ts", report = 0)
element_token = Name( value = "element_token" )
literal = Name ( value = "literal")
group = Name ( value = "group")
characterrange = Name ( value = "range")
name = Name ( value = "name")


SPGenerator = generator.Generator ()

SPGenerator.addDefinition(
	"declarationset",
	Name (value = "declaration", repeating = 1),
)



SPGenerator.addDefinition (
	"declaration",
	SequentialGroup (
		children = [
			whitespace,
			FirstOfGroup (
				children = [
					Name (value = "unreportedname", ),
					Name (value = "expandedname", ),
					Name (value = "name", ),
				],
			),
			whitespace,
			Literal (value = ":"),
			Literal (value = ":", optional=1),
			Literal (value = "=",),
			Name( value = "seq_group"),
		],
	)
)

SPGenerator.addDefinition (
	"group",
	SequentialGroup (
		children = [
			Literal (value ="("),
			Name( value= "seq_group"),
			Literal (value =")"),
		],
		expanded = 1,
	)
)

_seq_children = FirstOfGroup(
	children = [
		Name(value="error_on_fail"),
		Name(value="fo_group"),
		Name(value="element_token"),
	],
)

SPGenerator.addDefinition (
	"seq_group",
	SequentialGroup (
		children = [
			whitespace,
			_seq_children,
			SequentialGroup(
				children = [
					whitespace,
					Name( value="seq_indicator"),
					whitespace,
					_seq_children,
				],
				repeating = 1, optional = 1,
			),
			whitespace,
		],
	),
)

SPGenerator.addDefinition (
	"fo_group",
	SequentialGroup (
		children = [
			element_token,
			SequentialGroup(
				children = [
					whitespace,
					Name( value="fo_indicator"),
					whitespace,
					element_token,
				],
				repeating = 1,
			),
		],
	)
)
SPGenerator.addDefinition (
	"seq_indicator",
	Literal(value = ",", report=0 ),
)	
SPGenerator.addDefinition (
	"fo_indicator",
	Literal(value = "/", report=0 ),
)	

SPGenerator.addDefinition (
	"element_token",
	SequentialGroup (
		children = [
			Name (value = "lookahead_indicator", optional = 1),
			whitespace,
			Name (value = "negpos_indicator", optional = 1),
			whitespace,
			FirstOfGroup (
				children = [
					literal,
					characterrange,
					group,
					name,
				]
			),
			whitespace,
			Name (value = "occurence_indicator", optional = 1),
			whitespace,
			Name (value = "error_on_fail", optional = 1),
		]
	)
)

SPGenerator.addDefinition (
	"negpos_indicator",
	Range (value = "+-" )
)
SPGenerator.addDefinition (
	"lookahead_indicator",
	Literal(value = "?" ),
)	

SPGenerator.addDefinition (
	"occurence_indicator",
	Range (value = "+*?" ),
)	
SPGenerator.addDefinition (
	"error_on_fail",
	SequentialGroup (
		children = [
			Literal (value ="!"),
			SequentialGroup (
				children = [
					whitespace,
					Name( value="literal"),
				],
				optional = 1,
			),
		],
	),
)

SPGenerator.addDefinition (
	"unreportedname",
	SequentialGroup (
		children = [
			Literal (value ="<"),
			whitespace,
			name,
			whitespace,
			Literal (value =">"),
		]
	)
)
SPGenerator.addDefinition (
	"expandedname",
	SequentialGroup (
		children = [
			Literal (value =">"),
			whitespace,
			name,
			whitespace,
			Literal (value ="<"),
		]
	)
)

SPGenerator.addDefinition (
	"name",
	SequentialGroup (
		children = [
			Range(value ='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'),
			Range(value ='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789', optional= 1, repeating= 1),
		]
	)
)

SPGenerator.addDefinition (
	"ts", # ( [ \011-\015]+ / ('#',-'\n'+,'\n')+ )*
	FirstOfGroup (
		children = [
			Range(value =' \011\012\013\014\015', repeating=1),
			Name( value = "comment" ),
		],
		repeating = 1, optional=1,
	)
)
SPGenerator.addDefinition (
	"comment", # ( [ \011-\015]+ / ('#',-'\n'+,'\n')+ )*
	SequentialGroup (
		children = [
			Literal ( value ="#"),
			Literal (value ="\n", negative = 1, repeating = 1, optional=1),
			Literal (value = "\n",),
		],
	),
)

SPGenerator.addDefinition (
	"literalDecorator", # literalDecorator    :=  [c]
	Range( value = 'c' )
)

SPGenerator.addDefinition (
	"literal",  # ("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'")  /  ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"')
	SequentialGroup(
		children = [
			Name( value = 'literalDecorator', optional=1 ),
			FirstOfGroup (
				children = [
					SequentialGroup (
						children = [
							Literal (value ="'"),
							FirstOfGroup (
								children = [
									Name (value = "CHARNOSNGLQUOTE"),
									Name (value = "ESCAPEDCHAR"),
								],
								optional = 1, repeating = 1,
							),
							Literal (value ="'"),
						],
					),
					SequentialGroup (
						children = [
							Literal (value ='"'),
							FirstOfGroup (
								children = [
									Name (value = "CHARNODBLQUOTE"),
									Name (value = "ESCAPEDCHAR"),
								],
								optional = 1, repeating = 1,
							),
							Literal (value ='"'),
						],
					)
				],
			),
		],
	)
)

SPGenerator.addDefinition (
	"range",   # '[',CHARBRACE?,CHARDASH?, (CHARRANGE/CHARNOBRACE)*, CHARDASH?,']'
	SequentialGroup (
		children =[
			Literal (value ="["),
			Name (value ="CHARBRACE",optional = 1),
			Name (value ="CHARDASH",optional = 1),
			FirstOfGroup(
				children = [
					Name (value ="CHARRANGE"),
					Name (value ="CHARNOBRACE"),
				],
				optional = 1, repeating = 1,
			),
			Name (value ="CHARDASH",optional = 1),
			Literal (value ="]"),
		],
	)
)
SPGenerator.addDefinition (
	"CHARBRACE",   
	Literal (value = "]"),
)
SPGenerator.addDefinition (
	"CHARDASH",   
	Literal (value = "-"),
)
SPGenerator.addDefinition (
	"CHARRANGE",   # CHARRANGE           :=  CHARNOBRACE, '-', CHARNOBRACE
	SequentialGroup (
		children =[
			Name (value ="CHARNOBRACE"),
			Literal (value ="-"),
			Name (value ="CHARNOBRACE"),
		],
	),
)
SPGenerator.addDefinition (
	"CHARNOBRACE",   # CHARRANGE           :=  CHARNOBRACE, '-', CHARNOBRACE
	FirstOfGroup(
		children =[
			Name (value ="ESCAPEDCHAR"),
			Name (value ="CHAR"),
		],
	),
)
SPGenerator.addDefinition (
	"CHAR",
	Literal (
		value ="]",
		negative = 1,
	),
)

SPGenerator.addDefinition (
	"ESCAPEDCHAR",   # '\\',( SPECIALESCAPEDCHAR / ('x',HEXESCAPEDCHAR) / OCTALESCAPEDCHAR  )
	SequentialGroup (
		children =[
			Literal (value ="\\"),
			FirstOfGroup(
				children = [
					Name (value ="SPECIALESCAPEDCHAR"),
					SequentialGroup(
						children = [
							Range( value = 'xX' ),
							Name( value="HEXESCAPEDCHAR"),
						]
					),
					Name (value ="OCTALESCAPEDCHAR"),
				],
			),
		],
	)
)

SPGenerator.addDefinition (
	"SPECIALESCAPEDCHAR",
	Range(value ='\\abfnrtv"\''),
)

SPGenerator.addDefinition (
	"OCTALESCAPEDCHAR",   # [0-7],[0-7]?,[0-7]?
	SequentialGroup (
		children =[
			Range (value ="01234567"),
			Range (value ="01234567", optional = 1),
			Range (value ="01234567", optional = 1),
		],
	)
)
SPGenerator.addDefinition (
	"HEXESCAPEDCHAR",   # [0-9a-fA-F],[0-9a-fA-F]
	SequentialGroup (
		children =[
			Range (value ="0123456789abcdefABCDEF"),
			Range (value ="0123456789abcdefABCDEF"),
		],
	)
)


SPGenerator.addDefinition (
	"CHARNODBLQUOTE",
	Range(value ='\\"', negative = 1, repeating = 1),
)
SPGenerator.addDefinition (
	"CHARNOSNGLQUOTE",
	Range(value ="\\'", negative = 1, repeating = 1),
)

declaration = r"""declarationset      :=  declaration+
declaration         :=  ts, (unreportedname/expandedname/name) ,ts,':',':'?,'=',seq_group

element_token       :=  lookahead_indicator?, ts, negpos_indicator?,ts, (literal/range/group/name),ts, occurence_indicator?, ts, error_on_fail?

negpos_indicator    :=  [-+]
lookahead_indicator :=  "?"
occurence_indicator :=  [+*?]
error_on_fail       :=  "!", (ts,literal)?

>group<             :=  '(',seq_group, ')'
seq_group           :=  ts,(error_on_fail/fo_group/element_token),
                          (ts, seq_indicator, ts,
                              (error_on_fail/fo_group/element_token)
                          )*, ts

fo_group            :=  element_token, (ts, fo_indicator, ts, element_token)+


# following two are likely something peoples might want to
# replace in many instances...
<fo_indicator>      :=  "/"
<seq_indicator>     :=  ','

unreportedname      :=  '<', name, '>'
expandedname        :=  '>', name, '<'
name                :=  [a-zA-Z_],[a-zA-Z0-9_]*
<ts>                :=  ( [ \011-\015]+ / comment )*
comment             :=  '#',-'\n'*,'\n'
literal             :=  literalDecorator?,("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'")  /  ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"')
literalDecorator    :=  [c]



range               :=  '[',CHARBRACE?,CHARDASH?, (CHARRANGE/CHARNOBRACE)*, CHARDASH?,']'
CHARBRACE           :=  ']'
CHARDASH            :=  '-'
CHARRANGE           :=  CHARNOBRACE, '-', CHARNOBRACE
CHARNOBRACE         :=  ESCAPEDCHAR/CHAR
CHAR                :=  -[]]
ESCAPEDCHAR         :=  '\\',( SPECIALESCAPEDCHAR / ('x',HEXESCAPEDCHAR) / ("u",UNICODEESCAPEDCHAR_16) /("U",UNICODEESCAPEDCHAR_32)/OCTALESCAPEDCHAR  )
SPECIALESCAPEDCHAR  :=  [\\abfnrtv"']
OCTALESCAPEDCHAR    :=  [0-7],[0-7]?,[0-7]?
HEXESCAPEDCHAR      :=  [0-9a-fA-F],[0-9a-fA-F]
CHARNODBLQUOTE      :=  -[\\"]+
CHARNOSNGLQUOTE     :=  -[\\']+
UNICODEESCAPEDCHAR_16 := [0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F]
UNICODEESCAPEDCHAR_32 := [0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F],[0-9a-fA-F]
"""

### Now the interpreter objects...
class Parser(baseparser.BaseParser):
	"""Parser which generates new parsers from EBNF grammars

	This parser class allows you to pass in an EBNF grammar as
	the initialisation parameter.  The EBNF is processed, and a
	SimpleParse generator object is created as self.generator.

	Unlike most Parsers, this object is intended to be re-created
	for each bit of data it parses (i.e. each EBNF), so it warps
	the standard API a lot.
	"""
	_rootProduction = 'declarationset'
	def __init__( self, ebnf, prebuilts=(), methodSource=None, definitionSources=() ):
		"""Create a new generator based on the EBNF in simpleparse format"""
		processor = SPGrammarProcessor( prebuilts, definitionSources )
		success, tags, next = self.parse( ebnf, self._rootProduction, processor=processor )
		if next != len(ebnf):
			lineNumber = lines(0, next, ebnf)
			raise ValueError(
				"""Unable to complete parsing of the EBNF, stopped at line %s (%s chars of %s)
Unparsed:\n%s..."""%(lineNumber, next, len(ebnf), ebnf[next:next+100])
			)
		self.generator = processor.generator
	def buildTagger( self, name=None, processor = None ):
		"""Build the tag-table for parsing the EBNF for this parser"""
		return SPGenerator.buildParser( name, processor )

class SPGrammarProcessor( DispatchProcessor ):
	"""Processing object for post-processing an EBNF into a new generator"""
	### top level
	def __init__( self, prebuilts=(), definitionSources=() ):
		"""Create a new generator based on the EBNF in simpleparse format"""
		self.generator = generator.Generator()
		for (name, table) in prebuilts:
			if isinstance( table, ElementToken):
				self.generator.addDefinition( name, table)
			else:
				self.generator.addDefinition( name, Prebuilt(value=table))
		for source in definitionSources:
			self.generator.addDefinitionSource( source )
	
	def declaration( self, (tag, left, right, sublist), buffer):
		'''Base declaration from the grammar, a "production" or "rule"'''
		name = sublist[0]
		expanded = 0
		if name[0] == "unreportedname":
			name = name[3][0]
			# note that the info is stored in the wrong place :(
			report = 0
		elif name[0] == 'expandedname':
			report = 1
			expanded = 1
			name = name[3][0]
		else:
			report = 1
		name = getString( name, buffer )
		self.currentProduction = name
		content = dispatch( self, sublist[1], buffer )
		content.report = report
		content.expanded = expanded
		self.generator.addDefinition(
			name,
			content,
		)
		del self.currentProduction

	### element configuration
	def element_token( self, (tag, left, right, sublist), buffer):
		'''get the children, then configure'''
		base = None
		negative = 0
		optional = 0
		repeating = 0
		lookahead = 0
		errorOnFail = None
		for tup in sublist:
			result = dispatch( self, tup, buffer )
			if tup[0] == 'negpos_indicator':
				negative = result
			elif tup[0] == 'occurence_indicator':
				optional, repeating = result
			elif tup[0] == 'lookahead_indicator':
				lookahead = result
			elif tup[0] == 'error_on_fail':
				# we do some extra work here
				errorOnFail = result
				self._config_error_on_fail( errorOnFail, (tag,left,tup[1],[]), buffer )
			else:
				base = result
		base.optional = optional
		base.negative = negative
		base.repeating = repeating
		base.lookahead = lookahead
		if errorOnFail:
			base.errorOnFail = errorOnFail
		return base

	### generator-node-builders
	def seq_group( self, (tag, left, right, sublist), buffer):
		"""Process a sequential-group into a SequentialGroup element token"""
		children = dispatchList( self, sublist, buffer )
		errorOnFail = None
		result = []
		for (item,tup) in map(None,children,sublist):
			if isinstance( item, ErrorOnFail ):
				errorOnFail = item
			else:
				if errorOnFail:
					item.errorOnFail = errorOnFail.copy()
					self._config_error_on_fail(
						item.errorOnFail,
						tup,
						buffer
					)
				result.append( item )
		if len(result) == 1:
			# single-item sequential group (very common)
			return result[0]
		elif not result:
			raise ValueError( """SequentialGroup on line %s doesn't have an element-token child! grammar was %s"""%( lines(0,left, buffer), buffer[left:left+25]))
		base = SequentialGroup(
			children = result,
		)
		return base
	def fo_group( self, (tag, left, right, sublist), buffer):
		"""Process a first-of-group into a FirstOf element token"""
		children = dispatchList( self, sublist, buffer )
		if len(children) == 1:
			# this should never happen, but if it does, we can deal with it I suppose...
			return children[0]
		base = FirstOfGroup(
			children = children
		)
		return base
		
	def literal( self, (tag, left, right, sublist), buffer):
		'''Turn a literal result into a literal generator'''
		if sublist and sublist[0][0] == 'literalDecorator':
			# right now only have the one decorator...
			sublist = sublist[1:]
			classObject = CILiteral
		else:
			classObject = Literal
		elements = dispatchList( self, sublist, buffer)
		### Should check for CILiteral with non-CI string or single-character value!
		return classObject( value = string.join(elements, "" ) )

	def range( self, (tag, left, right, sublist), buffer):
##		if hasattr( Range, 'requiresExpandedSet') and Range.requiresExpandedSet:
		return Range(
			value = string.join(dispatchList( self, sublist, buffer),''),
		)
##		else:
##			# need to build up a new-syntax version of the range...
##			# escape ^ to \^
##			# escape \ to \\
##			# escape - to \-
##			# make sure range-sets are in proper order...
##			raise NotImplementedError( """Haven't got the new CharSet version implemented yet""")
	def name( self, tup, buffer):
		return Name(
			value = getString(tup, buffer),
		)
	### simple translators
	occurenceIndicatorMap = {
		'*': (1,1),
		'+': (0,1),
		'?': (1,0),
	}
	def occurence_indicator( self, tup, buffer):
		'''Return optional, repeating as a tuple of true/false values'''
		value = getString(tup, buffer)
		return self.occurenceIndicatorMap[value]
	def lookahead_indicator( self, tup, buffer ):
		"""If present, the lookahead indictor just says "yes", so just return 1"""
		return 1
	def error_on_fail( self, (tag,left,right,children), buffer ):
		"""If present, we are going to make the current object an errorOnFail type,

		If there's a string literal child, then we use it to create the
		"message" attribute of the errorOnFail object.
		"""
		err = ErrorOnFail()
		if children:
			(tag,left,right,children) = children[0]
			message = string.join( dispatchList( self, children, buffer), "")
			err.message = message
		return err
	def _config_error_on_fail( self, errorOnFail, tup, buffer ):
		"""Configure an error-on-fail instance for a given child tuple"""
		# what we expected to find...
		errorOnFail.expected = buffer[tup[1]:tup[2]]
		if hasattr( self, "currentProduction"):
			errorOnFail.production = self.currentProduction
		

	negposIndicatorMap = {
		'+': 0,
		'-': 1,
	}
	def negpos_indicator( self, tup, buffer ):
		'''return whether indicates negative'''
		value = getString(tup, buffer)
		return self.negposIndicatorMap[value]

	def CHARNODBLQUOTE( self, tup, buffer):
		return getString(tup, buffer)
	CHAR = CHARNOSNGLQUOTE = CHARNODBLQUOTE
	def ESCAPEDCHAR( self, (tag, left, right, sublist), buffer):
		return string.join(dispatchList( self, sublist, buffer), "")
	specialescapedmap = {
	'a':'\a',
	'b':'\b',
	'f':'\f',
	'n':'\n',
	'r':'\r',
	't':'\t',
	'v':'\v',
	'\\':'\\',
	'"':'"',
	"'":"'",
	}
	def SPECIALESCAPEDCHAR( self, tup, buffer):
		return self.specialescapedmap[ getString(tup, buffer)]
	def OCTALESCAPEDCHAR(self, tup, buffer):
		return chr(string.atoi( getString(tup, buffer), 8 ))
	def HEXESCAPEDCHAR( self, tup , buffer):
		return chr(string.atoi( getString(tup, buffer), 16 ))
	def CHARNOBRACE( self, (tag, left, right, sublist), buffer):
		return string.join(dispatchList( self, sublist, buffer), "")
	def CHARRANGE( self, (tag, left, right, sublist), buffer):
		'''Create a string from first to second item'''
		# following should never raise an error, as there's only one possible format...
		try:
			first, second = map( ord, dispatchList( self, sublist, buffer))
		except TypeError:
			import pdb
			pdb.set_trace ()
		if second < first:
			second, first = first, second
		return string.join(map( chr, range(first, second+1),), '')
	def CHARDASH( self, tup , buffer):
		return '-'
	def CHARBRACE( self, tup , buffer):
		return ']'

	if HAVE_UNICODE:
		def UNICODEESCAPEDCHAR_16( self, (tag, left, right, sublist), buffer):
			"""Only available in unicode-aware Python versions"""
			char = unichr(int( buffer[left:right], 16 ))
			return char
		### Only available in wide-unicode Python versions (rare)
		UNICODEESCAPEDCHAR_32 = UNICODEESCAPEDCHAR_16
	else:
		# ignore unicode-specific characters, though this isn't a particularly
		# useful approach, I don't see a better option at the moment...
		def UNICODEESCAPEDCHAR_16( self, (tag, left, right, sublist), buffer):
			"""Only available in unicode-aware Python versions"""
			return ""
			
		def UNICODEESCAPEDCHAR_32( self, (tag, left, right, sublist), buffer):
			"""Only available in wide-unicode Python versions (rare)"""
			return ""
syntax highlighted by Code2HTML, v. 0.9.1