"nesteddict.py - Simple dictionary parser" import string, re class NestedDict: """Parse a string containing a nested dictionary text file return None if error, or else a dictionary containing key:value pairs, where value can be a dictionary. File format uses indent levels to indicate nesting: key: value key: value key: key: value key: value key: key: value (etc) """ def __init__(self,text,report_errors=0): self.report_errors = report_errors lines = text.split('\n') # parse the file into dictionaries... self.data = self.parse(lines) def parse(self,lines): "parse an array of lines and return a dictionary" lines.reverse() indent_level = -1 return self.parse1(lines,indent_level) def show(self,data=None,indent=0): if data == None: data = self.data # show non-dicts first for key in data.keys(): if type(data[key]) != type({}): for index in xrange(0,indent): print " ", print "%s: %s" % (key, data[key]) for key in data.keys(): if type(data[key]) == type({}): for index in xrange(0,indent): print " ", print "%s:" % key self.show(data[key],indent+2) def error(self,message): if self.report_errors: print "Error: ",message # the functions below here are utility functions... def remove_comment(self,line): "remove trailing comment" for index in xrange(0,len(line)): char = line[index] if char == '#': line = line[:index] break return line def indent(self,line): "return the indent-level of a line of text" for index in xrange(0,len(line)): char = line[index] if char != ' ': return index # shouldn't get here since all lines are non-blank, but just in case... return 0 def is_comment_or_blank(self,line): "return true if line is comment or blank" if len(line) <= 0: return 1 elif line[0] == '#': return 1 elif len(line.strip()) == 0: return 1 else: return 0 def pop(self, lines): "return the next non-blank, non-comment line - alters lines" if len(lines) > 0: line = lines.pop() while self.is_comment_or_blank(line) and len(lines) > 0: line = lines.pop() if self.is_comment_or_blank(line): return None else: return line else: return None def peek(self,lines): "return the next non-blank, non-comment line - does not alter lines" line = self.pop(lines) if line != None: lines.append(line) return line def eval(self, value): """Try to eval the value; on success, return the result; on failure, return the original object. Note: will return the result if item is of types (), [], or {} returns None if string is only whitespace or '' """ if type(value) == type(''): s = value.strip() if len(s) == 0: return None # crude sanity check... :-) so we don't end up eval'ing garbage? if s[0] in ('(','[','{'): try: new_object = eval(value) if type(new_object) in (type(()),type([]),type({})): return new_object else: return value except: pass return value def parse1(self,lines,last_block_indent): "parse an array of lines and return a dictionary - lines must be in reverse order" data = {} # lines may be key:value, or key: # if key: then indented block follows line = self.peek(lines) while line != None: peek_indent = self.indent(line) if peek_indent > last_block_indent: line = self.pop(lines) current_indent = self.indent(line) #print "%d|%s|" %(current_indent,line) #print "lbi: %d pi: %d" % (last_block_indent, peek_indent) words = line.split(':') if len(words) == 0: self.error("Expected ':'.") return None elif len(words) == 2: # if 2 elements, could be ('',value) or (key,'') or (key,value) key = words[0].strip() value = words[1].strip() if len(key) <= 0: self.error("Expected key.") return None elif len(value) > 0: # we have a key:value pair... store it # see if we can eval it into a sequence... data[key] = self.eval(value) else: # we have a key: indented block, parse and store it. value = self.parse1(lines,current_indent) if value != None: data[key]=value else: # error- was called already so just return None return None else: # too many words self.error("Too many words.") return None elif peek_indent <= last_block_indent: return data else: self.error("Unknown error") return None # while end # get the next non-comment line line = self.peek(lines) return data if __name__ == '__main__': text = """ level1-1: 'foo' level1-2: 'bar' level1-3: level2-1: 'a' level2-2: 'b' level2-3: level3-1: (1.0, 33, 45.7, 0.0, 0, 'some string') level2-4: 'c' level1-4: ('rab', 0.4444) """ level = NestedDict(text) if level.data != None: level.show() print level.data else: print "Parse Error."