import Martel from Martel import RecordReader, Parser from xml.sax import handler, saxutils from StringIO import StringIO # NOTE: Do not write formats like this, eg, with "(.|\n)*". Those # depend on the implementation using a RecordReader-like interface. # Instead, you need to write them so they could be used even as one # huge regexp. def test_reader_parser(): record = Martel.Group("start", Martel.Rep(Martel.Str("abc"))) + \ Martel.Group("end", Martel.Rep(Martel.Str("xyz"))) parser = record.make_parser() parser = Parser.Parser(parser.tagtable) parser.setErrorHandler(handler.ErrorHandler()) parser.parseString("abc" * 10 + "xyz") try: parser.parseString("abc" * 10 + "xyzQ") except Parser.ParserPositionException: pass else: raise AssertionError, "didn't get a position exception" try: parser.parseString("abc" * 10 + "x") except Parser.ParserPositionException: pass else: raise AssertionError, "didn't get a position exception" class CountErrors(handler.ErrorHandler): def __init__(self): self.error_count = 0 self.fatal_error_count = 0 def error(self, exception): self.error_count = self.error_count + 1 def fatalError(self, exception): self.fatal_error_count = self.fatal_error_count + 1 class CountRecords(handler.ContentHandler): def __init__(self, tag): self.tag = tag self.count = 0 def startElement(self, tag, attrs): if tag == self.tag: self.count = self.count + 1 def test_record_parser(): record = Martel.Group("A", Martel.Str("X\n") + Martel.Re("a*\n")) p = record.make_parser() parser = Parser.RecordParser("blah", {}, p.tagtable, (0, 1, {}), RecordReader.StartsWith, ("X",)) err = CountErrors() parser.setErrorHandler(err) count = CountRecords("A") parser.setContentHandler(count) parser.parseString("X\na\nX\nb\nX\naaa\nX\naaaa\nX\nq\nX\na\n") assert err.fatal_error_count == 0, err.fatal_error_count assert err.error_count == 2, err.error_count assert count.count == 4, count.count def test_header_footer1(): s = """\ header XX record 1 // record 2 // record 3 // footer """ gold = """\
header XX
record 1 // record 2 // record 3 //
""" debug_level = 1 # Don't use regexps like these in your code - for testing only! header = Martel.Group("header", Martel.Re(r"header(.|\n)*")) record = Martel.Group("record", Martel.Re(r"rec(.|\n)*")) footer = Martel.Group("footer", Martel.Re(r"footer(.|\n)*")) header = header.make_parser(debug_level = debug_level) record = record.make_parser(debug_level = debug_level) footer = footer.make_parser(debug_level = debug_level) hf = Parser.HeaderFooterParser( "hf", {}, RecordReader.EndsWith, ("XX\n", ), header.tagtable, RecordReader.EndsWith, ("//\n", ), record.tagtable, RecordReader.StartsWith, ("f", ), footer.tagtable, (0, debug_level, {})) outfile = StringIO() hf.setContentHandler(saxutils.XMLGenerator(outfile)) hf.setErrorHandler(handler.ErrorHandler()) hf.parseFile(StringIO(s)) result = outfile.getvalue() assert result == gold, (result, gold) def test_header_footer2(): # Have a header but no footer s = """ This is some misc. header text that goes on until the end. ID 1 This is some data ID 2 This is some more data """ gold = """\
This is some misc. header text that goes on until the end.
ID 1 This is some data ID 2 This is some more data
""" # Don't use a regexp like this in your code - for testing only! header = Martel.Group("header", Martel.Re(r"(.|\n)*")) record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) header = header.make_parser() record = record.make_parser() hf = Parser.HeaderFooterParser( "hf", {}, RecordReader.Until, ("ID", ), header.tagtable, RecordReader.StartsWith, ("ID", ), record.tagtable, RecordReader.Nothing, (), (), (0, 1, {})) outfile = StringIO() hf.setContentHandler(saxutils.XMLGenerator(outfile)) hf.setErrorHandler(handler.ErrorHandler()) hf.parseFile(StringIO(s)) text = outfile.getvalue() assert text == gold, (text, gold) def test_header_footer3(): # Have a footer but no header s = """\ ID 1 This is some data // ID 2 This is some more data // Okay, that was all of the data. """ gold = """\ ID 1 This is some data // ID 2 This is some more data // """ # Don't use a regexp like this in your code - for testing only! record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) # Require at least 5 characters (just to be safe) footer = Martel.Group("footer", Martel.Re(r".....(.|\n)*")) record = record.make_parser() footer = footer.make_parser() hf = Parser.HeaderFooterParser( "hf", {}, RecordReader.Nothing, (), (), RecordReader.EndsWith, ("//\n", ), record.tagtable, RecordReader.Everything, (), footer.tagtable, (0, 1, {})) outfile = StringIO() hf.setContentHandler(saxutils.XMLGenerator(outfile)) hf.setErrorHandler(handler.ErrorHandler()) hf.parseFile(StringIO(s)) text = outfile.getvalue() assert text == gold, (text, gold) def test_header_footer4(): # Have a header but no footer - and not footer reader s = """ This is some misc. header text that goes on until the end. ID 1 This is some data ID 2 This is some more data """ gold = """\
This is some misc. header text that goes on until the end.
ID 1 This is some data ID 2 This is some more data
""" # Don't use a regexp like this in your code - for testing only! header = Martel.Group("header", Martel.Re(r"(.|\n)*")) record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) header = header.make_parser() record = record.make_parser() hf = Parser.HeaderFooterParser( "hf", {}, RecordReader.Until, ("ID", ), header.tagtable, RecordReader.StartsWith, ("ID", ), record.tagtable, None, (), (), (0, 1, {})) outfile = StringIO() hf.setContentHandler(saxutils.XMLGenerator(outfile)) hf.setErrorHandler(handler.ErrorHandler()) hf.parseFile(StringIO(s)) text = outfile.getvalue() assert text == gold, (text, gold) def test_header_footer5(): # Make sure I can skip records when there are not footer records s = """ This is some misc. header text that goes on until the end. ID 1 This is some data ID A This is some more data ID 3 This is again some more data ID Q This blah ID W QWE ID 987 To be ID 897 Or not to be """ header = Martel.Group("header", Martel.Re(r"(.|\n)*")) record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) header = header.make_parser() record = record.make_parser() hf = Parser.HeaderFooterParser( "hf", {}, RecordReader.Until, ("ID", ), header.tagtable, RecordReader.StartsWith, ("ID", ), record.tagtable, None, (), (), (0, 1, {})) count = CountRecords("record") hf.setContentHandler(count) err = CountErrors() hf.setErrorHandler(err) hf.parseFile(StringIO(s)) assert err.error_count == 3, err.error_count assert err.fatal_error_count == 0, err.fatal_error_count assert count.count == 4, count.count def test_header_footer6(): # Make sure I can skip records when there are footer records s = """ This is some misc. header text that goes on until the end. ID 1 This is some data // ID A This is some more data // ID 3 This is again some more data // ID Q This blah // ID W QWE // ID 987 To be // ID 897 Or not to be // FOOTER """ header = Martel.Group("header", Martel.Re(r"(.|\n)*")) record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) footer = Martel.Group("footer", Martel.Re("FOOTER(.|\n)*")) header = header.make_parser() record = record.make_parser() footer = footer.make_parser() hf = Parser.HeaderFooterParser( "hf", {}, RecordReader.Until, ("ID", ), header.tagtable, RecordReader.EndsWith, ("//", ), record.tagtable, RecordReader.StartsWith, ("FOOTER", ), footer.tagtable, (0, 1, {})) count = CountRecords("record") hf.setContentHandler(count) err = CountErrors() hf.setErrorHandler(err) hf.parseFile(StringIO(s)) assert err.error_count == 3, err.error_count assert err.fatal_error_count == 0, err.fatal_error_count assert count.count == 4, count.count def test_header_footer7(): # header and footer but with no record data s = """\ This is some misc. header text that goes on until the end. FOOTER """ header = Martel.Group("header", Martel.Re(r"(.|\n)*")) record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) footer = Martel.Group("footer", Martel.Re("FOOTER(.|\n)*")) header = header.make_parser() record = record.make_parser() footer = footer.make_parser() hf = Parser.HeaderFooterParser( "hf", {}, RecordReader.CountLines, (2, ), header.tagtable, RecordReader.EndsWith, ("//", ), record.tagtable, RecordReader.StartsWith, ("FOOTER", ), footer.tagtable, (0, 1, {})) count = CountRecords("record") hf.setContentHandler(count) err = CountErrors() hf.setErrorHandler(err) hf.parseFile(StringIO(s)) assert err.error_count == 0, err.error_count assert err.fatal_error_count == 0, err.fatal_error_count assert count.count == 0, count.count def test_header_footer8(): # header, record and footer, but with extra data s1 = """Two lines in the header. Data 1 Data 2 Data Q Data 4 FOOTER Abc FOOTER B """ s2 = """Two lines in the header. Data 1 Data 2 Data Q Data 4 FOOTER Abc """ s3 = """Two lines in the header. Data 1 Data 4 FOOTER Abc """ s4 = """Two lines in the header. Data Q FOOTER Abc """ s5 = """Two lines in the header. FOOTER Abc """ dataset = ( (s1, 3, 1, 1), (s2, 3, 1, 0), (s3, 2, 0, 0), (s4, 0, 1, 0), (s5, 0, 0, 0), ) header = Martel.Group("header", Martel.Re(r"(.|\n)*")) record = Martel.Group("record", Martel.Re(r"Data \d+\n")) footer = Martel.Group("footer", Martel.Re("FOOTER \w+\n")) header = header.make_parser() record = record.make_parser() footer = footer.make_parser() hf = Parser.HeaderFooterParser( "hf", {}, RecordReader.CountLines, (2, ), header.tagtable, RecordReader.CountLines, (1, ), record.tagtable, RecordReader.CountLines, (1, ), footer.tagtable, (0, 1, {})) for s, rec_count, err_count, fatal_count in dataset: count = CountRecords("record") hf.setContentHandler(count) err = CountErrors() hf.setErrorHandler(err) hf.parseFile(StringIO(s)) assert err.error_count == err_count, (s, err.error_count, err_count) assert err.fatal_error_count == fatal_count, \ (s, err.fatal_error_count, fatal_count) assert count.count == rec_count, (s, count.count, rec_count) def test(): test_reader_parser() test_record_parser() test_header_footer1() test_header_footer2() test_header_footer3() test_header_footer4() test_header_footer5() test_header_footer7() test_header_footer8() if __name__ == "__main__": test()