from mx.Tidy import tidy import os plone_dir = '/home/runyaga/Zope-2.6.0b2-linux2-x86/lib/python/Products/CMFPlone' ignored_errors = [ 'unknown attribute "xmlns:metal"', 'unknown attribute "xmlns:tal"', 'unknown attribute "xmlns:i18n"', 'unknown attribute "tal:', 'unknown attribute "i18n:', 'unknown attribute "metal:', ' is not recognized', ' is not recognized', 'tal:block is not recognized', 'metal:block is not recognized', ' has XML attribute "xml:lang"', 'inserting missing \'title\' element', 'unknown attribute "onfocus"', # why does Tidy not like this? 'discarding unexpected ', 'trimming empty ', ' lacks "summary" attribute', 'img lacks "src" attribute', 'img lacks "alt" attribute', 'This document has errors that must be fixed before', 'using HTML Tidy to generate a tidied up version.', ] def check_pt(filename): input = open(filename) (nerrors, nwarnings, outputdata, errordata) = \ tidy(input, output_markup=0) out = '' for err in errordata.split('\n'): if err.strip(): found = -1 for ignore in ignored_errors: found = err.find(ignore) if found != -1: break if found == -1: out = out + '\t' + err + '\n' if out: print filename print out input.close() def visit(arg, dirname, files): files = filter(lambda x: x.endswith('.pt'), files) if len(files) > 0: for f in files: check_pt(os.path.join(dirname, f)) os.path.walk(plone_dir, visit, [])