#!/usr/bin/python
from HTMLParser import HTMLParser
import sys
class Html2Breve ( HTMLParser ):
__depth = 0
__indent = 4
__injs = False
__jsdepth = 0
__out = sys.stdout
__prev = None
__needscomma = ( None, 'data', 'entity' )
def fmt_attrs ( self, attrs ):
if attrs:
attrs = ' ( %s )' % ', '.join ( [ '%s_ = "%s"' % ( a, v )
for ( a, v ) in attrs ] )
return attrs or ''
def handle_starttag ( self, tag, attrs ):
if tag == 'script':
self.handle_startjs ( tag, attrs )
else:
self.__out.write ( '%s\n%s%s%s [' % (
[ '', ',' ][ self.__depth > 0
and self.__prev in self.__needscomma ],
' ' * self.__depth,
tag,
self.fmt_attrs ( attrs )
)
)
self.__depth += self.__indent
self.__prev = tag
def handle_endtag ( self, tag ):
if tag == 'script':
self.handle_endjs ( tag )
else:
self.__depth -= self.__indent
self.__out.write ( '\n%s]' % (
# [ '', '#,#' ][ self.__prev is not None ],
' ' * self.__depth,
# [ '\n', ',' ] [ self.__depth > 0 ]
)
)
self.__prev = None
def handle_startendtag ( self, tag, attrs ):
self.__out.write ( '%s\n%s%s%s' % (
[ '', ',' ][ self.__depth > 0
and self.__prev in self.__needscomma ],
' ' * self.__depth,
tag,
self.fmt_attrs ( attrs )
)
)
self.__prev = None
def handle_data ( self, data ):
data = data.strip ( )
if not data:
return
if self.__injs:
if data.startswith ( '}' ):
self.__jsdepth -= self.__indent
self.__out.write ( '\n%s%s' % ( ' ' * ( self.__depth + self.__jsdepth ), data ) )
if data.startswith ( '{' ):
self.__jsdepth += self.__indent
else:
bquote = equote = '"'
if '\n' in data:
bquote = "'''\n%s" % ( ' ' * self.__depth )
equote = "\n%s'''" % ( ' ' * self.__depth )
data = data.replace ( '\n', '\n' + ' ' * self.__depth )
self.__out.write ( '%s\n%s%s%s%s' % (
[ '', ',' ][ self.__prev in self.__needscomma ],
' ' * self.__depth,
bquote,
data,
equote
)
)
self.__prev = 'data'
def handle_startjs ( self, tag, attrs ):
self.__injs = True
self.__jsdepth = self.__indent
self.__out.write ( "\n%sinlineJS ( '''" % ( ' ' * self.__depth ) )
self.__prev = tag
def handle_endjs ( self, tag ):
self.__injs = False
self.__jsdepth = 0
self.__out.write ( "%s''' )," % ( ' ' * self.__depth ) )
self.__prev = tag
def handle_entityref ( self, name ):
sys.stdout.write ( '%s\n%s%s' % (
(
[ '', ',' ][ self.__depth > 0
and self.__prev in self.__needscomma ],
' ' * self.__depth,
name
)
)
)
self.__prev = 'entity'
def usage ( ):
print '''
Usage:
%s
''' % sys.argv [ 0 ]
if __name__ == '__main__':
if len ( sys.argv ) < 2:
usage ( )
raise SystemExit
p = Html2Breve ( )
html = '\n'.join ( [ l.strip ( )
for l in file ( sys.argv [ 1 ], 'rU' ).readlines ( ) ] )
p.feed ( html )
p.close ( )