#!/usr/bin/env python """ Small utility that parses Netscape bookmarks. """ # TODO: # LAST_CHARSET: put in ... # H3:LAST_MODIFIED: Put in XBEL 1.2? # Cross references? # Descriptions from xml.sax import sax2exts,handler import bookmark import string, htmlentitydefs # --- SAX handler for Netscape bookmarks class NetscapeHandler(handler.ContentHandler): def __init__(self): self.bms=bookmark.Bookmarks() self.cur_elem = None self.added = None self.href = None self.visited = None self.modified = None self.latest = None self.desc = "" def startElement(self,name,attrs): name = string.lower( name ) d = {} for key, value in attrs.items(): d[ string.lower(key) ] = value ## print 'start', name, d if name=="h3": self.cur_elem="h3" if d.has_key("folded"): self.folded = "yes" else: self.folded = "no" self.id = d.get('id') self.added= d.get('add_date',"") self.modified = d.get('last_modified', "") folder = self.bms.add_folder('', None) folder.id = self.id folder.folded = self.folded folder.added = self.added self.latest = folder elif name=="a": self.cur_elem="a" self.bookmark = "" if d.has_key('add_date'): self.added=d["add_date"] else: self.added = None if d.has_key('last_visit'): self.visited=d["last_visit"] else: self.visited = None if d.has_key('last_modified'): self.modified=d["last_modified"] else: self.modified = None self.url=d["href"] elif name=='title': self.cur_elem = 'title' self.bms.title = "" elif name=='h1': self.cur_elem = 'h1' self.bms.desc = "" elif name=='hr': self.bms.add_separator() elif name=='meta': if d.has_key('http-equiv') and \ string.lower(d['http-equiv'])=='content-type': value = string.split(d['content'], "charset=") if len(value) == 2: the_parser.setProperty(handler.property_encoding, value[1]) elif name in ('dt','dl'): if self.desc and not self.latest.desc: self.latest.desc = self.desc self.desc = "" self.curr_elem = '' elif name=='dd': self.cur_elem = 'dd' self.desc = "" def characters(self,data): ## print 'char', self.cur_elem, data if self.cur_elem=="h3": self.latest.title+=data elif self.cur_elem=="a": self.bookmark = self.bookmark+data elif self.cur_elem=="title": self.bms.title = self.bms.title + data elif self.cur_elem=="h1": self.bms.desc = self.bms.desc + data elif self.cur_elem=="dd": self.desc = self.desc + data def skippedEntity(self, name): self.characters(htmlentitydefs.entitydefs[name]) def endElement(self,name): name = string.lower( name ) ## print 'end', name if name=="a": self.latest = self.bms.add_bookmark(self.bookmark, added = self.added, visited = self.visited, modified = self.modified, href = self.url) elif name=="h3": self.cur_elem=None elif name=="dl": self.bms.leave_folder() elif name == self.cur_elem: self.cur_elem=None def endDocument(self): if self.desc and not self.latest.desc: self.latest.desc = self.desc # --- Test-program if __name__ == '__main__': import sys if len(sys.argv)<2 or len(sys.argv)>3: print print "A simple utility to convert Netscape bookmarks to XBEL." print print "Usage: " print " ns_parse.py []" sys.exit(1) ns_handler=NetscapeHandler() the_parser = sax2exts.SGMLParserFactory.make_parser() the_parser.setContentHandler(ns_handler) # For Netscape 4, default to Latin-1 the_parser.setProperty(handler.property_encoding, "iso-8859-1") file = open(sys.argv[1], 'r') the_parser.parse(file) bms = ns_handler.bms if len(sys.argv)==3: out=open(sys.argv[2],"w") bms.dump_xbel(out) out.close() else: bms.dump_xbel() # Done ## ns_handler=NetscapeHandler() ## p=saxexts.SGMLParserFactory.make_parser() ## p.setDocumentHandler(ns_handler) ## p.parseFile(open(r"/home/amk/.netscape/bookmarks.html")) ## ns_handler.bms.dump_xbel()