from hachoir_metadata.metadata import RootMetadata, registerExtractor
from hachoir_metadata.safe import fault_tolerant
from hachoir_parser.misc import TorrentFile, TrueTypeFontFile, OLE2_File, PcfFile
from hachoir_core.field import isString
from hachoir_core.error import warning
from hachoir_parser import guessParser
class TorrentMetadata(RootMetadata):
KEY_TO_ATTR = {
u"announce": "url",
u"comment": "comment",
u"creation_date": "creation_date",
}
INFO_TO_ATTR = {
u"length": "file_size",
u"name": "filename",
}
def extract(self, torrent):
for field in torrent[0]:
if field.name in self.KEY_TO_ATTR:
key = self.KEY_TO_ATTR[field.name]
value = field.value
setattr(self, key, value)
elif field.name == "info" and "value" in field:
self.processInfo(field["value"])
def processInfo(self, info):
for field in info:
if field.name in self.INFO_TO_ATTR:
key = self.INFO_TO_ATTR[field.name]
value = field.value
setattr(self, key, value)
elif field.name == "piece_length":
self.comment = "Piece length: %s" % field.display
class TTF_Metadata(RootMetadata):
NAMEID_TO_ATTR = {
0: "copyright", # Copyright notice
3: "title", # Unique font identifier
5: "version", # Version string
8: "author", # Manufacturer name
11: "url", # URL Vendor
14: "copyright", # License info URL
}
def extract(self, ttf):
if "header" in ttf:
self.extractHeader(ttf["header"])
if "names" in ttf:
self.extractNames(ttf["names"])
@fault_tolerant
def extractHeader(self, header):
self.creation_date = header["created"].value
self.last_modification = header["modified"].value
self.comment = u"Smallest readable size in pixels: %s pixels" % header["lowest"].value
self.comment = u"Font direction: %s" % header["font_dir"].display
@fault_tolerant
def extractNames(self, names):
offset = names["offset"].value
for header in names.array("header"):
key = header["nameID"].value
foffset = offset + header["offset"].value
field = names.getFieldByAddress(foffset*8)
if not field or not isString(field):
continue
value = field.value
if key not in self.NAMEID_TO_ATTR:
continue
key = self.NAMEID_TO_ATTR[key]
if key == "version" and value.startswith(u"Version "):
# "Version 1.2" => "1.2"
value = value[8:]
setattr(self, key, value)
class OLE2_Metadata(RootMetadata):
SUMMARY_ID_TO_ATTR = {
2: ("title", False),
4: ("author", False),
6: ("comment", False),
8: ("author", False),
9: ("version", True), # Revision number
12: ("creation_date", False),
13: ("last_modification", False),
14: ("nb_page", False),
15: ("comment", True), # Nb. words
16: ("comment", True), # Nb. characters
18: ("producer", False),
}
def extract(self, ole2):
if "summary[0]" in ole2:
self.useSummary(ole2["summary[0]"])
def useSummary(self, summary):
# FIXME: Remove this hack
# Problem: there is no method to get all fragments from a file
summary.parent._feedAll()
# ---
stream = summary.getSubIStream()
summary = guessParser(stream)
if not summary:
print "Unable to create summary parser"
if "os" in summary:
self.os = summary["os"].display
if "section[0]" not in summary:
return
summary = summary["section[0]"]
for property in summary.array("property_index"):
self.useProperty(summary, property)
@fault_tolerant
def useProperty(self, summary, property):
field = summary.getFieldByAddress(property["offset"].value*8)
if not field:
return
if not field.hasValue():
return
value = field.value
try:
key, use_prefix = self.SUMMARY_ID_TO_ATTR[property["id"].value]
except LookupError:
# warning("Skip %s[%s]=%s" % (
# property["id"].display, property["id"].value, value))
return
if use_prefix:
value = "%s: %s" % (property["id"].display, value)
setattr(self, key, value)
class PcfMetadata(RootMetadata):
PROP_TO_KEY = {
'CHARSET_REGISTRY': 'charset',
'COPYRIGHT': 'copyright',
'WEIGHT_NAME': 'font_weight',
'FOUNDRY': 'author',
'FONT': 'title',
'_XMBDFED_INFO': 'producer',
}
def extract(self, pcf):
if "properties" in pcf:
self.useProperties(pcf["properties"])
def useProperties(self, properties):
last = properties["total_str_length"]
offset0 = last.address + last.size
for index in properties.array("property"):
# Search name and value
value = properties.getFieldByAddress(offset0+index["value_offset"].value*8)
if not value:
continue
value = value.value
if not value:
continue
name = properties.getFieldByAddress(offset0+index["name_offset"].value*8)
if not name:
continue
name = name.value
if name not in self.PROP_TO_KEY:
warning("Skip %s=%r" % (name, value))
continue
key = self.PROP_TO_KEY[name]
setattr(self, key, value)
registerExtractor(TorrentFile, TorrentMetadata)
registerExtractor(TrueTypeFontFile, TTF_Metadata)
registerExtractor(OLE2_File, OLE2_Metadata)
registerExtractor(PcfFile, PcfMetadata)
syntax highlighted by Code2HTML, v. 0.9.1