# -*- coding: utf-8 -*- # Copyright (C) 2003-2006 by Igor E. Novikov # Copyright (C) 1998, 2000 by Bernhard Herzog # # This library is covered by GNU Library General Public License. # For more info see COPYRIGHTS file in sK1 root directory. # A simple parser for PostScript files that conform to the Document # Structuring Conventions (DSC). # # In its current form this is mainly intended for parsing EPS files and # extract the information necessary for Sketch (BoundingBox and resource # dependencies) # import re, string from string import split, strip, atof import streamfilter try: from app.events.warn import warn, INTERNAL except ImportError: def warn(*args): pass INTERNAL = None # match a line containing a DSC-comment. rx_dsccomment = re.compile('^%%([a-zA-Z+]+):?') # match the beginning of an EPS file. rx_eps_magic = re.compile('^%!.*EPSF') endcommentchars = string.maketrans('','')[33:127] ATEND = '(atend)' class DSCError(Exception): pass # # Class EpsInfo # # The instance variables of this class are the key/value pairs extracted # from the header comments of an EPS file. # # BoundingBox: # # The bounding box of the document as a 4-tuple of floats. The DSC # say that the BoundingBox should be given in UINTs but since some # programs (incorrectly) use floats here we also use float here. # # DocumentNeededResources: # # A dictionary describing the resources needed by the document. # The information is stored in the *keys* of the dictionary. # # A key has the form (TYPE, VALUE) where TYPE is a string giving # the resource type (such as 'font') and value is a string # describing the resource (such as 'Times-Roman') # # DocumentSuppliedResources: # # The resources supplied by the document in the same format as # DocumentNeededResources. # # atend: # # True, if any comment in the header had a value of `(atend)'. # (Used internally by the parsing functions) class EpsInfo: def __init__(self): self.DocumentSuppliedResources = {} self.DocumentNeededResources = {} self.BoundingBox = None self.atend = 0 def NeedResources(self, type, resources): for res in resources: self.DocumentNeededResources[(type, res)] = 1 def SupplyResources(self, type, resources): for res in resources: self.DocumentSuppliedResources[(type, res)] = 1 def print_info(self): # print the contents of self in a readable manner. (for debugging) print 'BoundingBox:\t%s' % `self.BoundingBox` print 'DocumentNeededResources: [', for res in self.DocumentNeededResources.keys(): print res, print ']' print 'DocumentSuppliedResources: [', for res in self.DocumentSuppliedResources.keys(): print res, print ']' for key, value in self.__dict__.items(): if key not in ('BoundingBox', 'DocumentNeededResources', 'DocumentSuppliedResources', 'atend'): print '%s\t%s' % (key, value) def IsEpsFileStart(data): # return true if data might be the beginning of an Encapsulated # PostScript file. return rx_eps_magic.match(data) def parse_header(file, info): # Parse the header section of FILE and store the information found # in the INFO object which is assumed to be an instance of EpsInfo. # # This works for the %%Trailer section as well so that parsing the # beginning (until %%EndComments) and end (from %%Trailer) if # necessary with the same INFO object should get all information # available. line = file.readline() last_key = '' while line: match = rx_dsccomment.match(line) if match: key = match.group(1) value = strip(line[match.end(0):]) if key == 'EndComments' or key == 'EOF': break if key == '+': key = last_key else: last_key = '' if key == 'BoundingBox': if value != ATEND: # the bounding box should be given in UINTs # but may also (incorrectly) be float. info.BoundingBox = tuple(map(atof, split(value))) else: info.atend = 1 elif key == 'DocumentNeededResources': if value != ATEND: if value: [type, value] = split(value, None, 1) if type == 'font': info.NeedResources(type, split(value)) else: # XXX: might occasionally be interesting for the # user warn(INTERNAL, 'needed resource %s %s ignored', type, value) else: info.atend = 1 elif key == 'DocumentNeededFonts': if value != ATEND: info.NeedResources('font', split(value)) else: info.atend = 1 elif key == 'DocumentSuppliedResources': if value != ATEND: if value: [type, value] = split(value, None, 1) if type == 'font': info.NeedResources(type, split(value)) else: # XXX: might occasionally be interesting for the # user warn(INTERNAL, 'supplied resource %s %s ignored', type, value) else: info.atend = 1 else: setattr(info, key, value) # last_key = key else: # the header comments end at a line not beginning with %X, # where X is a printable character not in SPACE, TAB, NL # XXX: It is probably wrong to do this in the %%Trailer if line[0] != '%': break if len(line) == 1 or line[1] not in endcommentchars: break line = file.readline() def skip_to_comment(file, comment): # Read lines from FILE until a line with a DSC comment COMMENT is # found. Handles (it should at least) (binary) data and embedded # documents correctly (i.e. isn't confused by embedded documents # containing COMMENT as well, if they are enclosed in # Begin/EndDocument comments). # # The file is positioned right after the line containing the # comment. Raise a DSCError if the comment is not found line = file.readline() while line: match = rx_dsccomment.match(line) if match: key = match.group(1) if key == comment: return elif key == 'BeginDocument': # skip embedded document skip_to_comment(file, 'EndDocument') elif key == 'BeginData': value = split(strip(line[match.end(0):])) lines = 0 if len(value) >= 1: count = atoi(value) if len(value) == 3: lines = value[2] == 'Lines' else: # should never happen in a conforming document... count = 0 if count > 0: if lines: for i in range(count): file.readline() else: blocksize = 4000 while count: if count > blocksize: count = count - len(file.read(blocksize)) else: count = count - len(file.read(count)) line = file.readline() else: raise DSCError('DSC-Comment %s not found' % comment) def parse_eps_file(filename): # Extract information from the EPS file FILENAME. Return an instance # of EpsInfo with the appropriate parameters. Raise a DSCError, if # the file is not an EPS file. file = streamfilter.LineDecode(open(filename, 'r')) line = file.readline() info = EpsInfo() if IsEpsFileStart(line): parse_header(file, info) if info.atend: skip_to_comment(file, 'Trailer') parse_header(file, info) else: raise DSCError('%s is not an EPS file' % filename) file.close() return info # # # if __name__ == '__main__': import sys file = open(sys.argv[1], 'r') info = EpsInfo() parse_header(file, info) if info.atend: skip_to_comment(file, 'Trailer') parse_header(file, info) file.close() info.print_info()