# xpktools.py: A python module containing function definitions and classes # useful for manipulating data from nmrview .xpk peaklist files. # # ********** INDEX of functions and classes ********** # # XpkEntry class: A class suited for handling single lines of # non-header data from an nmrview .xpk file. This class # provides methods for extracting data by the field name # which is listed in the last line of the peaklist header. import string # * * * * * INITIALIZATIONS * * * * * HEADERLEN=6 # * * * * * _______________ * * * * * class XpkEntry: # Usage: XpkEntry(xpkentry,xpkheadline) where xpkentry is the line # from an nmrview .xpk file and xpkheadline is the line from # the header file that gives the names of the entries # which is typcially the sixth line of the header (counting fm 1) # Variables are accessed by either their name in the header line as in # self.field["H1.P"] will return the H1.P entry for example. # self.field["entrynum"] returns the line number (1st field of line) def __init__(self,entry,headline): self.fields={} # Holds all fields from input line in a dictionary # keys are data labels from the .xpk header datlist = string.split(entry) headlist = string.split(headline) i=0 for i in range(len(datlist)-1): self.fields[headlist[i]]=datlist[i+1] i=i+1 try: self.fields["entrynum"]=datlist[0] except IndexError, e: pass class Peaklist: # This class reads in an entire xpk file and returns # Header file lines are available as attributes # The data lines are available as a list def __init__(self,infn): self.data=[] # init the data line list infile=open(infn,'r') # Read in the header lines self.firstline=string.split(infile.readline(),"\012")[0] self.axislabels=string.split(infile.readline(),"\012")[0] self.dataset=string.split(infile.readline(),"\012")[0] self.sw=string.split(infile.readline(),"\012")[0] self.sf=string.split(infile.readline(),"\012")[0] self.datalabels=string.split(infile.readline(),"\012")[0] # Read in the data lines to a list line=infile.readline() while line: self.data.append(string.split(line,"\012")[0]) line=infile.readline() def residue_dict(self,index): # Generate a dictionary idexed by residue number or a nucleus # The nucleus should be given as the input argument in the # same form as it appears in the xpk label line (H1, 15N for example) maxres=-1; minres=-1 # Cast the data lines into the xpentry class self.dict={} for i in range(len(self.data)): line=self.data[i] ind=XpkEntry(line,self.datalabels).fields[index+".L"] key=string.split(ind,".")[0] res=string.atoi(key) if (maxres==-1): maxres=res if (minres==-1): minres=res maxres=max([maxres,res]) minres=min([minres,res]) if self.dict.has_key(str(res)): # Append additional data to list under same key templst=self.dict[str(res)] templst.append(line) self.dict[str(res)]=templst else: # This is a new residue, start a new list self.dict[str(res)]=[line] # Use [] for list type self.dict["maxres"]=maxres self.dict["minres"]=minres return self.dict def write_header(self,outfn): outfile=_try_open_write(outfn) outfile.write(self.firstline);outfile.write("\012") outfile.write(self.axislabels);outfile.write("\012") outfile.write(self.dataset);outfile.write("\012") outfile.write(self.sw);outfile.write("\012") outfile.write(self.sf);outfile.write("\012") outfile.write(self.datalabels);outfile.write("\012") outfile.close() def _try_open_read(fn): # Try to open a file for reading. Exit on IOError try: infile=open(fn,'r') except IOError, e: print "file", fn, "could not be opened for reading - quitting." sys.exit(0) return infile def _try_open_write(fn): # Try to open a file for writing. Exit on IOError try: infile=open(fn,'w') except IOError, e: print "file", fn, "could not be opened for writing - quitting." sys.exit(0) return infile def replace_entry(line,fieldn,newentry): # Replace an entry in a string by the field number # No padding is implemented currently. Spacing will change if # the original field entry and the new field entry are of # different lengths. # This method depends on xpktools._find_start_entry start=_find_start_entry(line,fieldn) leng=len(string.splitfields(line[start:])[0]) newline=line[:start]+str(newentry)+line[(start+leng):] return newline def _find_start_entry(line,n): # find the starting point character for the n'th entry in # a space delimited line. n is counted starting with 1 # The n=1 field by definition begins at the first character # This function is used by replace_entry infield=0 # A flag that indicates that the counter is in a field if (n==1): return 0 # Special case # Count the number of fields by counting spaces c=1 leng=len(line) # Initialize variables according to whether the first character # is a space or a character if (line[0]==" "): infield=0 field=0 else: infield=1 field=1 while (c. The data element reported is # and the index for the data table is by the # nucleus indicated by . outlist=[] [dict_list,label_line_list]=_read_dicts(fn_list,keyatom) # Find global max and min residue numbers minr=dict_list[0]["minres"]; maxr=dict_list[0]["maxres"] for dict in dict_list: if (maxr < dict["maxres"]): maxr = dict["maxres"] if (minr > dict["minres"]): minr = dict["minres"] res=minr while res <= maxr: # s.t. res numbers count=0 line=str(res) for dict in dict_list: # s.t. dictionaries label=label_line_list[count] if ( dict.has_key(str(res)) ): line=line+"\t"+XpkEntry(dict[str(res)][0],label).fields[datalabel] else: line=line+"\t"+"*" count=count+1 line=line+"\n" outlist.append(line) res=res+1 return outlist def _sort_keys(dict): keys=dict.keys() sorted_keys=keys.sort() return sorted_keys def _read_dicts(fn_list, keyatom): # Read multiple files into a list of residue dictionaries dict_list=[]; datalabel_list=[] for fn in fn_list: peaklist=Peaklist(fn); dict=peaklist.residue_dict(keyatom) dict_list.append(dict) datalabel_list.append(peaklist.datalabels) return [dict_list, datalabel_list]