# Stores data about the sequence # NEEDS TO BE SYNCH WITH THE REST OF BIOPYTHON AND BIOPERL import sys if getattr(sys, "version_info", (1, 5))[:2] >= (2,1): from Bio import FormatIO # Should this be in the module namespace or the record namespace? io = FormatIO.FormatIO("SeqRecord", default_input_format = "sequence", default_output_format = "fasta") class SeqRecord: """A SeqRecord object holds a sequence and information about it. Main attributes: id - Identifier such as a locus tag (string) seq - The sequence itself (Seq object) Additional attributes: name - Sequence name, e.g. gene name (string) description - Additional text (string) dbxrefs - List of database cross references (list of strings) features - Any (sub)features defined (list of SeqFeature objects) annotations - Further information about the whole sequence (dictionary) """ def __init__(self, seq, id = "", name = "", description = "", dbxrefs = None, features = None): """Create a SeqRecord Arguments: seq - Sequence, required (Seq object) id - Sequence identifier, recommended (string) name - Seqeuence name, optional (string) description - Seqeuence description, optional (string) dbxrefs - Database cross references, optional (list of strings) features - Any (sub)features, optional (list of SeqFeature objects) Note that while an id is optional, we strongly recommend you supply a unique id string for each record. This is especially important if you wish to write your sequences to a file. You can create a 'blank' SeqRecord object can then populated the attributes later. Note that currently the annotations dictionary cannot be specified when creating the SeqRecord.""" self.seq = seq self.id = id self.name = name self.description = description if dbxrefs is None: dbxrefs = [] self.dbxrefs = dbxrefs # annotations about the whole sequence self.annotations = {} # annotations about parts of the sequence if features is None: features = [] self.features = features def __str__(self) : lines = [] if self.id : lines.append("ID: %s" % self.id) if self.name : lines.append("Name: %s" % self.name) if self.description : lines.append("Desription: %s" % self.description) if self.dbxrefs : lines.append("Database cross-references: " \ + ", ".join(self.dbxrefs)) for a in self.annotations: lines.append("/%s=%s" % (a, str(self.annotations[a]))) lines.append(str(self.seq)) return "\n".join(lines) def __repr__(self) : return "SeqRecord(seq=%s, id=%s, name=%s, description=%s, dbxrefs=%s)" \ % tuple(map(repr, (self.seq, self.id, self.name, self.description, self.dbxrefs))) if __name__ == "__main__" : #The following is a very quick example of how to create a SeqRecord object from Bio.Seq import Seq from Bio.Alphabet import generic_protein record = SeqRecord(Seq("MASRGVNKVILVGNLGQDPEVRYMPNGGAVANITLATSESWRDKAT" \ +"GEMKEQTEWHRVVLFGKLAEVASEYLRKGSQVYIEGQLRTRKWTDQ" \ +"SGQDRYTTEVVVNVGGTMQMLGGRQGGGAPAGGNIGGGQPQGGWGQ" \ +"PQQPQGGNQFSGGAQSRPQQSAPAAPSNEPPMDFDDDIPF", generic_protein), id="NP_418483.1", name="b4059", description="ssDNA-binding protein", dbxrefs=["ASAP:13298", "GI:16131885", "GeneID:948570"]) #Note that annotations must be added AFTER creating the record record.annotations["note"] = "This annotation was added later" print record #One way to create a minimal record. record2 = SeqRecord(Seq(""))