"""Sequence and multi-sequence objects are defined here."""
from crops import __prog__, __description__, __author__
from crops import __date__, __version__, __copyright__
import os
import io
import copy
import logging
from crops.iomod.taggers import retrieve_id
from crops.iomod.taggers import makeheader
from crops.libs.rescodes import reslist
from crops.libs.rescodes import nuclist
from crops.elements.intervals import intinterval
[docs]def guess_type(inseq):
"""Return the biological type of the sequence as guessed from residue types.
:param inseq: Sequence to be evaluated.
:type inseq: str
:return: Sequence type ('Protein' or 'DNA' or 'RNA' or 'Unknown').
:rtype: str
"""
if not isinstance(inseq, str):
logging.critical("Sequence 'inseq' should be a string.")
raise TypeError
outtype = None
for char in inseq:
if (char == 'C' or char == 'A' or char == 'G' or char == 'I' or
char == 'X' or char == '-' or char == '+' or char == '*'):
pass
elif char not in nuclist.values():
if char in reslist.values():
outtype = 'Protein'
else:
outtype = 'Unknown'
else:
if char == 'T':
if outtype == 'DNA' or outtype == 'Protein':
pass
elif outtype is None:
outtype = 'DNA'
elif outtype == 'RNA':
outtype = 'Protein'
elif char == 'U':
if outtype == 'RNA' or outtype == 'Protein':
pass
elif outtype is None:
outtype = 'RNA'
elif outtype == 'DNA':
outtype = 'Protein'
if outtype is None:
outtype = 'DNA or RNA'
return outtype
[docs]class sequence:
"""A :class:`crops.elements.sequences.sequence` object representing a single chain sequence.
The :class:`crops.elements.sequences.sequence` class represents a data structure to hold all
sequence versions and other useful information characterising it.
It contains functions to store, manipulate and organise sequence versions.
:param seqid: Sequence identifier. Can be used alone or together with oligomer ID, defaults to None.
:type seqid: str
:param oligomer: Oligomer identifier. Sometimes as important as seqid, defaults to None.
:type oligomer: str, optional
:param seq: Sequence string, defaults to None.
:type seq: str, optional
:param chains: The names of chains having this sequence, defaults to None.
:type chains: set [str], optional
:param source: Source of the sequence, defaults to None
:type source: str, optional
:param header: Standard .fasta header, starting with ">", defaults to None.
:type header: str, optional
:param biotype: Type of molecule ('Protein', 'DNA', 'RNA'...), defaults to None.
:type biotype: str, optional
:param extrainfo: Other useful information about the sequence, defaults to None.
:type extrainfo: str, optional
:ivar name: Sequence identifier.
:vartype name: str
:ivar oligomer_id: Oligomer identifier.
:vartype oligomer_id: str
:ivar chains: The names of chains having this sequence.
:vartype chains: set [str]
:ivar seqs: The set of sequences, including default "mainseq".
:vartype seqs: dict [str, str]
:ivar source: Source of the sequence.
:vartype source: str
:ivar source_headers: A list of headers from input files.
:vartype source_headers: list [str]
:ivar crops_header: A new header containing the information from the object that will be used when printing sequence and cropmap.
:vartype crops_header: str
:ivar biotype: Type of molecule ('Protein', 'DNA', 'RNA'...).
:vartype biotype: str
:ivar infostring: Other useful information about the sequence.
:vartype infostring: str
:ivar cropmap: A dictionary mapping residue numbers from original sequence to cropped sequence.
:vartype cropmap: dict [int, int]
:ivar cropbackmap: A dictionary mapping residue numbers from cropped sequence to original sequence.
:vartype cropbackmap: dict [int, int]
:ivar msa: A free variable not used by CROPS itself.
:vartype msa: Any
:ivar cropmsa: A free variable not used by CROPS itself.
:vartype cropmsa: Any
:ivar intervals: The integer interval object containing the cropping information.
:vartype intervals: :class:`crops.elements.intervals.intinterval`
:raises `TypeError`: For wrong input formats.
:example:
>>> from crops.elements import sequences as ces
>>> myseq = ces.sequence(seqid='1', oligomer = 'exampleID')
>>> myseq.mainseq('GATTACA')
>>> myseq.mainseq()
'GATTACA'
>>> myseq.chains = {'A', 'B'}
>>> myseq.addseq('gapseq','GAT--C-')
>>> myseq.addseq('cobra','TACATACA')
>>> myseq.length()
7
>>> myseq.ngaps('gapseq')
3
>>> myseq.guess_biotype()
'DNA'
>>> print(myseq)
Sequence object >EXAMPLEID_1|Chains A,B (seq=GATTACA, type=DNA, length=7)
>>> myseq.source = 'Example'
>>> myseq.addseq('cropseq', '+A+T++')
>>> myseq.addseq('cropgapseq', '+A+-++')
>>> myseq.full_length()
7
>>> myseq.mainseq('AT')
'AT'
>>> myseq.ncrops()
4
>>> myseq.update_cropsheader()
>>> myseq.cropinfo()
'#Residues cropped: 4 (1 not from terminals) ; % cropped: 66.67 (16.67 not from terminal segments)'
>>> myseq.dump(out='string')
'>crops|exampleID_1|Chains A,B|Source: Example|#Residues cropped: 4 (1 not from terminal segments) ; % cropped: 66.67 (16.67 not from terminal segments)\\nAT\\n'
:example:
>>> from crops.elements import sequences as ces
>>> from crops.iomod import parsers as cip
>>> myseq = cip.parseseqfile('7M6C.fasta')
>>> myseq
Sequence object: (>7M6C_1|Chain A, seq=MRTLWIMAVL[...]KPLCKKADPC, type=Undefined, length=138)
>>> myseq.guess_biotype()
'Protein'
>>> myseq
Sequence object: (>7M6C_1|Chain A, seq=MRTLWIMAVL[...]KPLCKKADPC, type=Protein, length=138)
"""
_kind = 'Sequence'
__slots__ = ['oligomer_id', 'name', 'chains', 'source', 'seqs', 'biotype',
'source_headers', 'crops_header', 'cropmap', 'cropbackmap',
'infostring', 'msa', 'cropmsa', 'intervals']
def __init__(self, seqid=None, oligomer=None, seq=None, chains=None,
source=None, header=None, biotype=None, extrainfo=None):
self.oligomer_id = None
self.name = None
self.chains = set()
self.source = None
self.source_headers = []
self.crops_header = None
self.seqs = {}
self.biotype = None
self.infostring = None
self.cropmap = None
self.cropbackmap = None
self.msa = None
self.cropmsa = None
self.intervals = None
if header is not None:
if isinstance(header, str):
self.source_headers.append(header)
try:
header_info = retrieve_id(header)
except Exception:
logging.warning('Header format not recognised. Information not extracted.')
header_info = None
else:
logging.critical("Argument 'header' should be a string.")
raise TypeError
else:
header_info = None
if seqid is not None:
if isinstance(seqid, str):
self.name = seqid
elif isinstance(seqid, int):
self.name = str(seqid)
else:
logging.critical("Sequence ID 'seqid' should be a string.")
raise TypeError
else:
if header_info is not None:
if 'seqid' in header_info:
self.name = header_info['seqid']
else:
self.name = '1'
if seq is not None:
if isinstance(seq, str):
self.seqs['mainseq'] = seq
else:
logging.critical("Chain sequence 'seq' should be a string.")
raise TypeError
else:
self.seqs['mainseq'] = ''
if oligomer is not None:
if isinstance(oligomer, str):
self.oligomer_id = oligomer
else:
logging.critical("Oligomer ID 'oligomer' should be a string.")
raise TypeError
else:
if header_info is not None:
if 'mainid' in header_info:
self.oligomer_id = header_info['mainid']
if chains is not None:
if isinstance(chains, set):
for ch in chains:
if isinstance(ch, str):
self.chains.add(ch)
else:
logging.critical("Chain IDs in 'chains' set should be strings.")
raise TypeError
else:
logging.critical("Argument 'chains' should be a set of strings.")
raise TypeError
else:
if header_info is not None:
if 'chains' in header_info:
self.chains = header_info['chains']
if source is not None:
if isinstance(source, str):
self.source = source
else:
logging.critical("Argument 'source' should be a string.")
raise TypeError
else:
if header_info is not None:
if 'source' in header_info:
self.source = header_info['source']
if biotype is not None:
if biotype.lower() == 'guess':
self.biotype = guess_type(seq)
else:
self.biotype = biotype
else:
self.biotype = None
if extrainfo is not None:
if isinstance(extrainfo, str):
self.infostring = extrainfo
else:
logging.critical("Argument 'extrainfo' should be a string.")
raise TypeError
else:
if header_info is not None:
if 'comments' in header_info:
self.infostring = header_info['comments']
else:
self.infostring = ""
if oligomer is None:
self.crops_header = makeheader(mainid='NOID', seqid=self.name,
chains=self.chains, source=self.source,
extrainfo=self.infostring)
else:
self.crops_header = makeheader(mainid=self.oligomer_id, seqid=self.name,
chains=self.chains, source=self.source,
extrainfo=self.infostring)
def __repr__(self):
chtype = self.biotype if self.biotype is not None else 'Undefined'
if 'mainseq' not in self.seqs:
logging.critical("'mainseq' sequence not found.")
raise ValueError
if len(self.seqs['mainseq']) <= 20:
showseq = self.seqs['mainseq']
else:
showseq = (self.seqs['mainseq'][:10]+'[...]' +
self.seqs['mainseq'][len(self.seqs['mainseq'])-10:])
tempolig = self.oligomer_id if self.oligomer_id is not None else 'NOID'
shortid = makeheader(mainid=tempolig, seqid=self.name,
chains=self.chains, short=True)
string = (self._kind+" object "+shortid+" (seq="+str(showseq) +
", type=" + chtype + ", length=" +
str(len(self.seqs['mainseq']))+")")
return string
def __iter__(self):
return iter(self.seqs['mainseq'].values())
[docs] def copy(self):
return copy.copy(self)
[docs] def deepcopy(self):
return copy.deepcopy(self)
[docs] def addseq(self, newid, newseq):
"""Add sequence to `seqs` dictionary.
:param newid: New sequence's identifier.
:type newid: str
:param newseq: New sequence.
:type newseq: str
:raises `TypeError`: If newid is not a string.
:raises `KeyError`: If newseq is not a string.
"""
if not isinstance(newid, str):
logging.critical("New sequence ID 'newid' should be a string.")
raise TypeError
if not isinstance(newseq, str):
logging.critical("New sequence string 'newseq' should be a string.")
raise TypeError
if newid in self.seqs:
logging.critical("Key name 'newid' already exists.")
raise KeyError
self.seqs[newid] = newseq
[docs] def delseq(self, delid=None, wipeall=False):
"""Delete sequence(s) from the `seqs` dictionary.
:param delid: ID of sequence to be deleted, defaults to None.
:type delid: str, optional
:param wipeall: If True, all the sequences are deleted, defaults to False.
:type wipeall: bool, optional
:raises `TypeError`: If delid is not a string or wipeall is not a boolean.
"""
if not isinstance(delid, str) and delid is not None:
logging.critical("Sequence ID 'delid' should be a string.")
raise TypeError
if not isinstance(wipeall, bool):
logging.critical("Boolean switch 'wipeall' is neither True nor False.")
raise TypeError
if wipeall:
self.seqs = {}
self.seqs['mainseq'] = ''
return
if delid is None:
return
if delid == 'mainseq':
self.seqs['mainseq'] = ''
else:
self.seqs.pop(delid)
[docs] def mainseq(self, add=None):
"""Return or modifies the main sequence.
:param add: If given, the main sequence is replaced by 'add', defaults to None.
:type add: str, optional
:raises `TypeError`: If 'add' is given and is not a string.
:return: The (new) main sequence.
:rtype: str
"""
if not isinstance(add, str) and add is not None:
logging.critical("If included, sequence 'add' should be a string.")
raise TypeError
if add is not None:
self.seqs['mainseq'] = add
return self.seqs['mainseq']
[docs] def guess_biotype(self):
"""Save the guessed biotype and return it.
:return: Guessed biotype.
:rtype: str
"""
if self.seqs['mainseq'] is None:
self.biotype = None
else:
self.biotype = guess_type(self.seqs['mainseq'])
return self.biotype
[docs] def dump(self, out, split=False, oneline=False):
"""Write header and main sequence to a file. If the file exists, output is appended.
:param out: An output filepath (str), 'string', or an open file.
:type out: str, file
:param split: If True, identical sequences are dumped for every chain, defaults to False.
:type split: bool, optional
:param oneline: If True, sequences are not split in 80 residue-lines, defaults to False.
:type oneline: bool, optional
:raises `TypeError`: If `out` is neither a string nor an open file.
:raises `KeyError`: If object contains no chains.
:return: A string containing the output if and only if out=='string'.
:rtype: str
"""
if not isinstance(out, str) and not isinstance(out, io.IOBase):
logging.critical("Argument 'out' should be a string or a file.")
raise TypeError
if (self.chains is None or
(isinstance(self.chains, set) and len(self.chains) == 0)):
logging.critical('No chains defined in sequence.')
raise KeyError
outheader = []
if split:
chset = []
for ch in self.chains:
chset.append({ch})
else:
chset = [self.chains]
if self.oligomer_id is None:
tag1 = 'NoID'
else:
tag1 = self.oligomer_id
tag2 = self.infostring
if self.ncrops() == 0 and 'cropseq' not in self.seqs:
pass
else:
if self.infostring[-1] != "|":
tag2 += '|'
tag2 += self.cropinfo()
for ch in chset:
outheader.append(makeheader(mainid=tag1,
seqid=self.name,
chains=ch,
source=self.source,
extrainfo=tag2))
if not oneline:
lenseq = len(self.seqs['mainseq'])
nlines = int((lenseq-1)/80)+1
output = ''
for header in outheader:
if isinstance(out, io.IOBase) is True:
out.write(header+'\n')
if oneline:
out.write(self.seqs['mainseq']+'\n')
else:
for n in range(nlines):
out.write(self.seqs['mainseq'][n*80:(n+1)*80]+'\n')
else:
output += header + os.linesep
if oneline:
output += self.seqs['mainseq'] + os.linesep
else:
for n in range(nlines):
output += self.seqs['mainseq'][n*80:(n+1)*80] + os.linesep
if isinstance(out, io.IOBase) is False:
if out.lower() == 'string':
return output
else:
outpath = out
op = 'a' if os.path.isfile(outpath) else 'w'
with open(outpath, op) as out:
out.write(output)
return
[docs] def dumpmap(self, out, split=False):
"""Write header and cropmap to a file. If file exists, output is appended.
:param out: An output filepath (str) or an open file.
:type out: str, file
:param backmap: If True, the output will be self.cropbackmap, defaults to False.
:type backmap: bool, optional
:param split: If True, identical maps are dumped for every chain, defaults to False.
:type split: bool, optional
:raises `TypeError`: If `out` is neither a string nor an open file.
:raises `ValueError`: If one or both of `cropmap` and `cropbackmap` are empty.
:raises `KeyError`: If object contains no chains.
"""
if not isinstance(out, str) and not isinstance(out, io.IOBase):
logging.critical("Argument 'out' should be a string or a file.")
raise TypeError
if self.cropmap is None:
stringerr = "Cropmap not found in sequence."
logging.critical(stringerr)
raise ValueError
if (self.chains is None or
(isinstance(self.chains, set) and len(self.chains) == 0)):
logging.critical('No chains defined in sequence.')
raise KeyError
outheader = []
if split:
chset = []
for ch in self.chains:
chset.append({ch})
else:
chset = [self.chains]
if self.oligomer_id is None:
tag1 = 'NoID'
else:
tag1 = self.oligomer_id
tag2 = self.infostring
if self.ncrops() == 0 and 'cropseq' not in self.seqs:
pass
else:
if self.infostring[-1] != "|":
tag2 += '|'
tag2 += self.cropinfo()
for ch in chset:
outheader.append(makeheader(mainid=tag1,
seqid=self.name,
chains=ch,
source=self.source,
extrainfo=tag2))
output = ''
for header in outheader:
if isinstance(out, io.IOBase):
out.write(header+'\n')
for key, value in self.cropmap.items():
if value is not None:
out.write(str(key)+' '+str(value)+'\n')
else:
out.write(str(key)+' 0\n')
else:
output += header + os.linesep
for key, value in self.cropmap.items():
if value is not None:
output += str(key) + ' ' + str(value) + os.linesep
else:
output += str(key) + ' 0' + os.linesep
if isinstance(out, io.IOBase) is False:
if out.lower() == 'string':
return output
else:
outpath = out
op = 'a' if os.path.isfile(outpath) else 'w'
with open(outpath, op) as out:
out.write(output)
return
[docs] def length(self):
"""Return the length of the main sequence.
:return: Length of the main sequence.
:rtype: int
"""
return len(self.seqs['mainseq'])
[docs] def full_length(self):
"""Return the length of the full sequence. If not found, the main sequence
will be considered the full sequence, and will be saved as so.
:return: Length of the full sequence.
:rtype: int
"""
if 'fullseq' not in self.seqs:
self.seqs['fullseq'] = self.seqs['mainseq']
return len(self.seqs['fullseq'])
[docs] def ngaps(self, seqid='gapseq'):
"""Return the number of gaps ('-') in a sequence.
:param seqid: The ID of the sequence containing the gaps, defaults to 'gapseq'.
:type seqid: str, optional
:raises `TypeError`: If seqid is not a string.
:return: Number of gaps in `seqid`. If 'gapseq' is a list of several models, a list is returned. If `seqid` not found, 0 is returned.
:rtype: int or list [int]
"""
if not isinstance(seqid, str):
logging.critical("Sequence ID 'seqid' should be a string.")
raise TypeError
if seqid in self.seqs:
if isinstance(self.seqs[seqid], str):
nseqid = [self.seqs[seqid]]
else:
nseqid = self.seqs[seqid]
ng = []
for altseq in nseqid:
n = 0
for char in altseq:
if char == '-':
n += 1
ng.append(n)
if len(ng) == 1:
ng = ng[0]
else:
ng = 0
return ng
[docs] def ncrops(self, seqid='cropseq', offterminals=False, offmidseq=False):
"""Return the number of cropped elements ('+','*') in a sequence.
:param seqid: The ID of the sequence containing the cropped elements, defaults to 'cropseq'.
:type seqid: str, optional
:param offterminals: Count those removed from terminal segments only, defaults to False.
:type offterminals: bool, optional
:param offmidseq: Count those removed NOT from terminal segments only, defaults to False.
:type offmidseq: bool, optional
:raises `TypeError`: If `seqid` is not a string, or `offterminals`, `offmidseq` are not boolean.
:return: Number of cropped elements in `seqid` according to interval chosen. If `seqid` not found, 0 is returned.
:rtype: int
"""
if not isinstance(seqid, str):
logging.critical("Sequence ID 'seqid' should be a string.")
raise TypeError
n = 0
if seqid not in self.seqs:
return n
for char in self.seqs[seqid]:
if char == '+' or char == '*':
n += 1
if offterminals is offmidseq:
return n
else:
nterms = 0
for char in self.seqs[seqid]:
if char == '+' or char == '*':
nterms += 1
else:
break
for char in reversed(self.seqs[seqid]):
if char == '+' or char == '*':
nterms += 1
else:
break
if offterminals is False and offmidseq is True:
return n-nterms
elif offterminals is True and offmidseq is False:
return nterms
[docs] def cropinfo(self):
"""Return a string containing statistics about the cropped residues.
:return: Statistics on number of crops.
:rtype: str
"""
cropstr = ""
if 'cropseq' in self.seqs:
cropstr += '#Residues cropped: '
if self.ncrops() == 0:
cropstr += '0'
else:
cropstr += (str(self.ncrops()) + ' (' +
str(self.ncrops(offmidseq=True)) +
' not from terminal segments) ' +
'; % cropped: ' +
str(round(100*self.ncrops()/len(self.seqs['cropseq']), 2)) +
' (' + str(round(100*self.ncrops(offmidseq=True)/len(self.seqs['cropseq']), 2)) +
' not from terminal segments)')
else:
pass
return cropstr
[docs]class oligoseq:
"""An object grouping several :class:`crops.elements.sequences.sequence` objects pertaining to a common oligomer.
:param oligomer_id: Oligomer identifier (e.g. PDB id), defaults to None.
:type oligomer_id: str
:param imer: Container of several :class:`crops.elements.sequences.sequence` objects making up the oligomer, defaults to empty dict.
:type imer: dict [str, :class:`crops.elements.sequences.sequence`], optional
:ivar id: Oligomer sequence identifier (e.g. PDB id).
:vartype id: str
:ivar imer: Container of several :class:`crops.elements.sequence.monomer_sequence` making up the oligomer.
:vartype imer: dict [str, :class:`crops.elements.sequence.monomer_sequence`]
:raises `TypeError`: If the input formats are wrong.
:example:
>>> from crops.elements import sequences as ces
>>> my_oligoseq = ces.oligoseq(oligomer_id='exampleID')
>>> my_oligoseq.add_monomer
>>> my_sequence.add_monomer('header_example','GATTACA',nid='mychain')
>>> my_sequence.add_monomer('another_header','TACATACA')
>>> my_sequence.nchains()
2
>>> my_sequence.length('mychain')
7
>>> my_sequence.write('/path/to/output/dir/')
>>> print(my_sequence)
docs Protein/polynucleotide sequence object: (id='example_id', # chains = 2)
>>> my_sequence.purge()
>>> my_sequence.nchains()
0
"""
_kind = 'Multiple sequence'
__slots__ = ['id', 'imer']
def __init__(self, oligomer_id=None, imer=None):
if not isinstance(oligomer_id, str) and oligomer_id is not None:
logging.critical("'oligomer_id' should be a string.")
raise TypeError
if not isinstance(imer, dict) and imer is not None:
logging.critical("Sequence container 'imer' should be a dictionary.")
raise TypeError
elif isinstance(imer, dict):
for val in imer.values():
if not isinstance(val, sequence):
logging.critical("Sequence container 'imer' should only "
"contain :class:`~crops.elements.sequences.sequence` objects.")
raise TypeError
self.id = oligomer_id
self.imer = imer if imer is not None else {}
def __repr__(self):
string = self._kind+" object: (id="+ str(self.id) + ", sequences = "+str(self.imer)+")"
return string
def __getitem__(self, key):
return self.imer[key]
def __len__(self):
return len(self.imer)
def __iter__(self):
return iter(self.imer.values())
[docs] def copy(self):
return copy.copy(self)
[docs] def deepcopy(self):
return copy.deepcopy(self)
[docs] def purge(self):
"""Clear the object's content without deleting the object itself."""
self.id = None
self.imer.clear()
[docs] def add_sequence(self, newseq):
"""Add a new :class:`crops.elements.sequences.sequence` to the object.
:param newseq: Sequence object.
:type newseq: :class:`crops.elements.sequences.sequence`
:raises `TypeError`: If `newseq` is not a :class:`crops.elements.sequences.sequence` object.
:raises `Exception`: If sequence content is incompatible with that in oligoseq (oligomer id, other sequences, etc).
"""
addall = None
errormsg = ('Sequence content is incompatible with oligoseq ' +
self.id + '.')
if self.id is not None and newseq.oligomer_id is not None:
if self.id.upper() != newseq.oligomer_id.upper():
logging.critical(errormsg)
raise ValueError
if newseq.name is not None:
if newseq.name in self.imer:
if self[newseq.name].imer.seqs['mainseq'] == newseq.seqs['mainseq']:
addall = False
else:
logging.critical(errormsg)
raise ValueError
else:
for seq in self.imer.values():
if seq.seqs['mainseq'] == newseq.seqs['mainseq']:
logging.critical(errormsg)
raise ValueError
addall = True
else:
for seq in self.imer.values():
if seq.seqs['mainseq'] == newseq.seqs['mainseq']:
addall = False
newseq.name = seq.name
break
if addall is not False:
addall = True
if addall is True:
for ch in newseq.chains:
for seq in self.imer.values():
if ch in seq.chains:
logging.critical(errormsg)
raise ValueError
if newseq.name is None:
n = 1
while True:
if str(n) in self.imer:
n += 1
else:
newseq.name = str(n)
break
self.imer[newseq.name] = newseq
if self.id is None and newseq.oligomer_id is not None:
self.id = newseq.oligomer_id.upper()
for seq in self.imer.values():
seq.oligomer_id = newseq.oligomer_id.upper()
elif self.id is not None and newseq.oligomer_id is None:
self.imer[newseq.name].oligomer_id = self.id.upper()
else:
for ch in newseq.chains:
for seq in self.imer.values():
if ch in seq.chains and seq.name != newseq.name:
logging.critical(errormsg)
raise ValueError
self.imer[newseq.name].chains.add(ch)
for header in newseq.source_headers:
if header not in self.imer[newseq.name].source_headers:
self.imer[newseq.name].source_headers.append(header)
if newseq.source != self.imer[newseq.name].source:
self.imer[newseq.name].source = 'Diverse'
self.imer[newseq.name].update_cropsheader()
return
[docs] def del_sequence(self, seqid):
"""Remove the selected :class:`crops.elements.sequences.sequence` from the object.
:param seqid: Doomed sequence's identifier.
:type seqid: str
:raises `TypeError`: If `seqid` is not a string.
"""
if isinstance(seqid, int):
seqid = str(seqid)
if not isinstance(seqid, str):
raise TypeError("'seqid' should be a string.")
if seqid in self.imer:
self.imer.pop(seqid)
else:
logging.warning('Sequence named ' + seqid + ' not found in oligoseq.')
return
[docs] def set_cropmaps(self, mapdict, cropmain=False):
"""Sets the parsed cropmaps from :class:`crops.iomod.parsers.parsemapfile`.
:param mapdict: Parsed maps for this specific object.
:type mapdict: dict [str, dict [str, dict [int, int]]]
:param cropmain: If True, it will crop 'mainseq' and generate 'fullseq' and 'cropseq'. If 'mainseq' has been edited before this operation will yield wrong results, defaults to False.
:type cropmain: bool, optional
:raises `TypeError`: When `mapdict` has not the appropriate format.
"""
if not isinstance(mapdict, dict):
logging.critical("'mapdict' should be a dictionary.")
raise TypeError
for seqid in mapdict:
if not isinstance(seqid, str):
logging.critical("Values in 'mapdict' should be strings.")
raise TypeError
if seqid in self.imer:
if ('cropmap' not in mapdict[seqid] or
'cropbackmap' not in mapdict[seqid]):
logging.critical("'mapdict' is not a crop map.")
raise TypeError
self.imer[seqid].cropmap = copy.deepcopy(mapdict[seqid]['cropmap'])
self.imer[seqid].cropbackmap = copy.deepcopy(mapdict[seqid]['cropbackmap'])
self.imer[seqid].intervals = intinterval(description=self.id+'_'+str(seqid))
for resc, res0 in mapdict[seqid]['cropbackmap'].items():
self.imer[seqid].intervals = self.imer[seqid].intervals.union(other=res0)
if cropmain is True:
self.imer[seqid].seqs['fullseq'] = self.imer[seqid].seqs['mainseq']
self.imer[seqid].seqs['mainseq'] = ''
self.imer[seqid].seqs['cropseq'] = ''
for n in range(len(self.imer[seqid].seqs['fullseq'])):
if self.imer[seqid].cropmap[n+1] is None:
self.imer[seqid].seqs['cropseq'] += '+'
else:
self.imer[seqid].seqs['mainseq'] += self.imer[seqid].seqs['fullseq'][n]
self.imer[seqid].seqs['cropseq'] += self.imer[seqid].seqs['fullseq'][n]
self.imer[seqid].infostring += '|' + self.imer[seqid].cropinfo()
self.imer[seqid].update_cropsheader()
return
[docs] def write(self, outdir, infix="", split=False, oneline=False):
"""Write all :class:`crops.elements.sequences.sequence` objects to .fasta file or string.
:param outdir: Output directory or 'string'.
:type outdir: str
:param infix: Filename tag to distinguish from original input file, defaults to "".
:type infix: str, optional
:param split: If True, identical sequences are dumped for each chain, defaults to False.
:type split: bool, optional
:param oneline: If True, sequences are not split in 80 residue-lines, defaults to False.
:type oneline: bool, optional
:raises `FileNotFoundError`: Output directory not found.
"""
if not os.path.isdir(outdir) and outdir != 'string':
logging.critical(outdir + ' directory not found.')
raise FileNotFoundError
if outdir == 'string':
if infix != "":
logging.warning("Writing sequences to a string, 'infix' value ignored.")
outpath = 'string'
outstring = ""
for seq in self.imer.values():
outstring += seq.dump(outpath, split=split, oneline=oneline)
return outstring
else:
outpath = os.path.join(outdir, self.seq_id + infix + ".fasta")
for seq in self.imer.values():
seq.dump(outpath, split=split, oneline=oneline)
return
[docs] def length(self, seqid):
"""Return the length of a certain sequence.
:param seqid: ID of :class:`crops.elements.sequences.sequence`.
:type seqid: str
:raises `TypeError`: When 'seqid' is not a string.
:raises `KeyError`: Specific sequence not found in :class:`crops.elements.sequences.oligoseq`.
:return: Length of :class:`crops.elements.sequences.sequence`.
:rtype: int
"""
if isinstance(seqid, int):
seqid = str(seqid)
if not isinstance(seqid, str):
logging.critical('chain input must be a string.')
raise TypeError
if seqid in self.imer:
return self.imer[seqid].length()
else:
logging.critical(seqid+' monomer not found in sequence.')
raise KeyError
[docs] def nchains(self):
"""Return number of chains in object, counting all sequence objects contained.
:return: Number of chains in object, counting al :class:`crops.elements.sequences.sequence` contained.
:rtype: int
"""
n = 0
for seqid in self.imer:
n += len(self.imer[seqid].chains)
return n
[docs] def nseqs(self):
"""Return number of sequence objects in object.
:return: Number of :class:`crops.elements.sequences.sequence` objects in object.
:rtype: int
"""
return len(self.imer)
[docs] def chainlist(self):
"""Return a set with all the chain names in the object.
:return: Chain names in :class:`crops.elements.sequences.oligoseq`.
:rtype: set [str]
"""
newset = set()
for seqid in self.imer:
newset = newset.union(self.imer[seqid].chains)
return newset
[docs] def whatseq(self, chain):
"""Return the sequence number corresponding to a given chain.
:param chain: The chain ID.
:type chain: str
:return: The :class:`crops.elements.sequences.sequence` of that chain.
:rtype: str
"""
myseq = None
for seqid in self.imer:
if chain in self.imer[seqid].chains:
myseq = seqid
break
return myseq