Source code for crops.elements.sequences

"""Sequence and multi-sequence objects are defined here."""

from crops import __prog__, __description__, __author__
from crops import __date__, __version__, __copyright__

import os
import io
import copy
import logging

from crops.iomod.taggers import retrieve_id
from crops.iomod.taggers import makeheader
from crops.libs.rescodes import reslist
from crops.libs.rescodes import nuclist
from crops.elements.intervals import intinterval


[docs]def guess_type(inseq):
    """Return the biological type of the sequence as guessed from residue types.

    :param inseq: Sequence to be evaluated.
    :type inseq: str

    :return: Sequence type ('Protein' or 'DNA' or 'RNA' or 'Unknown').
    :rtype: str

    """
    if not isinstance(inseq, str):
        logging.critical("Sequence 'inseq' should be a string.")
        raise TypeError

    outtype = None
    for char in inseq:
        if (char == 'C' or char == 'A' or char == 'G' or char == 'I' or
                char == 'X' or char == '-' or char == '+' or char == '*'):
            pass
        elif char not in nuclist.values():
            if char in reslist.values():
                outtype = 'Protein'
            else:
                outtype = 'Unknown'
        else:
            if char == 'T':
                if outtype == 'DNA' or outtype == 'Protein':
                    pass
                elif outtype is None:
                    outtype = 'DNA'
                elif outtype == 'RNA':
                    outtype = 'Protein'
            elif char == 'U':
                if outtype == 'RNA' or outtype == 'Protein':
                    pass
                elif outtype is None:
                    outtype = 'RNA'
                elif outtype == 'DNA':
                    outtype = 'Protein'

    if outtype is None:
        outtype = 'DNA or RNA'

    return outtype


[docs]class sequence:
    """A :class:`crops.elements.sequences.sequence` object representing a single chain sequence.

    The :class:`crops.elements.sequences.sequence` class represents a data structure to hold all
    sequence versions and other useful information characterising it.
    It contains functions to store, manipulate and organise sequence versions.

    :param seqid: Sequence identifier. Can be used alone or together with oligomer ID, defaults to None.
    :type seqid: str
    :param oligomer: Oligomer identifier. Sometimes as important as seqid, defaults to None.
    :type oligomer: str, optional
    :param seq: Sequence string, defaults to None.
    :type seq: str, optional
    :param chains: The names of chains having this sequence, defaults to None.
    :type chains: set [str], optional
    :param source: Source of the sequence, defaults to None
    :type source: str, optional
    :param header: Standard .fasta header, starting with ">", defaults to None.
    :type header: str, optional
    :param biotype: Type of molecule ('Protein', 'DNA', 'RNA'...), defaults to None.
    :type biotype: str, optional
    :param extrainfo: Other useful information about the sequence, defaults to None.
    :type extrainfo: str, optional

    :ivar name: Sequence identifier.
    :vartype name: str
    :ivar oligomer_id: Oligomer identifier.
    :vartype oligomer_id: str
    :ivar chains: The names of chains having this sequence.
    :vartype chains: set [str]
    :ivar seqs: The set of sequences, including default "mainseq".
    :vartype seqs: dict [str, str]
    :ivar source: Source of the sequence.
    :vartype source: str
    :ivar source_headers: A list of headers from input files.
    :vartype source_headers: list [str]
    :ivar crops_header: A new header containing the information from the object that will be used when printing sequence and cropmap.
    :vartype crops_header: str
    :ivar biotype: Type of molecule ('Protein', 'DNA', 'RNA'...).
    :vartype biotype: str
    :ivar infostring: Other useful information about the sequence.
    :vartype infostring: str
    :ivar cropmap: A dictionary mapping residue numbers from original sequence to cropped sequence.
    :vartype cropmap: dict [int, int]
    :ivar cropbackmap: A dictionary mapping residue numbers from cropped sequence to original sequence.
    :vartype cropbackmap: dict [int, int]
    :ivar msa: A free variable not used by CROPS itself.
    :vartype msa: Any
    :ivar cropmsa: A free variable not used by CROPS itself.
    :vartype cropmsa: Any
    :ivar intervals: The integer interval object containing the cropping information.
    :vartype intervals: :class:`crops.elements.intervals.intinterval`

    :raises `TypeError`: For wrong input formats.

    :example:

    >>> from crops.elements import sequences as ces
    >>> myseq = ces.sequence(seqid='1', oligomer = 'exampleID')
    >>> myseq.mainseq('GATTACA')
    >>> myseq.mainseq()
    'GATTACA'
    >>> myseq.chains = {'A', 'B'}
    >>> myseq.addseq('gapseq','GAT--C-')
    >>> myseq.addseq('cobra','TACATACA')
    >>> myseq.length()
    7
    >>> myseq.ngaps('gapseq')
    3
    >>> myseq.guess_biotype()
    'DNA'
    >>> print(myseq)
    Sequence object >EXAMPLEID_1|Chains A,B (seq=GATTACA, type=DNA, length=7)
    >>> myseq.source = 'Example'
    >>> myseq.addseq('cropseq', '+A+T++')
    >>> myseq.addseq('cropgapseq', '+A+-++')
    >>> myseq.full_length()
    7
    >>> myseq.mainseq('AT')
    'AT'
    >>> myseq.ncrops()
    4
    >>> myseq.update_cropsheader()
    >>> myseq.cropinfo()
    '#Residues cropped: 4 (1 not from terminals) ; % cropped: 66.67 (16.67 not from terminal segments)'
    >>> myseq.dump(out='string')
    '>crops|exampleID_1|Chains A,B|Source: Example|#Residues cropped: 4 (1 not from terminal segments) ; % cropped: 66.67 (16.67 not from terminal segments)\\nAT\\n'

    :example:

    >>> from crops.elements import sequences as ces
    >>> from crops.iomod import parsers as cip
    >>> myseq = cip.parseseqfile('7M6C.fasta')
    >>> myseq
    Sequence object: (>7M6C_1|Chain A, seq=MRTLWIMAVL[...]KPLCKKADPC, type=Undefined, length=138)
    >>> myseq.guess_biotype()
    'Protein'
    >>> myseq
    Sequence object: (>7M6C_1|Chain A, seq=MRTLWIMAVL[...]KPLCKKADPC, type=Protein, length=138)
    """

    _kind = 'Sequence'
    __slots__ = ['oligomer_id', 'name', 'chains', 'source', 'seqs', 'biotype',
                 'source_headers', 'crops_header', 'cropmap', 'cropbackmap',
                 'infostring', 'msa', 'cropmsa', 'intervals']

    def __init__(self, seqid=None, oligomer=None, seq=None, chains=None,
                 source=None, header=None, biotype=None, extrainfo=None):
        self.oligomer_id = None
        self.name = None
        self.chains = set()
        self.source = None
        self.source_headers = []
        self.crops_header = None
        self.seqs = {}
        self.biotype = None
        self.infostring = None
        self.cropmap = None
        self.cropbackmap = None
        self.msa = None
        self.cropmsa = None
        self.intervals = None

        if header is not None:
            if isinstance(header, str):
                self.source_headers.append(header)
                try:
                    header_info = retrieve_id(header)
                except Exception:
                    logging.warning('Header format not recognised. Information not extracted.')
                    header_info = None
            else:
                logging.critical("Argument 'header' should be a string.")
                raise TypeError
        else:
            header_info = None

        if seqid is not None:
            if isinstance(seqid, str):
                self.name = seqid
            elif isinstance(seqid, int):
                self.name = str(seqid)
            else:
                logging.critical("Sequence ID 'seqid' should be a string.")
                raise TypeError
        else:
            if header_info is not None:
                if 'seqid' in header_info:
                    self.name = header_info['seqid']
            else:
                self.name = '1'

        if seq is not None:
            if isinstance(seq, str):
                self.seqs['mainseq'] = seq
            else:
                logging.critical("Chain sequence 'seq' should be a string.")
                raise TypeError
        else:
            self.seqs['mainseq'] = ''

        if oligomer is not None:
            if isinstance(oligomer, str):
                self.oligomer_id = oligomer
            else:
                logging.critical("Oligomer ID 'oligomer' should be a string.")
                raise TypeError
        else:
            if header_info is not None:
                if 'mainid' in header_info:
                    self.oligomer_id = header_info['mainid']

        if chains is not None:
            if isinstance(chains, set):
                for ch in chains:
                    if isinstance(ch, str):
                        self.chains.add(ch)
                    else:
                        logging.critical("Chain IDs in 'chains' set should be strings.")
                        raise TypeError
            else:
                logging.critical("Argument 'chains' should be a set of strings.")
                raise TypeError
        else:
            if header_info is not None:
                if 'chains' in header_info:
                    self.chains = header_info['chains']

        if source is not None:
            if isinstance(source, str):
                self.source = source
            else:
                logging.critical("Argument 'source' should be a string.")
                raise TypeError
        else:
            if header_info is not None:
                if 'source' in header_info:
                    self.source = header_info['source']

        if biotype is not None:
            if biotype.lower() == 'guess':
                self.biotype = guess_type(seq)
            else:
                self.biotype = biotype
        else:
            self.biotype = None

        if extrainfo is not None:
            if isinstance(extrainfo, str):
                self.infostring = extrainfo
            else:
                logging.critical("Argument 'extrainfo' should be a string.")
                raise TypeError
        else:
            if header_info is not None:
                if 'comments' in header_info:
                    self.infostring = header_info['comments']
            else:
                self.infostring = ""

        if oligomer is None:
            self.crops_header = makeheader(mainid='NOID', seqid=self.name,
                                           chains=self.chains, source=self.source,
                                           extrainfo=self.infostring)
        else:
            self.crops_header = makeheader(mainid=self.oligomer_id, seqid=self.name,
                                           chains=self.chains, source=self.source,
                                           extrainfo=self.infostring)

    def __repr__(self):
        chtype = self.biotype if self.biotype is not None else 'Undefined'
        if 'mainseq' not in self.seqs:
            logging.critical("'mainseq' sequence not found.")
            raise ValueError
        if len(self.seqs['mainseq']) <= 20:
            showseq = self.seqs['mainseq']
        else:
            showseq = (self.seqs['mainseq'][:10]+'[...]' +
                       self.seqs['mainseq'][len(self.seqs['mainseq'])-10:])
        tempolig = self.oligomer_id if self.oligomer_id is not None else 'NOID'
        shortid = makeheader(mainid=tempolig, seqid=self.name,
                             chains=self.chains, short=True)
        string = (self._kind+" object "+shortid+" (seq="+str(showseq) +
                  ", type=" + chtype + ", length=" +
                  str(len(self.seqs['mainseq']))+")")
        return string

    def __iter__(self):
        return iter(self.seqs['mainseq'].values())

[docs]    def copy(self):
        return copy.copy(self)

[docs]    def deepcopy(self):
        return copy.deepcopy(self)

[docs]    def addseq(self, newid, newseq):
        """Add sequence to `seqs` dictionary.

        :param newid: New sequence's identifier.
        :type newid: str
        :param newseq: New sequence.
        :type newseq: str

        :raises `TypeError`: If newid is not a string.
        :raises `KeyError`: If newseq is not a string.

        """
        if not isinstance(newid, str):
            logging.critical("New sequence ID 'newid' should be a string.")
            raise TypeError
        if not isinstance(newseq, str):
            logging.critical("New sequence string 'newseq' should be a string.")
            raise TypeError
        if newid in self.seqs:
            logging.critical("Key name 'newid' already exists.")
            raise KeyError

        self.seqs[newid] = newseq

[docs]    def delseq(self, delid=None, wipeall=False):
        """Delete sequence(s) from the `seqs` dictionary.

        :param delid: ID of sequence to be deleted, defaults to None.
        :type delid: str, optional
        :param wipeall: If True, all the sequences are deleted, defaults to False.
        :type wipeall: bool, optional

        :raises `TypeError`: If delid is not a string or wipeall is not a boolean.

        """
        if not isinstance(delid, str) and delid is not None:
            logging.critical("Sequence ID 'delid' should be a string.")
            raise TypeError
        if not isinstance(wipeall, bool):
            logging.critical("Boolean switch 'wipeall' is neither True nor False.")
            raise TypeError

        if wipeall:
            self.seqs = {}
            self.seqs['mainseq'] = ''
            return
        if delid is None:
            return

        if delid == 'mainseq':
            self.seqs['mainseq'] = ''
        else:
            self.seqs.pop(delid)

[docs]    def mainseq(self, add=None):
        """Return or modifies the main sequence.

        :param add: If given, the main sequence is replaced by 'add', defaults to None.
        :type add: str, optional

        :raises `TypeError`: If 'add' is given and is not a string.

        :return: The (new) main sequence.
        :rtype: str

        """
        if not isinstance(add, str) and add is not None:
            logging.critical("If included, sequence 'add' should be a string.")
            raise TypeError

        if add is not None:
            self.seqs['mainseq'] = add

        return self.seqs['mainseq']

[docs]    def guess_biotype(self):
        """Save the guessed biotype and return it.

        :return: Guessed biotype.
        :rtype: str

        """
        if self.seqs['mainseq'] is None:
            self.biotype = None
        else:
            self.biotype = guess_type(self.seqs['mainseq'])

        return self.biotype

[docs]    def dump(self, out, split=False, oneline=False):
        """Write header and main sequence to a file. If the file exists, output is appended.

        :param out: An output filepath (str), 'string', or an open file.
        :type out: str, file
        :param split: If True, identical sequences are dumped for every chain, defaults to False.
        :type split: bool, optional
        :param oneline: If True, sequences are not split in 80 residue-lines, defaults to False.
        :type oneline: bool, optional

        :raises `TypeError`: If `out` is neither a string nor an open file.
        :raises `KeyError`: If object contains no chains.

        :return: A string containing the output if and only if out=='string'.
        :rtype: str

        """
        if not isinstance(out, str) and not isinstance(out, io.IOBase):
            logging.critical("Argument 'out' should be a string or a file.")
            raise TypeError

        if (self.chains is None or
                (isinstance(self.chains, set) and len(self.chains) == 0)):
            logging.critical('No chains defined in sequence.')
            raise KeyError

        outheader = []

        if split:
            chset = []
            for ch in self.chains:
                chset.append({ch})
        else:
            chset = [self.chains]

        if self.oligomer_id is None:
            tag1 = 'NoID'
        else:
            tag1 = self.oligomer_id
        tag2 = self.infostring
        if self.ncrops() == 0 and 'cropseq' not in self.seqs:
            pass
        else:
            if self.infostring[-1] != "|":
                tag2 += '|'
            tag2 += self.cropinfo()

        for ch in chset:
            outheader.append(makeheader(mainid=tag1,
                                        seqid=self.name,
                                        chains=ch,
                                        source=self.source,
                                        extrainfo=tag2))

        if not oneline:
            lenseq = len(self.seqs['mainseq'])
            nlines = int((lenseq-1)/80)+1
        output = ''
        for header in outheader:
            if isinstance(out, io.IOBase) is True:
                out.write(header+'\n')
                if oneline:
                    out.write(self.seqs['mainseq']+'\n')
                else:
                    for n in range(nlines):
                        out.write(self.seqs['mainseq'][n*80:(n+1)*80]+'\n')
            else:
                output += header + os.linesep
                if oneline:
                    output += self.seqs['mainseq'] + os.linesep
                else:
                    for n in range(nlines):
                        output += self.seqs['mainseq'][n*80:(n+1)*80] + os.linesep
        if isinstance(out, io.IOBase) is False:
            if out.lower() == 'string':
                return output
            else:
                outpath = out
                op = 'a' if os.path.isfile(outpath) else 'w'
                with open(outpath, op) as out:
                    out.write(output)
        return

[docs]    def dumpmap(self, out, split=False):
        """Write header and cropmap to a file. If file exists, output is appended.

        :param out: An output filepath (str) or an open file.
        :type out: str, file
        :param backmap: If True, the output will be self.cropbackmap, defaults to False.
        :type backmap: bool, optional
        :param split: If True, identical maps are dumped for every chain, defaults to False.
        :type split: bool, optional

        :raises `TypeError`: If `out` is neither a string nor an open file.
        :raises `ValueError`: If one or both of `cropmap` and `cropbackmap` are empty.
        :raises `KeyError`: If object contains no chains.

        """
        if not isinstance(out, str) and not isinstance(out, io.IOBase):
            logging.critical("Argument 'out' should be a string or a file.")
            raise TypeError

        if self.cropmap is None:
            stringerr = "Cropmap not found in sequence."
            logging.critical(stringerr)
            raise ValueError

        if (self.chains is None or
                (isinstance(self.chains, set) and len(self.chains) == 0)):
            logging.critical('No chains defined in sequence.')
            raise KeyError

        outheader = []

        if split:
            chset = []
            for ch in self.chains:
                chset.append({ch})
        else:
            chset = [self.chains]

        if self.oligomer_id is None:
            tag1 = 'NoID'
        else:
            tag1 = self.oligomer_id
        tag2 = self.infostring
        if self.ncrops() == 0 and 'cropseq' not in self.seqs:
            pass
        else:
            if self.infostring[-1] != "|":
                tag2 += '|'
            tag2 += self.cropinfo()

        for ch in chset:
            outheader.append(makeheader(mainid=tag1,
                                        seqid=self.name,
                                        chains=ch,
                                        source=self.source,
                                        extrainfo=tag2))
        output = ''
        for header in outheader:
            if isinstance(out, io.IOBase):
                out.write(header+'\n')
                for key, value in self.cropmap.items():
                    if value is not None:
                        out.write(str(key)+'  '+str(value)+'\n')
                    else:
                        out.write(str(key)+'  0\n')
            else:
                output += header + os.linesep
                for key, value in self.cropmap.items():
                    if value is not None:
                        output += str(key) + '  ' + str(value) + os.linesep
                    else:
                        output += str(key) + '  0' + os.linesep

        if isinstance(out, io.IOBase) is False:
            if out.lower() == 'string':
                return output
            else:
                outpath = out
                op = 'a' if os.path.isfile(outpath) else 'w'
                with open(outpath, op) as out:
                    out.write(output)
        return

[docs]    def length(self):
        """Return the length of the main sequence.

        :return: Length of the main sequence.
        :rtype: int

        """
        return len(self.seqs['mainseq'])

[docs]    def full_length(self):
        """Return the length of the full sequence. If not found, the main sequence
        will be considered the full sequence, and will be saved as so.

        :return: Length of the full sequence.
        :rtype: int

        """
        if 'fullseq' not in self.seqs:
            self.seqs['fullseq'] = self.seqs['mainseq']

        return len(self.seqs['fullseq'])

[docs]    def ngaps(self, seqid='gapseq'):
        """Return the number of gaps ('-') in a sequence.

        :param seqid: The ID of the sequence containing the gaps, defaults to 'gapseq'.
        :type seqid: str, optional

        :raises `TypeError`: If seqid is not a string.

        :return: Number of gaps in `seqid`. If 'gapseq' is a list of several models, a list is returned. If `seqid` not found, 0 is returned.
        :rtype: int or list [int]

        """
        if not isinstance(seqid, str):
            logging.critical("Sequence ID 'seqid' should be a string.")
            raise TypeError
        if seqid in self.seqs:
            if isinstance(self.seqs[seqid], str):
                nseqid = [self.seqs[seqid]]
            else:
                nseqid = self.seqs[seqid]
            ng = []
            for altseq in nseqid:
                n = 0
                for char in altseq:
                    if char == '-':
                        n += 1
                ng.append(n)
            if len(ng) == 1:
                ng = ng[0]
        else:
            ng = 0

        return ng

[docs]    def ncrops(self, seqid='cropseq', offterminals=False, offmidseq=False):
        """Return the number of cropped elements ('+','*') in a sequence.

        :param seqid: The ID of the sequence containing the cropped elements, defaults to 'cropseq'.
        :type seqid: str, optional
        :param offterminals: Count those removed from terminal segments only, defaults to False.
        :type offterminals: bool, optional
        :param offmidseq: Count those removed NOT from terminal segments only, defaults to False.
        :type offmidseq: bool, optional

        :raises `TypeError`: If `seqid` is not a string, or `offterminals`, `offmidseq` are not boolean.

        :return: Number of cropped elements in `seqid` according to interval chosen. If `seqid` not found, 0 is returned.
        :rtype: int

        """
        if not isinstance(seqid, str):
            logging.critical("Sequence ID 'seqid' should be a string.")
            raise TypeError

        n = 0
        if seqid not in self.seqs:
            return n

        for char in self.seqs[seqid]:
            if char == '+' or char == '*':
                n += 1

        if offterminals is offmidseq:
            return n
        else:
            nterms = 0
            for char in self.seqs[seqid]:
                if char == '+' or char == '*':
                    nterms += 1
                else:
                    break
            for char in reversed(self.seqs[seqid]):
                if char == '+' or char == '*':
                    nterms += 1
                else:
                    break

        if offterminals is False and offmidseq is True:
            return n-nterms
        elif offterminals is True and offmidseq is False:
            return nterms

[docs]    def update_cropsheader(self):
        """Update `cropsheader`. Useful after updating any information from the sequence."""
        if self.oligomer_id is None:
            tag1 = 'NoID'
        else:
            tag1 = self.oligomer_id
        tag2 = self.infostring
        if self.ncrops() == 0 and 'cropseq' not in self.seqs:
            pass
        else:
            if tag2[-1] != "|":
                tag2 += '|'
            tag2 += self.cropinfo()

        self.crops_header = makeheader(mainid=tag1,
                                       seqid=self.name,
                                       chains=self.chains,
                                       source=self.source,
                                       extrainfo=tag2)

[docs]    def cropinfo(self):
        """Return a string containing statistics about the cropped residues.

        :return: Statistics on number of crops.
        :rtype: str

        """
        cropstr = ""
        if 'cropseq' in self.seqs:
            cropstr += '#Residues cropped: '
            if self.ncrops() == 0:
                cropstr += '0'
            else:
                cropstr += (str(self.ncrops()) + ' (' +
                            str(self.ncrops(offmidseq=True)) +
                            ' not from terminal segments) ' +
                            '; % cropped: ' +
                            str(round(100*self.ncrops()/len(self.seqs['cropseq']), 2)) +
                            ' (' + str(round(100*self.ncrops(offmidseq=True)/len(self.seqs['cropseq']), 2)) +
                            ' not from terminal segments)')
        else:
            pass

        return cropstr


[docs]class oligoseq:
    """An object grouping several :class:`crops.elements.sequences.sequence` objects pertaining to a common oligomer.

    :param oligomer_id: Oligomer identifier (e.g. PDB id), defaults to None.
    :type oligomer_id: str
    :param imer: Container of several :class:`crops.elements.sequences.sequence` objects making up the oligomer, defaults to empty dict.
    :type imer: dict [str, :class:`crops.elements.sequences.sequence`], optional

    :ivar id: Oligomer sequence identifier (e.g. PDB id).
    :vartype id: str
    :ivar imer: Container of several :class:`crops.elements.sequence.monomer_sequence` making up the oligomer.
    :vartype imer: dict [str, :class:`crops.elements.sequence.monomer_sequence`]

    :raises `TypeError`: If the input formats are wrong.

    :example:

    >>> from crops.elements import sequences as ces
    >>> my_oligoseq = ces.oligoseq(oligomer_id='exampleID')
    >>> my_oligoseq.add_monomer
    >>> my_sequence.add_monomer('header_example','GATTACA',nid='mychain')
    >>> my_sequence.add_monomer('another_header','TACATACA')
    >>> my_sequence.nchains()
    2
    >>> my_sequence.length('mychain')
    7
    >>> my_sequence.write('/path/to/output/dir/')
    >>> print(my_sequence)
    docs Protein/polynucleotide sequence object: (id='example_id', # chains = 2)
    >>> my_sequence.purge()
    >>> my_sequence.nchains()
    0

    """
    _kind = 'Multiple sequence'
    __slots__ = ['id', 'imer']

    def __init__(self, oligomer_id=None, imer=None):
        if not isinstance(oligomer_id, str) and oligomer_id is not None:
            logging.critical("'oligomer_id' should be a string.")
            raise TypeError
        if not isinstance(imer, dict) and imer is not None:
            logging.critical("Sequence container 'imer' should be a dictionary.")
            raise TypeError
        elif isinstance(imer, dict):
            for val in imer.values():
                if not isinstance(val, sequence):
                    logging.critical("Sequence container 'imer' should only "
                                    "contain :class:`~crops.elements.sequences.sequence` objects.")
                    raise TypeError
        self.id = oligomer_id
        self.imer = imer if imer is not None else {}

    def __repr__(self):
        string = self._kind+" object: (id="+ str(self.id) + ", sequences = "+str(self.imer)+")"
        return string

    def __getitem__(self, key):
        return self.imer[key]

    def __len__(self):
        return len(self.imer)

    def __iter__(self):
        return iter(self.imer.values())

[docs]    def copy(self):
        return copy.copy(self)

[docs]    def deepcopy(self):
        return copy.deepcopy(self)

[docs]    def purge(self):
        """Clear the object's content without deleting the object itself."""
        self.id = None
        self.imer.clear()

[docs]    def add_sequence(self, newseq):
        """Add a new :class:`crops.elements.sequences.sequence` to the object.

        :param newseq: Sequence object.
        :type newseq: :class:`crops.elements.sequences.sequence`

        :raises `TypeError`: If `newseq` is not a :class:`crops.elements.sequences.sequence` object.
        :raises `Exception`: If sequence content is incompatible with that in oligoseq (oligomer id, other sequences, etc).

        """
        addall = None
        errormsg = ('Sequence content is incompatible with oligoseq ' +
                    self.id + '.')
        if self.id is not None and newseq.oligomer_id is not None:
            if self.id.upper() != newseq.oligomer_id.upper():
                logging.critical(errormsg)
                raise ValueError

        if newseq.name is not None:
            if newseq.name in self.imer:
                if self[newseq.name].imer.seqs['mainseq'] == newseq.seqs['mainseq']:
                    addall = False
                else:
                    logging.critical(errormsg)
                    raise ValueError
            else:
                for seq in self.imer.values():
                    if seq.seqs['mainseq'] == newseq.seqs['mainseq']:
                        logging.critical(errormsg)
                        raise ValueError
                addall = True
        else:
            for seq in self.imer.values():
                if seq.seqs['mainseq'] == newseq.seqs['mainseq']:
                    addall = False
                    newseq.name = seq.name
                    break
            if addall is not False:
                addall = True

        if addall is True:
            for ch in newseq.chains:
                for seq in self.imer.values():
                    if ch in seq.chains:
                        logging.critical(errormsg)
                        raise ValueError
            if newseq.name is None:
                n = 1
                while True:
                    if str(n) in self.imer:
                        n += 1
                    else:
                        newseq.name = str(n)
                        break
            self.imer[newseq.name] = newseq
            if self.id is None and newseq.oligomer_id is not None:
                self.id = newseq.oligomer_id.upper()
                for seq in self.imer.values():
                    seq.oligomer_id = newseq.oligomer_id.upper()
            elif self.id is not None and newseq.oligomer_id is None:
                self.imer[newseq.name].oligomer_id = self.id.upper()
        else:
            for ch in newseq.chains:
                for seq in self.imer.values():
                    if ch in seq.chains and seq.name != newseq.name:
                        logging.critical(errormsg)
                        raise ValueError
                self.imer[newseq.name].chains.add(ch)

            for header in newseq.source_headers:
                if header not in self.imer[newseq.name].source_headers:
                    self.imer[newseq.name].source_headers.append(header)
            if newseq.source != self.imer[newseq.name].source:
                self.imer[newseq.name].source = 'Diverse'

        self.imer[newseq.name].update_cropsheader()

        return

[docs]    def del_sequence(self, seqid):
        """Remove the selected :class:`crops.elements.sequences.sequence` from the object.

        :param seqid: Doomed sequence's identifier.
        :type seqid: str

        :raises `TypeError`: If `seqid` is not a string.

        """
        if isinstance(seqid, int):
            seqid = str(seqid)
        if not isinstance(seqid, str):
            raise TypeError("'seqid' should be a string.")

        if seqid in self.imer:
            self.imer.pop(seqid)
        else:
            logging.warning('Sequence named ' + seqid + ' not found in oligoseq.')

        return

[docs]    def set_cropmaps(self, mapdict, cropmain=False):
        """Sets the parsed cropmaps from :class:`crops.iomod.parsers.parsemapfile`.

        :param mapdict: Parsed maps for this specific object.
        :type mapdict: dict [str, dict [str, dict [int, int]]]
        :param cropmain: If True, it will crop 'mainseq' and generate 'fullseq' and 'cropseq'. If 'mainseq' has been edited before this operation will yield wrong results, defaults to False.
        :type cropmain: bool, optional

        :raises `TypeError`: When `mapdict` has not the appropriate format.

        """
        if not isinstance(mapdict, dict):
            logging.critical("'mapdict' should be a dictionary.")
            raise TypeError

        for seqid in mapdict:
            if not isinstance(seqid, str):
                logging.critical("Values in 'mapdict' should be strings.")
                raise TypeError
            if seqid in self.imer:
                if ('cropmap' not in mapdict[seqid] or
                        'cropbackmap' not in mapdict[seqid]):
                    logging.critical("'mapdict' is not a crop map.")
                    raise TypeError
                self.imer[seqid].cropmap = copy.deepcopy(mapdict[seqid]['cropmap'])
                self.imer[seqid].cropbackmap = copy.deepcopy(mapdict[seqid]['cropbackmap'])
                self.imer[seqid].intervals = intinterval(description=self.id+'_'+str(seqid))
                for resc, res0 in mapdict[seqid]['cropbackmap'].items():
                    self.imer[seqid].intervals = self.imer[seqid].intervals.union(other=res0)
                if cropmain is True:
                    self.imer[seqid].seqs['fullseq'] = self.imer[seqid].seqs['mainseq']
                    self.imer[seqid].seqs['mainseq'] = ''
                    self.imer[seqid].seqs['cropseq'] = ''
                    for n in range(len(self.imer[seqid].seqs['fullseq'])):
                        if self.imer[seqid].cropmap[n+1] is None:
                            self.imer[seqid].seqs['cropseq'] += '+'
                        else:
                            self.imer[seqid].seqs['mainseq'] += self.imer[seqid].seqs['fullseq'][n]
                            self.imer[seqid].seqs['cropseq'] += self.imer[seqid].seqs['fullseq'][n]
                self.imer[seqid].infostring += '|' + self.imer[seqid].cropinfo()
                self.imer[seqid].update_cropsheader()

        return

[docs]    def write(self, outdir, infix="", split=False, oneline=False):
        """Write all :class:`crops.elements.sequences.sequence` objects to .fasta file or string.

        :param outdir: Output directory or 'string'.
        :type outdir: str
        :param infix: Filename tag to distinguish from original input file, defaults to "".
        :type infix: str, optional
        :param split: If True, identical sequences are dumped for each chain, defaults to False.
        :type split: bool, optional
        :param oneline: If True, sequences are not split in 80 residue-lines, defaults to False.
        :type oneline: bool, optional

        :raises `FileNotFoundError`: Output directory not found.

        """
        if not os.path.isdir(outdir) and outdir != 'string':
            logging.critical(outdir + ' directory not found.')
            raise FileNotFoundError

        if outdir == 'string':
            if infix != "":
                logging.warning("Writing sequences to a string, 'infix' value ignored.")
            outpath = 'string'
            outstring = ""
            for seq in self.imer.values():
                outstring += seq.dump(outpath, split=split, oneline=oneline)

            return outstring
        else:
            outpath = os.path.join(outdir, self.seq_id + infix + ".fasta")
            for seq in self.imer.values():
                seq.dump(outpath, split=split, oneline=oneline)

            return

[docs]    def length(self, seqid):
        """Return the length of a certain sequence.

        :param seqid: ID of :class:`crops.elements.sequences.sequence`.
        :type seqid: str

        :raises `TypeError`: When 'seqid' is not a string.
        :raises `KeyError`: Specific sequence not found in :class:`crops.elements.sequences.oligoseq`.

        :return: Length of :class:`crops.elements.sequences.sequence`.
        :rtype: int

        """
        if isinstance(seqid, int):
            seqid = str(seqid)
        if not isinstance(seqid, str):
            logging.critical('chain input must be a string.')
            raise TypeError
        if seqid in self.imer:
            return self.imer[seqid].length()
        else:
            logging.critical(seqid+' monomer not found in sequence.')
            raise KeyError

[docs]    def nchains(self):
        """Return number of chains in object, counting all sequence objects contained.

        :return: Number of chains in object, counting al :class:`crops.elements.sequences.sequence` contained.
        :rtype: int
        """
        n = 0
        for seqid in self.imer:
            n += len(self.imer[seqid].chains)

        return n

[docs]    def nseqs(self):
        """Return number of sequence objects in object.

        :return: Number of :class:`crops.elements.sequences.sequence` objects in object.
        :rtype: int
        """
        return len(self.imer)

[docs]    def chainlist(self):
        """Return a set with all the chain names in the object.

        :return: Chain names in :class:`crops.elements.sequences.oligoseq`.
        :rtype: set [str]

        """
        newset = set()
        for seqid in self.imer:
            newset = newset.union(self.imer[seqid].chains)

        return newset

[docs]    def whatseq(self, chain):
        """Return the sequence number corresponding to a given chain.

        :param chain: The chain ID.
        :type chain: str

        :return: The :class:`crops.elements.sequences.sequence` of that chain.
        :rtype: str

        """
        myseq = None
        for seqid in self.imer:
            if chain in self.imer[seqid].chains:
                myseq = seqid
                break

        return myseq