Source code for crops.command_line.crops_splitseqs
"""This script will take a sequence (fasta) file as an input and produce several different fasta files, one per sequence/chain."""
from crops import __prog__, __description__, __author__
from crops import __date__, __version__, __copyright__
import argparse
import os
from crops.iomod import check_path
from crops.iomod import outpathgen
from crops.iomod import parsers as cin
from crops import command_line as ccl
logger = None
[docs]def create_argument_parser():
"""Create a parser for the command line arguments used in crops-splitseqs."""
parser = argparse.ArgumentParser(prog=__prog__, formatter_class=argparse.RawDescriptionHelpFormatter,
description=__description__+' ('+__prog__+') v.'+__version__+os.linesep+__doc__)
parser.add_argument("input_seqpath", nargs=1, metavar="Sequence_filepath",
help="Input sequence filepath.")
parser.add_argument("-i", "--individual", action='store_true', default=False,
help="One separated output fasta file per each sequence.")
parser.add_argument("-p", "--preselect", nargs='+', metavar="Oligoseq_ids", default=None,
help="From all the sequences in the input sequence file, just print out this preselected subset.")
parser.add_argument("-o", "--outdir", nargs=1, metavar="Output_Directory",
help="Set output directory path. If not supplied, default is the one containing the input sequence.")
parser.add_argument('--version', action='version', version='%(prog)s ' + __version__)
return parser
[docs]def main():
"""Take a sequence (fasta) file as an input and produce several different fasta files, one per sequence/chain."""
# INITIALISE AND PARSE ARGUMENTS FROM COMMAND LINE
parser = create_argument_parser()
args = parser.parse_args()
global logger
logger = ccl.crops_logger(level="info")
logger.info(ccl._welcome())
inseq = check_path(args.input_seqpath[0], 'file')
if args.outdir is None:
outdir = check_path(os.path.dirname(inseq), 'dir')
else:
outdir = check_path(args.outdir[0], 'dir')
# PARSE INPUT FILES
logger.info('Parsing sequence file '+inseq)
if args.preselect is not None:
subset = set(args.preselect)
else:
subset = None
seqset = cin.parseseqfile(seq_input=inseq, inset=subset)
logger.info('Done'+os.linesep)
# MAIN OPERATION / PRINT OUT RESULTS WITHIN
logger.info('Printing sequences out...')
for key, S in seqset.items():
for key2, monomer in S.imer.items():
fout = key
if args.individual is True:
fout += '_' + key2
fout += os.extsep + 'fasta'
outseq = outpathgen(outdir, filename=fout)
monomer.dump(outseq)
# FINISH
logger.info('Done' + os.linesep)
return
if __name__ == "__main__":
import sys
import traceback
try:
main()
logger.info(ccl._ok())
sys.exit(0)
except Exception as e:
if not isinstance(e, SystemExit):
msg = "".join(traceback.format_exception(*sys.exc_info()))
logger.critical(msg)
sys.exit(1)