Mercurial > repos > kkonganti > lexmapr2_from_cfsan
view lexmapr2.py @ 21:a86bcf3ea8a6
"planemo upload"
author | kkonganti |
---|---|
date | Wed, 09 Nov 2022 09:08:00 -0500 |
parents | 0be9a7117ba5 |
children |
line wrap: on
line source
"""Entry script""" __version__ = "1.1.0" import argparse, datetime, json, logging, os, pandas, sys import lexmapr.pipeline, lexmapr.run_summary from lexmapr.definitions import arg_bins def valid_input(file_path): """Exits if input file is invalid""" _, file_ext = os.path.splitext(file_path) if file_ext.lower() != ".csv" and file_ext.lower() != ".tsv": sys.exit("Please supply a CSV or TSV input file with the correct file extension") if not os.path.exists(file_path): sys.exit(f'Input file named "{file_path}" not found') return file_path.strip() def valid_json(file_path): """Outputs read JSON file and exits if file is invalid""" try: with open(file_path, "r") as JSON_file: try: return json.load(JSON_file) except (json.decoder.JSONDecodeError): sys.exit(f"User-defined bins not in readable JSON format") except (FileNotFoundError): sys.exit(f'File named "{file_path}" not found') def valid_list(list_str): """Return list of user-defined ontologies""" return [x.strip().upper() for x in list_str.split(",")] if __name__ == "__main__": # Parse arguments, initiate log file and start run arg_parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) arg_parser.add_argument("input", help="input CSV or TSV file; required", type=valid_input) arg_parser.add_argument( "-o", "--output", metavar="\b", help=" output TSV file path; default is stdout" ) arg_parser.add_argument( "-a", "--no_ancestors", action="store_true", help="remove ancestral terms from output" ) arg_parser.add_argument( "-b", "--bin", action="store_true", help="classify samples into default bins" ) arg_parser.add_argument( "-e", "--embl_ontol", metavar="\b", type=valid_list, help=" user-defined comma-separated ontology short names", ) arg_parser.add_argument("-f", "--full", action="store_true", help="full output format") arg_parser.add_argument( "-g", "--graph", action="store_true", help="visualize summaries of mapping and binning" ) arg_parser.add_argument( "-j", "--graph_only", action="store_true", help="only perform visualization with LexMapr output", ) arg_parser.add_argument( "-r", "--remake_cache", action="store_true", help="remake cached resources" ) arg_parser.add_argument( "-u", "--user_bin", metavar="\b", type=valid_json, help=" path to JSON file with user-defined bins", ) arg_parser.add_argument( "-w", "--num_words", metavar="\b", default=3, help=" number of word combinations to sample", ) arg_parser.add_argument( "-p", "--cpus", metavar="\b", default=8, help=" number of CPUs to try and parallelize permuations on", ) arg_parser.add_argument("-v", "--version", action="version", version="%(prog)s " + __version__) # TODO: encoding argument addded to logging.basicConfig in Python 3.9; now defaults to open() run_args = arg_parser.parse_args() if run_args.user_bin is not None: run_args.bin = True arg_bins = run_args.user_bin logging.basicConfig(filename="lexmapr_run.log", level=logging.DEBUG) if run_args.graph_only: try: mapping_results = pandas.read_csv(run_args.input, delimiter="\t") except: sys.exit("Input file not readable or not in expected format") needed_columns = ["Matched_Components", "Match_Status (Macro Level)"] + list( arg_bins.keys() ) missing_columns = set(needed_columns).difference(set(mapping_results.columns)) if missing_columns: sys.exit(f"Missing column(s) {missing_columns} from input file") t0 = datetime.datetime.now() logging.info(f"Run start: {t0}") logging.info("Graphing only") print("\nGraphing only...") lexmapr.run_summary.figure_folder() lexmapr.run_summary.report_results(run_args.input, list(arg_bins.keys())) lexmapr.run_summary.visualize_results(run_args.input, list(arg_bins.keys())) print("\t" + f"Done! {datetime.datetime.now()-t0} passed".ljust(60) + "\n") else: logging.info(f"Run start: {datetime.datetime.now()}") lexmapr.pipeline.run(run_args) logging.info(f"Run end: {datetime.datetime.now()}\n")