kkonganti@0: """Entry script""" kkonganti@0: kkonganti@20: __version__ = "1.1.0" kkonganti@0: import argparse, datetime, json, logging, os, pandas, sys kkonganti@0: import lexmapr.pipeline, lexmapr.run_summary kkonganti@0: from lexmapr.definitions import arg_bins kkonganti@0: kkonganti@0: kkonganti@0: def valid_input(file_path): kkonganti@20: """Exits if input file is invalid""" kkonganti@0: _, file_ext = os.path.splitext(file_path) kkonganti@20: if file_ext.lower() != ".csv" and file_ext.lower() != ".tsv": kkonganti@20: sys.exit("Please supply a CSV or TSV input file with the correct file extension") kkonganti@0: if not os.path.exists(file_path): kkonganti@20: sys.exit(f'Input file named "{file_path}" not found') kkonganti@20: return file_path.strip() kkonganti@20: kkonganti@0: kkonganti@0: def valid_json(file_path): kkonganti@20: """Outputs read JSON file and exits if file is invalid""" kkonganti@0: try: kkonganti@20: with open(file_path, "r") as JSON_file: kkonganti@0: try: kkonganti@20: return json.load(JSON_file) kkonganti@20: except (json.decoder.JSONDecodeError): kkonganti@20: sys.exit(f"User-defined bins not in readable JSON format") kkonganti@20: except (FileNotFoundError): kkonganti@20: sys.exit(f'File named "{file_path}" not found') kkonganti@20: kkonganti@0: kkonganti@0: def valid_list(list_str): kkonganti@20: """Return list of user-defined ontologies""" kkonganti@20: return [x.strip().upper() for x in list_str.split(",")] kkonganti@20: kkonganti@0: kkonganti@0: if __name__ == "__main__": kkonganti@0: # Parse arguments, initiate log file and start run kkonganti@0: arg_parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) kkonganti@20: arg_parser.add_argument("input", help="input CSV or TSV file; required", type=valid_input) kkonganti@20: arg_parser.add_argument( kkonganti@20: "-o", "--output", metavar="\b", help=" output TSV file path; default is stdout" kkonganti@20: ) kkonganti@20: arg_parser.add_argument( kkonganti@20: "-a", "--no_ancestors", action="store_true", help="remove ancestral terms from output" kkonganti@20: ) kkonganti@20: arg_parser.add_argument( kkonganti@20: "-b", "--bin", action="store_true", help="classify samples into default bins" kkonganti@20: ) kkonganti@20: arg_parser.add_argument( kkonganti@20: "-e", kkonganti@20: "--embl_ontol", kkonganti@20: metavar="\b", kkonganti@20: type=valid_list, kkonganti@20: help=" user-defined comma-separated ontology short names", kkonganti@20: ) kkonganti@20: arg_parser.add_argument("-f", "--full", action="store_true", help="full output format") kkonganti@20: arg_parser.add_argument( kkonganti@20: "-g", "--graph", action="store_true", help="visualize summaries of mapping and binning" kkonganti@20: ) kkonganti@20: arg_parser.add_argument( kkonganti@20: "-j", kkonganti@20: "--graph_only", kkonganti@20: action="store_true", kkonganti@20: help="only perform visualization with LexMapr output", kkonganti@20: ) kkonganti@20: arg_parser.add_argument( kkonganti@20: "-r", "--remake_cache", action="store_true", help="remake cached resources" kkonganti@20: ) kkonganti@20: arg_parser.add_argument( kkonganti@20: "-u", kkonganti@20: "--user_bin", kkonganti@20: metavar="\b", kkonganti@20: type=valid_json, kkonganti@20: help=" path to JSON file with user-defined bins", kkonganti@20: ) kkonganti@20: arg_parser.add_argument( kkonganti@20: "-w", kkonganti@20: "--num_words", kkonganti@20: metavar="\b", kkonganti@20: default=3, kkonganti@20: help=" number of word combinations to sample", kkonganti@20: ) kkonganti@20: arg_parser.add_argument( kkonganti@20: "-p", kkonganti@20: "--cpus", kkonganti@20: metavar="\b", kkonganti@20: default=8, kkonganti@20: help=" number of CPUs to try and parallelize permuations on", kkonganti@20: ) kkonganti@20: arg_parser.add_argument("-v", "--version", action="version", version="%(prog)s " + __version__) kkonganti@0: kkonganti@0: # TODO: encoding argument addded to logging.basicConfig in Python 3.9; now defaults to open() kkonganti@0: run_args = arg_parser.parse_args() kkonganti@0: if run_args.user_bin is not None: kkonganti@0: run_args.bin = True kkonganti@0: arg_bins = run_args.user_bin kkonganti@0: kkonganti@20: logging.basicConfig(filename="lexmapr_run.log", level=logging.DEBUG) kkonganti@0: kkonganti@0: if run_args.graph_only: kkonganti@0: try: kkonganti@20: mapping_results = pandas.read_csv(run_args.input, delimiter="\t") kkonganti@0: except: kkonganti@20: sys.exit("Input file not readable or not in expected format") kkonganti@20: needed_columns = ["Matched_Components", "Match_Status (Macro Level)"] + list( kkonganti@20: arg_bins.keys() kkonganti@20: ) kkonganti@0: missing_columns = set(needed_columns).difference(set(mapping_results.columns)) kkonganti@0: if missing_columns: kkonganti@20: sys.exit(f"Missing column(s) {missing_columns} from input file") kkonganti@0: t0 = datetime.datetime.now() kkonganti@20: logging.info(f"Run start: {t0}") kkonganti@20: logging.info("Graphing only") kkonganti@20: print("\nGraphing only...") kkonganti@0: lexmapr.run_summary.figure_folder() kkonganti@0: lexmapr.run_summary.report_results(run_args.input, list(arg_bins.keys())) kkonganti@0: lexmapr.run_summary.visualize_results(run_args.input, list(arg_bins.keys())) kkonganti@20: print("\t" + f"Done! {datetime.datetime.now()-t0} passed".ljust(60) + "\n") kkonganti@0: else: kkonganti@20: logging.info(f"Run start: {datetime.datetime.now()}") kkonganti@0: lexmapr.pipeline.run(run_args) kkonganti@0: kkonganti@20: logging.info(f"Run end: {datetime.datetime.now()}\n")