annotate lexmapr2.py @ 23:e4215818a909 tip

"planemo upload"
author kkonganti
date Wed, 09 Nov 2022 10:29:25 -0500
parents 0be9a7117ba5
children
rev   line source
kkonganti@0 1 """Entry script"""
kkonganti@0 2
kkonganti@20 3 __version__ = "1.1.0"
kkonganti@0 4 import argparse, datetime, json, logging, os, pandas, sys
kkonganti@0 5 import lexmapr.pipeline, lexmapr.run_summary
kkonganti@0 6 from lexmapr.definitions import arg_bins
kkonganti@0 7
kkonganti@0 8
kkonganti@0 9 def valid_input(file_path):
kkonganti@20 10 """Exits if input file is invalid"""
kkonganti@0 11 _, file_ext = os.path.splitext(file_path)
kkonganti@20 12 if file_ext.lower() != ".csv" and file_ext.lower() != ".tsv":
kkonganti@20 13 sys.exit("Please supply a CSV or TSV input file with the correct file extension")
kkonganti@0 14 if not os.path.exists(file_path):
kkonganti@20 15 sys.exit(f'Input file named "{file_path}" not found')
kkonganti@20 16 return file_path.strip()
kkonganti@20 17
kkonganti@0 18
kkonganti@0 19 def valid_json(file_path):
kkonganti@20 20 """Outputs read JSON file and exits if file is invalid"""
kkonganti@0 21 try:
kkonganti@20 22 with open(file_path, "r") as JSON_file:
kkonganti@0 23 try:
kkonganti@20 24 return json.load(JSON_file)
kkonganti@20 25 except (json.decoder.JSONDecodeError):
kkonganti@20 26 sys.exit(f"User-defined bins not in readable JSON format")
kkonganti@20 27 except (FileNotFoundError):
kkonganti@20 28 sys.exit(f'File named "{file_path}" not found')
kkonganti@20 29
kkonganti@0 30
kkonganti@0 31 def valid_list(list_str):
kkonganti@20 32 """Return list of user-defined ontologies"""
kkonganti@20 33 return [x.strip().upper() for x in list_str.split(",")]
kkonganti@20 34
kkonganti@0 35
kkonganti@0 36 if __name__ == "__main__":
kkonganti@0 37 # Parse arguments, initiate log file and start run
kkonganti@0 38 arg_parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
kkonganti@20 39 arg_parser.add_argument("input", help="input CSV or TSV file; required", type=valid_input)
kkonganti@20 40 arg_parser.add_argument(
kkonganti@20 41 "-o", "--output", metavar="\b", help=" output TSV file path; default is stdout"
kkonganti@20 42 )
kkonganti@20 43 arg_parser.add_argument(
kkonganti@20 44 "-a", "--no_ancestors", action="store_true", help="remove ancestral terms from output"
kkonganti@20 45 )
kkonganti@20 46 arg_parser.add_argument(
kkonganti@20 47 "-b", "--bin", action="store_true", help="classify samples into default bins"
kkonganti@20 48 )
kkonganti@20 49 arg_parser.add_argument(
kkonganti@20 50 "-e",
kkonganti@20 51 "--embl_ontol",
kkonganti@20 52 metavar="\b",
kkonganti@20 53 type=valid_list,
kkonganti@20 54 help=" user-defined comma-separated ontology short names",
kkonganti@20 55 )
kkonganti@20 56 arg_parser.add_argument("-f", "--full", action="store_true", help="full output format")
kkonganti@20 57 arg_parser.add_argument(
kkonganti@20 58 "-g", "--graph", action="store_true", help="visualize summaries of mapping and binning"
kkonganti@20 59 )
kkonganti@20 60 arg_parser.add_argument(
kkonganti@20 61 "-j",
kkonganti@20 62 "--graph_only",
kkonganti@20 63 action="store_true",
kkonganti@20 64 help="only perform visualization with LexMapr output",
kkonganti@20 65 )
kkonganti@20 66 arg_parser.add_argument(
kkonganti@20 67 "-r", "--remake_cache", action="store_true", help="remake cached resources"
kkonganti@20 68 )
kkonganti@20 69 arg_parser.add_argument(
kkonganti@20 70 "-u",
kkonganti@20 71 "--user_bin",
kkonganti@20 72 metavar="\b",
kkonganti@20 73 type=valid_json,
kkonganti@20 74 help=" path to JSON file with user-defined bins",
kkonganti@20 75 )
kkonganti@20 76 arg_parser.add_argument(
kkonganti@20 77 "-w",
kkonganti@20 78 "--num_words",
kkonganti@20 79 metavar="\b",
kkonganti@20 80 default=3,
kkonganti@20 81 help=" number of word combinations to sample",
kkonganti@20 82 )
kkonganti@20 83 arg_parser.add_argument(
kkonganti@20 84 "-p",
kkonganti@20 85 "--cpus",
kkonganti@20 86 metavar="\b",
kkonganti@20 87 default=8,
kkonganti@20 88 help=" number of CPUs to try and parallelize permuations on",
kkonganti@20 89 )
kkonganti@20 90 arg_parser.add_argument("-v", "--version", action="version", version="%(prog)s " + __version__)
kkonganti@0 91
kkonganti@0 92 # TODO: encoding argument addded to logging.basicConfig in Python 3.9; now defaults to open()
kkonganti@0 93 run_args = arg_parser.parse_args()
kkonganti@0 94 if run_args.user_bin is not None:
kkonganti@0 95 run_args.bin = True
kkonganti@0 96 arg_bins = run_args.user_bin
kkonganti@0 97
kkonganti@20 98 logging.basicConfig(filename="lexmapr_run.log", level=logging.DEBUG)
kkonganti@0 99
kkonganti@0 100 if run_args.graph_only:
kkonganti@0 101 try:
kkonganti@20 102 mapping_results = pandas.read_csv(run_args.input, delimiter="\t")
kkonganti@0 103 except:
kkonganti@20 104 sys.exit("Input file not readable or not in expected format")
kkonganti@20 105 needed_columns = ["Matched_Components", "Match_Status (Macro Level)"] + list(
kkonganti@20 106 arg_bins.keys()
kkonganti@20 107 )
kkonganti@0 108 missing_columns = set(needed_columns).difference(set(mapping_results.columns))
kkonganti@0 109 if missing_columns:
kkonganti@20 110 sys.exit(f"Missing column(s) {missing_columns} from input file")
kkonganti@0 111 t0 = datetime.datetime.now()
kkonganti@20 112 logging.info(f"Run start: {t0}")
kkonganti@20 113 logging.info("Graphing only")
kkonganti@20 114 print("\nGraphing only...")
kkonganti@0 115 lexmapr.run_summary.figure_folder()
kkonganti@0 116 lexmapr.run_summary.report_results(run_args.input, list(arg_bins.keys()))
kkonganti@0 117 lexmapr.run_summary.visualize_results(run_args.input, list(arg_bins.keys()))
kkonganti@20 118 print("\t" + f"Done! {datetime.datetime.now()-t0} passed".ljust(60) + "\n")
kkonganti@0 119 else:
kkonganti@20 120 logging.info(f"Run start: {datetime.datetime.now()}")
kkonganti@0 121 lexmapr.pipeline.run(run_args)
kkonganti@0 122
kkonganti@20 123 logging.info(f"Run end: {datetime.datetime.now()}\n")