kkonganti@0
|
1 """Entry script"""
|
kkonganti@0
|
2
|
kkonganti@20
|
3 __version__ = "1.1.0"
|
kkonganti@0
|
4 import argparse, datetime, json, logging, os, pandas, sys
|
kkonganti@0
|
5 import lexmapr.pipeline, lexmapr.run_summary
|
kkonganti@0
|
6 from lexmapr.definitions import arg_bins
|
kkonganti@0
|
7
|
kkonganti@0
|
8
|
kkonganti@0
|
9 def valid_input(file_path):
|
kkonganti@20
|
10 """Exits if input file is invalid"""
|
kkonganti@0
|
11 _, file_ext = os.path.splitext(file_path)
|
kkonganti@20
|
12 if file_ext.lower() != ".csv" and file_ext.lower() != ".tsv":
|
kkonganti@20
|
13 sys.exit("Please supply a CSV or TSV input file with the correct file extension")
|
kkonganti@0
|
14 if not os.path.exists(file_path):
|
kkonganti@20
|
15 sys.exit(f'Input file named "{file_path}" not found')
|
kkonganti@20
|
16 return file_path.strip()
|
kkonganti@20
|
17
|
kkonganti@0
|
18
|
kkonganti@0
|
19 def valid_json(file_path):
|
kkonganti@20
|
20 """Outputs read JSON file and exits if file is invalid"""
|
kkonganti@0
|
21 try:
|
kkonganti@20
|
22 with open(file_path, "r") as JSON_file:
|
kkonganti@0
|
23 try:
|
kkonganti@20
|
24 return json.load(JSON_file)
|
kkonganti@20
|
25 except (json.decoder.JSONDecodeError):
|
kkonganti@20
|
26 sys.exit(f"User-defined bins not in readable JSON format")
|
kkonganti@20
|
27 except (FileNotFoundError):
|
kkonganti@20
|
28 sys.exit(f'File named "{file_path}" not found')
|
kkonganti@20
|
29
|
kkonganti@0
|
30
|
kkonganti@0
|
31 def valid_list(list_str):
|
kkonganti@20
|
32 """Return list of user-defined ontologies"""
|
kkonganti@20
|
33 return [x.strip().upper() for x in list_str.split(",")]
|
kkonganti@20
|
34
|
kkonganti@0
|
35
|
kkonganti@0
|
36 if __name__ == "__main__":
|
kkonganti@0
|
37 # Parse arguments, initiate log file and start run
|
kkonganti@0
|
38 arg_parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
|
kkonganti@20
|
39 arg_parser.add_argument("input", help="input CSV or TSV file; required", type=valid_input)
|
kkonganti@20
|
40 arg_parser.add_argument(
|
kkonganti@20
|
41 "-o", "--output", metavar="\b", help=" output TSV file path; default is stdout"
|
kkonganti@20
|
42 )
|
kkonganti@20
|
43 arg_parser.add_argument(
|
kkonganti@20
|
44 "-a", "--no_ancestors", action="store_true", help="remove ancestral terms from output"
|
kkonganti@20
|
45 )
|
kkonganti@20
|
46 arg_parser.add_argument(
|
kkonganti@20
|
47 "-b", "--bin", action="store_true", help="classify samples into default bins"
|
kkonganti@20
|
48 )
|
kkonganti@20
|
49 arg_parser.add_argument(
|
kkonganti@20
|
50 "-e",
|
kkonganti@20
|
51 "--embl_ontol",
|
kkonganti@20
|
52 metavar="\b",
|
kkonganti@20
|
53 type=valid_list,
|
kkonganti@20
|
54 help=" user-defined comma-separated ontology short names",
|
kkonganti@20
|
55 )
|
kkonganti@20
|
56 arg_parser.add_argument("-f", "--full", action="store_true", help="full output format")
|
kkonganti@20
|
57 arg_parser.add_argument(
|
kkonganti@20
|
58 "-g", "--graph", action="store_true", help="visualize summaries of mapping and binning"
|
kkonganti@20
|
59 )
|
kkonganti@20
|
60 arg_parser.add_argument(
|
kkonganti@20
|
61 "-j",
|
kkonganti@20
|
62 "--graph_only",
|
kkonganti@20
|
63 action="store_true",
|
kkonganti@20
|
64 help="only perform visualization with LexMapr output",
|
kkonganti@20
|
65 )
|
kkonganti@20
|
66 arg_parser.add_argument(
|
kkonganti@20
|
67 "-r", "--remake_cache", action="store_true", help="remake cached resources"
|
kkonganti@20
|
68 )
|
kkonganti@20
|
69 arg_parser.add_argument(
|
kkonganti@20
|
70 "-u",
|
kkonganti@20
|
71 "--user_bin",
|
kkonganti@20
|
72 metavar="\b",
|
kkonganti@20
|
73 type=valid_json,
|
kkonganti@20
|
74 help=" path to JSON file with user-defined bins",
|
kkonganti@20
|
75 )
|
kkonganti@20
|
76 arg_parser.add_argument(
|
kkonganti@20
|
77 "-w",
|
kkonganti@20
|
78 "--num_words",
|
kkonganti@20
|
79 metavar="\b",
|
kkonganti@20
|
80 default=3,
|
kkonganti@20
|
81 help=" number of word combinations to sample",
|
kkonganti@20
|
82 )
|
kkonganti@20
|
83 arg_parser.add_argument(
|
kkonganti@20
|
84 "-p",
|
kkonganti@20
|
85 "--cpus",
|
kkonganti@20
|
86 metavar="\b",
|
kkonganti@20
|
87 default=8,
|
kkonganti@20
|
88 help=" number of CPUs to try and parallelize permuations on",
|
kkonganti@20
|
89 )
|
kkonganti@20
|
90 arg_parser.add_argument("-v", "--version", action="version", version="%(prog)s " + __version__)
|
kkonganti@0
|
91
|
kkonganti@0
|
92 # TODO: encoding argument addded to logging.basicConfig in Python 3.9; now defaults to open()
|
kkonganti@0
|
93 run_args = arg_parser.parse_args()
|
kkonganti@0
|
94 if run_args.user_bin is not None:
|
kkonganti@0
|
95 run_args.bin = True
|
kkonganti@0
|
96 arg_bins = run_args.user_bin
|
kkonganti@0
|
97
|
kkonganti@20
|
98 logging.basicConfig(filename="lexmapr_run.log", level=logging.DEBUG)
|
kkonganti@0
|
99
|
kkonganti@0
|
100 if run_args.graph_only:
|
kkonganti@0
|
101 try:
|
kkonganti@20
|
102 mapping_results = pandas.read_csv(run_args.input, delimiter="\t")
|
kkonganti@0
|
103 except:
|
kkonganti@20
|
104 sys.exit("Input file not readable or not in expected format")
|
kkonganti@20
|
105 needed_columns = ["Matched_Components", "Match_Status (Macro Level)"] + list(
|
kkonganti@20
|
106 arg_bins.keys()
|
kkonganti@20
|
107 )
|
kkonganti@0
|
108 missing_columns = set(needed_columns).difference(set(mapping_results.columns))
|
kkonganti@0
|
109 if missing_columns:
|
kkonganti@20
|
110 sys.exit(f"Missing column(s) {missing_columns} from input file")
|
kkonganti@0
|
111 t0 = datetime.datetime.now()
|
kkonganti@20
|
112 logging.info(f"Run start: {t0}")
|
kkonganti@20
|
113 logging.info("Graphing only")
|
kkonganti@20
|
114 print("\nGraphing only...")
|
kkonganti@0
|
115 lexmapr.run_summary.figure_folder()
|
kkonganti@0
|
116 lexmapr.run_summary.report_results(run_args.input, list(arg_bins.keys()))
|
kkonganti@0
|
117 lexmapr.run_summary.visualize_results(run_args.input, list(arg_bins.keys()))
|
kkonganti@20
|
118 print("\t" + f"Done! {datetime.datetime.now()-t0} passed".ljust(60) + "\n")
|
kkonganti@0
|
119 else:
|
kkonganti@20
|
120 logging.info(f"Run start: {datetime.datetime.now()}")
|
kkonganti@0
|
121 lexmapr.pipeline.run(run_args)
|
kkonganti@0
|
122
|
kkonganti@20
|
123 logging.info(f"Run end: {datetime.datetime.now()}\n")
|