kkonganti@0
|
1 """Entry script"""
|
kkonganti@0
|
2
|
kkonganti@0
|
3 __version__ = '1.0.0'
|
kkonganti@0
|
4 import argparse, datetime, json, logging, os, pandas, sys
|
kkonganti@0
|
5 import lexmapr.pipeline, lexmapr.run_summary
|
kkonganti@0
|
6 from lexmapr.definitions import arg_bins
|
kkonganti@0
|
7
|
kkonganti@0
|
8
|
kkonganti@0
|
9 def valid_input(file_path):
|
kkonganti@0
|
10 '''Exits if input file is invalid'''
|
kkonganti@0
|
11 _, file_ext = os.path.splitext(file_path)
|
kkonganti@0
|
12 if file_ext.lower() != '.csv' and file_ext.lower() != '.tsv':
|
kkonganti@0
|
13 sys.exit('Please supply a CSV or TSV input file with the correct file extension')
|
kkonganti@0
|
14 if not os.path.exists(file_path):
|
kkonganti@0
|
15 sys.exit(f'Input file named \"{file_path}\" not found')
|
kkonganti@0
|
16 return(file_path.strip())
|
kkonganti@0
|
17
|
kkonganti@0
|
18 def valid_json(file_path):
|
kkonganti@0
|
19 '''Outputs read JSON file and exits if file is invalid'''
|
kkonganti@0
|
20 try:
|
kkonganti@0
|
21 with open(file_path, 'r') as JSON_file:
|
kkonganti@0
|
22 try:
|
kkonganti@0
|
23 return(json.load(JSON_file))
|
kkonganti@0
|
24 except(json.decoder.JSONDecodeError):
|
kkonganti@0
|
25 sys.exit(f'User-defined bins not in readable JSON format')
|
kkonganti@0
|
26 except(FileNotFoundError):
|
kkonganti@0
|
27 sys.exit(f'File named \"{file_path}\" not found')
|
kkonganti@0
|
28
|
kkonganti@0
|
29 def valid_list(list_str):
|
kkonganti@0
|
30 '''Return list of user-defined ontologies'''
|
kkonganti@0
|
31 return([x.strip().upper() for x in list_str.split(',')])
|
kkonganti@0
|
32
|
kkonganti@0
|
33 if __name__ == "__main__":
|
kkonganti@0
|
34 # Parse arguments, initiate log file and start run
|
kkonganti@0
|
35 arg_parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
|
kkonganti@0
|
36 arg_parser.add_argument('input', help='input CSV or TSV file; required', type=valid_input)
|
kkonganti@0
|
37 arg_parser.add_argument('-o', '--output', metavar='\b',
|
kkonganti@0
|
38 help=' output TSV file path; default is stdout')
|
kkonganti@0
|
39 arg_parser.add_argument('-a', '--no_ancestors', action='store_true',
|
kkonganti@0
|
40 help='remove ancestral terms from output')
|
kkonganti@0
|
41 arg_parser.add_argument('-b', '--bin', action='store_true',
|
kkonganti@0
|
42 help='classify samples into default bins')
|
kkonganti@0
|
43 arg_parser.add_argument('-e', '--embl_ontol', metavar='\b', type=valid_list,
|
kkonganti@0
|
44 help=' user-defined comma-separated ontology short names')
|
kkonganti@0
|
45 arg_parser.add_argument('-f', '--full', action='store_true', help='full output format')
|
kkonganti@0
|
46 arg_parser.add_argument('-g', '--graph', action='store_true',
|
kkonganti@0
|
47 help='visualize summaries of mapping and binning')
|
kkonganti@0
|
48 arg_parser.add_argument('-j', '--graph_only', action='store_true',
|
kkonganti@0
|
49 help='only perform visualization with LexMapr output')
|
kkonganti@0
|
50 arg_parser.add_argument('-r', '--remake_cache', action='store_true',
|
kkonganti@0
|
51 help='remake cached resources')
|
kkonganti@0
|
52 arg_parser.add_argument('-u', '--user_bin', metavar='\b', type=valid_json,
|
kkonganti@0
|
53 help=' path to JSON file with user-defined bins')
|
kkonganti@0
|
54 arg_parser.add_argument('-v', '--version', action='version',
|
kkonganti@0
|
55 version='%(prog)s '+__version__)
|
kkonganti@0
|
56
|
kkonganti@0
|
57 # TODO: encoding argument addded to logging.basicConfig in Python 3.9; now defaults to open()
|
kkonganti@0
|
58 run_args = arg_parser.parse_args()
|
kkonganti@0
|
59 if run_args.user_bin is not None:
|
kkonganti@0
|
60 run_args.bin = True
|
kkonganti@0
|
61 arg_bins = run_args.user_bin
|
kkonganti@0
|
62
|
kkonganti@0
|
63 logging.basicConfig(filename='lexmapr_run.log', level=logging.DEBUG)
|
kkonganti@0
|
64
|
kkonganti@0
|
65 if run_args.graph_only:
|
kkonganti@0
|
66 try:
|
kkonganti@0
|
67 mapping_results = pandas.read_csv(run_args.input, delimiter='\t')
|
kkonganti@0
|
68 except:
|
kkonganti@0
|
69 sys.exit('Input file not readable or not in expected format')
|
kkonganti@0
|
70 needed_columns = ['Matched_Components','Match_Status (Macro Level)']+list(arg_bins.keys())
|
kkonganti@0
|
71 missing_columns = set(needed_columns).difference(set(mapping_results.columns))
|
kkonganti@0
|
72 if missing_columns:
|
kkonganti@0
|
73 sys.exit(f'Missing column(s) {missing_columns} from input file')
|
kkonganti@0
|
74 t0 = datetime.datetime.now()
|
kkonganti@0
|
75 logging.info(f'Run start: {t0}')
|
kkonganti@0
|
76 logging.info('Graphing only')
|
kkonganti@0
|
77 print('\nGraphing only...')
|
kkonganti@0
|
78 lexmapr.run_summary.figure_folder()
|
kkonganti@0
|
79 lexmapr.run_summary.report_results(run_args.input, list(arg_bins.keys()))
|
kkonganti@0
|
80 lexmapr.run_summary.visualize_results(run_args.input, list(arg_bins.keys()))
|
kkonganti@0
|
81 print('\t'+f'Done! {datetime.datetime.now()-t0} passed'.ljust(60)+'\n')
|
kkonganti@0
|
82 else:
|
kkonganti@0
|
83 logging.info(f'Run start: {datetime.datetime.now()}')
|
kkonganti@0
|
84 lexmapr.pipeline.run(run_args)
|
kkonganti@0
|
85
|
kkonganti@0
|
86 logging.info(f'Run end: {datetime.datetime.now()}\n')
|