Mercurial > repos > kkonganti > cfsan_lexmapr2
comparison lexmapr2.py @ 0:f5c39d0447be
"planemo upload"
author | kkonganti |
---|---|
date | Wed, 31 Aug 2022 14:32:07 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f5c39d0447be |
---|---|
1 """Entry script""" | |
2 | |
3 __version__ = '1.0.0' | |
4 import argparse, datetime, json, logging, os, pandas, sys | |
5 import lexmapr.pipeline, lexmapr.run_summary | |
6 from lexmapr.definitions import arg_bins | |
7 | |
8 | |
9 def valid_input(file_path): | |
10 '''Exits if input file is invalid''' | |
11 _, file_ext = os.path.splitext(file_path) | |
12 if file_ext.lower() != '.csv' and file_ext.lower() != '.tsv': | |
13 sys.exit('Please supply a CSV or TSV input file with the correct file extension') | |
14 if not os.path.exists(file_path): | |
15 sys.exit(f'Input file named \"{file_path}\" not found') | |
16 return(file_path.strip()) | |
17 | |
18 def valid_json(file_path): | |
19 '''Outputs read JSON file and exits if file is invalid''' | |
20 try: | |
21 with open(file_path, 'r') as JSON_file: | |
22 try: | |
23 return(json.load(JSON_file)) | |
24 except(json.decoder.JSONDecodeError): | |
25 sys.exit(f'User-defined bins not in readable JSON format') | |
26 except(FileNotFoundError): | |
27 sys.exit(f'File named \"{file_path}\" not found') | |
28 | |
29 def valid_list(list_str): | |
30 '''Return list of user-defined ontologies''' | |
31 return([x.strip().upper() for x in list_str.split(',')]) | |
32 | |
33 if __name__ == "__main__": | |
34 # Parse arguments, initiate log file and start run | |
35 arg_parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) | |
36 arg_parser.add_argument('input', help='input CSV or TSV file; required', type=valid_input) | |
37 arg_parser.add_argument('-o', '--output', metavar='\b', | |
38 help=' output TSV file path; default is stdout') | |
39 arg_parser.add_argument('-a', '--no_ancestors', action='store_true', | |
40 help='remove ancestral terms from output') | |
41 arg_parser.add_argument('-b', '--bin', action='store_true', | |
42 help='classify samples into default bins') | |
43 arg_parser.add_argument('-e', '--embl_ontol', metavar='\b', type=valid_list, | |
44 help=' user-defined comma-separated ontology short names') | |
45 arg_parser.add_argument('-f', '--full', action='store_true', help='full output format') | |
46 arg_parser.add_argument('-g', '--graph', action='store_true', | |
47 help='visualize summaries of mapping and binning') | |
48 arg_parser.add_argument('-j', '--graph_only', action='store_true', | |
49 help='only perform visualization with LexMapr output') | |
50 arg_parser.add_argument('-r', '--remake_cache', action='store_true', | |
51 help='remake cached resources') | |
52 arg_parser.add_argument('-u', '--user_bin', metavar='\b', type=valid_json, | |
53 help=' path to JSON file with user-defined bins') | |
54 arg_parser.add_argument('-v', '--version', action='version', | |
55 version='%(prog)s '+__version__) | |
56 | |
57 # TODO: encoding argument addded to logging.basicConfig in Python 3.9; now defaults to open() | |
58 run_args = arg_parser.parse_args() | |
59 if run_args.user_bin is not None: | |
60 run_args.bin = True | |
61 arg_bins = run_args.user_bin | |
62 | |
63 logging.basicConfig(filename='lexmapr_run.log', level=logging.DEBUG) | |
64 | |
65 if run_args.graph_only: | |
66 try: | |
67 mapping_results = pandas.read_csv(run_args.input, delimiter='\t') | |
68 except: | |
69 sys.exit('Input file not readable or not in expected format') | |
70 needed_columns = ['Matched_Components','Match_Status (Macro Level)']+list(arg_bins.keys()) | |
71 missing_columns = set(needed_columns).difference(set(mapping_results.columns)) | |
72 if missing_columns: | |
73 sys.exit(f'Missing column(s) {missing_columns} from input file') | |
74 t0 = datetime.datetime.now() | |
75 logging.info(f'Run start: {t0}') | |
76 logging.info('Graphing only') | |
77 print('\nGraphing only...') | |
78 lexmapr.run_summary.figure_folder() | |
79 lexmapr.run_summary.report_results(run_args.input, list(arg_bins.keys())) | |
80 lexmapr.run_summary.visualize_results(run_args.input, list(arg_bins.keys())) | |
81 print('\t'+f'Done! {datetime.datetime.now()-t0} passed'.ljust(60)+'\n') | |
82 else: | |
83 logging.info(f'Run start: {datetime.datetime.now()}') | |
84 lexmapr.pipeline.run(run_args) | |
85 | |
86 logging.info(f'Run end: {datetime.datetime.now()}\n') |