Mercurial > repos > kkonganti > lexmapr2_from_cfsan
comparison lexmapr2.py @ 20:0be9a7117ba5
"planemo upload"
author | kkonganti |
---|---|
date | Wed, 09 Nov 2022 09:05:28 -0500 |
parents | 91438d32ed58 |
children |
comparison
equal
deleted
inserted
replaced
19:a2bf7a8b7bc9 | 20:0be9a7117ba5 |
---|---|
1 """Entry script""" | 1 """Entry script""" |
2 | 2 |
3 __version__ = '1.0.0' | 3 __version__ = "1.1.0" |
4 import argparse, datetime, json, logging, os, pandas, sys | 4 import argparse, datetime, json, logging, os, pandas, sys |
5 import lexmapr.pipeline, lexmapr.run_summary | 5 import lexmapr.pipeline, lexmapr.run_summary |
6 from lexmapr.definitions import arg_bins | 6 from lexmapr.definitions import arg_bins |
7 | 7 |
8 | 8 |
9 def valid_input(file_path): | 9 def valid_input(file_path): |
10 '''Exits if input file is invalid''' | 10 """Exits if input file is invalid""" |
11 _, file_ext = os.path.splitext(file_path) | 11 _, file_ext = os.path.splitext(file_path) |
12 if file_ext.lower() != '.csv' and file_ext.lower() != '.tsv': | 12 if file_ext.lower() != ".csv" and file_ext.lower() != ".tsv": |
13 sys.exit('Please supply a CSV or TSV input file with the correct file extension') | 13 sys.exit("Please supply a CSV or TSV input file with the correct file extension") |
14 if not os.path.exists(file_path): | 14 if not os.path.exists(file_path): |
15 sys.exit(f'Input file named \"{file_path}\" not found') | 15 sys.exit(f'Input file named "{file_path}" not found') |
16 return(file_path.strip()) | 16 return file_path.strip() |
17 | |
17 | 18 |
18 def valid_json(file_path): | 19 def valid_json(file_path): |
19 '''Outputs read JSON file and exits if file is invalid''' | 20 """Outputs read JSON file and exits if file is invalid""" |
20 try: | 21 try: |
21 with open(file_path, 'r') as JSON_file: | 22 with open(file_path, "r") as JSON_file: |
22 try: | 23 try: |
23 return(json.load(JSON_file)) | 24 return json.load(JSON_file) |
24 except(json.decoder.JSONDecodeError): | 25 except (json.decoder.JSONDecodeError): |
25 sys.exit(f'User-defined bins not in readable JSON format') | 26 sys.exit(f"User-defined bins not in readable JSON format") |
26 except(FileNotFoundError): | 27 except (FileNotFoundError): |
27 sys.exit(f'File named \"{file_path}\" not found') | 28 sys.exit(f'File named "{file_path}" not found') |
29 | |
28 | 30 |
29 def valid_list(list_str): | 31 def valid_list(list_str): |
30 '''Return list of user-defined ontologies''' | 32 """Return list of user-defined ontologies""" |
31 return([x.strip().upper() for x in list_str.split(',')]) | 33 return [x.strip().upper() for x in list_str.split(",")] |
34 | |
32 | 35 |
33 if __name__ == "__main__": | 36 if __name__ == "__main__": |
34 # Parse arguments, initiate log file and start run | 37 # Parse arguments, initiate log file and start run |
35 arg_parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) | 38 arg_parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) |
36 arg_parser.add_argument('input', help='input CSV or TSV file; required', type=valid_input) | 39 arg_parser.add_argument("input", help="input CSV or TSV file; required", type=valid_input) |
37 arg_parser.add_argument('-o', '--output', metavar='\b', | 40 arg_parser.add_argument( |
38 help=' output TSV file path; default is stdout') | 41 "-o", "--output", metavar="\b", help=" output TSV file path; default is stdout" |
39 arg_parser.add_argument('-a', '--no_ancestors', action='store_true', | 42 ) |
40 help='remove ancestral terms from output') | 43 arg_parser.add_argument( |
41 arg_parser.add_argument('-b', '--bin', action='store_true', | 44 "-a", "--no_ancestors", action="store_true", help="remove ancestral terms from output" |
42 help='classify samples into default bins') | 45 ) |
43 arg_parser.add_argument('-e', '--embl_ontol', metavar='\b', type=valid_list, | 46 arg_parser.add_argument( |
44 help=' user-defined comma-separated ontology short names') | 47 "-b", "--bin", action="store_true", help="classify samples into default bins" |
45 arg_parser.add_argument('-f', '--full', action='store_true', help='full output format') | 48 ) |
46 arg_parser.add_argument('-g', '--graph', action='store_true', | 49 arg_parser.add_argument( |
47 help='visualize summaries of mapping and binning') | 50 "-e", |
48 arg_parser.add_argument('-j', '--graph_only', action='store_true', | 51 "--embl_ontol", |
49 help='only perform visualization with LexMapr output') | 52 metavar="\b", |
50 arg_parser.add_argument('-r', '--remake_cache', action='store_true', | 53 type=valid_list, |
51 help='remake cached resources') | 54 help=" user-defined comma-separated ontology short names", |
52 arg_parser.add_argument('-u', '--user_bin', metavar='\b', type=valid_json, | 55 ) |
53 help=' path to JSON file with user-defined bins') | 56 arg_parser.add_argument("-f", "--full", action="store_true", help="full output format") |
54 arg_parser.add_argument('-v', '--version', action='version', | 57 arg_parser.add_argument( |
55 version='%(prog)s '+__version__) | 58 "-g", "--graph", action="store_true", help="visualize summaries of mapping and binning" |
59 ) | |
60 arg_parser.add_argument( | |
61 "-j", | |
62 "--graph_only", | |
63 action="store_true", | |
64 help="only perform visualization with LexMapr output", | |
65 ) | |
66 arg_parser.add_argument( | |
67 "-r", "--remake_cache", action="store_true", help="remake cached resources" | |
68 ) | |
69 arg_parser.add_argument( | |
70 "-u", | |
71 "--user_bin", | |
72 metavar="\b", | |
73 type=valid_json, | |
74 help=" path to JSON file with user-defined bins", | |
75 ) | |
76 arg_parser.add_argument( | |
77 "-w", | |
78 "--num_words", | |
79 metavar="\b", | |
80 default=3, | |
81 help=" number of word combinations to sample", | |
82 ) | |
83 arg_parser.add_argument( | |
84 "-p", | |
85 "--cpus", | |
86 metavar="\b", | |
87 default=8, | |
88 help=" number of CPUs to try and parallelize permuations on", | |
89 ) | |
90 arg_parser.add_argument("-v", "--version", action="version", version="%(prog)s " + __version__) | |
56 | 91 |
57 # TODO: encoding argument addded to logging.basicConfig in Python 3.9; now defaults to open() | 92 # TODO: encoding argument addded to logging.basicConfig in Python 3.9; now defaults to open() |
58 run_args = arg_parser.parse_args() | 93 run_args = arg_parser.parse_args() |
59 if run_args.user_bin is not None: | 94 if run_args.user_bin is not None: |
60 run_args.bin = True | 95 run_args.bin = True |
61 arg_bins = run_args.user_bin | 96 arg_bins = run_args.user_bin |
62 | 97 |
63 logging.basicConfig(filename='lexmapr_run.log', level=logging.DEBUG) | 98 logging.basicConfig(filename="lexmapr_run.log", level=logging.DEBUG) |
64 | 99 |
65 if run_args.graph_only: | 100 if run_args.graph_only: |
66 try: | 101 try: |
67 mapping_results = pandas.read_csv(run_args.input, delimiter='\t') | 102 mapping_results = pandas.read_csv(run_args.input, delimiter="\t") |
68 except: | 103 except: |
69 sys.exit('Input file not readable or not in expected format') | 104 sys.exit("Input file not readable or not in expected format") |
70 needed_columns = ['Matched_Components','Match_Status (Macro Level)']+list(arg_bins.keys()) | 105 needed_columns = ["Matched_Components", "Match_Status (Macro Level)"] + list( |
106 arg_bins.keys() | |
107 ) | |
71 missing_columns = set(needed_columns).difference(set(mapping_results.columns)) | 108 missing_columns = set(needed_columns).difference(set(mapping_results.columns)) |
72 if missing_columns: | 109 if missing_columns: |
73 sys.exit(f'Missing column(s) {missing_columns} from input file') | 110 sys.exit(f"Missing column(s) {missing_columns} from input file") |
74 t0 = datetime.datetime.now() | 111 t0 = datetime.datetime.now() |
75 logging.info(f'Run start: {t0}') | 112 logging.info(f"Run start: {t0}") |
76 logging.info('Graphing only') | 113 logging.info("Graphing only") |
77 print('\nGraphing only...') | 114 print("\nGraphing only...") |
78 lexmapr.run_summary.figure_folder() | 115 lexmapr.run_summary.figure_folder() |
79 lexmapr.run_summary.report_results(run_args.input, list(arg_bins.keys())) | 116 lexmapr.run_summary.report_results(run_args.input, list(arg_bins.keys())) |
80 lexmapr.run_summary.visualize_results(run_args.input, list(arg_bins.keys())) | 117 lexmapr.run_summary.visualize_results(run_args.input, list(arg_bins.keys())) |
81 print('\t'+f'Done! {datetime.datetime.now()-t0} passed'.ljust(60)+'\n') | 118 print("\t" + f"Done! {datetime.datetime.now()-t0} passed".ljust(60) + "\n") |
82 else: | 119 else: |
83 logging.info(f'Run start: {datetime.datetime.now()}') | 120 logging.info(f"Run start: {datetime.datetime.now()}") |
84 lexmapr.pipeline.run(run_args) | 121 lexmapr.pipeline.run(run_args) |
85 | 122 |
86 logging.info(f'Run end: {datetime.datetime.now()}\n') | 123 logging.info(f"Run end: {datetime.datetime.now()}\n") |