comparison lexmapr2.py @ 20:0be9a7117ba5

"planemo upload"
author kkonganti
date Wed, 09 Nov 2022 09:05:28 -0500
parents 91438d32ed58
children
comparison
equal deleted inserted replaced
19:a2bf7a8b7bc9 20:0be9a7117ba5
1 """Entry script""" 1 """Entry script"""
2 2
3 __version__ = '1.0.0' 3 __version__ = "1.1.0"
4 import argparse, datetime, json, logging, os, pandas, sys 4 import argparse, datetime, json, logging, os, pandas, sys
5 import lexmapr.pipeline, lexmapr.run_summary 5 import lexmapr.pipeline, lexmapr.run_summary
6 from lexmapr.definitions import arg_bins 6 from lexmapr.definitions import arg_bins
7 7
8 8
9 def valid_input(file_path): 9 def valid_input(file_path):
10 '''Exits if input file is invalid''' 10 """Exits if input file is invalid"""
11 _, file_ext = os.path.splitext(file_path) 11 _, file_ext = os.path.splitext(file_path)
12 if file_ext.lower() != '.csv' and file_ext.lower() != '.tsv': 12 if file_ext.lower() != ".csv" and file_ext.lower() != ".tsv":
13 sys.exit('Please supply a CSV or TSV input file with the correct file extension') 13 sys.exit("Please supply a CSV or TSV input file with the correct file extension")
14 if not os.path.exists(file_path): 14 if not os.path.exists(file_path):
15 sys.exit(f'Input file named \"{file_path}\" not found') 15 sys.exit(f'Input file named "{file_path}" not found')
16 return(file_path.strip()) 16 return file_path.strip()
17
17 18
18 def valid_json(file_path): 19 def valid_json(file_path):
19 '''Outputs read JSON file and exits if file is invalid''' 20 """Outputs read JSON file and exits if file is invalid"""
20 try: 21 try:
21 with open(file_path, 'r') as JSON_file: 22 with open(file_path, "r") as JSON_file:
22 try: 23 try:
23 return(json.load(JSON_file)) 24 return json.load(JSON_file)
24 except(json.decoder.JSONDecodeError): 25 except (json.decoder.JSONDecodeError):
25 sys.exit(f'User-defined bins not in readable JSON format') 26 sys.exit(f"User-defined bins not in readable JSON format")
26 except(FileNotFoundError): 27 except (FileNotFoundError):
27 sys.exit(f'File named \"{file_path}\" not found') 28 sys.exit(f'File named "{file_path}" not found')
29
28 30
29 def valid_list(list_str): 31 def valid_list(list_str):
30 '''Return list of user-defined ontologies''' 32 """Return list of user-defined ontologies"""
31 return([x.strip().upper() for x in list_str.split(',')]) 33 return [x.strip().upper() for x in list_str.split(",")]
34
32 35
33 if __name__ == "__main__": 36 if __name__ == "__main__":
34 # Parse arguments, initiate log file and start run 37 # Parse arguments, initiate log file and start run
35 arg_parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) 38 arg_parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
36 arg_parser.add_argument('input', help='input CSV or TSV file; required', type=valid_input) 39 arg_parser.add_argument("input", help="input CSV or TSV file; required", type=valid_input)
37 arg_parser.add_argument('-o', '--output', metavar='\b', 40 arg_parser.add_argument(
38 help=' output TSV file path; default is stdout') 41 "-o", "--output", metavar="\b", help=" output TSV file path; default is stdout"
39 arg_parser.add_argument('-a', '--no_ancestors', action='store_true', 42 )
40 help='remove ancestral terms from output') 43 arg_parser.add_argument(
41 arg_parser.add_argument('-b', '--bin', action='store_true', 44 "-a", "--no_ancestors", action="store_true", help="remove ancestral terms from output"
42 help='classify samples into default bins') 45 )
43 arg_parser.add_argument('-e', '--embl_ontol', metavar='\b', type=valid_list, 46 arg_parser.add_argument(
44 help=' user-defined comma-separated ontology short names') 47 "-b", "--bin", action="store_true", help="classify samples into default bins"
45 arg_parser.add_argument('-f', '--full', action='store_true', help='full output format') 48 )
46 arg_parser.add_argument('-g', '--graph', action='store_true', 49 arg_parser.add_argument(
47 help='visualize summaries of mapping and binning') 50 "-e",
48 arg_parser.add_argument('-j', '--graph_only', action='store_true', 51 "--embl_ontol",
49 help='only perform visualization with LexMapr output') 52 metavar="\b",
50 arg_parser.add_argument('-r', '--remake_cache', action='store_true', 53 type=valid_list,
51 help='remake cached resources') 54 help=" user-defined comma-separated ontology short names",
52 arg_parser.add_argument('-u', '--user_bin', metavar='\b', type=valid_json, 55 )
53 help=' path to JSON file with user-defined bins') 56 arg_parser.add_argument("-f", "--full", action="store_true", help="full output format")
54 arg_parser.add_argument('-v', '--version', action='version', 57 arg_parser.add_argument(
55 version='%(prog)s '+__version__) 58 "-g", "--graph", action="store_true", help="visualize summaries of mapping and binning"
59 )
60 arg_parser.add_argument(
61 "-j",
62 "--graph_only",
63 action="store_true",
64 help="only perform visualization with LexMapr output",
65 )
66 arg_parser.add_argument(
67 "-r", "--remake_cache", action="store_true", help="remake cached resources"
68 )
69 arg_parser.add_argument(
70 "-u",
71 "--user_bin",
72 metavar="\b",
73 type=valid_json,
74 help=" path to JSON file with user-defined bins",
75 )
76 arg_parser.add_argument(
77 "-w",
78 "--num_words",
79 metavar="\b",
80 default=3,
81 help=" number of word combinations to sample",
82 )
83 arg_parser.add_argument(
84 "-p",
85 "--cpus",
86 metavar="\b",
87 default=8,
88 help=" number of CPUs to try and parallelize permuations on",
89 )
90 arg_parser.add_argument("-v", "--version", action="version", version="%(prog)s " + __version__)
56 91
57 # TODO: encoding argument addded to logging.basicConfig in Python 3.9; now defaults to open() 92 # TODO: encoding argument addded to logging.basicConfig in Python 3.9; now defaults to open()
58 run_args = arg_parser.parse_args() 93 run_args = arg_parser.parse_args()
59 if run_args.user_bin is not None: 94 if run_args.user_bin is not None:
60 run_args.bin = True 95 run_args.bin = True
61 arg_bins = run_args.user_bin 96 arg_bins = run_args.user_bin
62 97
63 logging.basicConfig(filename='lexmapr_run.log', level=logging.DEBUG) 98 logging.basicConfig(filename="lexmapr_run.log", level=logging.DEBUG)
64 99
65 if run_args.graph_only: 100 if run_args.graph_only:
66 try: 101 try:
67 mapping_results = pandas.read_csv(run_args.input, delimiter='\t') 102 mapping_results = pandas.read_csv(run_args.input, delimiter="\t")
68 except: 103 except:
69 sys.exit('Input file not readable or not in expected format') 104 sys.exit("Input file not readable or not in expected format")
70 needed_columns = ['Matched_Components','Match_Status (Macro Level)']+list(arg_bins.keys()) 105 needed_columns = ["Matched_Components", "Match_Status (Macro Level)"] + list(
106 arg_bins.keys()
107 )
71 missing_columns = set(needed_columns).difference(set(mapping_results.columns)) 108 missing_columns = set(needed_columns).difference(set(mapping_results.columns))
72 if missing_columns: 109 if missing_columns:
73 sys.exit(f'Missing column(s) {missing_columns} from input file') 110 sys.exit(f"Missing column(s) {missing_columns} from input file")
74 t0 = datetime.datetime.now() 111 t0 = datetime.datetime.now()
75 logging.info(f'Run start: {t0}') 112 logging.info(f"Run start: {t0}")
76 logging.info('Graphing only') 113 logging.info("Graphing only")
77 print('\nGraphing only...') 114 print("\nGraphing only...")
78 lexmapr.run_summary.figure_folder() 115 lexmapr.run_summary.figure_folder()
79 lexmapr.run_summary.report_results(run_args.input, list(arg_bins.keys())) 116 lexmapr.run_summary.report_results(run_args.input, list(arg_bins.keys()))
80 lexmapr.run_summary.visualize_results(run_args.input, list(arg_bins.keys())) 117 lexmapr.run_summary.visualize_results(run_args.input, list(arg_bins.keys()))
81 print('\t'+f'Done! {datetime.datetime.now()-t0} passed'.ljust(60)+'\n') 118 print("\t" + f"Done! {datetime.datetime.now()-t0} passed".ljust(60) + "\n")
82 else: 119 else:
83 logging.info(f'Run start: {datetime.datetime.now()}') 120 logging.info(f"Run start: {datetime.datetime.now()}")
84 lexmapr.pipeline.run(run_args) 121 lexmapr.pipeline.run(run_args)
85 122
86 logging.info(f'Run end: {datetime.datetime.now()}\n') 123 logging.info(f"Run end: {datetime.datetime.now()}\n")