diff lexmapr2.py @ 0:f5c39d0447be

"planemo upload"
author kkonganti
date Wed, 31 Aug 2022 14:32:07 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lexmapr2.py	Wed Aug 31 14:32:07 2022 -0400
@@ -0,0 +1,86 @@
+"""Entry script"""
+
+__version__ = '1.0.0'
+import argparse, datetime, json, logging, os, pandas, sys
+import lexmapr.pipeline, lexmapr.run_summary
+from lexmapr.definitions import arg_bins
+
+
+def valid_input(file_path):
+    '''Exits if input file is invalid'''
+    _, file_ext = os.path.splitext(file_path)
+    if file_ext.lower() != '.csv' and file_ext.lower() != '.tsv':
+        sys.exit('Please supply a CSV or TSV input file with the correct file extension')
+    if not os.path.exists(file_path):
+        sys.exit(f'Input file named \"{file_path}\" not found')
+    return(file_path.strip())
+
+def valid_json(file_path):
+    '''Outputs read JSON file and exits if file is invalid'''
+    try:
+        with open(file_path, 'r') as JSON_file:
+            try:
+                return(json.load(JSON_file))
+            except(json.decoder.JSONDecodeError):
+                sys.exit(f'User-defined bins not in readable JSON format')            
+    except(FileNotFoundError):
+        sys.exit(f'File named \"{file_path}\" not found')
+
+def valid_list(list_str):
+    '''Return list of user-defined ontologies'''
+    return([x.strip().upper() for x in list_str.split(',')])
+
+if __name__ == "__main__":
+    # Parse arguments, initiate log file and start run
+    arg_parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
+    arg_parser.add_argument('input', help='input CSV or TSV file; required', type=valid_input)
+    arg_parser.add_argument('-o', '--output', metavar='\b',
+                            help='    output TSV file path; default is stdout')
+    arg_parser.add_argument('-a', '--no_ancestors', action='store_true',
+                            help='remove ancestral terms from output')
+    arg_parser.add_argument('-b', '--bin', action='store_true',
+                            help='classify samples into default bins')
+    arg_parser.add_argument('-e', '--embl_ontol', metavar='\b', type=valid_list,
+                            help='    user-defined comma-separated ontology short names')
+    arg_parser.add_argument('-f', '--full', action='store_true', help='full output format')
+    arg_parser.add_argument('-g', '--graph', action='store_true',
+                            help='visualize summaries of mapping and binning')
+    arg_parser.add_argument('-j', '--graph_only', action='store_true',
+                            help='only perform visualization with LexMapr output')
+    arg_parser.add_argument('-r', '--remake_cache', action='store_true',
+                            help='remake cached resources')
+    arg_parser.add_argument('-u', '--user_bin', metavar='\b', type=valid_json,
+                            help='    path to JSON file with user-defined bins')
+    arg_parser.add_argument('-v', '--version', action='version',
+                            version='%(prog)s '+__version__)
+
+    # TODO: encoding argument addded to logging.basicConfig in Python 3.9; now defaults to open()
+    run_args = arg_parser.parse_args()
+    if run_args.user_bin is not None:
+        run_args.bin = True
+        arg_bins = run_args.user_bin
+
+    logging.basicConfig(filename='lexmapr_run.log', level=logging.DEBUG)
+
+    if run_args.graph_only:
+        try:
+            mapping_results = pandas.read_csv(run_args.input, delimiter='\t')
+        except:
+            sys.exit('Input file not readable or not in expected format')
+        needed_columns = ['Matched_Components','Match_Status (Macro Level)']+list(arg_bins.keys())
+        missing_columns = set(needed_columns).difference(set(mapping_results.columns))
+        if missing_columns:
+            sys.exit(f'Missing column(s) {missing_columns} from input file')
+        t0 = datetime.datetime.now()
+        logging.info(f'Run start: {t0}')
+        logging.info('Graphing only')
+        print('\nGraphing only...')
+        lexmapr.run_summary.figure_folder()
+        lexmapr.run_summary.report_results(run_args.input, list(arg_bins.keys()))
+        lexmapr.run_summary.visualize_results(run_args.input, list(arg_bins.keys()))
+        print('\t'+f'Done! {datetime.datetime.now()-t0} passed'.ljust(60)+'\n')
+    else:
+        logging.info(f'Run start: {datetime.datetime.now()}')
+        lexmapr.pipeline.run(run_args)
+
+    logging.info(f'Run end: {datetime.datetime.now()}\n')