Mercurial > repos > kkonganti > lexmapr2_from_cfsan

diff lexmapr2.py @ 20:0be9a7117ba5
"planemo upload"
author: kkonganti
date: Wed, 09 Nov 2022 09:05:28 -0500
parents: 91438d32ed58
--- a/lexmapr2.py	Thu Sep 15 14:50:34 2022 -0400
+++ b/lexmapr2.py	Wed Nov 09 09:05:28 2022 -0500
@@ -1,58 +1,93 @@
 """Entry script"""
 
-__version__ = '1.0.0'
+__version__ = "1.1.0"
 import argparse, datetime, json, logging, os, pandas, sys
 import lexmapr.pipeline, lexmapr.run_summary
 from lexmapr.definitions import arg_bins
 
 
 def valid_input(file_path):
-    '''Exits if input file is invalid'''
+    """Exits if input file is invalid"""
     _, file_ext = os.path.splitext(file_path)
-    if file_ext.lower() != '.csv' and file_ext.lower() != '.tsv':
-        sys.exit('Please supply a CSV or TSV input file with the correct file extension')
+    if file_ext.lower() != ".csv" and file_ext.lower() != ".tsv":
+        sys.exit("Please supply a CSV or TSV input file with the correct file extension")
     if not os.path.exists(file_path):
-        sys.exit(f'Input file named \"{file_path}\" not found')
-    return(file_path.strip())
+        sys.exit(f'Input file named "{file_path}" not found')
+    return file_path.strip()
+
 
 def valid_json(file_path):
-    '''Outputs read JSON file and exits if file is invalid'''
+    """Outputs read JSON file and exits if file is invalid"""
     try:
-        with open(file_path, 'r') as JSON_file:
+        with open(file_path, "r") as JSON_file:
             try:
-                return(json.load(JSON_file))
-            except(json.decoder.JSONDecodeError):
-                sys.exit(f'User-defined bins not in readable JSON format')            
-    except(FileNotFoundError):
-        sys.exit(f'File named \"{file_path}\" not found')
+                return json.load(JSON_file)
+            except (json.decoder.JSONDecodeError):
+                sys.exit(f"User-defined bins not in readable JSON format")
+    except (FileNotFoundError):
+        sys.exit(f'File named "{file_path}" not found')
+
 
 def valid_list(list_str):
-    '''Return list of user-defined ontologies'''
-    return([x.strip().upper() for x in list_str.split(',')])
+    """Return list of user-defined ontologies"""
+    return [x.strip().upper() for x in list_str.split(",")]
+
 
 if __name__ == "__main__":
     # Parse arguments, initiate log file and start run
     arg_parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
-    arg_parser.add_argument('input', help='input CSV or TSV file; required', type=valid_input)
-    arg_parser.add_argument('-o', '--output', metavar='\b',
-                            help='    output TSV file path; default is stdout')
-    arg_parser.add_argument('-a', '--no_ancestors', action='store_true',
-                            help='remove ancestral terms from output')
-    arg_parser.add_argument('-b', '--bin', action='store_true',
-                            help='classify samples into default bins')
-    arg_parser.add_argument('-e', '--embl_ontol', metavar='\b', type=valid_list,
-                            help='    user-defined comma-separated ontology short names')
-    arg_parser.add_argument('-f', '--full', action='store_true', help='full output format')
-    arg_parser.add_argument('-g', '--graph', action='store_true',
-                            help='visualize summaries of mapping and binning')
-    arg_parser.add_argument('-j', '--graph_only', action='store_true',
-                            help='only perform visualization with LexMapr output')
-    arg_parser.add_argument('-r', '--remake_cache', action='store_true',
-                            help='remake cached resources')
-    arg_parser.add_argument('-u', '--user_bin', metavar='\b', type=valid_json,
-                            help='    path to JSON file with user-defined bins')
-    arg_parser.add_argument('-v', '--version', action='version',
-                            version='%(prog)s '+__version__)
+    arg_parser.add_argument("input", help="input CSV or TSV file; required", type=valid_input)
+    arg_parser.add_argument(
+        "-o", "--output", metavar="\b", help="    output TSV file path; default is stdout"
+    )
+    arg_parser.add_argument(
+        "-a", "--no_ancestors", action="store_true", help="remove ancestral terms from output"
+    )
+    arg_parser.add_argument(
+        "-b", "--bin", action="store_true", help="classify samples into default bins"
+    )
+    arg_parser.add_argument(
+        "-e",
+        "--embl_ontol",
+        metavar="\b",
+        type=valid_list,
+        help="    user-defined comma-separated ontology short names",
+    )
+    arg_parser.add_argument("-f", "--full", action="store_true", help="full output format")
+    arg_parser.add_argument(
+        "-g", "--graph", action="store_true", help="visualize summaries of mapping and binning"
+    )
+    arg_parser.add_argument(
+        "-j",
+        "--graph_only",
+        action="store_true",
+        help="only perform visualization with LexMapr output",
+    )
+    arg_parser.add_argument(
+        "-r", "--remake_cache", action="store_true", help="remake cached resources"
+    )
+    arg_parser.add_argument(
+        "-u",
+        "--user_bin",
+        metavar="\b",
+        type=valid_json,
+        help="    path to JSON file with user-defined bins",
+    )
+    arg_parser.add_argument(
+        "-w",
+        "--num_words",
+        metavar="\b",
+        default=3,
+        help="    number of word combinations to sample",
+    )
+    arg_parser.add_argument(
+        "-p",
+        "--cpus",
+        metavar="\b",
+        default=8,
+        help="    number of CPUs to try and parallelize permuations on",
+    )
+    arg_parser.add_argument("-v", "--version", action="version", version="%(prog)s " + __version__)
 
     # TODO: encoding argument addded to logging.basicConfig in Python 3.9; now defaults to open()
     run_args = arg_parser.parse_args()
@@ -60,27 +95,29 @@
         run_args.bin = True
         arg_bins = run_args.user_bin
 
-    logging.basicConfig(filename='lexmapr_run.log', level=logging.DEBUG)
+    logging.basicConfig(filename="lexmapr_run.log", level=logging.DEBUG)
 
     if run_args.graph_only:
         try:
-            mapping_results = pandas.read_csv(run_args.input, delimiter='\t')
+            mapping_results = pandas.read_csv(run_args.input, delimiter="\t")
         except:
-            sys.exit('Input file not readable or not in expected format')
-        needed_columns = ['Matched_Components','Match_Status (Macro Level)']+list(arg_bins.keys())
+            sys.exit("Input file not readable or not in expected format")
+        needed_columns = ["Matched_Components", "Match_Status (Macro Level)"] + list(
+            arg_bins.keys()
+        )
         missing_columns = set(needed_columns).difference(set(mapping_results.columns))
         if missing_columns:
-            sys.exit(f'Missing column(s) {missing_columns} from input file')
+            sys.exit(f"Missing column(s) {missing_columns} from input file")
         t0 = datetime.datetime.now()
-        logging.info(f'Run start: {t0}')
-        logging.info('Graphing only')
-        print('\nGraphing only...')
+        logging.info(f"Run start: {t0}")
+        logging.info("Graphing only")
+        print("\nGraphing only...")
         lexmapr.run_summary.figure_folder()
         lexmapr.run_summary.report_results(run_args.input, list(arg_bins.keys()))
         lexmapr.run_summary.visualize_results(run_args.input, list(arg_bins.keys()))
-        print('\t'+f'Done! {datetime.datetime.now()-t0} passed'.ljust(60)+'\n')
+        print("\t" + f"Done! {datetime.datetime.now()-t0} passed".ljust(60) + "\n")
     else:
-        logging.info(f'Run start: {datetime.datetime.now()}')
+        logging.info(f"Run start: {datetime.datetime.now()}")
         lexmapr.pipeline.run(run_args)
 
-    logging.info(f'Run end: {datetime.datetime.now()}\n')
+    logging.info(f"Run end: {datetime.datetime.now()}\n")
author	kkonganti
date	Wed, 09 Nov 2022 09:05:28 -0500
parents	91438d32ed58
children