lexmapr2_from_cfsan: lexmapr2.py comparison

comparison lexmapr2.py @ 20:0be9a7117ba5

"planemo upload"

author	kkonganti
date	Wed, 09 Nov 2022 09:05:28 -0500
parents	91438d32ed58
children

comparison

equal deleted inserted replaced

-:a2bf7a8b7bc9
+:0be9a7117ba5
 """Entry script"""
-__version__ = '1.0.0'
+__version__ = "1.1.0"
 import argparse, datetime, json, logging, os, pandas, sys
 import lexmapr.pipeline, lexmapr.run_summary
 from lexmapr.definitions import arg_bins
 def valid_input(file_path):
-'''Exits if input file is invalid'''
+"""Exits if input file is invalid"""
 _, file_ext = os.path.splitext(file_path)
-if file_ext.lower() != '.csv' and file_ext.lower() != '.tsv':
+if file_ext.lower() != ".csv" and file_ext.lower() != ".tsv":
-sys.exit('Please supply a CSV or TSV input file with the correct file extension')
+sys.exit("Please supply a CSV or TSV input file with the correct file extension")
 if not os.path.exists(file_path):
-sys.exit(f'Input file named \"{file_path}\" not found')
+sys.exit(f'Input file named "{file_path}" not found')
-return(file_path.strip())
+return file_path.strip()
 def valid_json(file_path):
-'''Outputs read JSON file and exits if file is invalid'''
+"""Outputs read JSON file and exits if file is invalid"""
 try:
-with open(file_path, 'r') as JSON_file:
+with open(file_path, "r") as JSON_file:
 try:
-return(json.load(JSON_file))
+return json.load(JSON_file)
-except(json.decoder.JSONDecodeError):
+except (json.decoder.JSONDecodeError):
-sys.exit(f'User-defined bins not in readable JSON format')
+sys.exit(f"User-defined bins not in readable JSON format")
-except(FileNotFoundError):
+except (FileNotFoundError):
-sys.exit(f'File named \"{file_path}\" not found')
+sys.exit(f'File named "{file_path}" not found')
 def valid_list(list_str):
-'''Return list of user-defined ontologies'''
+"""Return list of user-defined ontologies"""
-return([x.strip().upper() for x in list_str.split(',')])
+return [x.strip().upper() for x in list_str.split(",")]
 if __name__ == "__main__":
 # Parse arguments, initiate log file and start run
 arg_parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
-arg_parser.add_argument('input', help='input CSV or TSV file; required', type=valid_input)
+arg_parser.add_argument("input", help="input CSV or TSV file; required", type=valid_input)
-arg_parser.add_argument('-o', '--output', metavar='\b',
+arg_parser.add_argument(
-help='    output TSV file path; default is stdout')
+"-o", "--output", metavar="\b", help="    output TSV file path; default is stdout"
-arg_parser.add_argument('-a', '--no_ancestors', action='store_true',
+)
-help='remove ancestral terms from output')
+arg_parser.add_argument(
-arg_parser.add_argument('-b', '--bin', action='store_true',
+"-a", "--no_ancestors", action="store_true", help="remove ancestral terms from output"
-help='classify samples into default bins')
+)
-arg_parser.add_argument('-e', '--embl_ontol', metavar='\b', type=valid_list,
+arg_parser.add_argument(
-help='    user-defined comma-separated ontology short names')
+"-b", "--bin", action="store_true", help="classify samples into default bins"
-arg_parser.add_argument('-f', '--full', action='store_true', help='full output format')
+)
-arg_parser.add_argument('-g', '--graph', action='store_true',
+arg_parser.add_argument(
-help='visualize summaries of mapping and binning')
+"-e",
-arg_parser.add_argument('-j', '--graph_only', action='store_true',
+"--embl_ontol",
-help='only perform visualization with LexMapr output')
+metavar="\b",
-arg_parser.add_argument('-r', '--remake_cache', action='store_true',
+type=valid_list,
-help='remake cached resources')
+help="    user-defined comma-separated ontology short names",
-arg_parser.add_argument('-u', '--user_bin', metavar='\b', type=valid_json,
+)
-help='    path to JSON file with user-defined bins')
+arg_parser.add_argument("-f", "--full", action="store_true", help="full output format")
-arg_parser.add_argument('-v', '--version', action='version',
+arg_parser.add_argument(
-version='%(prog)s '+__version__)
+"-g", "--graph", action="store_true", help="visualize summaries of mapping and binning"
+)
+arg_parser.add_argument(
+"-j",
+"--graph_only",
+action="store_true",
+help="only perform visualization with LexMapr output",
+)
+arg_parser.add_argument(
+"-r", "--remake_cache", action="store_true", help="remake cached resources"
+)
+arg_parser.add_argument(
+"-u",
+"--user_bin",
+metavar="\b",
+type=valid_json,
+help="    path to JSON file with user-defined bins",
+)
+arg_parser.add_argument(
+"-w",
+"--num_words",
+metavar="\b",
+default=3,
+help="    number of word combinations to sample",
+)
+arg_parser.add_argument(
+"-p",
+"--cpus",
+metavar="\b",
+default=8,
+help="    number of CPUs to try and parallelize permuations on",
+)
+arg_parser.add_argument("-v", "--version", action="version", version="%(prog)s " + __version__)
 # TODO: encoding argument addded to logging.basicConfig in Python 3.9; now defaults to open()
 run_args = arg_parser.parse_args()
 if run_args.user_bin is not None:
 run_args.bin = True
 arg_bins = run_args.user_bin
-logging.basicConfig(filename='lexmapr_run.log', level=logging.DEBUG)
+logging.basicConfig(filename="lexmapr_run.log", level=logging.DEBUG)
 if run_args.graph_only:
 try:
-mapping_results = pandas.read_csv(run_args.input, delimiter='\t')
+mapping_results = pandas.read_csv(run_args.input, delimiter="\t")
 except:
-sys.exit('Input file not readable or not in expected format')
+sys.exit("Input file not readable or not in expected format")
-needed_columns = ['Matched_Components','Match_Status (Macro Level)']+list(arg_bins.keys())
+needed_columns = ["Matched_Components", "Match_Status (Macro Level)"] + list(
+arg_bins.keys()
+)
 missing_columns = set(needed_columns).difference(set(mapping_results.columns))
 if missing_columns:
-sys.exit(f'Missing column(s) {missing_columns} from input file')
+sys.exit(f"Missing column(s) {missing_columns} from input file")
 t0 = datetime.datetime.now()
-logging.info(f'Run start: {t0}')
+logging.info(f"Run start: {t0}")
-logging.info('Graphing only')
+logging.info("Graphing only")
-print('\nGraphing only...')
+print("\nGraphing only...")
 lexmapr.run_summary.figure_folder()
 lexmapr.run_summary.report_results(run_args.input, list(arg_bins.keys()))
 lexmapr.run_summary.visualize_results(run_args.input, list(arg_bins.keys()))
-print('\t'+f'Done! {datetime.datetime.now()-t0} passed'.ljust(60)+'\n')
+print("\t" + f"Done! {datetime.datetime.now()-t0} passed".ljust(60) + "\n")
 else:
-logging.info(f'Run start: {datetime.datetime.now()}')
+logging.info(f"Run start: {datetime.datetime.now()}")
 lexmapr.pipeline.run(run_args)
-logging.info(f'Run end: {datetime.datetime.now()}\n')
+logging.info(f"Run end: {datetime.datetime.now()}\n")

Mercurial > repos > kkonganti > lexmapr2_from_cfsan

comparison lexmapr2.py @ 20:0be9a7117ba5