Mercurial > repos > kkonganti > lexmapr2_from_cfsan
diff lexmapr2.py @ 20:0be9a7117ba5
"planemo upload"
author | kkonganti |
---|---|
date | Wed, 09 Nov 2022 09:05:28 -0500 |
parents | 91438d32ed58 |
children |
line wrap: on
line diff
--- a/lexmapr2.py Thu Sep 15 14:50:34 2022 -0400 +++ b/lexmapr2.py Wed Nov 09 09:05:28 2022 -0500 @@ -1,58 +1,93 @@ """Entry script""" -__version__ = '1.0.0' +__version__ = "1.1.0" import argparse, datetime, json, logging, os, pandas, sys import lexmapr.pipeline, lexmapr.run_summary from lexmapr.definitions import arg_bins def valid_input(file_path): - '''Exits if input file is invalid''' + """Exits if input file is invalid""" _, file_ext = os.path.splitext(file_path) - if file_ext.lower() != '.csv' and file_ext.lower() != '.tsv': - sys.exit('Please supply a CSV or TSV input file with the correct file extension') + if file_ext.lower() != ".csv" and file_ext.lower() != ".tsv": + sys.exit("Please supply a CSV or TSV input file with the correct file extension") if not os.path.exists(file_path): - sys.exit(f'Input file named \"{file_path}\" not found') - return(file_path.strip()) + sys.exit(f'Input file named "{file_path}" not found') + return file_path.strip() + def valid_json(file_path): - '''Outputs read JSON file and exits if file is invalid''' + """Outputs read JSON file and exits if file is invalid""" try: - with open(file_path, 'r') as JSON_file: + with open(file_path, "r") as JSON_file: try: - return(json.load(JSON_file)) - except(json.decoder.JSONDecodeError): - sys.exit(f'User-defined bins not in readable JSON format') - except(FileNotFoundError): - sys.exit(f'File named \"{file_path}\" not found') + return json.load(JSON_file) + except (json.decoder.JSONDecodeError): + sys.exit(f"User-defined bins not in readable JSON format") + except (FileNotFoundError): + sys.exit(f'File named "{file_path}" not found') + def valid_list(list_str): - '''Return list of user-defined ontologies''' - return([x.strip().upper() for x in list_str.split(',')]) + """Return list of user-defined ontologies""" + return [x.strip().upper() for x in list_str.split(",")] + if __name__ == "__main__": # Parse arguments, initiate log file and start run arg_parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) - arg_parser.add_argument('input', help='input CSV or TSV file; required', type=valid_input) - arg_parser.add_argument('-o', '--output', metavar='\b', - help=' output TSV file path; default is stdout') - arg_parser.add_argument('-a', '--no_ancestors', action='store_true', - help='remove ancestral terms from output') - arg_parser.add_argument('-b', '--bin', action='store_true', - help='classify samples into default bins') - arg_parser.add_argument('-e', '--embl_ontol', metavar='\b', type=valid_list, - help=' user-defined comma-separated ontology short names') - arg_parser.add_argument('-f', '--full', action='store_true', help='full output format') - arg_parser.add_argument('-g', '--graph', action='store_true', - help='visualize summaries of mapping and binning') - arg_parser.add_argument('-j', '--graph_only', action='store_true', - help='only perform visualization with LexMapr output') - arg_parser.add_argument('-r', '--remake_cache', action='store_true', - help='remake cached resources') - arg_parser.add_argument('-u', '--user_bin', metavar='\b', type=valid_json, - help=' path to JSON file with user-defined bins') - arg_parser.add_argument('-v', '--version', action='version', - version='%(prog)s '+__version__) + arg_parser.add_argument("input", help="input CSV or TSV file; required", type=valid_input) + arg_parser.add_argument( + "-o", "--output", metavar="\b", help=" output TSV file path; default is stdout" + ) + arg_parser.add_argument( + "-a", "--no_ancestors", action="store_true", help="remove ancestral terms from output" + ) + arg_parser.add_argument( + "-b", "--bin", action="store_true", help="classify samples into default bins" + ) + arg_parser.add_argument( + "-e", + "--embl_ontol", + metavar="\b", + type=valid_list, + help=" user-defined comma-separated ontology short names", + ) + arg_parser.add_argument("-f", "--full", action="store_true", help="full output format") + arg_parser.add_argument( + "-g", "--graph", action="store_true", help="visualize summaries of mapping and binning" + ) + arg_parser.add_argument( + "-j", + "--graph_only", + action="store_true", + help="only perform visualization with LexMapr output", + ) + arg_parser.add_argument( + "-r", "--remake_cache", action="store_true", help="remake cached resources" + ) + arg_parser.add_argument( + "-u", + "--user_bin", + metavar="\b", + type=valid_json, + help=" path to JSON file with user-defined bins", + ) + arg_parser.add_argument( + "-w", + "--num_words", + metavar="\b", + default=3, + help=" number of word combinations to sample", + ) + arg_parser.add_argument( + "-p", + "--cpus", + metavar="\b", + default=8, + help=" number of CPUs to try and parallelize permuations on", + ) + arg_parser.add_argument("-v", "--version", action="version", version="%(prog)s " + __version__) # TODO: encoding argument addded to logging.basicConfig in Python 3.9; now defaults to open() run_args = arg_parser.parse_args() @@ -60,27 +95,29 @@ run_args.bin = True arg_bins = run_args.user_bin - logging.basicConfig(filename='lexmapr_run.log', level=logging.DEBUG) + logging.basicConfig(filename="lexmapr_run.log", level=logging.DEBUG) if run_args.graph_only: try: - mapping_results = pandas.read_csv(run_args.input, delimiter='\t') + mapping_results = pandas.read_csv(run_args.input, delimiter="\t") except: - sys.exit('Input file not readable or not in expected format') - needed_columns = ['Matched_Components','Match_Status (Macro Level)']+list(arg_bins.keys()) + sys.exit("Input file not readable or not in expected format") + needed_columns = ["Matched_Components", "Match_Status (Macro Level)"] + list( + arg_bins.keys() + ) missing_columns = set(needed_columns).difference(set(mapping_results.columns)) if missing_columns: - sys.exit(f'Missing column(s) {missing_columns} from input file') + sys.exit(f"Missing column(s) {missing_columns} from input file") t0 = datetime.datetime.now() - logging.info(f'Run start: {t0}') - logging.info('Graphing only') - print('\nGraphing only...') + logging.info(f"Run start: {t0}") + logging.info("Graphing only") + print("\nGraphing only...") lexmapr.run_summary.figure_folder() lexmapr.run_summary.report_results(run_args.input, list(arg_bins.keys())) lexmapr.run_summary.visualize_results(run_args.input, list(arg_bins.keys())) - print('\t'+f'Done! {datetime.datetime.now()-t0} passed'.ljust(60)+'\n') + print("\t" + f"Done! {datetime.datetime.now()-t0} passed".ljust(60) + "\n") else: - logging.info(f'Run start: {datetime.datetime.now()}') + logging.info(f"Run start: {datetime.datetime.now()}") lexmapr.pipeline.run(run_args) - logging.info(f'Run end: {datetime.datetime.now()}\n') + logging.info(f"Run end: {datetime.datetime.now()}\n")