Mercurial > repos > kkonganti > lexmapr2_from_cfsan
changeset 20:0be9a7117ba5
"planemo upload"
author | kkonganti |
---|---|
date | Wed, 09 Nov 2022 09:05:28 -0500 |
parents | a2bf7a8b7bc9 |
children | a86bcf3ea8a6 |
files | cfsan_lexmapr2.xml lexmapr2.py lexmapr_run.log o.tsv t.csv t.json |
diffstat | 6 files changed, 87 insertions(+), 77 deletions(-) [+] |
line wrap: on
line diff
--- a/cfsan_lexmapr2.xml Thu Sep 15 14:50:34 2022 -0400 +++ b/cfsan_lexmapr2.xml Wed Nov 09 09:05:28 2022 -0500 @@ -56,10 +56,10 @@ <when value="false"> </when> </conditional> - <param name="bins" truevalue="-b" type="boolean" checked="true" - label="Classify samples into default bins" help="Default: true"/> - <param name="graph" truevalue="-g" type="boolean" checked="true" - label="Visualize summary of mapping and binning" help="Default: true"/> + <param name="bins" truevalue="-b" type="boolean" checked="false" + label="Classify samples into default bins" help="Default: false. Turning on this option may increase run time."/> + <param name="graph" truevalue="-g" type="boolean" checked="false" + label="Visualize summary of mapping and binning" help="Default: false. Turning on this option may increase run time."/> <param name="full" truevalue="-f" type="boolean" checked="true" label="Generate full output format" help="Default: true"/> <param name="no_ancestors" truevalue="-a" type="boolean" checked="false"
--- a/lexmapr2.py Thu Sep 15 14:50:34 2022 -0400 +++ b/lexmapr2.py Wed Nov 09 09:05:28 2022 -0500 @@ -1,58 +1,93 @@ """Entry script""" -__version__ = '1.0.0' +__version__ = "1.1.0" import argparse, datetime, json, logging, os, pandas, sys import lexmapr.pipeline, lexmapr.run_summary from lexmapr.definitions import arg_bins def valid_input(file_path): - '''Exits if input file is invalid''' + """Exits if input file is invalid""" _, file_ext = os.path.splitext(file_path) - if file_ext.lower() != '.csv' and file_ext.lower() != '.tsv': - sys.exit('Please supply a CSV or TSV input file with the correct file extension') + if file_ext.lower() != ".csv" and file_ext.lower() != ".tsv": + sys.exit("Please supply a CSV or TSV input file with the correct file extension") if not os.path.exists(file_path): - sys.exit(f'Input file named \"{file_path}\" not found') - return(file_path.strip()) + sys.exit(f'Input file named "{file_path}" not found') + return file_path.strip() + def valid_json(file_path): - '''Outputs read JSON file and exits if file is invalid''' + """Outputs read JSON file and exits if file is invalid""" try: - with open(file_path, 'r') as JSON_file: + with open(file_path, "r") as JSON_file: try: - return(json.load(JSON_file)) - except(json.decoder.JSONDecodeError): - sys.exit(f'User-defined bins not in readable JSON format') - except(FileNotFoundError): - sys.exit(f'File named \"{file_path}\" not found') + return json.load(JSON_file) + except (json.decoder.JSONDecodeError): + sys.exit(f"User-defined bins not in readable JSON format") + except (FileNotFoundError): + sys.exit(f'File named "{file_path}" not found') + def valid_list(list_str): - '''Return list of user-defined ontologies''' - return([x.strip().upper() for x in list_str.split(',')]) + """Return list of user-defined ontologies""" + return [x.strip().upper() for x in list_str.split(",")] + if __name__ == "__main__": # Parse arguments, initiate log file and start run arg_parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) - arg_parser.add_argument('input', help='input CSV or TSV file; required', type=valid_input) - arg_parser.add_argument('-o', '--output', metavar='\b', - help=' output TSV file path; default is stdout') - arg_parser.add_argument('-a', '--no_ancestors', action='store_true', - help='remove ancestral terms from output') - arg_parser.add_argument('-b', '--bin', action='store_true', - help='classify samples into default bins') - arg_parser.add_argument('-e', '--embl_ontol', metavar='\b', type=valid_list, - help=' user-defined comma-separated ontology short names') - arg_parser.add_argument('-f', '--full', action='store_true', help='full output format') - arg_parser.add_argument('-g', '--graph', action='store_true', - help='visualize summaries of mapping and binning') - arg_parser.add_argument('-j', '--graph_only', action='store_true', - help='only perform visualization with LexMapr output') - arg_parser.add_argument('-r', '--remake_cache', action='store_true', - help='remake cached resources') - arg_parser.add_argument('-u', '--user_bin', metavar='\b', type=valid_json, - help=' path to JSON file with user-defined bins') - arg_parser.add_argument('-v', '--version', action='version', - version='%(prog)s '+__version__) + arg_parser.add_argument("input", help="input CSV or TSV file; required", type=valid_input) + arg_parser.add_argument( + "-o", "--output", metavar="\b", help=" output TSV file path; default is stdout" + ) + arg_parser.add_argument( + "-a", "--no_ancestors", action="store_true", help="remove ancestral terms from output" + ) + arg_parser.add_argument( + "-b", "--bin", action="store_true", help="classify samples into default bins" + ) + arg_parser.add_argument( + "-e", + "--embl_ontol", + metavar="\b", + type=valid_list, + help=" user-defined comma-separated ontology short names", + ) + arg_parser.add_argument("-f", "--full", action="store_true", help="full output format") + arg_parser.add_argument( + "-g", "--graph", action="store_true", help="visualize summaries of mapping and binning" + ) + arg_parser.add_argument( + "-j", + "--graph_only", + action="store_true", + help="only perform visualization with LexMapr output", + ) + arg_parser.add_argument( + "-r", "--remake_cache", action="store_true", help="remake cached resources" + ) + arg_parser.add_argument( + "-u", + "--user_bin", + metavar="\b", + type=valid_json, + help=" path to JSON file with user-defined bins", + ) + arg_parser.add_argument( + "-w", + "--num_words", + metavar="\b", + default=3, + help=" number of word combinations to sample", + ) + arg_parser.add_argument( + "-p", + "--cpus", + metavar="\b", + default=8, + help=" number of CPUs to try and parallelize permuations on", + ) + arg_parser.add_argument("-v", "--version", action="version", version="%(prog)s " + __version__) # TODO: encoding argument addded to logging.basicConfig in Python 3.9; now defaults to open() run_args = arg_parser.parse_args() @@ -60,27 +95,29 @@ run_args.bin = True arg_bins = run_args.user_bin - logging.basicConfig(filename='lexmapr_run.log', level=logging.DEBUG) + logging.basicConfig(filename="lexmapr_run.log", level=logging.DEBUG) if run_args.graph_only: try: - mapping_results = pandas.read_csv(run_args.input, delimiter='\t') + mapping_results = pandas.read_csv(run_args.input, delimiter="\t") except: - sys.exit('Input file not readable or not in expected format') - needed_columns = ['Matched_Components','Match_Status (Macro Level)']+list(arg_bins.keys()) + sys.exit("Input file not readable or not in expected format") + needed_columns = ["Matched_Components", "Match_Status (Macro Level)"] + list( + arg_bins.keys() + ) missing_columns = set(needed_columns).difference(set(mapping_results.columns)) if missing_columns: - sys.exit(f'Missing column(s) {missing_columns} from input file') + sys.exit(f"Missing column(s) {missing_columns} from input file") t0 = datetime.datetime.now() - logging.info(f'Run start: {t0}') - logging.info('Graphing only') - print('\nGraphing only...') + logging.info(f"Run start: {t0}") + logging.info("Graphing only") + print("\nGraphing only...") lexmapr.run_summary.figure_folder() lexmapr.run_summary.report_results(run_args.input, list(arg_bins.keys())) lexmapr.run_summary.visualize_results(run_args.input, list(arg_bins.keys())) - print('\t'+f'Done! {datetime.datetime.now()-t0} passed'.ljust(60)+'\n') + print("\t" + f"Done! {datetime.datetime.now()-t0} passed".ljust(60) + "\n") else: - logging.info(f'Run start: {datetime.datetime.now()}') + logging.info(f"Run start: {datetime.datetime.now()}") lexmapr.pipeline.run(run_args) - logging.info(f'Run end: {datetime.datetime.now()}\n') + logging.info(f"Run end: {datetime.datetime.now()}\n")
--- a/lexmapr_run.log Thu Sep 15 14:50:34 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -INFO:root:Run start: 2022-09-15 14:05:40.236514 -INFO:root:Run start: 2022-09-15 14:07:13.815176 -INFO:root:Run start: 2022-09-15 14:08:11.894380 -INFO:root:Database build/confirm: 2022-09-15 14:08:11.908013 -WARNING:root:Did not retrieve URL for https://www.ebi.ac.uk/ols/api/ontologies/foodon/ancestors?id=FOODON:00002703&page=1&size=20 during API search
--- a/o.tsv Thu Sep 15 14:50:34 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -Sample_Id Sample_Desc Processed_Sample Annotated_Sample Matched_Components Match_Status (Macro Level) Match_Status (Micro Level) Sample_Transformations ncbi_taxon -small_simple1 Chicken Breast Not annotated chicken breast:FOODON_00002703 Full Term Match [] {} \ No newline at end of file
--- a/t.csv Thu Sep 15 14:50:34 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -SampleId,Sample -small_simple1,Chicken Breast -small_simple2,Baked Potato -small_simple3,Canned Corn -small_simple4,Frozen Yogurt -small_simple5,Apple Pie -small_simple5,Apple cider -small_simple6,Chicken egg \ No newline at end of file
--- a/t.json Thu Sep 15 14:50:34 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -{ - "ncbi_taxon":{ - "Actinopterygii":"NCBITaxon_7898", - "Ecdysozoa":"NCBITaxon_1206794", - "Echinodermata":"NCBITaxon_7586", - "Fungi":"NCBITaxon_4751", - "Mammalia":"NCBITaxon_40674", - "Sauropsida":"NCBITaxon_8457", - "Spiralia":"NCBITaxon_2697495", - "Viridiplantae":"NCBITaxon_33090" - } -} \ No newline at end of file