cstrittmatter@0: """Entry script""" cstrittmatter@0: cstrittmatter@0: __version__ = '1.0.0' cstrittmatter@0: import argparse, datetime, json, logging, os, pandas, sys cstrittmatter@0: import lexmapr.pipeline, lexmapr.run_summary cstrittmatter@0: from lexmapr.definitions import arg_bins cstrittmatter@0: cstrittmatter@0: cstrittmatter@0: def valid_input(file_path): cstrittmatter@0: '''Exits if input file is invalid''' cstrittmatter@0: _, file_ext = os.path.splitext(file_path) cstrittmatter@0: if file_ext.lower() != '.csv' and file_ext.lower() != '.tsv': cstrittmatter@0: sys.exit('Please supply a CSV or TSV input file with the correct file extension') cstrittmatter@0: if not os.path.exists(file_path): cstrittmatter@0: sys.exit(f'Input file named \"{file_path}\" not found') cstrittmatter@0: return(file_path.strip()) cstrittmatter@0: cstrittmatter@0: def valid_json(file_path): cstrittmatter@0: '''Outputs read JSON file and exits if file is invalid''' cstrittmatter@0: try: cstrittmatter@0: with open(file_path, 'r') as JSON_file: cstrittmatter@0: try: cstrittmatter@0: return(json.load(JSON_file)) cstrittmatter@0: except(json.decoder.JSONDecodeError): cstrittmatter@0: sys.exit(f'User-defined bins not in readable JSON format') cstrittmatter@0: except(FileNotFoundError): cstrittmatter@0: sys.exit(f'File named \"{file_path}\" not found') cstrittmatter@0: cstrittmatter@0: def valid_list(list_str): cstrittmatter@0: '''Return list of user-defined ontologies''' cstrittmatter@0: return([x.strip().upper() for x in list_str.split(',')]) cstrittmatter@0: cstrittmatter@0: if __name__ == "__main__": cstrittmatter@0: # Parse arguments, initiate log file and start run cstrittmatter@0: arg_parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) cstrittmatter@0: arg_parser.add_argument('input', help='input CSV or TSV file; required', type=valid_input) cstrittmatter@0: arg_parser.add_argument('-o', '--output', metavar='\b', cstrittmatter@0: help=' output TSV file path; default is stdout') cstrittmatter@0: arg_parser.add_argument('-a', '--no_ancestors', action='store_true', cstrittmatter@0: help='remove ancestral terms from output') cstrittmatter@0: arg_parser.add_argument('-b', '--bin', action='store_true', cstrittmatter@0: help='classify samples into default bins') cstrittmatter@0: arg_parser.add_argument('-e', '--embl_ontol', metavar='\b', type=valid_list, cstrittmatter@0: help=' user-defined comma-separated ontology short names') cstrittmatter@0: arg_parser.add_argument('-f', '--full', action='store_true', help='full output format') cstrittmatter@0: arg_parser.add_argument('-g', '--graph', action='store_true', cstrittmatter@0: help='visualize summaries of mapping and binning') cstrittmatter@0: arg_parser.add_argument('-j', '--graph_only', action='store_true', cstrittmatter@0: help='only perform visualization with LexMapr output') cstrittmatter@0: arg_parser.add_argument('-r', '--remake_cache', action='store_true', cstrittmatter@0: help='remake cached resources') cstrittmatter@0: arg_parser.add_argument('-u', '--user_bin', metavar='\b', type=valid_json, cstrittmatter@0: help=' path to JSON file with user-defined bins') cstrittmatter@0: arg_parser.add_argument('-v', '--version', action='version', cstrittmatter@0: version='%(prog)s '+__version__) cstrittmatter@0: cstrittmatter@0: # TODO: encoding argument addded to logging.basicConfig in Python 3.9; now defaults to open() cstrittmatter@0: run_args = arg_parser.parse_args() cstrittmatter@0: if run_args.user_bin is not None: cstrittmatter@0: run_args.bin = True cstrittmatter@0: arg_bins = run_args.user_bin cstrittmatter@0: cstrittmatter@0: logging.basicConfig(filename='lexmapr_run.log', level=logging.DEBUG) cstrittmatter@0: cstrittmatter@0: if run_args.graph_only: cstrittmatter@0: try: cstrittmatter@0: mapping_results = pandas.read_csv(run_args.input, delimiter='\t') cstrittmatter@0: except: cstrittmatter@0: sys.exit('Input file not readable or not in expected format') cstrittmatter@0: needed_columns = ['Matched_Components','Match_Status (Macro Level)']+list(arg_bins.keys()) cstrittmatter@0: missing_columns = set(needed_columns).difference(set(mapping_results.columns)) cstrittmatter@0: if missing_columns: cstrittmatter@0: sys.exit(f'Missing column(s) {missing_columns} from input file') cstrittmatter@0: t0 = datetime.datetime.now() cstrittmatter@0: logging.info(f'Run start: {t0}') cstrittmatter@0: logging.info('Graphing only') cstrittmatter@0: print('\nGraphing only...') cstrittmatter@0: lexmapr.run_summary.figure_folder() cstrittmatter@0: lexmapr.run_summary.report_results(run_args.input, list(arg_bins.keys())) cstrittmatter@0: lexmapr.run_summary.visualize_results(run_args.input, list(arg_bins.keys())) cstrittmatter@0: print('\t'+f'Done! {datetime.datetime.now()-t0} passed'.ljust(60)+'\n') cstrittmatter@0: else: cstrittmatter@0: logging.info(f'Run start: {datetime.datetime.now()}') cstrittmatter@0: lexmapr.pipeline.run(run_args) cstrittmatter@0: cstrittmatter@0: logging.info(f'Run end: {datetime.datetime.now()}\n')