kkonganti@0: #!/usr/bin/env python3 kkonganti@0: kkonganti@0: import os kkonganti@0: import re kkonganti@0: import glob kkonganti@0: import argparse kkonganti@0: import logging kkonganti@0: kkonganti@0: def main(): kkonganti@0: # READ IN ARGUMENTS kkonganti@0: desc = """ kkonganti@0: Takes in a file with flowcell ID, one per line and creates soft links kkonganti@0: to 'fastq_pass' directory at target location. kkonganti@0: kkonganti@0: Ex: kkonganti@0: kkonganti@0: prepare_nanopore_fastq_dir.py \ kkonganti@0: -o /hpc/scratch/Kranti.Konganti/np_test \ kkonganti@0: -f flowcells.txt kkonganti@0: kkonganti@0: where flowcells.txt contains the following lines: kkonganti@0: kkonganti@0: FAL11127 kkonganti@0: FAL11151 kkonganti@0: kkonganti@0: """ kkonganti@0: parser = argparse.ArgumentParser(prog='prepare_nanopore_fastq_dir.py', kkonganti@0: formatter_class=argparse.ArgumentDefaultsHelpFormatter, kkonganti@0: description=desc) kkonganti@0: required = parser.add_argument_group('required arguments') kkonganti@0: kkonganti@0: required.add_argument("-f", dest='flowcells', required=True, kkonganti@0: help="Path to a text file containing Nanopore flowcell IDs, one per line") kkonganti@0: required.add_argument("-i", dest='inputdir', kkonganti@0: required=False, action='append', nargs='*', kkonganti@0: help="Path to search directory. This directory location is where" + kkonganti@0: " the presence of 'fastq_pass' will be searched for each flowcell.") kkonganti@0: required.add_argument("-o", dest='outputdir', kkonganti@0: required=True, kkonganti@0: help="Path to output directory. This directory is created by the script" + kkonganti@0: " and new soft links (symlinks) are created in this directory.") kkonganti@0: kkonganti@0: args = parser.parse_args() kkonganti@0: flowcells = args.flowcells kkonganti@0: output = args.outputdir kkonganti@0: inputs = args.inputdir kkonganti@0: kkonganti@0: logging.basicConfig(format='%(asctime)s - %(levelname)s => %(message)s', level=logging.DEBUG) kkonganti@0: kkonganti@0: if not inputs: kkonganti@0: inputs = ['/projects/nanopore/raw'] kkonganti@0: nanopore_machines = ['RazorCrest', 'Revolution', 'ObiWan', 'MinIT', kkonganti@0: 'Mayhem', 'CaptainMarvel', 'MinION', 'MinION_Padmini', 'RogueOne'] kkonganti@0: logging.info(f"Searching default path(s). Use -i option if custom path should be searched.") kkonganti@0: else: kkonganti@0: nanopore_machines = ['custom'] kkonganti@0: kkonganti@0: fastq_pass_found = {} kkonganti@0: was_fastq_pass_found = [] kkonganti@0: kkonganti@0: for each_input in inputs: kkonganti@0: for machine in nanopore_machines: kkonganti@0: if ''.join(nanopore_machines) != 'custom': kkonganti@0: input = os.path.join(each_input, machine) kkonganti@0: else: kkonganti@0: input = ''.join(each_input) kkonganti@0: kkonganti@0: logging.info(f"Searching path: {input}") kkonganti@0: kkonganti@0: if (os.path.exists(flowcells) and os.path.getsize(flowcells) > 0): kkonganti@0: with open(flowcells, 'r') as fcells: kkonganti@0: for flowcell in fcells: kkonganti@0: if re.match('^\s*$', flowcell): kkonganti@0: continue kkonganti@0: flowcell = flowcell.strip() kkonganti@0: fastq_pass_path = glob.glob(os.path.join(input, flowcell, f"**", f"*[!fast5]*", 'fastq_pass')) kkonganti@0: # Try one more time since the flowcell user is trying to query may be the parent directory kkonganti@0: # of fastq_pass kkonganti@0: fastq_pass = fastq_pass_path if fastq_pass_path else glob.glob(os.path.join(input, f"**", f"*[!fast5]*", flowcell, 'fastq_pass')) kkonganti@0: if not fastq_pass: kkonganti@0: # logging.warning(f"Flowcell " + kkonganti@0: # os.path.join(input, flowcell).strip() + kkonganti@0: # f" does not seem to have a fastq_pass directory! Skipped!!") kkonganti@0: if not flowcell in fastq_pass_found.keys(): kkonganti@0: fastq_pass_found[flowcell] = 0 kkonganti@0: else: kkonganti@0: fastq_pass_found[flowcell] = 1 kkonganti@0: sym_link_dir = os.path.join(output, flowcell) kkonganti@0: sym_link_dir_dest = os.path.join(sym_link_dir, 'fastq_pass') kkonganti@0: if not os.path.exists(sym_link_dir): kkonganti@0: os.makedirs(sym_link_dir) kkonganti@0: os.symlink( kkonganti@0: ''.join(fastq_pass), kkonganti@0: sym_link_dir_dest, target_is_directory=True kkonganti@0: ) kkonganti@0: logging.info(f"New soft link created: {sym_link_dir_dest}") kkonganti@0: else: kkonganti@0: logging.info(f"Soft link {sym_link_dir_dest} already exists! Skipped!!") kkonganti@0: fcells.close() kkonganti@0: else: kkonganti@0: logging.error(f"File {flowcells} is empty or does not exist!\n") kkonganti@0: kkonganti@0: for k,v in fastq_pass_found.items(): kkonganti@0: if not v: kkonganti@0: was_fastq_pass_found.append(k) kkonganti@0: kkonganti@0: if was_fastq_pass_found: kkonganti@0: logging.warning("Did not find fastq_pass folder for the supplied flowcells: " + kkonganti@0: ', '.join(was_fastq_pass_found)) kkonganti@0: kkonganti@0: if was_fastq_pass_found and len(was_fastq_pass_found) == len(fastq_pass_found): kkonganti@0: logging.error(f"None of the supplied flowcells were found! The output directory, {output} may not have been created!") kkonganti@0: else: kkonganti@0: logging.info(f"NOTE: Now you can use {output} directory as --input to cpipes.\n") kkonganti@0: kkonganti@0: if __name__ == "__main__": kkonganti@0: main()