kkonganti@92: #!/usr/bin/env python3 kkonganti@92: kkonganti@92: import os kkonganti@92: import re kkonganti@92: import glob kkonganti@92: import argparse kkonganti@92: import logging kkonganti@92: kkonganti@92: def main(): kkonganti@92: # READ IN ARGUMENTS kkonganti@92: desc = """ kkonganti@92: Takes in a file with flowcell ID, one per line and creates soft links kkonganti@92: to 'fastq_pass' directory at target location. kkonganti@92: kkonganti@92: Ex: kkonganti@92: kkonganti@92: prepare_nanopore_fastq_dir.py \ kkonganti@92: -o /hpc/scratch/Kranti.Konganti/np_test \ kkonganti@92: -f flowcells.txt kkonganti@92: kkonganti@92: where flowcells.txt contains the following lines: kkonganti@92: kkonganti@92: FAL11127 kkonganti@92: FAL11151 kkonganti@92: kkonganti@92: """ kkonganti@92: parser = argparse.ArgumentParser(prog='prepare_nanopore_fastq_dir.py', kkonganti@92: formatter_class=argparse.ArgumentDefaultsHelpFormatter, kkonganti@92: description=desc) kkonganti@92: required = parser.add_argument_group('required arguments') kkonganti@92: kkonganti@92: required.add_argument("-f", dest='flowcells', required=True, kkonganti@92: help="Path to a text file containing Nanopore flowcell IDs, one per line") kkonganti@92: required.add_argument("-i", dest='inputdir', kkonganti@92: required=False, action='append', nargs='*', kkonganti@92: help="Path to search directory. This directory location is where" + kkonganti@92: " the presence of 'fastq_pass' will be searched for each flowcell.") kkonganti@92: required.add_argument("-o", dest='outputdir', kkonganti@92: required=True, kkonganti@92: help="Path to output directory. This directory is created by the script" + kkonganti@92: " and new soft links (symlinks) are created in this directory.") kkonganti@92: kkonganti@92: args = parser.parse_args() kkonganti@92: flowcells = args.flowcells kkonganti@92: output = args.outputdir kkonganti@92: inputs = args.inputdir kkonganti@92: kkonganti@92: logging.basicConfig(format='%(asctime)s - %(levelname)s => %(message)s', level=logging.DEBUG) kkonganti@92: kkonganti@92: if not inputs: kkonganti@92: inputs = ['/projects/nanopore/raw'] kkonganti@92: nanopore_machines = ['RazorCrest', 'Revolution', 'ObiWan', 'MinIT', kkonganti@92: 'Mayhem', 'CaptainMarvel', 'MinION', 'MinION_Padmini', 'RogueOne'] kkonganti@92: logging.info(f"Searching default path(s). Use -i option if custom path should be searched.") kkonganti@92: else: kkonganti@92: nanopore_machines = ['custom'] kkonganti@92: kkonganti@92: fastq_pass_found = {} kkonganti@92: was_fastq_pass_found = [] kkonganti@92: kkonganti@92: for each_input in inputs: kkonganti@92: for machine in nanopore_machines: kkonganti@92: if ''.join(nanopore_machines) != 'custom': kkonganti@92: input = os.path.join(each_input, machine) kkonganti@92: else: kkonganti@92: input = ''.join(each_input) kkonganti@92: kkonganti@92: logging.info(f"Searching path: {input}") kkonganti@92: kkonganti@92: if (os.path.exists(flowcells) and os.path.getsize(flowcells) > 0): kkonganti@92: with open(flowcells, 'r') as fcells: kkonganti@92: for flowcell in fcells: kkonganti@92: if re.match('^\s*$', flowcell): kkonganti@92: continue kkonganti@92: flowcell = flowcell.strip() kkonganti@92: fastq_pass_path = glob.glob(os.path.join(input, flowcell, f"**", f"*[!fast5]*", 'fastq_pass')) kkonganti@92: # Try one more time since the flowcell user is trying to query may be the parent directory kkonganti@92: # of fastq_pass kkonganti@92: fastq_pass = fastq_pass_path if fastq_pass_path else glob.glob(os.path.join(input, f"**", f"*[!fast5]*", flowcell, 'fastq_pass')) kkonganti@92: if not fastq_pass: kkonganti@92: # logging.warning(f"Flowcell " + kkonganti@92: # os.path.join(input, flowcell).strip() + kkonganti@92: # f" does not seem to have a fastq_pass directory! Skipped!!") kkonganti@92: if not flowcell in fastq_pass_found.keys(): kkonganti@92: fastq_pass_found[flowcell] = 0 kkonganti@92: else: kkonganti@92: fastq_pass_found[flowcell] = 1 kkonganti@92: sym_link_dir = os.path.join(output, flowcell) kkonganti@92: sym_link_dir_dest = os.path.join(sym_link_dir, 'fastq_pass') kkonganti@92: if not os.path.exists(sym_link_dir): kkonganti@92: os.makedirs(sym_link_dir) kkonganti@92: os.symlink( kkonganti@92: ''.join(fastq_pass), kkonganti@92: sym_link_dir_dest, target_is_directory=True kkonganti@92: ) kkonganti@92: logging.info(f"New soft link created: {sym_link_dir_dest}") kkonganti@92: else: kkonganti@92: logging.info(f"Soft link {sym_link_dir_dest} already exists! Skipped!!") kkonganti@92: fcells.close() kkonganti@92: else: kkonganti@92: logging.error(f"File {flowcells} is empty or does not exist!\n") kkonganti@92: kkonganti@92: for k,v in fastq_pass_found.items(): kkonganti@92: if not v: kkonganti@92: was_fastq_pass_found.append(k) kkonganti@92: kkonganti@92: if was_fastq_pass_found: kkonganti@92: logging.warning("Did not find fastq_pass folder for the supplied flowcells: " + kkonganti@92: ', '.join(was_fastq_pass_found)) kkonganti@92: kkonganti@92: if was_fastq_pass_found and len(was_fastq_pass_found) == len(fastq_pass_found): kkonganti@92: logging.error(f"None of the supplied flowcells were found! The output directory, {output} may not have been created!") kkonganti@92: else: kkonganti@92: logging.info(f"NOTE: Now you can use {output} directory as --input to cpipes.\n") kkonganti@92: kkonganti@92: if __name__ == "__main__": kkonganti@92: main()