# HG changeset patch # User kkonganti # Date 1719512246 14400 # Node ID 52045ea4679d9be5bdc5e68685117ac2eb9d3b92 # Parent 17890124001d4c140a6eed9742e3b4fec0dbc225 "planemo upload" diff -r 17890124001d -r 52045ea4679d 0.4.0/LICENSE.md --- a/0.4.0/LICENSE.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,98 +0,0 @@ -# CPIPES (CFSAN PIPELINES) - -## The modular pipeline repository at CFSAN, FDA - -**CPIPES** (CFSAN PIPELINES) is a collection of modular pipelines based on **NEXTFLOW**, -mostly for bioinformatics data analysis at **CFSAN, FDA.** - ---- - -### **LICENSES** - -\ -  - -**CPIPES** is licensed under: - -```text -MIT License - -In the U.S.A. Public Domain; elsewhere Copyright (c) 2022 U.S. Food and Drug Administration - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -``` - -\ -  - -Portions of **CPIPES** are built on modified versions of many tools, scripts and libraries from [nf-core/modules](https://github.com/nf-core/modules) and [nf-core/rnaseq](https://github.com/nf-core/rna-seq) which are originally licensed under: - -```text -MIT License - -Copyright (c) Philip Ewels -Copyright (c) Phil Ewels, Rickard Hammarén - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -``` - -\ -  - -The **MultiQC** report, in addition uses [DataTables](https://datatables.net), which is licensed under: - -```text -MIT License - -Copyright (C) 2008-2022, SpryMedia Ltd. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -``` diff -r 17890124001d -r 52045ea4679d 0.4.0/README.md --- a/0.4.0/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ -# CPIPES (CFSAN PIPELINES) - -## The modular pipeline repository at CFSAN, FDA - -**CPIPES** (CFSAN PIPELINES) is a collection of modular pipelines based on **NEXTFLOW**, -mostly for bioinformatics data analysis at **CFSAN, FDA.** - ---- - -### **Pipelines** - ---- -**CPIPES**: - - 1. `centriflaken` : [README](./readme/centriflaken.md). - 2. `centriflaken_hy` : [README](./readme/centriflaken_hy.md). - -#### Workflow Usage - -Following is the example of how to run the `centriflaken` pipeline on the **CFSAN** raven cluster. - -```bash -module load cpipes/0.4.0 - -cpipes --pipeline centriflaken [options] -``` - -Example: - -```bash -cd /hpc/scratch/$USER -mkdir nf-cpipes -cd nf-cpipes -cpipes \ - --pipeline centriflaken \ - --input /path/to/fastq_pass_dir \ - --output /path/to/where/output/should/go \ - --user_email First.Last@fda.hhs.gov \ - -profile raven -``` - -The above command would run the pipeline and store the output wherever the author of the workflow decided it to be and the **NEXTFLOW** reports are always stored in the current working directory from where `cpipes` is run. For example, for the above command, a directory called `CPIPES-centriflaken` would hold all the **NEXTFLOW** -related logs, reports and trace files. - -### **BETA** - ---- -The development of the modular structure and flow is an ongoing effort and may change depending on assessment of various computational topics and other considerations. diff -r 17890124001d -r 52045ea4679d 0.4.0/assets/FDa-Logo-Blue---medium-01.png Binary file 0.4.0/assets/FDa-Logo-Blue---medium-01.png has changed diff -r 17890124001d -r 52045ea4679d 0.4.0/assets/FDa-Logo-replace-Blue-small-01.png Binary file 0.4.0/assets/FDa-Logo-replace-Blue-small-01.png has changed diff -r 17890124001d -r 52045ea4679d 0.4.0/assets/dummy_file.txt --- a/0.4.0/assets/dummy_file.txt Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -DuMmY diff -r 17890124001d -r 52045ea4679d 0.4.0/assets/dummy_file2.txt --- a/0.4.0/assets/dummy_file2.txt Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -DuMmY diff -r 17890124001d -r 52045ea4679d 0.4.0/bin/check_samplesheet.py --- a/0.4.0/bin/check_samplesheet.py Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,185 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys -import errno -import argparse - - -def parse_args(args=None): - Description = "Reformat samplesheet file and check its contents." - Epilog = "Example usage: python check_samplesheet.py " - - parser = argparse.ArgumentParser(description=Description, epilog=Epilog) - parser.add_argument("FILE_IN", help="Input samplesheet file.") - parser.add_argument("FILE_OUT", help="Output file.") - return parser.parse_args(args) - - -def make_dir(path): - if len(path) > 0: - try: - os.makedirs(path) - except OSError as exception: - if exception.errno != errno.EEXIST: - raise exception - - -def print_error(error, context="Line", context_str=""): - error_str = f"ERROR: Please check samplesheet -> {error}" - if context != "" and context_str != "": - error_str = f"ERROR: Please check samplesheet -> {error}\n{context.strip()}: '{context_str.strip()}'" - print(error_str) - sys.exit(1) - - -def check_samplesheet(file_in, file_out): - """ - This function checks that the samplesheet follows the following structure: - - sample,fq1,fq2,strandedness - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz,forward - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz,forward - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq,,forward - SAMPLE_SE,SAMPLE_SE_RUN1_2.fastq.gz,,forward - - For an example see: - https://github.com/nf-core/test-datasets/blob/rnaseq/samplesheet/v3.1/samplesheet_test.csv - """ - - sample_mapping_dict = {} - with open(file_in, "r", encoding='utf-8-sig') as fin: - - ## Check header - MIN_COLS = 3 - HEADER = ["sample", "fq1", "fq2", "strandedness"] - header = [x.strip('"') for x in fin.readline().strip().split(",")] - if header[: len(HEADER)] != HEADER: - print( - f"ERROR: Please check samplesheet header -> {','.join(header)} != {','.join(HEADER)}" - ) - sys.exit(1) - - ## Check sample entries - for line in fin: - if line.strip(): - lspl = [x.strip().strip('"') for x in line.strip().split(",")] - - ## Check valid number of columns per row - if len(lspl) < len(HEADER): - print_error( - f"Invalid number of columns (minimum = {len(HEADER)})!", - "Line", - line, - ) - - num_cols = len([x for x in lspl if x]) - if num_cols < MIN_COLS: - print_error( - f"Invalid number of populated columns (minimum = {MIN_COLS})!", - "Line", - line, - ) - - ## Check sample name entries - sample, fq1, fq2, strandedness = lspl[: len(HEADER)] - if sample.find(" ") != -1: - print( - f"WARNING: Spaces have been replaced by underscores for sample: {sample}" - ) - sample = sample.replace(" ", "_") - if not sample: - print_error("Sample entry has not been specified!", "Line", line) - - ## Check FastQ file extension - for fastq in [fq1, fq2]: - if fastq: - if fastq.find(" ") != -1: - print_error("FastQ file contains spaces!", "Line", line) - # if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): - # print_error( - # "FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", - # "Line", - # line, - # ) - - ## Check strandedness - strandednesses = ["unstranded", "forward", "reverse"] - if strandedness: - if strandedness not in strandednesses: - print_error( - f"Strandedness must be one of '{', '.join(strandednesses)}'!", - "Line", - line, - ) - else: - print_error( - f"Strandedness has not been specified! Must be one of {', '.join(strandednesses)}.", - "Line", - line, - ) - - ## Auto-detect paired-end/single-end - sample_info = [] ## [single_end, fq1, fq2, strandedness] - if sample and fq1 and fq2: ## Paired-end short reads - sample_info = ["0", fq1, fq2, strandedness] - elif sample and fq1 and not fq2: ## Single-end short reads - sample_info = ["1", fq1, fq2, strandedness] - else: - print_error("Invalid combination of columns provided!", "Line", line) - - ## Create sample mapping dictionary = {sample: [[ single_end, fq1, fq2, strandedness ]]} - if sample not in sample_mapping_dict: - sample_mapping_dict[sample] = [sample_info] - else: - if sample_info in sample_mapping_dict[sample]: - print_error("Samplesheet contains duplicate rows!", "Line", line) - else: - sample_mapping_dict[sample].append(sample_info) - - ## Write validated samplesheet with appropriate columns - if len(sample_mapping_dict) > 0: - out_dir = os.path.dirname(file_out) - make_dir(out_dir) - with open(file_out, "w") as fout: - fout.write( - ",".join(["sample", "single_end", "fq1", "fq2", "strandedness"]) - + "\n" - ) - for sample in sorted(sample_mapping_dict.keys()): - - ## Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end - if not all( - x[0] == sample_mapping_dict[sample][0][0] - for x in sample_mapping_dict[sample] - ): - print_error( - f"Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end!", - "Sample", - sample, - ) - - ## Check that multiple runs of the same sample are of the same strandedness - if not all( - x[-1] == sample_mapping_dict[sample][0][-1] - for x in sample_mapping_dict[sample] - ): - print_error( - f"Multiple runs of a sample must have the same strandedness!", - "Sample", - sample, - ) - - for idx, val in enumerate(sample_mapping_dict[sample]): - fout.write(",".join([f"{sample}_T{idx+1}"] + val) + "\n") - else: - print_error(f"No entries to process!", "Samplesheet: {file_in}") - - -def main(args=None): - args = parse_args(args) - check_samplesheet(args.FILE_IN, args.FILE_OUT) - - -if __name__ == "__main__": - sys.exit(main()) diff -r 17890124001d -r 52045ea4679d 0.4.0/bin/create_mqc_data_table.py --- a/0.4.0/bin/create_mqc_data_table.py Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,124 +0,0 @@ -#!/usr/bin/env python - -import sys -import yaml -from textwrap import dedent - -def main() : - """ - Takes a tab-delimited text file with a mandatory header - column and generates an HTML table. - """ - - args = sys.argv - if (len(args) < 2 or len(args) > 3): - print(f"\nTwo CL arguments are required!\n") - exit(1) - - table_sum_on = args[1].lower() - workflow_name = args[2].lower() - - with open(f"{table_sum_on}.tblsum.txt", "r") as tbl: - header = tbl.readline() - header_cols = header.strip().split('\t') - - html = [ - dedent( - f""" -
- - - - """ - ) - ] - - for header_col in header_cols: - html.append( - dedent( - f""" - """ - ) - ) - - html.append( - dedent( - """ - - - """ - ) - ) - - for row in tbl: - html.append("\n") - data_cols = row.strip().split('\t') - if ( len(header_cols) != len(data_cols) ): - print(f"\nWARN: Number of header columns ({len(header_cols)}) and data " + - f"columns ({len(data_cols)}) are not equal!\nWill append empty columns!\n") - if ( len(header_cols) > len(data_cols) ): - data_cols += (( len(header_cols) - len(data_cols) ) * ' ' ) - print(len(data_cols)) - else: - header_cols += (( len(data_cols) - len(header_cols) ) * ' ') - - html.append( - dedent( - f""" - - """ - ) - ) - - for data_col in data_cols[1:]: - html.append( - dedent( - f""" - """ - ) - ) - html.append("\n") - html.append("\n") - html.append("
{header_col}
{data_cols[0]}{data_col}
\n") - html.append("
\n") - - mqc_yaml = { - "id": f"{table_sum_on.upper()}_collated_table", - "section_name": f"{table_sum_on.upper()}", - "section_href": f"https://cfsan-git.fda.gov/Kranti.Konganti/{workflow_name}", - "plot_type": "html", - "description": "The results table shown here is a collection from all samples.", - "data": ('').join(html), - } - - with open(f"{table_sum_on.lower()}_mqc.yml", "w") as html_mqc: - yaml.dump(mqc_yaml, html_mqc, default_flow_style=False) - -if __name__ == "__main__": - main() \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/bin/extract_assembled_filtered_contigs.py --- a/0.4.0/bin/extract_assembled_filtered_contigs.py Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,56 +0,0 @@ -#!/usr/bin/env python - -import os -import argparse -import logging as log -import pandas as pd -import numpy as np -from Bio import SeqIO - - -def main(): - # READ IN ARGUMENTS - desc = """This script is part of the centriflaken pipeline: - - accepts assembled contigs (assembly.fasta from flye) and kraken classification (kraken_output.txt from kraken2) output - - filters the assembled contigs based on taxa specified - - outputs an assembled and filtered fasta (assembled_filtered_contigs.fasta) """ - parser = argparse.ArgumentParser(prog='extract_assembled_filtered_contigs.py', description=desc) - parser.add_argument("-v", dest='verbose', action="store_true", help="for more verbose output") - parser.add_argument("-i", dest='input_fasta', required=True, help="Path to input fasta file (assembled output from flye)") - parser.add_argument("-o", dest='assembled_filtered_contigs', required=True, help="Path to output fasta file filtered by taxa specified") - parser.add_argument("-k", dest='kraken_output', required=True, help="Path to kraken output file") - parser.add_argument("-b", dest='bug', required=True, help="name or fragment of name of bug") - args = parser.parse_args() - - # MORE INFO IF VERBOSE - if args.verbose: - log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG) - else: - log.basicConfig(format="%(levelname)s: %(message)s") - - # ASSIGN VARIABLES - input_fasta = args.input_fasta - assembled_filtered_contigs = args.assembled_filtered_contigs - kraken_output = args.kraken_output - bug = args.bug - - # Match and filter taxa names and ids from kraken output file - report_df = pd.read_csv(kraken_output, delimiter="\t", usecols=[1,2], header=None) - report_df.columns = ["contig", "name"] - report_df['name'] = report_df['name'].str.lower() - filt_report_df = report_df[report_df['name'].str.contains(bug.lower())] - print("\nMatching taxa names and ids:\n",filt_report_df) - filtered_contig_list = filt_report_df['contig'] - - # Extract filtered reads from assembled input fasta and write to output fasta - print ("Indexing reads..") - rec = SeqIO.index(input_fasta,"fasta") - TF=open(assembled_filtered_contigs, "w") - for i in filtered_contig_list: - if i in rec: - SeqIO.write(rec[i], TF, "fasta") - TF.close() - - -if __name__ == "__main__": - main() \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/bin/fastq_dir_to_samplesheet.py --- a/0.4.0/bin/fastq_dir_to_samplesheet.py Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,177 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys -import glob -import argparse -import re - - -def parse_args(args=None): - Description = "Generate samplesheet from a directory of FastQ files." - Epilog = "Example usage: python fastq_dir_to_samplesheet.py " - - parser = argparse.ArgumentParser(description=Description, epilog=Epilog) - parser.add_argument("FASTQ_DIR", help="Folder containing raw FastQ files.") - parser.add_argument("SAMPLESHEET_FILE", help="Output samplesheet file.") - parser.add_argument( - "-st", - "--strandedness", - type=str, - dest="STRANDEDNESS", - default="unstranded", - help="Value for 'strandedness' in samplesheet. Must be one of 'unstranded', 'forward', 'reverse'.", - ) - parser.add_argument( - "-r1", - "--read1_extension", - type=str, - dest="READ1_EXTENSION", - default="_R1_001.fastq.gz", - help="File extension for read 1.", - ) - parser.add_argument( - "-r2", - "--read2_extension", - type=str, - dest="READ2_EXTENSION", - default="_R2_001.fastq.gz", - help="File extension for read 2.", - ) - parser.add_argument( - "-se", - "--single_end", - dest="SINGLE_END", - action="store_true", - help="Single-end information will be auto-detected but this option forces paired-end FastQ files to be treated as single-end so only read 1 information is included in the samplesheet.", - ) - parser.add_argument( - "-sn", - "--sanitise_name", - dest="SANITISE_NAME", - action="store_true", - help="Whether to further sanitise FastQ file name to get sample id. Used in conjunction with --sanitise_name_delimiter and --sanitise_name_index.", - ) - parser.add_argument( - "-sd", - "--sanitise_name_delimiter", - type=str, - dest="SANITISE_NAME_DELIMITER", - default="_", - help="Delimiter to use to sanitise sample name.", - ) - parser.add_argument( - "-si", - "--sanitise_name_index", - type=int, - dest="SANITISE_NAME_INDEX", - default=1, - help="After splitting FastQ file name by --sanitise_name_delimiter all elements before this index (1-based) will be joined to create final sample name.", - ) - return parser.parse_args(args) - - -def fastq_dir_to_samplesheet( - fastq_dir, - samplesheet_file, - strandedness="unstranded", - read1_extension="_R1_001.fastq.gz", - read2_extension="_R2_001.fastq.gz", - single_end=False, - sanitise_name=False, - sanitise_name_delimiter="_", - sanitise_name_index=1, -): - def sanitize_sample(path, extension): - """Retrieve sample id from filename""" - sample = os.path.basename(path).replace(extension, "") - if sanitise_name: - if sanitise_name_index > 0: - sample = sanitise_name_delimiter.join( - os.path.basename(path).split(sanitise_name_delimiter)[ - :sanitise_name_index - ] - ) - # elif sanitise_name_index == -1: - # sample = os.path.basename(path)[ :os.path.basename(path).index('.') ] - return sample - - def get_fastqs(extension): - """ - Needs to be sorted to ensure R1 and R2 are in the same order - when merging technical replicates. Glob is not guaranteed to produce - sorted results. - See also https://stackoverflow.com/questions/6773584/how-is-pythons-glob-glob-ordered - """ - abs_fq_files = glob.glob(os.path.join(fastq_dir, f"**", f"*{extension}"), recursive=True) - return sorted( - [ - fq for _, fq in enumerate(abs_fq_files) if re.match('^((?!undetermined|unclassified|downloads).)*$', fq, flags=re.IGNORECASE) - ] - ) - - read_dict = {} - - ## Get read 1 files - for read1_file in get_fastqs(read1_extension): - sample = sanitize_sample(read1_file, read1_extension) - if sample not in read_dict: - read_dict[sample] = {"R1": [], "R2": []} - read_dict[sample]["R1"].append(read1_file) - - ## Get read 2 files - if not single_end: - for read2_file in get_fastqs(read2_extension): - sample = sanitize_sample(read2_file, read2_extension) - read_dict[sample]["R2"].append(read2_file) - - ## Write to file - if len(read_dict) > 0: - out_dir = os.path.dirname(samplesheet_file) - if out_dir and not os.path.exists(out_dir): - os.makedirs(out_dir) - - with open(samplesheet_file, "w") as fout: - header = ["sample", "fq1", "fq2", "strandedness"] - fout.write(",".join(header) + "\n") - for sample, reads in sorted(read_dict.items()): - for idx, read_1 in enumerate(reads["R1"]): - read_2 = "" - if idx < len(reads["R2"]): - read_2 = reads["R2"][idx] - sample_info = ",".join([sample, read_1, read_2, strandedness]) - fout.write(f"{sample_info}\n") - else: - error_str = ( - "\nWARNING: No FastQ files found so samplesheet has not been created!\n\n" - ) - error_str += "Please check the values provided for the:\n" - error_str += " - Path to the directory containing the FastQ files\n" - error_str += " - '--read1_extension' parameter\n" - error_str += " - '--read2_extension' parameter\n" - print(error_str) - sys.exit(1) - - -def main(args=None): - args = parse_args(args) - - strandedness = "unstranded" - if args.STRANDEDNESS in ["unstranded", "forward", "reverse"]: - strandedness = args.STRANDEDNESS - - fastq_dir_to_samplesheet( - fastq_dir=args.FASTQ_DIR, - samplesheet_file=args.SAMPLESHEET_FILE, - strandedness=strandedness, - read1_extension=args.READ1_EXTENSION, - read2_extension=args.READ2_EXTENSION, - single_end=args.SINGLE_END, - sanitise_name=args.SANITISE_NAME, - sanitise_name_delimiter=args.SANITISE_NAME_DELIMITER, - sanitise_name_index=args.SANITISE_NAME_INDEX, - ) - - -if __name__ == "__main__": - sys.exit(main()) diff -r 17890124001d -r 52045ea4679d 0.4.0/bin/prepare_nanopore_fastq_dir.py --- a/0.4.0/bin/prepare_nanopore_fastq_dir.py Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,117 +0,0 @@ -#!/usr/bin/env python3 - -import os -import re -import glob -import argparse -import logging - -def main(): - # READ IN ARGUMENTS - desc = """ - Takes in a file with flowcell ID, one per line and creates soft links - to 'fastq_pass' directory at target location. - - Ex: - - prepare_nanopore_fastq_dir.py \ - -o /hpc/scratch/Kranti.Konganti/np_test \ - -f flowcells.txt - - where flowcells.txt contains the following lines: - - FAL11127 - FAL11151 - - """ - parser = argparse.ArgumentParser(prog='prepare_nanopore_fastq_dir.py', - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - description=desc) - required = parser.add_argument_group('required arguments') - - required.add_argument("-f", dest='flowcells', required=True, - help="Path to a text file containing Nanopore flowcell IDs, one per line") - required.add_argument("-i", dest='inputdir', - required=False, action='append', nargs='*', - help="Path to search directory. This directory location is where" + - " the presence of 'fastq_pass' will be searched for each flowcell.") - required.add_argument("-o", dest='outputdir', - required=True, - help="Path to output directory. This directory is created by the script" + - " and new soft links (symlinks) are created in this directory.") - - args = parser.parse_args() - flowcells = args.flowcells - output = args.outputdir - inputs = args.inputdir - - logging.basicConfig(format='%(asctime)s - %(levelname)s => %(message)s', level=logging.DEBUG) - - if not inputs: - inputs = ['/projects/nanopore/raw'] - nanopore_machines = ['RazorCrest', 'Revolution', 'ObiWan', 'MinIT', - 'Mayhem', 'CaptainMarvel', 'MinION', 'MinION_Padmini', 'RogueOne'] - logging.info(f"Searching default path(s). Use -i option if custom path should be searched.") - else: - nanopore_machines = ['custom'] - - fastq_pass_found = {} - was_fastq_pass_found = [] - - for each_input in inputs: - for machine in nanopore_machines: - if ''.join(nanopore_machines) != 'custom': - input = os.path.join(each_input, machine) - else: - input = ''.join(each_input) - - logging.info(f"Searching path: {input}") - - if (os.path.exists(flowcells) and os.path.getsize(flowcells) > 0): - with open(flowcells, 'r') as fcells: - for flowcell in fcells: - if re.match('^\s*$', flowcell): - continue - flowcell = flowcell.strip() - fastq_pass_path = glob.glob(os.path.join(input, flowcell, f"**", f"*[!fast5]*", 'fastq_pass')) - # Try one more time since the flowcell user is trying to query may be the parent directory - # of fastq_pass - fastq_pass = fastq_pass_path if fastq_pass_path else glob.glob(os.path.join(input, f"**", f"*[!fast5]*", flowcell, 'fastq_pass')) - if not fastq_pass: - # logging.warning(f"Flowcell " + - # os.path.join(input, flowcell).strip() + - # f" does not seem to have a fastq_pass directory! Skipped!!") - if not flowcell in fastq_pass_found.keys(): - fastq_pass_found[flowcell] = 0 - else: - fastq_pass_found[flowcell] = 1 - sym_link_dir = os.path.join(output, flowcell) - sym_link_dir_dest = os.path.join(sym_link_dir, 'fastq_pass') - if not os.path.exists(sym_link_dir): - os.makedirs(sym_link_dir) - os.symlink( - ''.join(fastq_pass), - sym_link_dir_dest, target_is_directory=True - ) - logging.info(f"New soft link created: {sym_link_dir_dest}") - else: - logging.info(f"Soft link {sym_link_dir_dest} already exists! Skipped!!") - fcells.close() - else: - logging.error(f"File {flowcells} is empty or does not exist!\n") - - for k,v in fastq_pass_found.items(): - if not v: - was_fastq_pass_found.append(k) - - if was_fastq_pass_found: - logging.warning("Did not find fastq_pass folder for the supplied flowcells: " + - ', '.join(was_fastq_pass_found)) - - if was_fastq_pass_found and len(was_fastq_pass_found) == len(fastq_pass_found): - logging.error(f"None of the supplied flowcells were found! The output directory, {output} may not have been created!") - else: - logging.info(f"NOTE: Now you can use {output} directory as --input to cpipes.\n") - -if __name__ == "__main__": - main() \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/bin/process_centrifuge_output.py --- a/0.4.0/bin/process_centrifuge_output.py Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -#!/usr/bin/env python - -import os -import argparse -import logging as log -import pandas as pd -import numpy as np -from Bio import SeqIO - - -def main(): - # READ IN ARGUMENTS - desc = """ - This script is part of the centriflaken pipeline: It processes centrifuge - output and produces either a filtered FASTQ or a text file of FASTQ IDs based - on the supplied taxa/bug - """ - parser = argparse.ArgumentParser(prog='process_centrifuge_output.py', description=desc) - parser.add_argument("-v", dest='verbose', action="store_true", help="For more verbose output") - parser.add_argument("-i", dest='input_fastq', required=False, - help="Path to input FASTQ file (same as input to centrifuge). If not mentioned, \ - a text file of sequence IDs are produced instead of a FASTQ file") - parser.add_argument("-t", dest='taxa_filtered_fastq_file', required=True, - help="Path to output FASTQ or output text file filtered by the taxa specified") - parser.add_argument("-r", dest='cent_report', required=True, help="Path to centrifuge report") - parser.add_argument("-o", dest='cent_output', required=True, help="Path to centrifuge output") - parser.add_argument("-b", dest='bug', required=True, - help="Name or fragment of name of the bug by which reads are extracted") - args = parser.parse_args() - - # MORE INFO IF VERBOSE - if args.verbose: - log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG) - else: - log.basicConfig(format="%(levelname)s: %(message)s") - - # ASSIGN VARIABLES - input_fastq = args.input_fastq - taxa_filtered_fastq_file = args.taxa_filtered_fastq_file - cent_report = args.cent_report - cent_output = args.cent_output - bug = args.bug - report_col_list = ["name", "taxID"] - output_col_list = ["taxID", "readID"] - - # Match and filter taxa names and ids from centrifuge report file - report_df = pd.read_csv(cent_report, delimiter="\t", usecols=report_col_list) - report_df['name'] = report_df['name'].str.lower() - filt_report_df = report_df[report_df['name'].str.contains(bug.lower())] - #print("\nMatching taxa names and ids:\n",filt_report_df) - taxID_list = filt_report_df['taxID'] - - # Match the above tax ids to read ids from centrifuge output file and deduplicate - output_df = pd.read_csv(cent_output, delimiter="\t", usecols=output_col_list) - filt_output_df = output_df.loc[output_df['taxID'].isin(taxID_list)] - readID_list = filt_output_df['readID'] - readID_dedup_list = np.unique(readID_list) - TF=open(taxa_filtered_fastq_file, "w") - - if (not input_fastq): - # print("\nFILTERED READ ID LIST:\n", readID_dedup_list) - for ID in readID_dedup_list: - TF.write(f"{ID}\n") - else: - # Extract filtered reads from input fastq and write to output fastq - print ("Indexing reads..") - rec = SeqIO.index(input_fastq,"fastq") - for i in readID_dedup_list: - if i in rec: - SeqIO.write(rec[i], TF, "fastq") - - TF.close() - -if __name__ == "__main__": - main() \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/conf/base.config --- a/0.4.0/conf/base.config Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,49 +0,0 @@ -params { - fs = File.separator - cfsanpipename = 'CPIPES' - center = 'CFSAN, FDA.' - libs = "${projectDir}${params.fs}lib" - modules = "${projectDir}${params.fs}modules" - projectconf = "${projectDir}${params.fs}conf" - assetsdir = "${projectDir}${params.fs}assets" - subworkflows = "${projectDir}${params.fs}subworkflows" - workflows = "${projectDir}${params.fs}workflows" - workflowsconf = "${workflows}${params.fs}conf" - routines = "${libs}${params.fs}routines" - toolshelp = "${libs}${params.fs}help" - swmodulepath = "${params.fs}nfs${params.fs}software${params.fs}modules" - tracereportsdir = "${launchDir}${params.fs}${cfsanpipename}-${params.pipeline}${params.fs}nextflow-reports" - dummyfile = "${projectDir}${params.fs}assets${params.fs}dummy_file.txt" - dummyfile2 = "${projectDir}${params.fs}assets${params.fs}dummy_file2.txt" - linewidth = 80 - pad = 32 - pipeline = null - help = null - input = null - output = null - metadata = null - publish_dir_mode = "copy" - publish_dir_overwrite = true - user_email = null -} - -dag { - enabled = true - file = "${params.tracereportsdir}${params.fs}${params.pipeline}_dag.html" -} - -report { - enabled = true - file = "${params.tracereportsdir}${params.fs}${params.pipeline}_exec_report.html" -} - -trace { - enabled = true - file = "${params.tracereportsdir}${params.fs}${params.pipeline}_exec_trace.txt" -} - -timeline { - enabled = true - file = "${params.tracereportsdir}${params.fs}${params.pipeline}_exec_timeline.html" -} - diff -r 17890124001d -r 52045ea4679d 0.4.0/conf/fastq.config --- a/0.4.0/conf/fastq.config Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ -params { - fq_filter_by_len = "4000" - fq_suffix = ".fastq.gz" - fq2_suffix = false - fq_strandedness = "unstranded" - fq_single_end = false - fq_filename_delim = "_" - fq_filename_delim_idx = "1" -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/conf/logtheseparams.config --- a/0.4.0/conf/logtheseparams.config Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ -params { - logtheseparams = [ - "${params.metadata}" ? 'metadata' : null, - "${params.input}" ? 'input' : null, - "${params.output}" ? 'output' : null, - "${params.fq_suffix}" ? 'fq_suffix' : null, - "${params.fq2_suffix}" ? 'fq2_suffix' : null, - "${params.fq_strandedness}" ? 'fq_strandedness' : null, - "${params.fq_single_end}" ? 'fq_single_end' : null, - "${params.fq_filter_by_len}" ? 'fq_filter_by_len' : null, - "${params.fq_filename_delim}" ? 'fq_filename_delim' : null, - "${params.fq_filename_delim_idx}" ? 'fq_filename_delim_idx' : null, - 'enable_conda', - 'enable_module', - ] -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/conf/manifest.config --- a/0.4.0/conf/manifest.config Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -manifest { - author = 'Kranti.Konganti@fda.hhs.gov' - homePage = 'https://cfsan-git.fda.gov/cfsan-dev/cpipes' - name = 'CPIPES' - version = '0.4.0' - nextflowVersion = '>=21.12' - description = 'Modular Nextflow pipelines at CFSAN, FDA.' -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/conf/modules.config --- a/0.4.0/conf/modules.config Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,87 +0,0 @@ -process { - publishDir = [ - path: { - "${task.process.tokenize(':')[-1].toLowerCase()}" == "multiqc" ? - "${params.output}${params.fs}${params.pipeline.toLowerCase()}-${task.process.tokenize(':')[-1].toLowerCase()}" : - "${params.output}${params.fs}${task.process.tokenize(':')[-1].toLowerCase()}" - }, - mode: params.publish_dir_mode, - overwrite: params.publish_dir_overwrite, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - - errorStrategy = { - ![0].contains(task.exitStatus) ? dynamic_retry(task.attempt, 10) : 'finish' - } - - maxRetries = 80 - - withLabel: 'process_femto' { - cpus = 2 - } - - withLabel: 'process_pico' { - cpus = 2 - } - - withLabel: 'process_nano' { - cpus = 4 - } - - withLabel: 'process_micro' { - cpus = 8 - } - - withLabel: 'process_only_mem_low' { - cpus = 2 - } - - withLabel: 'process_only_mem_medium' { - cpus = 2 - } - - withLabel: 'process_only_mem_high' { - cpus = 2 - } - - withLabel: 'process_low' { - cpus = 8 - } - - withLabel: 'process_medium' { - cpus = 8 - } - - withLabel: 'process_high' { - cpus = 8 - } - - withLabel: 'process_higher' { - cpus = 8 - } - - withLabel: 'process_gigantic' { - cpus = 8 - } -} - -if ( (params.input || params.metadata ) && params.pipeline ) { - try { - includeConfig "${params.workflowsconf}${params.fs}process${params.fs}${params.pipeline}.process.config" - } catch (Exception e) { - System.err.println('-'.multiply(params.linewidth) + "\n" + - "\033[0;31m${params.cfsanpipename} - ERROR\033[0m\n" + - '-'.multiply(params.linewidth) + "\n" + "\033[0;31mCould not load " + - "default pipeline's process configuration. Please provide a pipeline \n" + - "name using the --pipeline option.\n\033[0m" + '-'.multiply(params.linewidth) + "\n") - System.exit(1) - } -} - -// Function will return after sleeping for some time. -// Sleep time increases exponentially by task attempt. -def dynamic_retry(task_retry_num, factor_by) { - // sleep(Math.pow(2, task_retry_num.toInteger()) * factor_by.toInteger() as long) - sleep(Math.pow(1.27, task_retry_num.toInteger()) as long) - return 'retry' -} diff -r 17890124001d -r 52045ea4679d 0.4.0/conf/multiqc/centriflaken_hy_mqc.yml --- a/0.4.0/conf/multiqc/centriflaken_hy_mqc.yml Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,82 +0,0 @@ -custom_logo: "FDa-Logo-Blue---medium-01.png" -custom_logo_url: "https://www.fda.gov/about-fda/fda-organization/center-food-safety-and-applied-nutrition-cfsan" -custom_logo_title: "CFSAN, FDA" -title: CPIPES Report -intro_text: > - CPIPES (CFSAN PIPELINES) is a modular bioinformatics data analysis project at CFSAN, FDA based on NEXTFLOW DSL2. -report_comment: > - This report has been generated by the CPIPES - Workflow_Name_Placeholder - analysis pipeline. Only certain tables and plots are reported here. For complete results, please refer to the analysis pipeline output directory. -report_header_info: - - CPIPES Version: CPIPES_Version_Placeholder - - Workflow: Workflow_Name_Placeholder - - Workflow Version: Workflow_Version_Placeholder - - Input Directory: Workflow_Input_Placeholder - - Output Directory: Workflow_Output_Placeholder - - Developer E-mail: 'Kranti.Konganti@fda.hhs.gov' - - Stakeholder E-mail: 'Narjol.Gonzalez-Escalona@fda.hhs.gov' -show_analysis_paths: False -show_analysis_time: False -report_section_order: - MLST_collated_table: - order: -989 - ECTYPER_collated_table: - order: -990 - SEROTYPEFINDER_collated_table: - order: -991 - SEQSERO2_collated_table: - order: -992 - ABRICATE_ECOLI_VF_collated_table: - order: -993 - ABRICATE_NCBI_collated_table: - order: -994 - ABRICATE_NCBIAMRPLUS_collated_table: - order: -995 - ABRICATE_MEGARES_collated_table: - order: -996 - ABRICATE_RESFINDER_collated_table: - order: -997 - ABRICATE_ARGANNOT_collated_table: - order: -998 - software_versions: - order: -999 - -export_plots: true - -# Run only these modules -run_modules: - - fastqc - - kraken - - custom_content - -module_order: - - fastqc: - name: 'FastQC' - info: 'section of the report shows FastQC results before adapter trimming.' - path_filters: - - '*_fastqc.zip' - - kraken: - name: 'Centrifuge' - href: 'https://ccb.jhu.edu/software/centrifuge' - doi: '10.1101/gr.210641.116' - info: > - section of the report shows how reads are classified. - Please note that the plot title below is shown as - Kraken2: Top taxa since centrifuge-kreport was used - to create Kraken-style reports from centrifuge output files. - path_filters: - - '*.kreport.txt' - - kraken: - name: 'Kraken2' - info: 'section of the report shows how assembled contigs are classified.' - path_filters: - - '*.report.txt' - -extra_fn_clean_exts: - - '.centrifuge.kreport' - - '.report' - -table_columns_visible: - Kraken: False - Kraken2: False - Centrifuge: False \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/conf/multiqc/centriflaken_mqc.yml --- a/0.4.0/conf/multiqc/centriflaken_mqc.yml Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,82 +0,0 @@ -custom_logo: "FDa-Logo-Blue---medium-01.png" -custom_logo_url: "https://www.fda.gov/about-fda/fda-organization/center-food-safety-and-applied-nutrition-cfsan" -custom_logo_title: "CFSAN, FDA" -title: CPIPES Report -intro_text: > - CPIPES (CFSAN PIPELINES) is a modular bioinformatics data analysis project at CFSAN, FDA based on NEXTFLOW DSL2. -report_comment: > - This report has been generated by the CPIPES - Workflow_Name_Placeholder - analysis pipeline. Only certain tables and plots are reported here. For complete results, please refer to the analysis pipeline output directory. -report_header_info: - - CPIPES Version: CPIPES_Version_Placeholder - - Workflow: Workflow_Name_Placeholder - - Workflow Version: Workflow_Version_Placeholder - - Input Directory: Workflow_Input_Placeholder - - Output Directory: Workflow_Output_Placeholder - - Developer E-mail: 'Kranti.Konganti@fda.hhs.gov' - - Stakeholder E-mail: 'Narjol.Gonzalez-Escalona@fda.hhs.gov' -show_analysis_paths: False -show_analysis_time: False -report_section_order: - MLST_collated_table: - order: -989 - ECTYPER_collated_table: - order: -990 - SEROTYPEFINDER_collated_table: - order: -991 - SEQSERO2_collated_table: - order: -992 - ABRICATE_ECOLI_VF_collated_table: - order: -993 - ABRICATE_NCBI_collated_table: - order: -994 - ABRICATE_NCBIAMRPLUS_collated_table: - order: -995 - ABRICATE_MEGARES_collated_table: - order: -996 - ABRICATE_RESFINDER_collated_table: - order: -997 - ABRICATE_ARGANNOT_collated_table: - order: -998 - software_versions: - order: -999 - -export_plots: true - -# Run only these modules -run_modules: - - fastqc - - kraken - - custom_content - -module_order: - - fastqc: - name: 'FastQC' - info: 'section of the report shows FastQC results before adapter trimming.' - path_filters: - - '*_fastqc.zip' - - kraken: - name: 'Centrifuge' - href: 'https://ccb.jhu.edu/software/centrifuge' - doi: '10.1101/gr.210641.116' - info: > - section of the report shows how reads are classified. - Please note that the plot title below is shown as - Kraken2: Top taxa since centrifuge-kreport was used - to create Kraken-style reports from centrifuge output files. - path_filters: - - '*.kreport.txt' - - kraken: - name: 'Kraken2' - info: 'section of the report shows how assembled contigs are classified.' - path_filters: - - '*.report.txt' - -extra_fn_clean_exts: - - '.centrifuge.kreport' - - '.report' - -table_columns_visible: - Kraken: False - Kraken2: False - Centrifuge: False \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/cpipes --- a/0.4.0/cpipes Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,72 +0,0 @@ -#!/usr/bin/env nextflow - -/* ----------------------------------------------------------------------------------------- - cfsan-dev/cpipes ----------------------------------------------------------------------------------------- - NAME : CPIPES - DESCRIPTION : Modular Nextflow pipelines at CFSAN, FDA. - GITLAB : https://cfsan-git.fda.gov/cfsan-dev/cpipes - JIRA : https://sde.fda.gov/jira/projects/CPIPES/ - CONTRIBUTORS : Kranti.Konganti@fda.hhs.gov ----------------------------------------------------------------------------------------- -*/ - -// Enable DSL 2 -nextflow.enable.dsl = 2 - -// Default routines for MAIN -include { pipelineBanner; stopNow; } from "${params.routines}" - -// Our banner for CPIPES -log.info pipelineBanner() - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW CAN BE USED TO RUN A SPECIFIC PIPELINE. THIS IS THE RECOMMENDED WAY. - NEED TO FIND A BETTER SOLUTION IF WE SEE A LOT OF PIPELINES. - See: https://github.com/nf-core/rnaseq/issues/619 -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -switch ("${params.pipeline}") { - case "nanofactory": - include { NANOFACTORY } from "${params.workflows}${params.fs}${params.pipeline}" - break - case "centriflaken": - include { CENTRIFLAKEN } from "${params.workflows}${params.fs}${params.pipeline}" - break - case "centriflaken_hy": - include { CENTRIFLAKEN_HY } from "${params.workflows}${params.fs}${params.pipeline}" - break - default: - stopNow("PLEASE MENTION A PIPELINE NAME. Ex: --pipeline centriflaken") -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow { - // THIS IS REPETETIVE BUT WE ARE NOT ALLOWED TO INCLUDE "INCLUDE" - // INSIDE WORKFLOW - switch ("${params.pipeline}") { - case "nanofactory": - NANOFACTORY() - break - case "centriflaken": - CENTRIFLAKEN() - break - case "centriflaken_hy": - CENTRIFLAKEN_HY() - break - } -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ diff -r 17890124001d -r 52045ea4679d 0.4.0/lib/help/abricate.nf --- a/0.4.0/lib/help/abricate.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ -// Help text for abricate within CPIPES. - -def abricateHelp(params) { - -Map tool = [:] -Map toolspecs = [:] -tool.text = [:] -tool.helpparams = [:] - - toolspecs = [ - 'abricate_run': [ - clihelp: 'Run ABRicate tool. Default: ' + - (params.abricate_run ?: false), - cliflag: null, - clivalue: null - ], - 'abricate_minid': [ - clihelp: 'Minimum DNA %identity. ' + - "Defaut: " + (params.abricate_minid ?: 80), - cliflag: '--minid', - clivalue: (params.abricate_minid ?: 80) - ], - 'abricate_mincov': [ - clihelp: 'Minimum DNA %coverage. ' + - "Defaut: " + (params.abricate_mincov ?: 80), - cliflag: '--mincov', - clivalue: (params.abricate_mincov ?: 80) - ], - 'abricate_datadir': [ - clihelp: 'ABRicate databases folder. ' + - "Defaut: " + (params.abricate_datadir ?: 'undefined'), - cliflag: '--datadir', - clivalue: (params.abricate_datadir ?: '') - ] - ] - - toolspecs.each { - k, v -> tool.text['--' + k] = "${v.clihelp}" - tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] - } - - return tool -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/lib/help/amrfinderplus.nf --- a/0.4.0/lib/help/amrfinderplus.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -def amrfinderplusHelp(params) { - - Map tool = [:] - Map toolspecs = [:] - tool.text = [:] - tool.helpparams = [:] - - toolspecs = [ - 'amrfinderplus_run': [ - clihelp: "Run AMRFinderPlus tool. Default: ${params.amrfinderplus_run}", - cliflag: null, - clivalue: null - ], - 'amrfinderplus_db': [ - clihelp: 'Path to AMRFinderPlus database. Please note that ' + - ' the databases should be ready and formatted with blast for use. ' + - 'Please read more at: ' + - 'https://github.com/ncbi/amr/wiki/AMRFinderPlus-database ' + - "Default: ${params.amrfinderplus_db}", - cliflag: '--database', - clivalue: (params.amrfinderplus_db ?: '') - ], - 'amrfinderplus_genes': [ - clihelp: 'Add the plus genes to the report', - cliflag: '--plus', - clivalue: (params.amrfinderplus_genes ? ' ' : '') - ] - ] - - toolspecs.each { - k, v -> tool.text['--' + k] = "${v.clihelp}" - tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] - } - - return tool -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/lib/help/centrifuge.nf --- a/0.4.0/lib/help/centrifuge.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,54 +0,0 @@ -// Help text for centrifuge within CPIPES. - -def centrifugeHelp(params) { - - Map tool = [:] - Map toolspecs = [:] - tool.text = [:] - tool.helpparams = [:] - - toolspecs = [ - 'centrifuge_x': [ - clihelp: "Absolute path to centrifuge database. Default: ${params.centrifuge_x}", - cliflag: '-x', - clivalue: null - ], - 'centrifuge_save_unaligned': [ - clihelp: 'Save SINGLE-END reads that did not align. For PAIRED-END' + - " reads, save read pairs that did not align concordantly. Default: ${params.centrifuge_save_unaligned}", - cliflag: null, // Handled in modules logic. - clivalue: null - ], - 'centrifuge_save_aligned': [ - clihelp: 'Save SINGLE-END reads that aligned. For PAIRED-END' + - " reads, save read pairs that aligned concordantly. Default: ${params.centrifuge_save_aligned}", - cliflag: null, // Handled in modules logic. - clivalue: null - ], - 'centrifuge_out_fmt_sam': [ - clihelp: "Centrifuge output should be in SAM. Default: ${params.centrifuge_save_aligned}", - cliflag: null, // Handled in modules logic. - clivalue: null - ], - 'centrifuge_extract_bug': [ - clihelp: "Extract this bug from centrifuge results." + - " Default: ${params.centrifuge_extract_bug}", - cliflag: null, // Handled in modules logic. - clivalue: null, - ], - 'centrifuge_ignore_quals': [ - clihelp: 'Treat all quality values as 30 on Phred scale. ' + - "Default: ${params.centrifuge_ignore_quals}", - cliflag: '--ignore-quals', - clivalue: (params.centrifuge_ignore_quals ? ' ' : '') - ] - ] - - toolspecs.each { - k, v -> tool.text['--' + k] = "${v.clihelp}" - tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] - } - - return tool -} - diff -r 17890124001d -r 52045ea4679d 0.4.0/lib/help/ectyper.nf --- a/0.4.0/lib/help/ectyper.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,46 +0,0 @@ -def ectyperHelp(params) { - - Map tool = [:] - Map toolspecs = [:] - tool.text = [:] - tool.helpparams = [:] - - toolspecs = [ - 'ectyper_run': [ - clihelp: "Run ectyper tool. Default: ${params.ectyper_run}", - cliflag: null, - clivalue: null - ], - 'ectyper_perc_opid': [ - clihelp: 'Percent identity required for an O antigen allele match. ' + - "Default: ${params.ectyper_perc_opid}", - cliflag: '-opid', - clivalue: (params.ectyper_perc_opid ?: 90) - ], - 'ectyper_perc_hpid': [ - clihelp: 'Percent identity required for a H antigen allele match. ' + - "Default: ${params.ectyper_perc_hpid}", - cliflag: '-hpid', - clivalue: (params.ectyper_perc_hpid ?: 95) - ], - 'ectyper_perc_opcov': [ - clihelp: 'Minumum percent coverage required for an O antigen allele match. ' + - "Default: ${params.ectyper_perc_opcov}", - cliflag: '-opcov', - clivalue: (params.ectyper_perc_opcov ?: 95) - ], - 'ectyper_perc_hpcov': [ - clihelp: 'Minumum percent coverage required for a H antigen allele match. ' + - "Default: ${params.ectyper_perc_hpcov}", - cliflag: '-hpcov', - clivalue: (params.ectyper_perc_hpcov ?: 50) - ] - ] - - toolspecs.each { - k, v -> tool.text['--' + k] = "${v.clihelp}" - tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] - } - - return tool -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/lib/help/flye.nf --- a/0.4.0/lib/help/flye.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,82 +0,0 @@ -// Help text for flye within CPIPES. - -def flyeHelp(params) { - -Map tool = [:] -Map toolspecs = [:] -tool.text = [:] -tool.helpparams = [:] - - toolspecs = [ - 'flye_pacbio_raw': [ - clihelp: 'Input FASTQ reads are PacBio regular CLR reads (<20% error) ' + - "Defaut: ${params.flye_pacbio_raw}", - cliflag: '--pacbio-raw', - clivalue: (params.flye_pacbio_raw ? ' ' : '') - ], - 'flye_pacbio_corr': [ - clihelp: 'Input FASTQ reads are PacBio reads that were corrected ' + - "with other methods (<3% error). Default: ${params.flye_pacbio_corr}", - cliflag: '--pacbio-corr', - clivalue: (params.flye_pacbio_corr ? ' ' : '') - ], - 'flye_pacbio_hifi': [ - clihelp: 'Input FASTQ reads are PacBio HiFi reads (<1% error). ' + - "Default: ${params.flye_pacbio_hifi}", - cliflag: '--pacbio-hifi', - clivalue: (params.flye_pacbio_hifi ? ' ' : '') - ], - 'flye_nano_raw': [ - clihelp: 'Input FASTQ reads are ONT regular reads, pre-Guppy5 (<20% error). ' + - "Default: ${params.flye_nano_raw}", - cliflag: '--nano-raw', - clivalue: (params.flye_nano_raw ? ' ' : '') - ], - 'flye_nano_corr': [ - clihelp: 'Input FASTQ reads are ONT reads that were corrected with other ' + - "methods (<3% error). Default: ${params.flye_nano_corr}", - cliflag: '--nano-corr', - clivalue: (params.flye_nano_corr ? ' ' : '') - ], - 'flye_nano_hq': [ - clihelp: 'Input FASTQ reads are ONT high-quality reads: ' + - "Guppy5+ SUP or Q20 (<5% error). Default: ${params.flye_nano_hq}", - cliflag: '--nano-hq', - clivalue: (params.flye_nano_hq ? ' ' : '') - ], - 'flye_genome_size': [ - clihelp: 'Estimated genome size (for example, 5m or 2.6g). ' + - "Default: ${params.flye_genome_size}", - cliflag: '--genome-size', - clivalue: (params.flye_genome_size ?: '') - ], - 'flye_polish_iter': [ - clihelp: 'Number of genome polishing iterations. ' + - "Default: ${params.flye_polish_iter}", - cliflag: '--iterations', - clivalue: (params.flye_polish_iter ?: '') - ], - 'flye_meta': [ - clihelp: "Do a metagenome assembly (unenven coverage mode). Default: ${params.flye_meta}", - cliflag: '--meta', - clivalue: (params.flye_meta ? ' ' : '') - ], - 'flye_min_overlap': [ - clihelp: "Minimum overlap between reads. Default: ${params.flye_min_overlap}", - cliflag: '--min-overlap', - clivalue: (params.flye_min_overlap ?: '') - ], - 'flye_scaffold': [ - clihelp: "Enable scaffolding using assembly graph. Default: ${params.flye_scaffold}", - cliflag: '--scaffold', - clivalue: (params.flye_scaffold ? ' ' : '') - ] - ] - - toolspecs.each { - k, v -> tool.text['--' + k] = "${v.clihelp}" - tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] - } - - return tool -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/lib/help/kraken2.nf --- a/0.4.0/lib/help/kraken2.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,72 +0,0 @@ -// Help text for kraken2 within CPIPES. - -def kraken2Help(params) { - - Map tool = [:] - Map toolspecs = [:] - tool.text = [:] - tool.helpparams = [:] - - toolspecs = [ - 'kraken2_db': [ - clihelp: "Absolute path to kraken database. Default: ${params.kraken2_db}", - cliflag: '--db', - clivalue: null - ], - 'kraken2_confidence': [ - clihelp: 'Confidence score threshold which must be ' + - "between 0 and 1. Default: ${params.kraken2_confidence}", - cliflag: '--confidence', - clivalue: (params.kraken2_confidence ?: '') - ], - 'kraken2_quick': [ - clihelp: "Quick operation (use first hit or hits). Default: ${params.kraken2_quick}", - cliflag: '--quick', - clivalue: (params.kraken2_quick ? ' ' : '') - ], - 'kraken2_use_mpa_style': [ - clihelp: "Report output like Kraken 1's " + - "kraken-mpa-report. Default: ${params.kraken2_use_mpa_style}", - cliflag: '--use-mpa-style', - clivalue: (params.kraken2_use_mpa_style ? ' ' : '') - ], - 'kraken2_minimum_base_quality': [ - clihelp: 'Minimum base quality used in classification ' + - " which is only effective with FASTQ input. Default: ${params.kraken2_minimum_base_quality}", - cliflag: '--minimum-base-quality', - clivalue: (params.kraken2_minimum_base_quality ?: '') - ], - 'kraken2_report_zero_counts': [ - clihelp: 'Report counts for ALL taxa, even if counts are zero. ' + - "Default: ${params.kraken2_report_zero_counts}", - cliflag: '--report-zero-counts', - clivalue: (params.kraken2_report_zero_counts ? ' ' : '') - ], - 'kraken2_report_minmizer_data': [ - clihelp: 'Report minimizer and distinct minimizer count' + - ' information in addition to normal Kraken report. ' + - "Default: ${params.kraken2_report_minimizer_data}", - cliflag: '--report-minimizer-data', - clivalue: (params.kraken2_report_minimizer_data ? ' ' : '') - ], - 'kraken2_use_names': [ - clihelp: 'Print scientific names instead of just taxids. ' + - "Default: ${params.kraken2_use_names}", - cliflag: '--use-names', - clivalue: (params.kraken2_use_names ? ' ' : '') - ], - 'kraken2_extract_bug': [ - clihelp: 'Extract the reads or contigs beloging to this bug. ' + - "Default: ${params.kraken2_extract_bug}", - cliflag: null, - clivalue: null - ] - ] - - toolspecs.each { - k, v -> tool.text['--' + k] = "${v.clihelp}" - tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] - } - - return tool -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/lib/help/megahit.nf --- a/0.4.0/lib/help/megahit.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,113 +0,0 @@ -// Help text for megahit within CPIPES. - -def megahitHelp(params) { - -Map tool = [:] -Map toolspecs = [:] -tool.text = [:] -tool.helpparams = [:] - - toolspecs = [ - 'megahit_run': [ - clihelp: 'Run MEGAHIT assembler. Default: ' + - (params.megahit_run ?: false), - cliflag: null, - clivalue: null - ], - 'megahit_min_count': [ - clihelp: '. Minimum multiplicity for filtering (k_min+1)-mers. ' + - "Defaut: ${params.megahit_min_count}", - cliflag: '--min-count', - clivalue: (params.megahit_min_count ?: '') - ], - 'megahit_k_list': [ - clihelp: 'Comma-separated list of kmer size. All values must be odd, in ' + - "the range 15-255, increment should be <= 28. Ex: '21,29,39,59,79,99,119,141'. " + - "Default: ${params.megahit_k_list}", - cliflag: '--k-list', - clivalue: (params.megahit_k_list ?: '') - ], - 'megahit_no_mercy': [ - clihelp: 'Do not add mercy k-mers. ' + - "Default: ${params.megahit_no_mercy}", - cliflag: '--no-mercy', - clivalue: (params.megahit_no_mercy ? ' ' : '') - ], - 'megahit_bubble_level': [ - clihelp: '. Intensity of bubble merging (0-2), 0 to disable. ' + - "Default: ${params.megahit_bubble_level}", - cliflag: '--bubble-level', - clivalue: (params.megahit_bubble_level ?: '') - ], - 'megahit_merge_level': [ - clihelp: '. Merge complex bubbles of length <= l*kmer_size and ' + - "similarity >= s. Default: ${params.megahit_merge_level}", - cliflag: '--merge-level', - clivalue: (params.megahit_merge_level ?: '') - ], - 'megahit_prune_level': [ - clihelp: '. Strength of low depth pruning (0-3). ' + - "Default: ${params.megahit_prune_level}", - cliflag: '--prune-level', - clivalue: (params.megahit_prune_level ?: '') - ], - 'megahit_prune_depth': [ - clihelp: '. Remove unitigs with avg k-mer depth less than this value. ' + - "Default: ${params.megahit_prune_depth}", - cliflag: '--prune-depth', - clivalue: (params.megahit_prune_depth ?: '') - ], - 'megahit_low_local_ratio': [ - clihelp: '. Ratio threshold to define low local coverage contigs. ' + - "Default: ${params.megahit_low_local_ratio}", - cliflag: '--low-local-ratio', - clivalue: (params.megahit_low_local_ratio ?: '') - ], - 'megahit_max_tip_len': [ - clihelp: '. remove tips less than this value [ * k]. ' + - "Default: ${params.megahit_max_tip_len}", - cliflag: '--max-tip-len', - clivalue: (params.megahit_max_tip_len ?: '') - ], - 'megahit_no_local': [ - clihelp: 'Disable local assembly. ' + - "Default: ${params.megahit_no_local}", - cliflag: '--no-local', - clivalue: (params.megahit_no_local ? ' ' : '') - ], - 'megahit_kmin_1pass': [ - clihelp: 'Use 1pass mode to build SdBG of k_min. ' + - "Default: ${params.megahit_kmin_1pass}", - cliflag: '--kmin-1pass', - clivalue: (params.megahit_kmin_1pass ? ' ' : '') - ], - 'megahit_preset': [ - clihelp: '. Override a group of parameters. Valid values are '+ - "meta-sensitive which enforces '--min-count 1 --k-list 21,29,39,49,...,129,141', " + - 'meta-large (large & complex metagenomes, like soil) which enforces ' + - "'--k-min 27 --k-max 127 --k-step 10'. " + - "Default: ${params.megahit_preset}", - cliflag: '--preset', - clivalue: (params.megahit_preset ?: '') - ], - 'megahit_mem_flag': [ - clihelp: '. SdBG builder memory mode. 0: minimum; 1: moderate; 2: use all memory specified. ' + - "Default: ${params.megahit_mem_flag}", - cliflag: '--mem-flag', - clivalue: (params.megahit_mem_flag ?: '') - ], - 'megahit_min_contig_len': [ - clihelp: '. Minimum length of contigs to output. ' + - "Default: ${params.megahit_min_contig_len}", - cliflag: '--use-gpu', - clivalue: (params.megahit_min_contig_len ?: '') - ] - ] - - toolspecs.each { - k, v -> tool.text['--' + k] = "${v.clihelp}" - tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] - } - - return tool -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/lib/help/mlst.nf --- a/0.4.0/lib/help/mlst.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,40 +0,0 @@ -def mlstHelp(params) { - - Map tool = [:] - Map toolspecs = [:] - tool.text = [:] - tool.helpparams = [:] - - toolspecs = [ - 'mlst_run': [ - clihelp: "Run MLST tool. Default: ${params.mlst_run}", - cliflag: null, - clivalue: null - ], - 'mlst_minid': [ - clihelp: "DNA %identity of full allelle to consider 'similar' [~]. " + - "Default: ${params.mlst_minid}", - cliflag: '--minid', - clivalue: (params.mlst_minid ?: 95) - ], - 'mlst_mincov': [ - clihelp: 'DNA %cov to report partial allele at all [?].' + - "Default: ${params.mlst_mincov}", - cliflag: '--mincov', - clivalue: (params.mlst_mincov ?: 10) - ], - 'mlst_minscore': [ - clihelp: 'Minumum score out of 100 to match a scheme.' + - "Default: ${params.mlst_minscore}", - cliflag: '--minscore', - clivalue: (params.mlst_minscore ?: 50) - ] - ] - - toolspecs.each { - k, v -> tool.text['--' + k] = "${v.clihelp}" - tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] - } - - return tool -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/lib/help/seqkitgrep.nf --- a/0.4.0/lib/help/seqkitgrep.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,69 +0,0 @@ -// Help text for seqkit grep within CPIPES. - -def seqkitgrepHelp(params) { - -Map tool = [:] -Map toolspecs = [:] -tool.text = [:] -tool.helpparams = [:] - - toolspecs = [ - 'seqkit_grep_n': [ - clihelp: 'Match by full name instead of just ID. ' + - "Defaut: " + (params.seqkit_grep_n ?: 'undefined'), - cliflag: '--seqkit_grep_n', - clivalue: (params.seqkit_grep_n ? ' ' : '') - ], - 'seqkit_grep_s': [ - clihelp: 'Search subseq on seq, both positive and negative ' + - 'strand are searched, and mismatch allowed using flag --seqkit_grep_m. ' + - "Defaut: " + (params.seqkit_grep_s ?: 'undefined'), - cliflag: '--seqkit_grep_s', - clivalue: (params.seqkit_grep_s ? ' ' : '') - ], - 'seqkit_grep_c': [ - clihelp: 'Input is circular genome ' + - "Defaut: " + (params.seqkit_grep_c ?: 'undefined'), - cliflag: '--seqkit_grep_c', - clivalue: (params.seqkit_grep_c ? ' ' : '') - ], - 'seqkit_grep_C': [ - clihelp: 'Just print a count of matching records. With the ' + - '--seqkit_grep_v flag, count non-matching records. ' + - "Defaut: " + (params.seqkit_grep_v ?: 'undefined'), - cliflag: '--seqkit_grep_v', - clivalue: (params.seqkit_grep_v ? ' ' : '') - ], - 'seqkit_grep_i': [ - clihelp: 'Ignore case while using seqkit grep. ' + - "Defaut: " + (params.seqkit_grep_i ?: 'undefined'), - cliflag: '--seqkit_grep_i', - clivalue: (params.seqkit_grep_i ? ' ' : '') - ], - 'seqkit_grep_v': [ - clihelp: 'Invert the match i.e. select non-matching records. ' + - "Defaut: " + (params.seqkit_grep_v ?: 'undefined'), - cliflag: '--seqkit_grep_v', - clivalue: (params.seqkit_grep_v ? ' ' : '') - ], - 'seqkit_grep_m': [ - clihelp: 'Maximum mismatches when matching by sequence. ' + - "Defaut: " + (params.seqkit_grep_m ?: 'undefined'), - cliflag: '--seqkit_grep_m', - clivalue: (params.seqkit_grep_v ?: '') - ], - 'seqkit_grep_r': [ - clihelp: 'Input patters are regular expressions. ' + - "Defaut: " + (params.seqkit_grep_m ?: 'undefined'), - cliflag: '--seqkit_grep_m', - clivalue: (params.seqkit_grep_v ?: '') - ] - ] - - toolspecs.each { - k, v -> tool.text['--' + k] = "${v.clihelp}" - tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] - } - - return tool -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/lib/help/seqkitrmdup.nf --- a/0.4.0/lib/help/seqkitrmdup.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,61 +0,0 @@ -// Help text for seqkit rmdup within CPIPES. - -def seqkitrmdupHelp(params) { - -Map tool = [:] -Map toolspecs = [:] -tool.text = [:] -tool.helpparams = [:] - - toolspecs = [ - 'seqkit_rmdup_run': [ - clihelp: 'Remove duplicate sequences using seqkit rmdup. Default: ' + - (params.seqkit_rmdup_run ?: false), - cliflag: null, - clivalue: null - ], - 'seqkit_rmdup_n': [ - clihelp: 'Match and remove duplicate sequences by full name instead of just ID. ' + - "Defaut: ${params.seqkit_rmdup_n}", - cliflag: '-n', - clivalue: (params.seqkit_rmdup_n ? ' ' : '') - ], - 'seqkit_rmdup_s': [ - clihelp: 'Match and remove duplicate sequences by sequence content. ' + - "Defaut: ${params.seqkit_rmdup_s}", - cliflag: '-s', - clivalue: (params.seqkit_rmdup_s ? ' ' : '') - ], - 'seqkit_rmdup_d': [ - clihelp: 'Save the duplicated sequences to a file. ' + - "Defaut: ${params.seqkit_rmdup_d}", - cliflag: null, - clivalue: null - ], - 'seqkit_rmdup_D': [ - clihelp: 'Save the number and list of duplicated sequences to a file. ' + - "Defaut: ${params.seqkit_rmdup_D}", - cliflag: null, - clivalue: null - ], - 'seqkit_rmdup_i': [ - clihelp: 'Ignore case while using seqkit rmdup. ' + - "Defaut: ${params.seqkit_rmdup_i}", - cliflag: '-i', - clivalue: (params.seqkit_rmdup_i ? ' ' : '') - ], - 'seqkit_rmdup_P': [ - clihelp: "Only consider positive strand (i.e. 5') when comparing by sequence content. " + - "Defaut: ${params.seqkit_rmdup_P}", - cliflag: '-P', - clivalue: (params.seqkit_rmdup_P ? ' ' : '') - ] - ] - - toolspecs.each { - k, v -> tool.text['--' + k] = "${v.clihelp}" - tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] - } - - return tool -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/lib/help/seqsero2.nf --- a/0.4.0/lib/help/seqsero2.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ -def seqsero2Help(params) { - - Map tool = [:] - Map toolspecs = [:] - tool.text = [:] - tool.helpparams = [:] - - toolspecs = [ - 'seqsero2_run': [ - clihelp: "Run SeqSero2 tool. Default: ${params.seqsero2_run}", - cliflag: null, - clivalue: null - ], - 'seqsero2_t': [ - clihelp: "'1' for interleaved paired-end reads, '2' for " + - "separated paired-end reads, '3' for single reads, '4' for " + - "genome assembly, '5' for nanopore reads (fasta/fastq). " + - "Default: ${params.seqsero2_t}", - cliflag: '-t', - clivalue: (params.seqsero2_t ?: '') - ], - 'seqsero2_m': [ - clihelp: "Which workflow to apply, 'a'(raw reads allele " + - "micro-assembly), 'k'(raw reads and genome assembly k-mer). " + - "Default: ${params.seqsero2_m}", - cliflag: '-m', - clivalue: (params.seqsero2_m ?: '') - ], - 'seqsero2_c': [ - clihelp: 'SeqSero2 will only output serotype prediction without the directory ' + - 'containing log files. ' + - "Default: ${params.seqsero2_c}", - cliflag: '-c', - clivalue: (params.seqsero2_c ? ' ' : '') - ], - 'seqsero2_s': [ - clihelp: 'SeqSero2 will not output header in SeqSero_result.tsv. ' + - "Default: ${params.seqsero2_s}", - cliflag: '-l', - clivalue: (params.seqsero2_s ? ' ' : '') - ] - ] - - toolspecs.each { - k, v -> tool.text['--' + k] = "${v.clihelp}" - tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] - } - - return tool -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/lib/help/serotypefinder.nf --- a/0.4.0/lib/help/serotypefinder.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,46 +0,0 @@ -def serotypefinderHelp(params) { - - Map tool = [:] - Map toolspecs = [:] - tool.text = [:] - tool.helpparams = [:] - - toolspecs = [ - 'serotypefinder_run': [ - clihelp: "Run SerotypeFinder tool. Default: ${params.serotypefinder_run}", - cliflag: null, - clivalue: null - ], - 'serotypefinder_x': [ - clihelp: 'Generate extended output files. ' + - "Default: ${params.serotypefinder_x}", - cliflag: '-x', - clivalue: (params.serotypefinder_x ? ' ' : '') - ], - 'serotypefinder_db': [ - clihelp: 'Path to SerotypeFinder databases. ' + - "Default: ${params.serotypefinder_db}", - cliflag: '-p', - clivalue: null - ], - 'serotypefinder_min_threshold': [ - clihelp: 'Minimum percent identity (in float) required for calling a hit. ' + - "Default: ${params.serotypefinder_min_threshold}", - cliflag: '-t', - clivalue: (params.serotypefinder_min_threshold ?: '') - ], - 'serotypefinder_min_cov': [ - clihelp: 'Minumum percent coverage (in float) required for calling a hit. ' + - "Default: ${params.serotypefinder_min_cov}", - cliflag: '-l', - clivalue: (params.serotypefinder_min_cov ?: '') - ] - ] - - toolspecs.each { - k, v -> tool.text['--' + k] = "${v.clihelp}" - tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] - } - - return tool -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/lib/help/spades.nf --- a/0.4.0/lib/help/spades.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,121 +0,0 @@ -// Help text for spades within CPIPES. - -def spadesHelp(params) { - -Map tool = [:] -Map toolspecs = [:] -tool.text = [:] -tool.helpparams = [:] - - toolspecs = [ - 'spades_run': [ - clihelp: 'Run SPAdes assembler. Default: ' + - (params.spades_run ?: false), - cliflag: null, - clivalue: null - ], - 'spades_isolate': [ - clihelp: 'This flag is highly recommended for high-coverage isolate and ' + - "multi-cell data. Defaut: ${params.spades_isolate}", - cliflag: '--isolate', - clivalue: (params.spades_isolate ? ' ' : '') - ], - 'spades_sc': [ - clihelp: 'This flag is required for MDA (single-cell) data. ' + - "Default: ${params.spades_sc}", - cliflag: '--sc', - clivalue: (params.spades_sc ? ' ' : '') - ], - 'spades_meta': [ - clihelp: 'This flag is required for metagenomic data. ' + - "Default: ${params.spades_meta}", - cliflag: '--meta', - clivalue: (params.spades_meta ? ' ' : '') - ], - 'spades_bio': [ - clihelp: 'This flag is required for biosytheticSPAdes mode. ' + - "Default: ${params.spades_bio}", - cliflag: '--bio', - clivalue: (params.spades_bio ? ' ' : '') - ], - 'spades_corona': [ - clihelp: 'This flag is required for coronaSPAdes mode. ' + - "Default: ${params.spades_corona}", - cliflag: '--corona', - clivalue: (params.spades_corona ? ' ' : '') - ], - 'spades_rna': [ - clihelp: 'This flag is required for RNA-Seq data. ' + - "Default: ${params.spades_rna}", - cliflag: '--rna', - clivalue: (params.spades_rna ? ' ' : '') - ], - 'spades_plasmid': [ - clihelp: 'Runs plasmidSPAdes pipeline for plasmid detection. ' + - "Default: ${params.spades_plasmid}", - cliflag: '--plasmid', - clivalue: (params.spades_plasmid ? ' ' : '') - ], - 'spades_metaviral': [ - clihelp: 'Runs metaviralSPAdes pipeline for virus detection. ' + - "Default: ${params.spades_metaviral}", - cliflag: '--metaviral', - clivalue: (params.spades_metaviral ? ' ' : '') - ], - 'spades_metaplasmid': [ - clihelp: 'Runs metaplasmidSPAdes pipeline for plasmid detection in ' + - "metagenomics datasets. Default: ${params.spades_metaplasmid}", - cliflag: '--metaplasmid', - clivalue: (params.spades_metaplasmid ? ' ' : '') - ], - 'spades_rnaviral': [ - clihelp: 'This flag enables virus assembly module from RNA-Seq data. ' + - "Default: ${params.spades_rnaviral}", - cliflag: '--rnaviral', - clivalue: (params.spades_rnaviral ? ' ' : '') - ], - 'spades_iontorrent': [ - clihelp: 'This flag is required for IonTorrent data. ' + - "Default: ${params.spades_iontorrent}", - cliflag: '--iontorrent', - clivalue: (params.spades_iontorrent ? ' ' : '') - ], - 'spades_only_assembler': [ - clihelp: 'Runs only the SPAdes assembler module (without read error correction). ' + - "Default: ${params.spades_only_assembler}", - cliflag: '--only-assembler', - clivalue: (params.spades_only_assembler ? ' ' : '') - ], - 'spades_careful': [ - clihelp: 'Tries to reduce the number of mismatches and short indels in the assembly. ' + - "Default: ${params.spades_careful}", - cliflag: '--careful', - clivalue: (params.spades_careful ? ' ' : '') - ], - 'spades_cov_cutoff': [ - clihelp: 'Coverage cutoff value (a positive float number). ' + - "Default: ${params.spades_cov_cutoff}", - cliflag: '--cov-cutoff', - clivalue: (params.spades_cov_cutoff ?: '') - ], - 'spades_k': [ - clihelp: 'List of k-mer sizes (must be odd and less than 128). ' + - "Default: ${params.spades_k}", - cliflag: '-k', - clivalue: (params.spades_k ?: '') - ], - 'spades_hmm': [ - clihelp: 'Directory with custom hmms that replace the default ones (very rare). ' + - "Default: ${params.spades_hmm}", - cliflag: '--custom-hmms', - clivalue: (params.spades_hmm ?: '') - ] - ] - - toolspecs.each { - k, v -> tool.text['--' + k] = "${v.clihelp}" - tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] - } - - return tool -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/lib/routines.nf --- a/0.4.0/lib/routines.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,368 +0,0 @@ -// Hold methods to print: -// 1. Colored logo. -// 2. Summary of parameters. -// 3. Single dashed line. -// 4. Double dashed line. -// - -import groovy.json.JsonSlurper -import nextflow.config.ConfigParser -// import groovy.json.JsonOutput - -// ASCII logo -def pipelineBanner() { - - def padding = (params.pad) ?: 30 - Map fgcolors = getANSIColors() - - def banner = [ - name: "${fgcolors.magenta}${workflow.manifest.name}${fgcolors.reset}", - author: "${fgcolors.cyan}${workflow.manifest.author}${fgcolors.reset}", - // workflow: "${fgcolors.magenta}${params.pipeline}${fgcolors.reset}", - version: "${fgcolors.green}${workflow.manifest.version}${fgcolors.reset}", - center: "${fgcolors.green}${params.center}${fgcolors.reset}", - pad: padding - ] - - manifest = addPadding(banner) - - return """${fgcolors.white}${dashedLine(type: '=')}${fgcolors.magenta} - (o) - ___ _ __ _ _ __ ___ ___ - / __|| '_ \\ | || '_ \\ / _ \\/ __| -| (__ | |_) || || |_) || __/\\__ \\ - \\___|| .__/ |_|| .__/ \\___||___/ - | | | | - |_| |_|${fgcolors.reset} -${dashedLine()} -${fgcolors.blue}A collection of modular pipelines at CFSAN, FDA.${fgcolors.reset} -${dashedLine()} -${manifest} -${dashedLine(type: '=')} -""".stripIndent() -} - -// Add padding to keys so that -// they indent nicely on the -// terminal -def addPadding(values) { - - def pad = (params.pad) ?: 30 - values.pad = pad - - def padding = values.pad.toInteger() - def nocapitalize = values.nocapitalize - def stopnow = values.stopNow - def help = values.help - - values.removeAll { - k, v -> [ - 'nocapitalize', - 'pad', - 'stopNow', - 'help' - ].contains(k) - } - - values.keySet().each { k -> - v = values[k] - s = params.linewidth - (pad + 5) - if (v.toString().size() > s && !stopnow) { - def sen = '' - v.toString().findAll(/.{1,${s}}\b(?:\W*|\s*)/).each { - sen += ' '.multiply(padding + 2) + it + '\n' - } - values[k] = ( - help ? sen.replaceAll(/^(\n|\s)*/, '') : sen.trim() - ) - } else { - values[k] = (help ? v + "\n" : v) - } - k = k.replaceAll(/\./, '_') - } - - return values.findResults { - k, v -> nocapitalize ? - k.padRight(padding) + ': ' + v : - k.capitalize().padRight(padding) + ': ' + v - }.join("\n") -} - -// Method for error messages -def stopNow(msg) { - - Map fgcolors = getANSIColors() - Map errors = [:] - - if (msg == null) { - msg = "Unknown error" - } - - errors['stopNow'] = true - errors["${params.cfsanpipename} - ${params.pipeline} - ERROR"] = """ -${fgcolors.reset}${dashedLine()} -${fgcolors.red}${msg}${fgcolors.reset} -${dashedLine()} -""".stripIndent() - // println dashedLine() // defaults to stdout - // log.info addPadding(errors) // prints to stdout - exit 1, "\n" + dashedLine() + - "${fgcolors.red}\n" + addPadding(errors) -} - -// Method to validate 4 required parameters -// if input for entry point is FASTQ files -def validateParamsForFASTQ() { - switch (params) { - case { params.metadata == null && params.input == null }: - stopNow("Either metadata CSV file with 5 required columns\n" + - "in order: sample, fq1, fq2, strandedness, single_end or \n" + - "input directory of only FASTQ files (gzipped or unzipped) should be provided\n" + - "using --metadata or --input options.\n" + - "None of these two options were provided!") - break - case { params.metadata != null && params.input != null }: - stopNow("Either metadata or input directory of FASTQ files\n" + - "should be provided using --metadata or --input options.\n" + - "Using both these options is not allowed!") - break - case { params.output == null }: - stopNow("Please mention output directory to store all results " + - "using --output option!") - break - } - return 1 -} - -// Method to print summary of parameters -// before running -def summaryOfParams() { - - def pipeline_specific_config = new ConfigParser().setIgnoreIncludes(true).parse( - file("${params.workflowsconf}${params.fs}${params.pipeline}.config").text - ) - Map fgcolors = getANSIColors() - Map globalparams = [:] - Map localparams = params.subMap( - pipeline_specific_config.params.keySet().toList() + params.logtheseparams - ) - - if (localparams !instanceof Map) { - stopNow("Need a Map of paramters. We got: " + localparams.getClass()) - } - - if (localparams.size() != 0) { - localparams['nocapitalize'] = true - globalparams['nocapitalize'] = true - globalparams['nextflow_version'] = "${nextflow.version}" - globalparams['nextflow_build'] = "${nextflow.build}" - globalparams['nextflow_timestamp'] = "${nextflow.timestamp}" - globalparams['workflow_projectDir'] = "${workflow.projectDir}" - globalparams['workflow_launchDir'] = "${workflow.launchDir}" - globalparams['workflow_workDir'] = "${workflow.workDir}" - globalparams['workflow_container'] = "${workflow.container}" - globalparams['workflow_containerEngine'] = "${workflow.containerEngine}" - globalparams['workflow_runName'] = "${workflow.runName}" - globalparams['workflow_sessionId'] = "${workflow.sessionId}" - globalparams['workflow_profile'] = "${workflow.profile}" - globalparams['workflow_start'] = "${workflow.start}" - globalparams['workflow_commandLine'] = "${workflow.commandLine}" - return """${dashedLine()} -Summary of the current workflow (${fgcolors.magenta}${params.pipeline}${fgcolors.reset}) parameters -${dashedLine()} -${addPadding(localparams)} -${dashedLine()} -${fgcolors.cyan}N E X T F L O W${fgcolors.reset} - ${fgcolors.magenta}${params.cfsanpipename}${fgcolors.reset} - Runtime metadata -${dashedLine()} -${addPadding(globalparams)} -${dashedLine()}""".stripIndent() - } - return 1 -} - -// Method to display -// Return dashed line either '-' -// type or '=' type -def dashedLine(Map defaults = [:]) { - - Map fgcolors = getANSIColors() - def line = [color: 'white', type: '-'] - - if (!defaults.isEmpty()) { - line.putAll(defaults) - } - - return fgcolors."${line.color}" + - "${line.type}".multiply(params.linewidth) + - fgcolors.reset -} - -// Return slurped keys parsed from JSON -def slurpJson(file) { - def slurped = null - def jsonInst = new JsonSlurper() - - try { - slurped = jsonInst.parse(new File ("${file}")) - } - catch (Exception e) { - log.error 'Please check your JSON schema. Invalid JSON file: ' + file - } - - // Declare globals for the nanofactory - // workflow. - return [keys: slurped.keySet().toList(), cparams: slurped] -} - -// Default help text in a map if the entry point -// to a pipeline is FASTQ files. -def fastqEntryPointHelp() { - - Map helptext = [:] - Map fgcolors = getANSIColors() - - helptext['Workflow'] = "${fgcolors.magenta}${params.pipeline}${fgcolors.reset}" - helptext['Author'] = "${fgcolors.cyan}${params.workflow_built_by}${fgcolors.reset}" - helptext['Version'] = "${fgcolors.green}${params.workflow_version}${fgcolors.reset}\n" - helptext['Usage'] = "cpipes --pipeline ${params.pipeline} [options]\n" - helptext['Required'] = "" - helptext['--input'] = "Absolute path to directory containing FASTQ files. " + - "The directory should contain only FASTQ files as all the " + - "files within the mentioned directory will be read. " + - "Ex: --input /path/to/fastq_pass" - helptext['--output'] = "Absolute path to directory where all the pipeline " + - "outputs should be stored. Ex: --output /path/to/output" - helptext['Other options'] = "" - helptext['--metadata'] = "Absolute path to metadata CSV file containing five " + - "mandatory columns: sample,fq1,fq2,strandedness,single_end. The fq1 and fq2 " + - "columns contain absolute paths to the FASTQ files. This option can be used in place " + - "of --input option. This is rare. Ex: --metadata samplesheet.csv" - helptext['--fq_suffix'] = "The suffix of FASTQ files (Unpaired reads or R1 reads or Long reads) if " + - "an input directory is mentioned via --input option. Default: ${params.fq_suffix}" - helptext['--fq2_suffix'] = "The suffix of FASTQ files (Paired-end reads or R2 reads) if an input directory is mentioned via " + - "--input option. Default: ${params.fq2_suffix}" - helptext['--fq_filter_by_len'] = "Remove FASTQ reads that are less than this many bases. " + - "Default: ${params.fq_filter_by_len}" - helptext['--fq_strandedness'] = "The strandedness of the sequencing run. This is mostly needed " + - "if your sequencing run is RNA-SEQ. For most of the other runs, it is probably safe to use " + - "unstranded for the option. Default: ${params.fq_strandedness}" - helptext['--fq_single_end'] = "SINGLE-END information will be auto-detected but this option forces " + - "PAIRED-END FASTQ files to be treated as SINGLE-END so only read 1 information is included in " + - "auto-generated samplesheet. Default: ${params.fq_single_end}" - helptext['--fq_filename_delim'] = "Delimiter by which the file name is split to obtain sample name. " + - "Default: ${params.fq_filename_delim}" - helptext['--fq_filename_delim_idx'] = "After splitting FASTQ file name by using the --fq_filename_delim option," + - " all elements before this index (1-based) will be joined to create final sample name." + - " Default: ${params.fq_filename_delim_idx}" - - return helptext -} - -// Wrap help text with the following options -def wrapUpHelp() { - - return [ - 'Help options' : "", - '--help': "Display this message.\n", - 'help': true, - 'nocapitalize': true - ] -} - -// Method to send email on workflow complete. -def sendMail() { - - if (params.user_email == null) { - return 1 - } - - def pad = (params.pad) ?: 30 - def contact_emails = [ - stakeholder: (params.workflow_blueprint_by ?: 'Not defined'), - author: (params.workflow_built_by ?: 'Not defined') - ] - def msg = """ -${pipelineBanner()} -${summaryOfParams()} -${params.cfsanpipename} - ${params.pipeline} -${dashedLine()} -Please check the following directory for N E X T F L O W -reports. You can view the HTML files directly by double clicking -them on your workstation. -${dashedLine()} -${params.tracereportsdir} -${dashedLine()} -Please send any bug reports to CFSAN Dev Team or the author or -the stakeholder of the current pipeline. -${dashedLine()} -Error messages (if any) -${dashedLine()} -${workflow.errorMessage} -${workflow.errorReport} -${dashedLine()} -Contact emails -${dashedLine()} -${addPadding(contact_emails)} -${dashedLine()} -Thank you for using ${params.cfsanpipename} - ${params.pipeline}! -${dashedLine()} -""".stripIndent() - - def mail_cmd = [ - 'sendmail', - '-f', 'cfsan-hpc-noreply@fda.hhs.gov', - '-F', 'cfsan-hpc-noreply', - '-t', "${params.user_email}" - ] - - def email_subject = "${params.cfsanpipename} - ${params.pipeline}" - Map fgcolors = getANSIColors() - - if (workflow.success) { - email_subject += ' completed successfully!' - } - else if (!workflow.success) { - email_subject += ' has failed!' - } - - try { - ['env', 'bash'].execute() << """${mail_cmd.join(' ')} -Subject: ${email_subject} -Mime-Version: 1.0 -Content-Type: text/html -
-${msg.replaceAll(/\x1b\[[0-9;]*m/, '')}
-
-""".stripIndent() - } catch (all) { - def warning_msg = "${fgcolors.yellow}${params.cfsanpipename} - ${params.pipeline} - WARNING" - .padRight(pad) + ':' - log.info """ -${dashedLine()} -${warning_msg} -${dashedLine()} -Could not send mail with the sendmail command! -${dashedLine()} -""".stripIndent() - } - return 1 -} - -// Set ANSI colors for any and all -// STDOUT or STDERR -def getANSIColors() { - - Map fgcolors = [:] - - fgcolors['reset'] = "\033[0m" - fgcolors['black'] = "\033[0;30m" - fgcolors['red'] = "\033[0;31m" - fgcolors['green'] = "\033[0;32m" - fgcolors['yellow'] = "\033[0;33m" - fgcolors['blue'] = "\033[0;34m" - fgcolors['magenta'] = "\033[0;35m" - fgcolors['cyan'] = "\033[0;36m" - fgcolors['white'] = "\033[0;37m" - - return fgcolors -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/abricate/run/README.md --- a/0.4.0/modules/abricate/run/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,94 +0,0 @@ -# NextFlow DSL2 Module - -```bash -ABRICATE_RUN -``` - -## Description - -Run `abricate` tool on a list of assembled contigs in FASTA format given a list of database names. Produces a single output table in ASCII text format per database. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of metadata (`meta`) and a list of assemled contig FASTA files of input type `path` (`assembly`). - -Ex: - -```groovy -[ [id: 'sample1', single_end: true], '/data/sample1/f_assembly.fa' ] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTQ file. - -Ex: - -```groovy -[ id: 'FAL00870', strandedness: 'unstranded', single_end: true ] -``` - -\ -  - -#### `assembly` - -Type: `path` - -NextFlow input type of `path` pointing to assembled contig file in FASTA format. - -\ -  - -#### `abdbs` - -Type: `val` - -Nextflow input type of `val` containing a list of at least one of the following database names on which `abricate` should be run. - -Ex: - -```groovy -[ 'resfinder', 'megares', 'ncbi', 'ncbiamrplus', 'argannot' , 'ecoli_vf' ] -``` - -\ -  - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and list of `abricate` result files (`abricated`). - -\ -  - -#### `abricated` - -Type: `path` - -NextFlow output type of `path` pointing to the `abricate` results table file per sample (`id:`). - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/abricate/run/main.nf --- a/0.4.0/modules/abricate/run/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ -process ABRICATE_RUN { - tag "$meta.id" - label 'process_low' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}abricate${params.fs}1.0.1" : null) - conda (params.enable_conda ? "bioconda::abricate=1.0.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/abricate%3A1.0.1--ha8f3691_1': - 'quay.io/biocontainers/abricate:1.0.1--ha8f3691_1' }" - - input: - tuple val(meta), path(assembly) - val abdbs - - output: - path "${meta.id}${params.fs}*" - tuple val(meta), path("${meta.id}${params.fs}*.ab.txt"), emit: abricated - path "versions.yml" , emit: versions - - when: - (task.ext.when == null || task.ext.when) && assembly.size() > 0 - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def dbs = abdbs.collect().join('\\n') - """ - newprefix="${prefix}${params.fs}${prefix}" - - if [ ! -d "$prefix" ]; then - mkdir "$prefix" || exit 1 - fi - - echo -e "$dbs" | while read -r db; do - abricate \\ - $assembly \\ - $args \\ - --db \$db \\ - --threads $task.cpus 1> "\${newprefix}.\${db}.ab.txt" - done - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - abricate: \$(echo \$(abricate --version 2>&1) | sed 's/^.*abricate //' ) - bash: \$( bash --version 2>&1 | sed '1!d; s/^.*version //; s/ (.*\$//' ) - END_VERSIONS - """ -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/abricate/summary/README.md --- a/0.4.0/modules/abricate/summary/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,140 +0,0 @@ -# NextFlow DSL2 Module - -```bash -ABRICATE_SUMMARY -``` - -## Description - -Run `abricate` tool's `summary` sub-command on a list of `abricate`'s result table files per database. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of `abricate` database names of type `val` (`abdbs`) and a list of `abricate` result table files for all databases of type `path` (`abfiles`). - -Ex: - -```groovy -[ - [ 'megares', 'argannot', 'resfinder', 'ncbi' ], - [ '/data/sample1/f.ncbi.ab.txt', - '/data/sample1/f.megares.ab.txt', - '/data/sample1/f.resfinder.ab.txt', - '/data/sample1/f.argannot.ab.txt', - '/data/sample1/f2.ncbi.ab.txt', - '/data/sample1/f2.megares.ab.txt', - '/data/sample1/f2.resfinder.ab.txt', - '/data/sample1/f2.argannot.ab.txt' - ] -] -``` - -\ -  - -#### `abdbs` - -Type: `val` - -A Groovy List containing the **mandatory** list of at least the following 4 `abricate` database names on which `abricate` was run. - -Ex: - -```groovy -[ 'resfinder', 'megares', 'ncbi', 'argannot' ] -``` - -\ -  - -#### `abfiles` - -Type: `path` - -NextFlow input type of `path` pointing to `abricate` result files for each of the database. - -\ -  - -### `output:` - -___ - -#### `ncbi` - -Type: `tuple` -\ -Optional: `true` - -Outputs a tuple of `abricate` database key (`abricate_ncbi`) and summary result file from `abricate summary` command of type `path` (`ncbi`). This database includes only core AMR genes. This tuple is emitted optionally only where there are output files with suffix `.ncbi.absum.txt` - -\ -  - -#### `ncbiamrplus` - -Type: `tuple` -\ -Optional: `true` - -Outputs a tuple of `abricate` database key (`abricate_ncbiamrplus`) and summary result file from `abricate summary` command of type `path` (`ncbiamrplus`). This database includes both core AMR genes and plus AMR genes. This tuple is emitted optionally only where there are output files with suffix `.ncbiamrplus.absum.txt` - -\ -  - -#### `resfinder` - -Type: `tuple` -\ -Optional: `true` - -Outputs a tuple of `abricate` database key (`abricate_resfinder`) and summary result file from `abricate summary` command of type `path` (`resfinder`). This tuple is emitted optionally only where there are output files with suffix `.resfinder.absum.txt` - -\ -  - -#### `megares` - -Type: `tuple` -\ -Optional: `true` - -Outputs a tuple of `abricate` database key (`abricate_megares`) and summary result file from `abricate summary` command of type `path` (`megares`). This tuple is emitted optionally only where there are output files with suffix `.megares.absum.txt` - -\ -  - -#### `argannot` - -Type: `tuple` -\ -Optional: `true` - -Outputs a tuple of `abricate` database key (`abricate_argannot`) and summary result file from `abricate summary` command of type `path` (`argannot`). This tuple is emitted optionally only where there are output files with suffix `.argannot.absum.txt` - -\ -  - -#### `ecoli_vf` - -Type: `tuple` -\ -Optional: `true` - -Outputs an **optional** tuple of `abricate` database key (`abricate_ecoli_vf`) and summary result file from `abricate summary` command of type `path` (`ecoli_vf`). This tuple is emitted only when there are output files with suffix `.ecoli_vf.absum.txt` within the `work` folder. - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/abricate/summary/main.nf --- a/0.4.0/modules/abricate/summary/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ -process ABRICATE_SUMMARY { - tag "${abdbs.join(',')}" - label 'process_low' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}abricate${params.fs}1.0.1" : null) - conda (params.enable_conda ? "bioconda::abricate=1.0.1 conda-forge::coreutils" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/abricate%3A1.0.1--ha8f3691_1': - 'quay.io/biocontainers/abricate:1.0.1--ha8f3691_1' }" - - input: - tuple val(abdbs), path(abfiles) - - output: - tuple val('abricate_ncbi'), path("*.ncbi.absum.txt") , emit: ncbi, optional: true - tuple val('abricate_ncbiamrplus'), path("*.ncbiamrplus.absum.txt"), emit: ncbiamrplus, optional: true - tuple val('abricate_resfinder'), path("*resfinder.absum.txt") , emit: resfinder, optional: true - tuple val('abricate_megares'), path("*.megares.absum.txt") , emit: megares, optional: true - tuple val('abricate_argannot'), path("*.argannot.absum.txt") , emit: argannot, optional: true - tuple val('abricate_ecoli_vf'), path("*.ecoli_vf.absum.txt") , emit: ecoli_vf, optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def onthese = abdbs.collect{ db -> - abfiles.findAll { files -> - files =~ /\.${db}/ - }.join(' ') - }.join('\\n') - """ - filenum="1" - - echo -e "$onthese" | while read -r files; do - db=\$( echo -e "\${files}" | grep -E -o '\\w+\\.ab\\.txt' | sort -u | sed -e 's/.ab.txt//' ) - - if [ -z "\$db" ]; then - db="\$filenum" - fi - - abricate \\ - $args \\ - --summary \${files} \\ - 1> "abricate.\${db}.absum.txt" - - sed -i -e "s/.\${db}.ab.txt//" "abricate.\${db}.absum.txt" - sed -i -e 's/.assembly_filtered_contigs.fasta//' "abricate.\${db}.absum.txt" - - filenum=\$((filenum+1)) - done - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - abricate: \$(echo \$(abricate --version 2>&1) | sed 's/^.*abricate //' ) - bash: \$( bash --version 2>&1 | sed '1!d; s/^.*version //; s/ (.*\$//' ) - END_VERSIONS - - sedver="" - sortver="" - grepver="" - - if [ "${workflow.containerEngine}" != "null" ]; then - sortver=\$( sort --help 2>&1 | sed -e '1!d; s/ (.*\$//' ) - sedver="\$sortver" - grepver="\$sortver" - else - sortver=\$( sort --version 2>&1 | sed '1!d; s/^.*(GNU coreutils//; s/) //;' ) - sedver=\$( echo \$(sed --version 2>&1) | sed 's/^.*(GNU sed) //; s/ Copyright.*\$//' ) - grepver=\$( echo \$(grep --version 2>&1) | sed 's/^.*(GNU grep) //; s/ Copyright.*\$//' ) - fi - - cat <<-END_VERSIONS >> versions.yml - sort: \$sortver - grep: \$grepver - sed: \$sedver - END_VERSIONS - """ -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/amrfinderplus/run/README.md --- a/0.4.0/modules/amrfinderplus/run/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,104 +0,0 @@ -# NextFlow DSL2 Module - -```bash -AMRFINDERPLUS_RUN -``` - -## Description - -Run `amrfinder` tool on a list of assembled contigs in FASTA format. Produces a single output table in ASCII text format per database. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of metadata (`meta`) and a list of assemled contig FASTA file of input type `path` (`fasta`). - -Ex: - -```groovy -[ [id: 'sample1', single_end: true], '/data/sample1/f_assembly.fa' ] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTQ file. - -Ex: - -```groovy -[ id: 'FAL00870', strandedness: 'unstranded', single_end: true, organism: 'Escherichia' ] -``` - -\ -  - -#### `fasta` - -Type: `path` - -NextFlow input type of `path` pointing to assembled contig file in FASTA format. - -\ -  - -#### `args` - -Type: Groovy String - -String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. - -Ex: - -```groovy -withName: 'AMRFINDERPLUS_RUN' { - ext.args = '--gpipe_org' -} -``` - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and list of `amrfinder` result files (`report`). - -\ -  - -#### `report` - -Type: `path` - -NextFlow output type of `path` pointing to the `amrfinder` results table file (`.tsv`) per sample (`id:`). - -\ -  - -#### `mutional_report` - -Type: `path` -\ -Optional: `true` - -NextFlow output type of `path` pointing to the `amrfinder` mutation results table file (`.tsv`) per sample (`id:`). Obtaining this output will depend on the presence of the `organism` key in the metadata (`meta`). See example above. - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/amrfinderplus/run/main.nf --- a/0.4.0/modules/amrfinderplus/run/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ -process AMRFINDERPLUS_RUN { - tag "$meta.id" - label 'process_low' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}amrfinderplus${params.fs}3.10.24" : null) - conda (params.enable_conda ? "bioconda::ncbi-amrfinderplus=3.10.24 conda-forge::libgcc-ng" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ncbi-amrfinderplus%3A3.10.23--h17dc2d4_0': - 'quay.io/biocontainers/ncbi-amrfinderplus:3.10.23--h17dc2d4_0' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path("${prefix}.tsv") , emit: report - tuple val(meta), path("${prefix}-mutations.tsv"), emit: mutation_report, optional: true - path "versions.yml" , emit: versions - - when: - (task.ext.when == null || task.ext.when) && fasta.size() > 0 - - script: - def args = task.ext.args ?: '' - def is_compressed = fasta.getName().endsWith(".gz") ? true : false - prefix = task.ext.prefix ?: "${meta.id}" - organism_param = meta.containsKey("organism") ? "--organism ${meta.organism} --mutation_all ${prefix}-mutations.tsv" : "" - fasta_name = fasta.getName().replace(".gz", "") - fasta_param = "-n" - if (meta.containsKey("is_proteins")) { - if (meta.is_proteins) { - fasta_param = "-p" - } - } - """ - if [ "$is_compressed" == "true" ]; then - gzip -c -d $fasta > $fasta_name - fi - - amrfinder \\ - $fasta_param $fasta_name \\ - $organism_param \\ - $args \\ - --threads $task.cpus > ${prefix}.tsv - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - amrfinderplus: \$(amrfinder --version) - gzip: \$( echo \$(gzip --version 2>&1) | sed 's/^.*(gzip) //; s/gzip //; s/ Copyright.*\$//' ) - END_VERSIONS - """ -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/cat/fastq/README.md --- a/0.4.0/modules/cat/fastq/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,96 +0,0 @@ -# NextFlow DSL2 Module - -```bash -CAT_FASTQ -``` - -## Description - -Concatenates a list of FASTQ files. Produces 2 files per sample (`id:`) if `single_end` is `false` as mentioned in the metadata Groovy Map. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of metadata (`meta`) and a list of FASTQ files of input type `path` (`reads`) to be concatenated. - -Ex: - -```groovy -[ [id: 'sample1', single_end: true], ['/data/sample1/f_L001.fq', '/data/sample1/f_L002.fq'] ] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTQ file. - -Ex: - -```groovy -[ id: 'FAL00870', strandedness: 'unstranded', single_end: true ] -``` - -\ -  - -#### `reads` - -Type: `path` - -NextFlow input type of `path` pointing to list of FASTQ files. - -\ -  - -#### `args` - -Type: Groovy String - -String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. - -Ex: - -```groovy -withName: 'CAT_FASTQ' { - ext.args = '--genome_size 5.5m' -} -``` - -\ -  - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and list of concatenated FASTQ files (`catted_reads`). - -\ -  - -#### `catted_reads` - -Type: `path` - -NextFlow output type of `path` pointing to the concatenated FASTQ files per sample (`id:`). - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/cat/fastq/main.nf --- a/0.4.0/modules/cat/fastq/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,89 +0,0 @@ -process CAT_FASTQ { - tag "$meta.id" - label 'process_micro' - - conda (params.enable_conda ? "conda-forge::sed=4.7 conda-forge::gzip" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" - - input: - tuple val(meta), path(reads, stageAs: "input*/*") - - output: - tuple val(meta), path("*.merged.fastq.gz"), emit: catted_reads - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def readList = reads.collect{ it.toString() } - def is_in_gz = readList[0].endsWith('.gz') - def gz_or_ungz = (is_in_gz ? '' : ' | gzip') - def pigz_or_ungz = (is_in_gz ? '' : " | pigz -p ${task.cpus}") - if (meta.single_end) { - if (readList.size > 1) { - """ - zcmd="gzip" - zver="" - - if type pigz > /dev/null 2>&1; then - cat ${readList.join(' ')} ${pigz_or_ungz} > ${prefix}.merged.fastq.gz - zcmd="pigz" - zver=\$( echo \$( \$zcmd --version 2>&1 ) | sed -e '1!d' | sed "s/\$zcmd //" ) - else - cat ${readList.join(' ')} ${gz_or_ungz} > ${prefix}.merged.fastq.gz - zcmd="gzip" - - if [ "${workflow.containerEngine}" != "null" ]; then - zver=\$( echo \$( \$zcmd --help 2>&1 ) | sed -e '1!d; s/ (.*\$//' ) - else - zver=\$( echo \$( \$zcmd --version 2>&1 ) | sed "s/^.*(\$zcmd) //; s/\$zcmd //; s/ Copyright.*\$//" ) - fi - fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cat: \$( echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//' ) - \$zcmd: \$zver - END_VERSIONS - """ - } - } else { - if (readList.size > 2) { - def read1 = [] - def read2 = [] - readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } - """ - zcmd="gzip" - zver="" - - if type pigz > /dev/null 2>&1; then - cat ${read1.join(' ')} ${pigz_or_ungz} > ${prefix}_1.merged.fastq.gz - cat ${read2.join(' ')} ${pigz_or_ungz} > ${prefix}_2.merged.fastq.gz - zcmd="pigz" - zver=\$( echo \$( \$zcmd --version 2>&1 ) | sed -e '1!d' | sed "s/\$zcmd //" ) - else - cat ${read1.join(' ')} ${gz_or_ungz} > ${prefix}_1.merged.fastq.gz - cat ${read2.join(' ')} ${gz_or_ungz} > ${prefix}_2.merged.fastq.gz - zcmd="gzip" - - if [ "${workflow.containerEngine}" != "null" ]; then - zver=\$( echo \$( \$zcmd --help 2>&1 ) | sed -e '1!d; s/ (.*\$//' ) - else - zver=\$( echo \$( \$zcmd --version 2>&1 ) | sed "s/^.*(\$zcmd) //; s/\$zcmd //; s/ Copyright.*\$//" ) - fi - fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cat: \$( echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//' ) - \$zcmd: \$zver - END_VERSIONS - """ - } - } -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/cat/tables/README.md --- a/0.4.0/modules/cat/tables/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,88 +0,0 @@ -# NextFlow DSL2 Module - -```bash -TABLE_SUMMARY -``` - -## Description - -Concatenates a list of tables (CSV or TAB delimited) in `.txt` or `.csv` format. The table files to be concatenated **must** have a header as the header from one of the table files will be used as the header for the concatenated result table file. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of `val` table key (`table_sum_on`) and a list of table files of input type `path` (`tables`) to be concatenated. For this module to work, a `bin` directory with the script `create_mqc_data_table.py` should be present where the NextFlow script using this DSL2 module will be run. This `python` script will convert the aggregated table to `.yml` format to be used with `multiqc`. - -Ex: - -```groovy -[ ['ectyper'], ['/data/sample1/f1_ectyper.txt', '/data/sample2/f2_ectyper.txt'] ] -``` - -\ -  - -#### `table_sum_on` - -Type: `val` - -A single key defining what tables are being concatenated. For example, if all the `ectyper` results are being concatenated for all samples, then this can be `ectyper`. - -Ex: - -```groovy -[ ['ectyper'], ['/data/sample1/f1_ectyper.txt', '/data/sample2/f2_ectyper.txt'] ] -``` - -\ -  - -#### `tables` - -Type: `path` - -NextFlow input type of `path` pointing to a list of tables (files) to be concatenated. - -\ -  - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of table key (`table_sum_on` from `input:`) and list of concatenated table files (`tblsummed`). - -\ -  - -#### `tblsummed` - -Type: `path` - -NextFlow output type of `path` pointing to the concatenated table files per table key (Ex: `ectyper`). - -\ -  - -#### `mqc_yml` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing table contents in `YAML` format which can be used to inject this table as part of the `multiqc` report. - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/cat/tables/main.nf --- a/0.4.0/modules/cat/tables/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ -process TABLE_SUMMARY { - tag "$table_sum_on" - label 'process_low' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - module (params.enable_module ? "${params.swmodulepath}${params.fs}python${params.fs}3.8.1" : null) - conda (params.enable_conda ? "conda-forge::python=3.9 conda-forge::pyyaml conda-forge::coreutils" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" - - input: - tuple val(table_sum_on), path(tables) - - output: - tuple val(table_sum_on), path("*.tblsum.txt"), emit: tblsummed - path "*_mqc.yml" , emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when || tables - - script: - def args = task.ext.args ?: '' - def onthese = tables.collect().join('\\n') - """ - filenum="1" - header="" - - echo -e "$onthese" | while read -r file; do - - if [ "\${filenum}" == "1" ]; then - header=\$( head -n1 "\${file}" ) - echo -e "\${header}" > ${table_sum_on}.tblsum.txt - fi - - tail -n+2 "\${file}" >> ${table_sum_on}.tblsum.txt - - filenum=\$((filenum+1)) - done - - create_mqc_data_table.py $table_sum_on ${workflow.manifest.name} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bash: \$( bash --version 2>&1 | sed '1!d; s/^.*version //; s/ (.*\$//' ) - python: \$( python --version | sed 's/Python //g' ) - END_VERSIONS - - headver=\$( head --version 2>&1 | sed '1!d; s/^.*(GNU coreutils//; s/) //;' ) - tailver=\$( tail --version 2>&1 | sed '1!d; s/^.*(GNU coreutils//; s/) //;' ) - - cat <<-END_VERSIONS >> versions.yml - head: \$headver - tail: \$tailver - END_VERSIONS - """ -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/centrifuge/classify/README.md --- a/0.4.0/modules/centrifuge/classify/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,159 +0,0 @@ -# NextFlow DSL2 Module - -```bash -CENTRIFUGE_CLASSIFY -``` - -## Description - -Run `centrifuge` tool on reads in FASTQ format. Produces 3 output files in ASCII text format and optional output files. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of metadata (`meta`) and a list of reads of type `path` (`reads`) per sample (`id:`). - -Ex: - -```groovy -[ - [ id: 'FAL00870', - strandedness: 'unstranded', - single_end: true, - centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' - ], - '/hpc/scratch/test/FAL000870/f1.merged.fq.gz' -] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTQ file. - -Ex: - -```groovy -[ - id: 'FAL00870', - strandedness: 'unstranded', - single_end: true, - centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' -] -``` - -\ -  - -#### `reads` - -Type: `path` - -NextFlow input type of `path` pointing to FASTQ files on which `centrifuge` classification should be run. - -\ -  - -#### `args` - -Type: Groovy String - -String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. - -Ex: - -```groovy -withName: 'CENTRIFUGE_CLASSIFY' { - ext.args = '--met 3' -} -``` - -\ -  - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and list of `centrifuge` result files. - -\ -  - -#### `report` - -Type: `path` - -NextFlow output type of `path` pointing to the `centrifuge` report table file (`.report.txt`) per sample (`id:`). - -\ -  - -#### `output` - -Type: `path` - -NextFlow output type of `path` pointing to the `centrifuge` output table file (`.output.txt`) per sample (`id:`). - -\ -  - -#### `kreport` - -Type: `path` - -NextFlow output type of `path` pointing to the `centrifuge` **Kraken** style report table file (`.kreport.txt`) per sample (`id:`). - -\ -  - -#### `sam` - -Type: `path` -\ -Optional: `true` - -NextFlow output type of `path` pointing to the `centrifuge` alignment results in SAM (`.sam`) format per sample (`id:`). Obtaining this output will depend on the mention of `--centrifuge_out_fmt_sam` command-line option when the NextFlow pipeline is called. - -\ -  - -#### `fastq_mapped` - -Type: `path` -\ -Optional: `true` - -NextFlow output type of `path` pointing to the `centrifuge` alignment results in FASTQ (`.fastq.gz`) format per sample (`id:`). Obtaining this output will depend on the mention of `--centrifuge_save_aligned` command-line option when the NextFlow pipeline is called. - -\ -  - -#### `fastq_unmapped` - -Type: `path` -\ -Optional: `true` - -NextFlow output type of `path` pointing to the `centrifuge` FASTQ (`.fastq.gz`) files of unaligned reads per sample (`id:`). Obtaining this output will depend on the mention of `--centrifuge_save_unaligned` command-line option when the NextFlow pipeline is called. - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/centrifuge/classify/main.nf --- a/0.4.0/modules/centrifuge/classify/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,61 +0,0 @@ -process CENTRIFUGE_CLASSIFY { - tag "$meta.id" - label 'process_medium' - - module (params.enable_module ? 'centrifuge' : null) - conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' : - 'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path('*.report.txt') , emit: report - tuple val(meta), path('*.output.txt') , emit: output - tuple val(meta), path('*.kreport.txt') , emit: kreport - tuple val(meta), path('*.sam') , optional: true, emit: sam - tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped - tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}" - def db = meta.centrifuge_x ?: '' - def db_name = db.toString().replace(".tar.gz","") - def unaligned = '' - def aligned = '' - if (meta.single_end) { - unaligned = params.centrifuge_save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : '' - aligned = params.centrifuge_save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : '' - } else { - unaligned = params.centrifuge_save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : '' - aligned = params.centrifuge_save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : '' - } - def sam_output = params.centrifuge_out_fmt_sam ? "--out-fmt 'sam'" : '' - """ - centrifuge \\ - -x $db \\ - -p $task.cpus \\ - $paired \\ - --report-file ${prefix}.centrifuge.report.txt \\ - -S ${prefix}.centrifuge.output.txt \\ - $unaligned \\ - $aligned \\ - $sam_output \\ - $args - - centrifuge-kreport -x $db_name ${prefix}.centrifuge.output.txt > ${prefix}.centrifuge.kreport.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //') - END_VERSIONS - """ -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/centrifuge/extract/README.md --- a/0.4.0/modules/centrifuge/extract/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,113 +0,0 @@ -# NextFlow DSL2 Module - -```bash -CENTRIFUGE_EXTRACT -``` - -## Description - -Extract FASTQ reads given a FASTQ file originally used with `centrifuge` tool and a taxa of interest. This specific module uses only GNU Coreutils to create a list of FASTQ read ids that need to be extract. See also `CENTRIFUGE_PROCESS` module which uses a `python` script to generate the FASTQ read ids. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following 2 tuples: - -- A tuple of metadata (`meta`) and of type `path` (`centrifuge_output`) per sample (`id:`). - -- A tuple of metadata (`meta`) and of type `path` (`centrifuge_report`) per sample (`id:`). - -Ex: - -```groovy -[ - [ id: 'FAL00870', - strandedness: 'unstranded', - single_end: true, - centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' - ], - '/hpc/scratch/test/FAL000870/f1.merged.cent_out.output.txt' -] - -[ - [ id: 'FAL00870', - strandedness: 'unstranded', - single_end: true, - centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' - ], - '/hpc/scratch/test/FAL000870/f1.merged.cent_out.report.txt' -] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTQ file. - -Ex: - -```groovy -[ - id: 'FAL00870', - strandedness: 'unstranded', - single_end: true, - centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' -] -``` - -\ -  - -#### `centrifuge_report` - -Type: `path` - -NextFlow input type of `path` pointing to `centrifuge` report file generated using `--report-file` option of `centrifuge` tool. - -\ -  - -#### `centrifuge_output` - -Type: `path` - -NextFlow input type of `path` pointing to `centrifuge` output file generated using `-S` option of `centrifuge` tool. - -\ -  - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and list of extracted FASTQ read ids. - -\ -  - -#### `extracted` - -Type: `path` - -NextFlow output type of `path` pointing to the extracted FASTQ read ids belonging to a particular taxa (`*.extract-centrifuge-bug-ids.txt`). - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/centrifuge/extract/main.nf --- a/0.4.0/modules/centrifuge/extract/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ -process CENTRIFUGE_EXTRACT { - tag "$meta.id" - label 'process_low' - - //seqkit container contains required bash and other utilities - module (params.enable_module ? "${params.swmodulepath}${params.fs}python${params.fs}3.8.1" : null) - conda (params.enable_conda ? "conda-forge::sed=4.7 conda-forge::coreutils" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-039542721b6b463b663872ba8b7e9fbc05f01925:1de88053ebf8fb9884758395c4871f642c57750c-0': - 'quay.io/biocontainers/mulled-v2-039542721b6b463b663872ba8b7e9fbc05f01925:1de88053ebf8fb9884758395c4871f642c57750c-0' }" - - input: - tuple val(meta), path(centrifuge_report) - tuple val(meta), path(centrifuge_output) - - output: - tuple val(meta), path('*.extract-centrifuge-bug-ids.txt'), emit: extracted - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - grep -F '${params.centrifuge_extract_bug}' $centrifuge_report \ - | cut -f2 \ - | sort -u \ - | while read -r taxId; do - echo -e "\t\$taxId"'\$' - done > gotcha.txt - - cut -f1-3 $centrifuge_output | grep -E -f gotcha.txt | cut -f1 | sort -u > ${prefix}.extract-centrifuge-bug-ids.txt || true - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bash: \$( bash --version 2>&1 | sed '1!d; s/^.*version //; s/ (.*\$//' ) - END_VERSIONS - - ver="" - sedver="" - - if [ "${workflow.containerEngine}" != "null" ]; then - ver=\$( cut --help 2>&1 | sed -e '1!d; s/ (.*\$//' ) - sedver="\$ver" - else - ver=\$( cut --version 2>&1 | sed '1!d; s/^.*(GNU coreutils//; s/) //;' ) - sedver=\$( echo \$(sed --version 2>&1) | sed 's/^.*(GNU sed) //; s/ Copyright.*\$//' ) - fi - - cat <<-END_VERSIONS >> versions.yml - cut: \$ver - tail: \$ver - sort: \$ver - sed: \$sedver - END_VERSIONS - """ -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/centrifuge/process/README.md --- a/0.4.0/modules/centrifuge/process/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,101 +0,0 @@ -# NextFlow DSL2 Module - -```bash -CENTRIFUGE_PROCESS -``` - -## Description - -Extract FASTQ reads given a FASTQ file originally used with `centrifuge` tool and a taxa of interest. This specific module uses a `python` script to generate the FASTQ read ids and as such requires a `bin` folder with `process_centrifuge_output.py` to be present where the NextFlow script will be executed from. See also `CENTRIFUGE_EXTRACT` module which uses only GNU Coreutils to create a list of FASTQ read ids that need to be extracted. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in a tuple in order of metadata (`meta`), a `path` (`centrifuge_report`) type and another `path` (`centrifuge_report`) per sample (`id:`). - -Ex: - -```groovy -[ - [ id: 'FAL00870', - strandedness: 'unstranded', - single_end: true, - centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' - ], - '/hpc/scratch/test/FAL000870/f1.merged.cent_out.report.txt', - '/hpc/scratch/test/FAL000870/f1.merged.cent_out.output.txt' -] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTQ file. - -Ex: - -```groovy -[ - id: 'FAL00870', - strandedness: 'unstranded', - single_end: true, - centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' -] -``` - -\ -  - -#### `centrifuge_report` - -Type: `path` - -NextFlow input type of `path` pointing to `centrifuge` report file generated using `--report-file` option of `centrifuge` tool. - -\ -  - -#### `centrifuge_output` - -Type: `path` - -NextFlow input type of `path` pointing to `centrifuge` output file generated using `-S` option of `centrifuge` tool. - -\ -  - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and list of extracted FASTQ read ids. - -\ -  - -#### `extracted` - -Type: `path` - -NextFlow output type of `path` pointing to the extracted FASTQ read ids belonging to a particular taxa (`*.extract-centrifuge-bug-ids.txt`). - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/centrifuge/process/main.nf --- a/0.4.0/modules/centrifuge/process/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -process CENTRIFUGE_PROCESS { - tag "$meta.id" - label 'process_low' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}python${params.fs}3.8.1" : null) - conda (params.enable_conda ? "conda-forge::python=3.9 conda-forge::pandas conda-forge::biopython" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-d91be2208450c41a5198d8660b6d9a5b60613b3a:d9847b41af5ef58746c86d7114cd010650f3d9a2-0' : - 'quay.io/biocontainers/mulled-v2-d91be2208450c41a5198d8660b6d9a5b60613b3a:d9847b41af5ef58746c86d7114cd010650f3d9a2-0' }" - - input: - tuple val(meta), path(centrifuge_report), path(centrifuge_output) - - output: - tuple val(meta), path('*.process-centrifuge-bug-ids.txt'), emit: extracted - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - process_centrifuge_output.py \\ - -r $centrifuge_report \\ - -o $centrifuge_output \\ - -b '${params.centrifuge_extract_bug}' \\ - -t ${prefix}.process-centrifuge-bug-ids.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$( python --version | sed 's/Python //g' ) - biopython: \$( python -c 'import Bio as bio; print(bio.__version__)' ) - numpy: \$( python -c 'import numpy as np; print(np.__version__)' ) - pandas: \$( python -c 'import pandas as pd; print(pd.__version__)' ) - END_VERSIONS - """ -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/custom/dump_software_versions/README.md --- a/0.4.0/modules/custom/dump_software_versions/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,57 +0,0 @@ -# NextFlow DSL2 Module - -```bash -DUMP_SOFTWARE_VERSIONS -``` - -## Description - -Given an `YAML` format file, produce a final `.yml` file which has unique entries and a corresponding `.mqc.yml` file for use with `multiqc`. - -\ -  - -### `input:` - -___ - -Type: `path` - -Takes in a `path` (`versions`) type pointing to the file to be used to produce a final `.yml` file without any duplicate entries and a `.mqc.yml` file. Generally, this is passed by mixing `versions` from various run time channels and finally passed to this module to produce a final software versions list. - -Ex: - -```groovy -[ '/hpc/scratch/test/work/9b/e7bf7e28806419c1c9a571dacd1f67/versions.yml' ] -``` - -\ -  - -### `output:` - -___ - -#### `yml` - -Type: `path` - -NextFlow output type of `path` type pointing to an `YAML` file with software versions. - -\ -  - -#### `mqc_yml` - -Type: `path` - -NextFlow output type of `path` pointing to `.mqc.yml` file which can be used to produce a software versions' table with `multiqc`. - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/custom/dump_software_versions/main.nf --- a/0.4.0/modules/custom/dump_software_versions/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -process DUMP_SOFTWARE_VERSIONS { - tag "${params.pipeline} software versions" - label 'process_pico' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - module (params.enable_module ? "${params.swmodulepath}${params.fs}python${params.fs}3.8.1" : null) - conda (params.enable_conda ? "conda-forge::python=3.9 conda-forge::pyyaml" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-ca258a039fcd88610bc4e297b13703e8be53f5ca:d638c4f85566099ea0c74bc8fddc6f531fe56753-0' : - 'quay.io/biocontainers/mulled-v2-ca258a039fcd88610bc4e297b13703e8be53f5ca:d638c4f85566099ea0c74bc8fddc6f531fe56753-0' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/custom/dump_software_versions/templates/dumpsoftwareversions.py --- a/0.4.0/modules/custom/dump_software_versions/templates/dumpsoftwareversions.py Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,100 +0,0 @@ -#!/usr/bin/env python - -import yaml -import platform -import subprocess -from textwrap import dedent - - -def _make_versions_html(versions): - html = [ - dedent( - """\\ - - - - - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
Process Name Software Version
{process if (i == 0) else ''}{tool}{version}
") - return "\\n".join(html) - - -versions_this_module = {} -versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, -} - -with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) - versions_by_process.update(versions_this_module) - -# aggregate versions by the module name (derived from fully-qualified process name) -versions_by_module = {} -for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - assert versions_by_module[module] == process_versions, ( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - -versions_by_module["CPIPES"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", - "${params.pipeline}": "${params.workflow_version}" -} - -versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://cfsan-git.fda.gov/Kranti.Konganti/${workflow.manifest.name.toLowerCase()}", - "plot_type": "html", - "description": "Collected at run time from the software output (STDOUT/STDERR).", - "data": _make_versions_html(versions_by_module), -} - -with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) - -# print('sed -i -e "' + "s%'%%g" + '" *.yml') -subprocess.run('sed -i -e "' + "s%'%%g" + '" software_versions.yml', shell=True) - -with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - -with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/ectyper/README.md --- a/0.4.0/modules/ectyper/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,96 +0,0 @@ -# NextFlow DSL2 Module - -```bash -ECTYPER -``` - -## Description - -Run `ectyper` tool on a list of assembled contigs in FASTA format. Produces a single output table in ASCII text format. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of metadata (`meta`) and a list of assemled contig FASTA files of input type `path` (`fasta`). - -Ex: - -```groovy -[ [id: 'sample1', single_end: true], '/data/sample1/f_assembly.fa' ] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTQ file. - -Ex: - -```groovy -[ id: 'FAL00870', strandedness: 'unstranded', single_end: true ] -``` - -\ -  - -#### `fasta` - -Type: `path` - -NextFlow input type of `path` pointing to assembled contig file in FASTA format. - -\ -  - -#### `args` - -Type: Groovy String - -String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. - -Ex: - -```groovy -withName: 'ECTYPER' { - ext.args = '--detailed' -} -``` - -\ -  - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and list of `ectyper` result files (`ectyped`). - -\ -  - -#### `ectyped` - -Type: `path` - -NextFlow output type of `path` pointing to the `ectyper` results table file per sample (`id:`). - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/ectyper/main.nf --- a/0.4.0/modules/ectyper/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,45 +0,0 @@ -process ECTYPER { - tag "$meta.id" - label 'process_low' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}ectyper${params.fs}1.0.0" : null) - conda (params.enable_conda ? "bioconda::ectyper=1.0.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ectyper:1.0.0--pyhdfd78af_1' : - 'quay.io/biocontainers/ectyper:1.0.0--pyhdfd78af_1' }" - - input: - tuple val(meta), path(fasta) - - output: - path("${meta.id}${params.fs}*") - tuple val(meta), path("${meta.id}${params.fs}${meta.id}.tsv"), emit: ectyped - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when || fasta.size() > 0 - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def is_compressed = fasta.getName().endsWith(".gz") ? true : false - def fasta_name = fasta.getName().replace(".gz", "") - """ - if [ "$is_compressed" == "true" ]; then - gzip -c -d $fasta > $fasta_name - fi - - ectyper \\ - $args \\ - --cores $task.cpus \\ - --output $prefix \\ - --input $fasta_name - - mv ${prefix}${params.fs}output.tsv ${prefix}${params.fs}${prefix}.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ectyper: \$(echo \$(ectyper --version 2>&1) | sed 's/.*ectyper //; s/ .*\$//') - END_VERSIONS - """ -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/fastqc/README.md --- a/0.4.0/modules/fastqc/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,113 +0,0 @@ -# NextFlow DSL2 Module - -```bash -FASTQC -``` - -## Description - -Run `fastqc` tool on reads in FASTQ format. Produces a HTML report file and a `.zip` file containing plots and data used to produce the plots. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of metadata (`meta`) and a list of reads of type `path` (`reads`) per sample (`id:`). - -Ex: - -```groovy -[ - [ id: 'FAL00870', - strandedness: 'unstranded', - single_end: true, - centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' - ], - '/hpc/scratch/test/FAL000870/f1.merged.fq.gz' -] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTQ file. - -Ex: - -```groovy -[ - id: 'FAL00870', - strandedness: 'unstranded', - single_end: true -] -``` - -\ -  - -#### `reads` - -Type: `path` - -NextFlow input type of `path` pointing to FASTQ files on which `fastqc` classification should be run. - -\ -  - -#### `args` - -Type: Groovy String - -String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. - -Ex: - -```groovy -withName: 'FASTQC' { - ext.args = '--nano' -} -``` - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and list of `fastqc` result files. - -\ -  - -#### `html` - -Type: `path` - -NextFlow output type of `path` pointing to the `fastqc` report file in HTML format per sample (`id:`). - -\ -  - -#### `zip` - -Type: `path` - -NextFlow output type of `path` pointing to the zipped `fastqc` results per sample (`id:`). - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/fastqc/main.nf --- a/0.4.0/modules/fastqc/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ -process FASTQC { - tag "$meta.id" - label 'process_low' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}fastqc${params.fs}0.11.9" : null) - conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - // Add soft-links to original FastQs for consistent naming in pipeline - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - fastqc $args --threads $task.cpus ${prefix}.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/flye/assemble/README.md --- a/0.4.0/modules/flye/assemble/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,96 +0,0 @@ -# NextFlow DSL2 Module - -```bash -FLYE_ASSEMBLE -``` - -## Description - -Run `flye` assembler tool on a list of read files in FASTQ format. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of metadata (`meta`) and a list of FASTQ files of input type `path` (`reads`). - -Ex: - -```groovy -[ [id: 'sample1', single_end: true], '/data/sample1/f_merged.fq.gz' ] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTQ file. - -Ex: - -```groovy -[ id: 'FAL00870', strandedness: 'unstranded', single_end: true ] -``` - -\ -  - -#### `reads` - -Type: `path` - -NextFlow input type of `path` pointing to read files in FASTQ format that need to be *de novo* assembled. - -\ -  - -#### `args` - -Type: Groovy String - -String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. - -Ex: - -```groovy -withName: 'FLYE_ASSEMBLE' { - ext.args = '--casava' -} -``` - -\ -  - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and `flye` assembled contig file in FASTA format. - -\ -  - -#### `assembly` - -Type: `path` - -NextFlow output type of `path` pointing to the `flye` assembler results file per sample (`id:`) i.e., the final assembled contig file in FASTA format. - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/flye/assemble/main.nf --- a/0.4.0/modules/flye/assemble/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,51 +0,0 @@ -process FLYE_ASSEMBLE { - tag "$meta.id" - label 'process_medium' - // errorStrategy 'ignore' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}flye${params.fs}2.8" : null) - conda (params.enable_conda ? "bioconda::flye=2.8.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/flye:2.8.1--py37h8270d21_1' : - 'quay.io/biocontainers/flye:2.8.1--py37h8270d21_1' }" - - input: - tuple val(meta), path(reads) - - output: - path "${meta.id}${params.fs}*" - tuple val(meta), path("${meta.id}${params.fs}assembly.fasta"), emit: assembly, optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - reads_platform=\$( echo "$args" | grep -E -o '(--nano|--pacbio)-(raw|corr|hq|hifi)' ) - flye \\ - \$(echo "$args" | sed -e "s/\$reads_platform//") \\ - -t $task.cpus \\ - --out-dir "${meta.id}" \\ - \$reads_platform \\ - $reads - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - flye: \$( flye --version ) - END_VERSIONS - - grepver="" - - if [ "${workflow.containerEngine}" != "null" ]; then - grepver=\$( grep --help 2>&1 | sed -e '1!d; s/ (.*\$//' ) - else - grepver=\$( echo \$( grep --version 2>&1 ) | sed 's/^.*(GNU grep) //; s/ Copyright.*\$//' ) - fi - - cat <<-END_VERSIONS >> versions.yml - grep: \$grepver - END_VERSIONS - """ -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/gen_samplesheet/README.md --- a/0.4.0/modules/gen_samplesheet/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,55 +0,0 @@ -# NextFlow DSL2 Module - -```bash -GEN_SAMPLESHEET -``` - -## Description - -Generates a sample sheet in CSV format that contains required fields to be used to construct a Groovy Map of metadata. It requires as input, an absolute UNIX path to a folder containing only FASTQ files. This module requires the `fastq_dir_to_samplesheet.py` script to be present in the `bin` folder from where the NextFlow script including this module will be executed. - -\ -  - -### `input:` - -___ - -Type: `val` - -Takes in the absolute UNIX path to a folder containing only FASTQ files (`inputdir`). - -Ex: - -```groovy -'/hpc/scratch/test/reads' -``` - -\ -  - -### `output:` - -___ - -Type: `path` - -NextFlow output of type `path` pointing to auto-generated CSV sample sheet (`csv`). - -\ -  - -#### `csv` - -Type: `path` - -NextFlow output type of `path` pointing to auto-generated CSV sample sheet for all FASTQ files present in the folder given by NextFlow input type of `val` (`inputdir`). - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/gen_samplesheet/main.nf --- a/0.4.0/modules/gen_samplesheet/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -process GEN_SAMPLESHEET { - tag "${inputdir.simpleName}" - label "process_pico" - - module (params.enable_module ? "${params.swmodulepath}${params.fs}python${params.fs}3.8.1" : null) - conda (params.enable_conda ? "conda-forge::python=3.9.5" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.9--1' : - 'quay.io/biocontainers/python:3.9--1' }" - - input: - val inputdir - - output: - path '*.csv' , emit: csv - path 'versions.yml', emit: versions - - when: - task.ext.when == null || task.ext.when - - // This script (fastq_dir_to_samplesheet.py) is distributed - // as part of the pipeline nf-core/rnaseq/bin/. MIT License. - script: - def this_script_args = (params.fq_single_end ? ' -se' : '') - this_script_args += (params.fq_suffix ? " -r1 '${params.fq_suffix}'" : '') - this_script_args += (params.fq2_suffix ? " -r2 '${params.fq2_suffix}'" : '') - - """ - fastq_dir_to_samplesheet.py -sn \\ - -st '${params.fq_strandedness}' \\ - -sd '${params.fq_filename_delim}' \\ - -si ${params.fq_filename_delim_idx} \\ - ${this_script_args} \\ - ${inputdir} autogen_samplesheet.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$( python --version | sed 's/Python //g' ) - END_VERSIONS - """ -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/kraken2/classify/README.md --- a/0.4.0/modules/kraken2/classify/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,121 +0,0 @@ -# NextFlow DSL2 Module - -```bash -KRAKEN2_CLASSIFY -``` - -## Description - -Run `kraken2` tool on reads in FASTQ format. Produces 4 output files per sample (`id:`) in ASCII text format. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of metadata (`meta`) and a list of reads or FASTA assembly of type `path` (`reads`) per sample (`id:`). - -Ex: - -```groovy -[ - [ id: 'FAL00870', - strandedness: 'unstranded', - single_end: true, - is_assembly: false, - centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' - kraken2_db: '/hpc/db/kraken2/standard-210914', - ], - '/hpc/scratch/test/FAL000870/f1.merged.fq.gz' -] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTQ / FASTA file. - -Ex: - -```groovy -[ - id: 'FAL00870', - strandedness: 'unstranded', - single_end: true, - is_assembly: false, - kraken2_db: '/hpc/db/kraken2/standard-210914' -] -``` - -\ -  - -#### `reads` - -Type: `path` - -NextFlow input type of `path` pointing to FASTQ files on which `kraken2` classification should be run. - -\ -  - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and list of `kraken2` result files. - -\ -  - -#### `kraken_report` - -Type: `path` - -NextFlow output type of `path` pointing to the `kraken2` report table file (`.report.txt`) per sample (`id:`). - -\ -  - -#### `kraken_output` - -Type: `path` - -NextFlow output type of `path` pointing to the `kraken2` output table file (`.output.txt`) per sample (`id:`). - -\ -  - -#### `classified` - -Type: `path` - -NextFlow output type of `path` pointing to the `kraken2` processed gzipped FASTQ files containing only reads that have been classified (`*classified.fastq`) per sample (`id:`). - -\ -  - -#### `unclassified` - -Type: `path` - -NextFlow output type of `path` pointing to the `kraken2` processed gzipped FASTQ files containing only reads that are unclassified (`*unclassified.fastq`) per sample (`id:`). - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/kraken2/classify/main.nf --- a/0.4.0/modules/kraken2/classify/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,73 +0,0 @@ -process KRAKEN2_CLASSIFY { - tag "$meta.id" - label 'process_low' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}kraken2${params.fs}2.1.2" : null) - conda (params.enable_conda ? 'bioconda::kraken2=2.1.2 conda-forge::pigz=2.6' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' : - 'quay.io/biocontainers/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path('*classified*') , emit: classified - tuple val(meta), path('*unclassified*'), emit: unclassified - tuple val(meta), path('*.report.txt') , emit: kraken_report - tuple val(meta), path('*.output.txt') , emit: kraken_output - path "versions.yml" , emit: versions - - when: - (task.ext.when == null || task.ext.when) && (meta.is_assembly ? reads.size() : 1) - - script: - def args = task.ext.args ?: '' - def db = meta.kraken2_db ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def readList = reads.collect{ it.toString() } - def is_single_end = (meta.single_end || meta.is_assembly) ? true : false - def paired = is_single_end ? "" : "--paired" - def classified = is_single_end ? "--classified-out ${prefix}.classified.fastq" : "--classified-out ${prefix}.classified#.fastq" - def unclassified = is_single_end ? "--unclassified-out ${prefix}.unclassified.fastq" : "--unclassified-out ${prefix}.unclassified#.fastq" - args += (reads.getName().endsWith(".gz") ? ' --gzip-compressed ' : '') - """ - kraken2 \\ - --db $db \\ - --threads $task.cpus \\ - $unclassified \\ - $classified \\ - --report ${prefix}.kraken2.report.txt \\ - --output ${prefix}.kraken2.output.txt \\ - $paired \\ - $args \\ - $reads - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//') - END_VERSIONS - - zcmd="" - zver="" - - if type pigz > /dev/null 2>&1; then - pigz -p $task.cpus *.fastq - zcmd="pigz" - zver=\$( echo \$( \$zcmd --version 2>&1 ) | sed -e '1!d' | sed "s/\$zcmd //" ) - elif type gzip > /dev/null 2>&1; then - gzip *.fastq - zcmd="gzip" - - if [ "${workflow.containerEngine}" != "null" ]; then - zver=\$( echo \$( \$zcmd --help 2>&1 ) | sed -e '1!d; s/ (.*\$//' ) - else - zver=\$( echo \$( \$zcmd --version 2>&1 ) | sed "s/^.*(\$zcmd) //; s/\$zcmd //; s/ Copyright.*\$//" ) - fi - fi - - cat <<-END_VERSIONS >> versions.yml - \$zcmd: \$zver - END_VERSIONS - """ -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/kraken2/extract_contigs/README.md --- a/0.4.0/modules/kraken2/extract_contigs/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,101 +0,0 @@ -# NextFlow DSL2 Module - -```bash -KRAKEN2_EXTRACT -``` - -## Description - -Extract FASTA reads or contigs given a FASTA file originally used with `kraken2` tool and a taxa of interest. This specific module uses a `python` script to generate the FASTA reads or contigs and as such requires a `bin` folder with `extract_assembled_filtered_contigs.py` script to be present where the NextFlow script will be executed from. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in a tuple in order of metadata (`meta`), a `path` (`kraken2_output`) type and another `path` (`assembly`) per sample (`id:`). - -Ex: - -```groovy -[ - [ id: 'FAL00870', - strandedness: 'unstranded', - single_end: true, - kraken2_db: '/hpc/db/kraken2/standard-210914' - ], - '/hpc/scratch/test/FAL000870/f1.merged.kraken2.output.txt', - '/hpc/scratch/test/FAL000870/f1.assembly.fasta' -] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTA file. - -Ex: - -```groovy -[ - id: 'FAL00870', - strandedness: 'unstranded', - single_end: true, - kraken2_db: '/hpc/db/kraken2/standard-210914' -] -``` - -\ -  - -#### `kraken2_output` - -Type: `path` - -NextFlow input type of `path` pointing to `kraken2` output file generated using `--output` option of `kraken2` tool. - -\ -  - -#### `assembly` - -Type: `path` - -NextFlow input type of `path` pointing to a FASTA format file, in this case an assembled contig file in FASTA format. - -\ -  - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and list of extracted FASTQ read ids. - -\ -  - -#### `asm_filtered_contigs` - -Type: `path` - -NextFlow output type of `path` pointing to the extracted FASTA reads or contigs belonging to a particular taxa. - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/kraken2/extract_contigs/main.nf --- a/0.4.0/modules/kraken2/extract_contigs/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,40 +0,0 @@ -process KRAKEN2_EXTRACT_CONTIGS { - tag "$meta.id" - label 'process_nano' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}python${params.fs}3.8.1" : null) - conda (params.enable_conda ? "conda-forge::python=3.9 conda-forge::pandas conda-forge::biopython" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-d91be2208450c41a5198d8660b6d9a5b60613b3a:d9847b41af5ef58746c86d7114cd010650f3d9a2-0' : - 'quay.io/biocontainers/mulled-v2-d91be2208450c41a5198d8660b6d9a5b60613b3a:d9847b41af5ef58746c86d7114cd010650f3d9a2-0' }" - - input: - tuple val(meta), path(assembly), path(kraken2_output) - val kraken2_extract_bug - - output: - tuple val(meta), path('*assembly_filtered_contigs.fasta'), emit: asm_filtered_contigs - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - extract_assembled_filtered_contigs.py \\ - -i $assembly \\ - -o ${prefix}.assembly_filtered_contigs.fasta \\ - -k $kraken2_output \\ - -b '$kraken2_extract_bug' - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$( python --version | sed 's/Python //g' ) - biopython: \$( python -c 'import Bio as bio; print(bio.__version__)' ) - numpy: \$( python -c 'import numpy as np; print(np.__version__)' ) - pandas: \$( python -c 'import pandas as pd; print(pd.__version__)' ) - END_VERSIONS - """ -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/megahit/assemble/README.md --- a/0.4.0/modules/megahit/assemble/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,97 +0,0 @@ -# NextFlow DSL2 Module - -```bash -MEGAHIT_ASSEMBLE -``` - -## Description - -Run `megahit` assembler tool on a list of read files in FASTQ format. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of metadata (`meta`) and a list of FASTQ files (short reads) of input type `path` (`reads`). - -Ex: - -```groovy -[ [id: 'sample1', single_end: true], '/data/sample1/f_merged.fq.gz' ] -[ [id: 'sample1', single_end: false], ['/data/sample1/f1_merged.fq.gz', '/data/sample2/f2_merged.fq.gz'] ] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTQ file. - -Ex: - -```groovy -[ id: 'KB01', strandedness: 'unstranded', single_end: true ] -``` - -\ -  - -#### `reads` - -Type: `path` - -NextFlow input type of `path` pointing to short read files in FASTQ format that need to be *de novo* assembled. - -\ -  - -#### `args` - -Type: Groovy String - -String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. - -Ex: - -```groovy -withName: 'MEGAHIT_ASSEMBLE' { - ext.args = '--keep-tmp-files' -} -``` - -\ -  - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and `megahit` assembled contigs file in FASTA format. - -\ -  - -#### `assembly` - -Type: `path` - -NextFlow output type of `path` pointing to the `megahit` assembler results file (`final.contigs.fa`) per sample (`id:`) i.e., the final assembled contigs file in FASTA format. - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/megahit/assemble/main.nf --- a/0.4.0/modules/megahit/assemble/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,57 +0,0 @@ -process MEGAHIT_ASSEMBLE { - tag "$meta.id" - label 'process_higher' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}megahit${params.fs}1.2.9" : null) - conda (params.enable_conda ? "bioconda::megahit=1.2.9" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/megahit:1.2.9--h2e03b76_1' : - 'quay.io/biocontainers/megahit:1.2.9--h2e03b76_1' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("${meta.id}${params.fs}${meta.id}.contigs.fa"), emit: assembly, optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def maxmem = task.memory ? "--memory ${task.memory.toBytes()}" : "" - if (meta.single_end) { - """ - megahit \\ - -r ${reads} \\ - -t $task.cpus \\ - $maxmem \\ - $args \\ - --out-dir $prefix \\ - --out-prefix $prefix - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - megahit: \$(echo \$(megahit -v 2>&1) | sed 's/MEGAHIT v//') - END_VERSIONS - """ - } else { - """ - megahit \\ - -1 ${reads[0]} \\ - -2 ${reads[1]} \\ - -t $task.cpus \\ - $maxmem \\ - $args \\ - --out-dir $prefix \\ - --out-prefix $prefix - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - megahit: \$(echo \$(megahit -v 2>&1) | sed 's/MEGAHIT v//') - END_VERSIONS - """ - } -} diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/mlst/README.md --- a/0.4.0/modules/mlst/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,96 +0,0 @@ -# NextFlow DSL2 Module - -```bash -MLST -``` - -## Description - -Run `mlst` tool on a list of assembled contigs in FASTA format. Produces a single output table in ASCII text format. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of metadata (`meta`) and a list of assemled contig FASTA files of input type `path` (`fasta`). - -Ex: - -```groovy -[ [id: 'sample1', single_end: true], '/data/sample1/f_assembly.fa' ] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTQ file. - -Ex: - -```groovy -[ id: 'FAL00870', strandedness: 'unstranded', single_end: true ] -``` - -\ -  - -#### `fasta` - -Type: `path` - -NextFlow input type of `path` pointing to assembled contig file in FASTA format. - -\ -  - -#### `args` - -Type: Groovy String - -String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. - -Ex: - -```groovy -withName: 'MLST' { - ext.args = '--nopath' -} -``` - -\ -  - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and list of `mlst` result files (`tsv`). - -\ -  - -#### `tsv` - -Type: `path` - -NextFlow output type of `path` pointing to the `mlst` results table file per sample (`id:`). - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/mlst/main.nf --- a/0.4.0/modules/mlst/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -process MLST { - tag "$meta.id" - label 'process_low' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}mlst${params.fs}2.19.0" : null) - conda (params.enable_conda ? "bioconda::mlst=2.19.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mlst:2.19.0--hdfd78af_1' : - 'quay.io/biocontainers/mlst:2.19.0--hdfd78af_1' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path("*.tsv"), emit: tsv - path "versions.yml" , emit: versions - - when: - (task.ext.when == null || task.ext.when) && fasta.size() > 0 - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - mlst \\ - --threads $task.cpus \\ - $args \\ - $fasta > ${prefix}.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mlst: \$( echo \$(mlst --version 2>&1) | sed 's/mlst //' ) - END_VERSIONS - """ - -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/multiqc/README.md --- a/0.4.0/modules/multiqc/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,67 +0,0 @@ -# NextFlow DSL2 Module - -```bash -MULTIQC -``` - -## Description - -Generate an aggregated [**MultiQC**](https://multiqc.info/) report. This particular module **will only work** within the framework of `cpipes` as in, it uses many `cpipes` related UNIX absolute paths to store and retrieve **MultiQC** related configration files and `cpipes` context aware metadata. It also uses a custom logo with filename `FDa-Logo-Blue---medium-01.png` which should be located inside an `assets` folder from where the NextFlow script including this module will be executed. - -\ -  - -### `input:` - -___ - -Type: `path` - -Takes in NextFlow input type of `path` which points to many log files that **MultiQC** should parse. - -Ex: - -```groovy -[ '/data/sample1/centrifuge/cent_output.txt', '/data/sample1/kraken/kraken_output.txt'] ] -``` - -\ -  - -### `output:` - -___ - -#### `report` - -Type: `path` - -Outputs a NextFlow output type of `path` pointing to the location of **MultiQC** final HTML report. - -\ -  - -#### `data` - -Type: `path` - -NextFlow output type of `path` pointing to the data files folder generated by **MultiQC** which were used to generate plots and HTML report. - -\ -  - -#### `plots` - -Type: `path` -Optional: `true` - -NextFlow output type of `path` pointing to the plots folder generated by **MultiQC**. - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/multiqc/main.nf --- a/0.4.0/modules/multiqc/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ -process MULTIQC { - label 'process_low' - tag 'MultiQC' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}multiqc${params.fs}1.12" : null) - conda (params.enable_conda ? 'bioconda::multiqc=1.12 conda-forge::spectra conda-forge::lzstring' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" - - input: - path multiqc_files - - output: - path "*multiqc*" - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional: true, emit: plots - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - cp ${params.projectconf}${params.fs}multiqc${params.fs}${params.pipeline}_mqc.yml cpipes_mqc_config.yml - cp ${params.assetsdir}${params.fs}FDa-Logo-Blue---medium-01.png FDa-Logo-Blue---medium-01.png - sed -i -e 's/Workflow_Name_Placeholder/${params.pipeline}/g; s/Workflow_Version_Placeholder/${params.workflow_version}/g' cpipes_mqc_config.yml - sed -i -e 's/CPIPES_Version_Placeholder/${workflow.manifest.version}/g; s%Workflow_Output_Placeholder%${params.output}%g' cpipes_mqc_config.yml - sed -i -e 's%Workflow_Input_Placeholder%${params.input}%g' cpipes_mqc_config.yml - - multiqc -c cpipes_mqc_config.yml -f $args . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - - sedver="" - - if [ "${workflow.containerEngine}" != "null" ]; then - sedver=\$( sed --help 2>&1 | sed -e '1!d; s/ (.*\$//' ) - else - sedver=\$( echo \$(sed --version 2>&1) | sed 's/^.*(GNU sed) //; s/ Copyright.*\$//' ) - fi - - cat <<-END_VERSIONS >> versions.yml - sed: \$sedver - END_VERSIONS - """ -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/samplesheet_check/README.md --- a/0.4.0/modules/samplesheet_check/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,55 +0,0 @@ -# NextFlow DSL2 Module - -```bash -SAMPLESHEET_CHECK -``` - -## Description - -Checks the validity of the sample sheet in CSV format to make sure there are required mandatory fields. This module generally succeeds `GEN_SAMPLESHEET` module as part of the `cpipes` pipelines to make sure that all fields of the columns are properly formatted to be used as Groovy Map for `meta` which is of input type `val`. This module requires the `check_samplesheet.py` script to be present in the `bin` folder from where the NextFlow script including this module will be executed - -\ -  - -### `input:` - -___ - -Type: `path` - -Takes in the absolute UNIX path to the sample sheet in CSV format (`samplesheet`). - -Ex: - -```groovy -'/hpc/scratch/test/reads/output/gen_samplesheet/autogen_samplesheet.csv' -``` - -\ -  - -### `output:` - -___ - -Type: `path` - -NextFlow output of type `path` pointing to properly formatted CSV sample sheet (`csv`). - -\ -  - -#### `csv` - -Type: `path` - -NextFlow output type of `path` pointing to auto-generated CSV sample sheet for all FASTQ files present in the folder given by NextFlow input type of `val` (`inputdir`). - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/samplesheet_check/main.nf --- a/0.4.0/modules/samplesheet_check/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,32 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - label "process_femto" - - module (params.enable_module ? "${params.swmodulepath}${params.fs}python${params.fs}3.8.1" : null) - conda (params.enable_conda ? "conda-forge::python=3.9.5" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.9--1' : - 'quay.io/biocontainers/python:3.9--1' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in nf-core/rnaseq/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$( python --version | sed 's/Python //g' ) - END_VERSIONS - """ -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/seqkit/grep/README.md --- a/0.4.0/modules/seqkit/grep/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,113 +0,0 @@ -# NextFlow DSL2 Module - -```bash -SEQKIT_GREP -``` - -## Description - -Run `seqkit grep` command on reads in FASTQ format. Produces a filtered FASTQ file as per the filter strategy in the supplied input file. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of metadata (`meta`) and a list of reads of type `path` (`reads`) per sample (`id:`). - -Ex: - -```groovy -[ - [ id: 'FAL00870', - strandedness: 'unstranded', - single_end: true, - centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' - ], - '/hpc/scratch/test/FAL000870/f1.merged.fq.gz' -] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTQ file. - -Ex: - -```groovy -[ - id: 'FAL00870', - strandedness: 'unstranded', - single_end: true -] -``` - -\ -  - -#### `reads` - -Type: `path` - -NextFlow input type of `path` pointing to FASTQ files on which `seqkit grep` should be run. - -\ -  - -#### `pattern_file` - -Type: path - -NextFlow input type of `path` pointing to the pattern file which has the patterns, one per line, by which FASTQ sequence ids should be searched and whose reads will be extracted. - -\ -  - -#### `args` - -Type: Groovy String - -String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. - -Ex: - -```groovy -withName: 'SEQKIT_GREP' { - ext.args = '--only-positive-strand' -} -``` - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and and filtered gzipped FASTQ file. - -\ -  - -#### `fastx` - -Type: `path` - -NextFlow output type of `path` pointing to the FASTQ format filtered gzipped file per sample (`id:`). - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/seqkit/grep/main.nf --- a/0.4.0/modules/seqkit/grep/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,89 +0,0 @@ -process SEQKIT_GREP { - tag "$meta.id" - label 'process_low' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}seqkit${params.fs}2.2.0" : null) - conda (params.enable_conda ? "bioconda::seqkit=2.2.0 conda-forge::sed=4.7 conda-forge::coreutils" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqkit:2.1.0--h9ee0642_0': - 'quay.io/biocontainers/seqkit:2.1.0--h9ee0642_0' }" - - input: - tuple val(meta), path(reads), path(pattern_file) - - output: - tuple val(meta), path("*.gz"), emit: fastx - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - def extension = "fastq" - if ("$reads" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/) { - extension = "fasta" - } - - if (meta.single_end) { - """ - pattern_file_contents=\$(sed '1!d' $pattern_file) - if [ "\$pattern_file_contents" != "DuMmY" ]; then - additional_args="-f $pattern_file $args" - else - additional_args="$args" - fi - - seqkit \\ - grep \\ - -j $task.cpus \\ - -o ${prefix}.seqkit-grep.${extension}.gz \\ - \$additional_args \\ - $reads - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqkit: \$( seqkit | sed '3!d; s/Version: //' ) - END_VERSIONS - """ - } else { - """ - pattern_file_contents=\$(sed '1!d' $pattern_file) - if [ "\$pattern_file_contents" != "DuMmY" ]; then - additional_args="-f $pattern_file $args" - else - additional_args="$args" - fi - - seqkit \\ - grep \\ - -j $task.cpus \\ - -o ${prefix}.R1.seqkit-grep.${extension}.gz \\ - \$additional_args \\ - ${reads[0]} - - seqkit \\ - grep \\ - -j $task.cpus \\ - -o ${prefix}.R2.seqkit-grep.${extension}.gz \\ - \$additional_args \\ - ${reads[1]} - - seqkit \\ - pair \\ - -j $task.cpus \\ - -1 ${prefix}.R1.seqkit-grep.${extension}.gz \\ - -2 ${prefix}.R2.seqkit-grep.${extension}.gz - - rm ${prefix}.R1.seqkit-grep.${extension}.gz - rm ${prefix}.R2.seqkit-grep.${extension}.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqkit: \$( seqkit | sed '3!d; s/Version: //' ) - END_VERSIONS - """ - } -} diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/seqkit/rmdup/README.md --- a/0.4.0/modules/seqkit/rmdup/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,104 +0,0 @@ -# NextFlow DSL2 Module - -```bash -SEQKIT_RMDUP -``` - -## Description - -Run `seqkit rmdup` command on reads in FASTQ format. Produces a filtered FASTQ file without duplicate sequences as per the strategy set using `ext.args` within the process scope. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of metadata (`meta`) and a list of reads of type `path` (`reads`) per sample (`id:`). - -Ex: - -```groovy -[ - [ id: 'FAL00870', - strandedness: 'unstranded', - single_end: true, - centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' - ], - '/hpc/scratch/test/FAL000870/f1.merged.fq.gz' -] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTQ file. - -Ex: - -```groovy -[ - id: 'FAL00870', - strandedness: 'unstranded', - single_end: true -] -``` - -\ -  - -#### `reads` - -Type: `path` - -NextFlow input type of `path` pointing to FASTQ files on which `seqkit rmdup` should be run. - -\ -  - -#### `args` - -Type: Groovy String - -String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. - -Ex: - -```groovy -withName: 'SEQKIT_DUP' { - ext.args = '-t dna' -} -``` - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and filtered gzipped FASTQ file. - -\ -  - -#### `fastx` - -Type: `path` - -NextFlow output type of `path` pointing to the FASTQ format filtered gzipped file per sample (`id:`). - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/seqkit/rmdup/main.nf --- a/0.4.0/modules/seqkit/rmdup/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,84 +0,0 @@ -process SEQKIT_RMDUP { - tag "$meta.id" - label 'process_low' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}seqkit${params.fs}2.2.0" : null) - conda (params.enable_conda ? "bioconda::seqkit=2.2.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqkit:2.1.0--h9ee0642_0': - 'quay.io/biocontainers/seqkit:2.1.0--h9ee0642_0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*duplicated.details.txt"), optional: true - tuple val(meta), path("*.gz") , emit: fastx - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def rmdup_d = params.seqkit_rmdup_d ? "-d ${prefix}.seqs.duplicated.fastq.gz" : "" - def rmdup_D = params.seqkit_rmdup_D ? "-D ${prefix}.duplicated.details.txt" : "" - - def extension = "fastq" - if ("$reads" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/) { - extension = "fasta" - } - - if (meta.single_end) { - """ - seqkit \\ - rmdup \\ - $rmdup_d \\ - $rmdup_D \\ - -j $task.cpus \\ - -o ${prefix}.seqkit-rmdup.${extension}.gz \\ - $args \\ - $reads - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqkit: \$( seqkit | sed '3!d; s/Version: //' ) - END_VERSIONS - """ - } else { - """ - seqkit \\ - rmdup \\ - $rmdup_d \\ - $rmdup_D \\ - -j $task.cpus \\ - -o ${prefix}.R1.seqkit-rmdup.${extension}.gz \\ - $args \\ - ${reads[0]} - - seqkit \\ - rmdup \\ - $rmdup_d \\ - $rmdup_D \\ - -j $task.cpus \\ - -o ${prefix}.R2.seqkit-rmdup.${extension}.gz \\ - $args \\ - ${reads[1]} - - seqkit \\ - pair \\ - -j $task.cpus \\ - -1 ${prefix}.R1.seqkit-rmdup.${extension}.gz \\ - -2 ${prefix}.R2.seqkit-rmdup.${extension}.gz - - rm ${prefix}.R1.seqkit-rmdup.${extension}.gz - rm ${prefix}.R2.seqkit-rmdup.${extension}.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqkit: \$( seqkit | sed '3!d; s/Version: //' ) - END_VERSIONS - """ - } -} diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/seqkit/seq/README.md --- a/0.4.0/modules/seqkit/seq/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,104 +0,0 @@ -# NextFlow DSL2 Module - -```bash -SEQKIT_SEQ -``` - -## Description - -Run `seqkit seq` command on reads in FASTQ format. Produces a filtered FASTQ file as per the filter strategy mentioned using the `ext.args` within the process scope. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of metadata (`meta`) and a list of reads of type `path` (`reads`) per sample (`id:`). - -Ex: - -```groovy -[ - [ id: 'FAL00870', - strandedness: 'unstranded', - single_end: true, - centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' - ], - '/hpc/scratch/test/FAL000870/f1.merged.fq.gz' -] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTQ file. - -Ex: - -```groovy -[ - id: 'FAL00870', - strandedness: 'unstranded', - single_end: true -] -``` - -\ -  - -#### `reads` - -Type: `path` - -NextFlow input type of `path` pointing to FASTQ files on which `seqkit seq` should be run. - -\ -  - -#### `args` - -Type: Groovy String - -String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. - -Ex: - -```groovy -withName: 'SEQKIT_SEQ' { - ext.args = '--max-len 4000' -} -``` - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and filtered gzipped FASTQ file. - -\ -  - -#### `fastx` - -Type: `path` - -NextFlow output type of `path` pointing to the FASTQ format filtered gzipped file per sample (`id:`). - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/seqkit/seq/main.nf --- a/0.4.0/modules/seqkit/seq/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -process SEQKIT_SEQ { - tag "$meta.id" - label 'process_low' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}seqkit${params.fs}2.2.0" : null) - conda (params.enable_conda ? "bioconda::seqkit=2.2.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqkit:2.1.0--h9ee0642_0': - 'quay.io/biocontainers/seqkit:2.1.0--h9ee0642_0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.gz"), emit: fastx - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - def extension = "fastq" - if ("$reads" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/) { - extension = "fasta" - } - - if (meta.single_end) { - """ - seqkit \\ - seq \\ - -j $task.cpus \\ - -o ${prefix}.seqkit-seq.${extension}.gz \\ - $args \\ - $reads - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqkit: \$( seqkit | sed '3!d; s/Version: //' ) - END_VERSIONS - """ - } else { - """ - seqkit \\ - seq \\ - -j $task.cpus \\ - -o ${prefix}.R1.seqkit-seq.${extension}.gz \\ - $args \\ - ${reads[0]} - - seqkit \\ - seq \\ - -j $task.cpus \\ - -o ${prefix}.R2.seqkit-seq.${extension}.gz \\ - $args \\ - ${reads[1]} - - seqkit \\ - pair \\ - -j $task.cpus \\ - -1 ${prefix}.R1.seqkit-seq.${extension}.gz \\ - -2 ${prefix}.R2.seqkit-seq.${extension}.gz - - rm ${prefix}.R1.seqkit-seq.${extension}.gz - rm ${prefix}.R2.seqkit-seq.${extension}.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqkit: \$( seqkit | sed '3!d; s/Version: //' ) - END_VERSIONS - """ - } -} diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/seqsero2/README.md --- a/0.4.0/modules/seqsero2/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,96 +0,0 @@ -# NextFlow DSL2 Module - -```bash -SEQSERO2 -``` - -## Description - -Run `seqsero2` tool on a list of assembled *Salmonella* contigs in FASTA format or sequencing reads in FASTQ format. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of metadata (`meta`) and a list of assemled contig FASTA files or list of sequencing reads in FASTQ format of input type `path` (`reads_or_asm`). - -Ex: - -```groovy -[ [id: 'sample1', single_end: true], '/data/sample1/f_assembly.fa' ] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTQ files or assembly FASTA files. - -Ex: - -```groovy -[ id: 'FAL00870', strandedness: 'unstranded', single_end: true ] -``` - -\ -  - -#### `reads_or_asm` - -Type: `path` - -NextFlow input type of `path` pointing to assembled contig file in FASTA format or sequencing reads in FASTQ format. - -\ -  - -#### `args` - -Type: Groovy String - -String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. - -Ex: - -```groovy -withName: 'SEQSERO2' { - ext.args = '-b mem' -} -``` - -\ -  - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and list of `seqsero2` result files (`serotyped`). - -\ -  - -#### `serotyped` - -Type: `path` - -NextFlow output type of `path` pointing to the `seqsero2` results table file per sample (`id:`). - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/seqsero2/main.nf --- a/0.4.0/modules/seqsero2/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -process SEQSERO2 { - tag "$meta.id" - label 'process_low' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}seqsero2${params.fs}1.2.1" : null) - conda (params.enable_conda ? "bioconda::seqsero2=1.2.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqsero2:1.2.1--py_0' : - 'quay.io/biocontainers/seqsero2:1.2.1--py_0' }" - - input: - tuple val(meta), path(reads_or_asm) - - output: - path("${meta.id}${params.fs}*") - tuple val(meta), path("${meta.id}${params.fs}*_result.tsv"), emit: serotyped - path "versions.yml" , emit: versions - - when: - (task.ext.when == null || task.ext.when) && reads_or_asm.size() > 0 - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - SeqSero2_package.py \\ - $args \\ - -d $prefix \\ - -n $prefix \\ - -p $task.cpus \\ - -i $reads_or_asm - - mv ${prefix}${params.fs}SeqSero_log.txt ${prefix}${params.fs}${prefix}.SeqSero_log.txt - mv ${prefix}${params.fs}SeqSero_result.txt ${prefix}${params.fs}${prefix}.SeqSero_result.txt - mv ${prefix}${params.fs}SeqSero_result.tsv ${prefix}${params.fs}${prefix}.SeqSero_result.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqsero2: \$( echo \$( SeqSero2_package.py --version 2>&1) | sed 's/^.*SeqSero2_package.py //' ) - END_VERSIONS - """ -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/seqtk/seq/README.md --- a/0.4.0/modules/seqtk/seq/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,104 +0,0 @@ -# NextFlow DSL2 Module - -```bash -SEQTK_SEQ -``` - -## Description - -Run `seqtk seq` command on reads in FASTQ format. Produces a filtered FASTQ file as per the filter strategy mentioned using the `ext.args` within the process scope. Please note that `seqtk seq` works only on one FASTQ file per command call. For paired-end reads, please use the `SEQKIT_SEQ` module. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of metadata (`meta`) and a list of reads of type `path` (`reads`) per sample (`id:`). - -Ex: - -```groovy -[ - [ id: 'FAL00870', - strandedness: 'unstranded', - single_end: true, - centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' - ], - '/hpc/scratch/test/FAL000870/f1.merged.fq.gz' -] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTQ file. - -Ex: - -```groovy -[ - id: 'FAL00870', - strandedness: 'unstranded', - single_end: true -] -``` - -\ -  - -#### `reads` - -Type: `path` - -NextFlow input type of `path` pointing to FASTQ files on which `seqtk seq` should be run. - -\ -  - -#### `args` - -Type: Groovy String - -String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. - -Ex: - -```groovy -withName: 'SEQTK_SEQ' { - ext.args = '-L 4000' -} -``` - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and filtered gzipped FASTQ file. - -\ -  - -#### `fastx` - -Type: `path` - -NextFlow output type of `path` pointing to the FASTQ format filtered gzipped file per sample (`id:`). - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/seqtk/seq/main.nf --- a/0.4.0/modules/seqtk/seq/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -process SEQTK_SEQ { - tag "$meta.id" - label 'process_mem_low' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}seqtk${params.fs}1.3-r106" : null) - conda (params.enable_conda ? "bioconda::seqtk=1.3 conda-forge::gzip" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqtk:1.3--h5bf99c6_3' : - 'quay.io/biocontainers/seqtk:1.3--h5bf99c6_3' }" - - input: - tuple val(meta), path(fastx) - - output: - tuple val(meta), path("*.gz"), emit: fastx - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - def extension = "fastq" - if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/ || "$args" ==~ /\-[aA]/ ) { - extension = "fasta" - } - """ - seqtk \\ - seq \\ - $args \\ - $fastx | \\ - gzip -c > ${prefix}.seqtk-seq.${task.index}.${extension}.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - gzip: \$( echo \$(gzip --version 2>&1) | sed 's/^.*(gzip) //; s/gzip //; s/ Copyright.*\$//' ) - END_VERSIONS - """ -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/serotypefinder/README.md --- a/0.4.0/modules/serotypefinder/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,96 +0,0 @@ -# NextFlow DSL2 Module - -```bash -SEROTYPEFINDER -``` - -## Description - -Run `serotypefinder` tool on a list of assembled *E. coli* contigs or partial sequences in FASTA format. Produces a single output table in ASCII text format. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of metadata (`meta`) and a list of assemled contig FASTA files of input type `path` (`fasta`). - -Ex: - -```groovy -[ [id: 'sample1', single_end: true], '/data/sample1/f_assembly.fa' ] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the assembly FASTA file. - -Ex: - -```groovy -[ id: 'FAL00870', strandedness: 'unstranded', single_end: true ] -``` - -\ -  - -#### `fasta` - -Type: `path` - -NextFlow input type of `path` pointing to assembled contig file or partial sequences in FASTA format. - -\ -  - -#### `args` - -Type: Groovy String - -String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. - -Ex: - -```groovy -withName: 'SEROTYPEFINDER' { - ext.args = '-mp kma' -} -``` - -\ -  - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and list of `serotypefinder` result files (`serotyped`). - -\ -  - -#### `serotyped` - -Type: `path` - -NextFlow output type of `path` pointing to the `serotypefinder` results table file per sample (`id:`). - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/serotypefinder/main.nf --- a/0.4.0/modules/serotypefinder/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,70 +0,0 @@ -process SEROTYPEFINDER { - tag "$meta.id" - label 'process_low' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}serotypefinder${params.fs}2.0.2" : null) - conda (params.enable_conda ? "bioconda::serotypefinder=2.0.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/serotypefinder:2.0.1--py39hdfd78af_0' : - 'quay.io/biocontainers/serotypefinder:2.0.1--py39hdfd78af_0' }" - - input: - tuple val(meta), path(fasta) - - output: - path("${meta.id}${params.fs}*") - tuple val(meta), path("${meta.id}${params.fs}${meta.id}.tsv"), emit: serotyped - path "versions.yml" , emit: versions - - when: - (task.ext.when == null || task.ext.when) && fasta.size() > 0 - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def is_compressed = fasta.getName().endsWith(".gz") ? true : false - def fasta_name = fasta.getName().replace(".gz", "") - def serotypefinder_db = "${meta.serotypefinder_db}" - def serotypefinder_cmd = (params.enable_module ? "serotypefinder.py" : "serotypefinder") - """ - if [ "$is_compressed" == "true" ]; then - gzip -c -d $fasta > $fasta_name - fi - - mkdir -p $prefix > /dev/null 2>&1 - - $serotypefinder_cmd \\ - $args \\ - -p $serotypefinder_db \\ - -o $prefix \\ - -i $fasta_name - - head -n1 ${prefix}${params.fs}results_tab.tsv | sed -E "s/(.*)/Name\\t\\1/g" > ${prefix}${params.fs}${prefix}.tsv - tail -n+2 ${prefix}${params.fs}results_tab.tsv | sed -E "s/(.*)/${prefix}\\t\\1/g" >> ${prefix}${params.fs}${prefix}.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - serotypefinder: 2.0.1/2.0.2 - END_VERSIONS - - sedver="" - headver="" - tailver="" - - if [ "${workflow.containerEngine}" != "null" ]; then - sedver=\$( sed --help 2>&1 | sed -e '1!d; s/ (.*\$//' ) - headver=\$( head --help 2>&1 | sed -e '1!d; s/ (.*\$//' ) - tailver="\$headver" - else - sedver=\$( echo \$(sed --version 2>&1) | sed 's/^.*(GNU sed) //; s/ Copyright.*\$//' ) - headver=\$( head --version 2>&1 | sed '1!d; s/^.*(GNU coreutils//; s/) //;' ) - tailver=\$( tail --version 2>&1 | sed '1!d; s/^.*(GNU coreutils//; s/) //;' ) - fi - - cat <<-END_VERSIONS >> versions.yml - sed: \$sedver - head: \$headver - tail: \$tailver - END_VERSIONS - """ -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/spades/assemble/README.md --- a/0.4.0/modules/spades/assemble/README.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,115 +0,0 @@ -# NextFlow DSL2 Module - -```bash -SPADES_ASSEMBLE -``` - -## Description - -Run `spades` assembler tool on a list of read files in FASTQ format. - -\ -  - -### `input:` - -___ - -Type: `tuple` - -Takes in the following tuple of metadata (`meta`) and a list of FASTQ files from various platforms of input type `path` (`illumina`, `pacbio`, `nanopore`). - -Ex: - -```groovy -[ [id: 'sample1', single_end: true], '/data/sample1/f_merged.fq.gz' ] -[ [id: 'sample1', single_end: false], ['/data/sample1/f1_merged.fq.gz', '/data/sample2/f2_merged.fq.gz'], ['/data/sample1/nanopore.fastq'], ['/data/sample1/pacbio.fastq'] ] -``` - -\ -  - -#### `meta` - -Type: Groovy Map - -A Groovy Map containing the metadata about the FASTQ file. - -Ex: - -```groovy -[ id: 'FAL00870', strandedness: 'unstranded', single_end: true ] -``` - -\ -  - -#### `illumina` - -Type: `path` - -NextFlow input type of `path` pointing to Illumina read files in FASTQ format that need to be *de novo* assembled along with reads from any other sequencing platforms, if any. - -\ -  - -#### `nanopore` - -Type: `path` - -NextFlow input type of `path` pointing to Oxford Nanopore read files in FASTQ format that need to be *de novo* assembled along with reads from any other sequencing platforms, if any. - -\ -  - -#### `pacbio` - -Type: `path` - -NextFlow input type of `path` pointing to PacBio read files in FASTQ format that need to be *de novo* assembled along with reads from any other sequencing platforms, if any. - -\ -  - -#### `args` - -Type: Groovy String - -String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. - -Ex: - -```groovy -withName: 'SPADES_ASSEMBLE' { - ext.args = '--rna' -} -``` - -\ -  - -### `output:` - -___ - -Type: `tuple` - -Outputs a tuple of metadata (`meta` from `input:`) and `spades` assembled scaffolds file in FASTA format. - -\ -  - -#### `assembly` - -Type: `path` - -NextFlow output type of `path` pointing to the `spades` assembler results file (`scaffolds.fasta`) per sample (`id:`) i.e., the final assembled scaffolds file in FASTA format. - -\ -  - -#### `versions` - -Type: `path` - -NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.0/modules/spades/assemble/main.nf --- a/0.4.0/modules/spades/assemble/main.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,47 +0,0 @@ -process SPADES_ASSEMBLE { - tag "$meta.id" - label 'process_higher' - - module (params.enable_module ? "${params.swmodulepath}${params.fs}spades${params.fs}3.15.3" : null) - conda (params.enable_conda ? 'bioconda::spades=3.15.3' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/spades:3.15.3--h95f258a_0' : - 'quay.io/biocontainers/spades:3.15.3--h95f258a_0' }" - - input: - tuple val(meta), path(illumina), path(pacbio), path(nanopore) - - output: - path "${meta.id}${params.fs}*" - tuple val(meta), path("${meta.id}${params.fs}scaffolds.fasta"), emit: assembly, optional: true - tuple val(meta), path("${meta.id}${params.fs}spades.log") , emit: log - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def maxmem = task.memory ? "--memory ${task.memory.toGiga()}" : "" - def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : "" - def pacbio_reads = !(pacbio.simpleName ==~ 'dummy_file.*') ? "--pacbio $pacbio" : "" - def nanopore_reads = !(nanopore.simpleName ==~ 'dummy_file.*') ? "--nanopore $nanopore" : "" - def custom_hmms = params.spades_hmm ? "--custom-hmms ${params.spades_hmm}" : "" - """ - spades.py \\ - $args \\ - --threads $task.cpus \\ - $maxmem \\ - $custom_hmms \\ - $illumina_reads \\ - $pacbio_reads \\ - $nanopore_reads \\ - -o ${prefix} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - spades: \$(spades.py --version 2>&1 | sed 's/^.*SPAdes genome assembler v//; s/ .*\$//') - END_VERSIONS - """ -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/nextflow.config --- a/0.4.0/nextflow.config Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,148 +0,0 @@ -def fs = File.separator -def pd = "${projectDir}" - -// Global parameters -includeConfig "${pd}${fs}conf${fs}manifest.config" -includeConfig "${pd}${fs}conf${fs}base.config" - -// Include FASTQ config to prepare for a case when the entry point is -// FASTQ metadata CSV or FASTQ input directory -includeConfig "${pd}${fs}conf${fs}fastq.config" - -if (params.pipeline != null) { - try { - includeConfig "${params.workflowsconf}${fs}${params.pipeline}.config" - } catch (Exception e) { - System.err.println('-'.multiply(params.linewidth) + "\n" + - "\033[0;31m${params.cfsanpipename} - ERROR\033[0m\n" + - '-'.multiply(params.linewidth) + "\n" + "\033[0;31mCould not load " + - "default pipeline configuration. Please provide a pipeline \n" + - "name using the --pipeline option.\n\033[0m" + '-'.multiply(params.linewidth) + "\n") - System.exit(1) - } -} - -// Include modules' config last. -includeConfig "${pd}${fs}conf${fs}logtheseparams.config" -includeConfig "${pd}${fs}conf${fs}modules.config" - -// Nextflow runtime profiles -conda.cacheDir = '/tool/tool-data/cfsan-centriflaken-db/0/kondagac_cache' -singularity.cacheDir = '/tool/tool-data/cfsan-centriflaken-db/0/cingularitygac_cache' - -profiles { - standard { - process.executor = 'local' - process.cpus = 1 - params.enable_conda = false - params.enable_module = true - singularity.enabled = false - docker.enabled = false - } - - stdkondagac { - process.executor = 'local' - process.cpus = 4 - params.enable_conda = true - params.enable_module = false - singularity.enabled = false - docker.enabled = false - } - - stdcingularitygac { - process.executor = 'local' - process.cpus = 4 - params.enable_conda = false - params.enable_module = false - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - } - - raven { - process.executor = 'slurm' - process.queue = 'prod' - process.memory = '10GB' - process.cpus = 4 - params.enable_conda = false - params.enable_module = true - singularity.enabled = false - docker.enabled = false - clusterOptions = '--signal B:USR2' - } - - eprod { - process.executor = 'slurm' - process.queue = 'lowmem,midmem,bigmem' - process.memory = '10GB' - process.cpus = 4 - params.enable_conda = false - params.enable_module = true - singularity.enabled = false - docker.enabled = false - clusterOptions = '--signal B:USR2' - } - - eprodcingularity { - process.executor = 'slurm' - process.queue = 'lowmem,midmem,bigmem' - process.memory = '10GB' - process.cpus = 4 - params.enable_conda = false - params.enable_module = false - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - clusterOptions = '--signal B:USR2' - } - - cingularity { - process.executor = 'slurm' - process.queue = 'prod' - process.memory = '10GB' - process.cpus = 4 - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - params.enable_conda = false - params.enable_module = false - clusterOptions = '--signal B:USR2' - } - - cingularitygac { - process.executor = 'slurm' - executor.$slurm.exitReadTimeout = 120000 - process.queue = 'centriflaken' - process.cpus = 4 - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - params.enable_conda = false - params.enable_module = false - clusterOptions = '-n 1 --signal B:USR2' - } - - konda { - process.executor = 'slurm' - process.queue = 'prod' - process.memory = '10GB' - process.cpus = 4 - singularity.enabled = false - docker.enabled = false - params.enable_conda = true - params.enable_module = false - clusterOptions = '--signal B:USR2' - } - - kondagac { - process.executor = 'slurm' - executor.$slurm.exitReadTimeout = 120000 - process.queue = 'centriflaken' - process.cpus = 4 - singularity.enabled = false - docker.enabled = false - params.enable_conda = true - params.enable_module = false - clusterOptions = '-n 1 --signal B:USR2' - } -} diff -r 17890124001d -r 52045ea4679d 0.4.0/readme/centriflaken.md --- a/0.4.0/readme/centriflaken.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,276 +0,0 @@ -# CPIPES (CFSAN PIPELINES) - -## The modular pipeline repository at CFSAN, FDA - -**CPIPES** (CFSAN PIPELINES) is a collection of modular pipelines based on **NEXTFLOW**, -mostly for bioinformatics data analysis at **CFSAN, FDA.** - ---- - -### **centriflaken** - ---- -Precision long-read metagenomics sequencing for food safety by detection and assembly of Shiga toxin-producing Escherichia coli. - -#### Workflow Usage - -```bash -module load cpipes/0.4.0 - -cpipes --pipeline centriflaken [options] -``` - -Example: Run the default `centriflaken` pipeline with taxa of interest as *E. coli*. - -```bash -cd /hpc/scratch/$USER -mkdir nf-cpipes -cd nf-cpipes -cpipes --pipeline centriflaken --input /path/to/fastq/dir --output /path/to/output --user_email 'Kranti.Konganti@fda.hhs.gov' -``` - -Example: Run the `centriflaken` pipeline with taxa of interest as *Salmonella*. In this mode, `SerotypeFinder` tool will be replaced with `SeqSero2` tool. - -```bash -cd /hpc/scratch/$USER -mkdir nf-cpipes -cd nf-cpipes -cpipes --pipeline centriflaken --centrifuge_extract_bug 'Salmonella' --input /path/to/fastq/dir --output /path/to/output --user_email 'Kranti.Konganti@fda.hhs.gov' -``` - -#### `centriflaken` Help - -```text -[Kranti.Konganti@login2-slurm ]$ cpipes --pipeline centriflaken --help -N E X T F L O W ~ version 21.12.1-edge -Launching `/nfs/software/apps/cpipes/0.4.0/cpipes` [crazy_euler] - revision: 72db279311 -================================================================================ - (o) - ___ _ __ _ _ __ ___ ___ - / __|| '_ \ | || '_ \ / _ \/ __| -| (__ | |_) || || |_) || __/\__ \ - \___|| .__/ |_|| .__/ \___||___/ - | | | | - |_| |_| --------------------------------------------------------------------------------- -A collection of modular pipelines at CFSAN, FDA. --------------------------------------------------------------------------------- -Name : CPIPES -Author : Kranti.Konganti@fda.hhs.gov -Version : 0.4.0 -Center : CFSAN, FDA. -================================================================================ - -Workflow : centriflaken - -Author : Kranti.Konganti@fda.hhs.gov - -Version : 0.2.1 - - -Usage : cpipes --pipeline centriflaken [options] - - -Required : - ---input : Absolute path to directory containing FASTQ - files. The directory should contain only - FASTQ files as all the files within the - mentioned directory will be read. Ex: -- - input /path/to/fastq_pass - ---output : Absolute path to directory where all the - pipeline outputs should be stored. Ex: -- - output /path/to/output - -Other options : - ---metadata : Absolute path to metadata CSV file - containing five mandatory columns: sample, - fq1,fq2,strandedness,single_end. The fq1 - and fq2 columns contain absolute paths to - the FASTQ files. This option can be used in - place of --input option. This is rare. Ex: -- - metadata samplesheet.csv - ---fq_suffix : The suffix of FASTQ files (Unpaired reads - or R1 reads or Long reads) if an input - directory is mentioned via --input option. - Default: .fastq.gz - ---fq2_suffix : The suffix of FASTQ files (Paired-end reads - or R2 reads) if an input directory is - mentioned via --input option. Default: - false - ---fq_filter_by_len : Remove FASTQ reads that are less than this - many bases. Default: 4000 - ---fq_strandedness : The strandedness of the sequencing run. - This is mostly needed if your sequencing - run is RNA-SEQ. For most of the other runs, - it is probably safe to use unstranded for - the option. Default: unstranded - ---fq_single_end : SINGLE-END information will be auto- - detected but this option forces PAIRED-END - FASTQ files to be treated as SINGLE-END so - only read 1 information is included in auto- - generated samplesheet. Default: false - ---fq_filename_delim : Delimiter by which the file name is split - to obtain sample name. Default: _ - ---fq_filename_delim_idx : After splitting FASTQ file name by using - the --fq_filename_delim option, all - elements before this index (1-based) will - be joined to create final sample name. - Default: 1 - ---kraken2_db : Absolute path to kraken database. Default: / - hpc/db/kraken2/standard-210914 - ---kraken2_confidence : Confidence score threshold which must be - between 0 and 1. Default: 0.0 - ---kraken2_quick : Quick operation (use first hit or hits). - Default: false - ---kraken2_use_mpa_style : Report output like Kraken 1's kraken-mpa- - report. Default: false - ---kraken2_minimum_base_quality : Minimum base quality used in classification - which is only effective with FASTQ input. - Default: 0 - ---kraken2_report_zero_counts : Report counts for ALL taxa, even if counts - are zero. Default: false - ---kraken2_report_minmizer_data : Report minimizer and distinct minimizer - count information in addition to normal - Kraken report. Default: false - ---kraken2_use_names : Print scientific names instead of just - taxids. Default: true - ---kraken2_extract_bug : Extract the reads or contigs beloging to - this bug. Default: Escherichia coli - ---centrifuge_x : Absolute path to centrifuge database. - Default: /hpc/db/centrifuge/2022-04-12/ab - ---centrifuge_save_unaligned : Save SINGLE-END reads that did not align. - For PAIRED-END reads, save read pairs that - did not align concordantly. Default: false - ---centrifuge_save_aligned : Save SINGLE-END reads that aligned. For - PAIRED-END reads, save read pairs that - aligned concordantly. Default: false - ---centrifuge_out_fmt_sam : Centrifuge output should be in SAM. Default: - false - ---centrifuge_extract_bug : Extract this bug from centrifuge results. - Default: Escherichia coli - ---centrifuge_ignore_quals : Treat all quality values as 30 on Phred - scale. Default: false - ---flye_pacbio_raw : Input FASTQ reads are PacBio regular CLR - reads (<20% error) Defaut: false - ---flye_pacbio_corr : Input FASTQ reads are PacBio reads that - were corrected with other methods (<3% - error). Default: false - ---flye_pacbio_hifi : Input FASTQ reads are PacBio HiFi reads (<1% - error). Default: false - ---flye_nano_raw : Input FASTQ reads are ONT regular reads, - pre-Guppy5 (<20% error). Default: true - ---flye_nano_corr : Input FASTQ reads are ONT reads that were - corrected with other methods (<3% error). - Default: false - ---flye_nano_hq : Input FASTQ reads are ONT high-quality - reads: Guppy5+ SUP or Q20 (<5% error). - Default: false - ---flye_genome_size : Estimated genome size (for example, 5m or 2. - 6g). Default: 5.5m - ---flye_polish_iter : Number of genome polishing iterations. - Default: false - ---flye_meta : Do a metagenome assembly (unenven coverage - mode). Default: true - ---flye_min_overlap : Minimum overlap between reads. Default: - false - ---flye_scaffold : Enable scaffolding using assembly graph. - Default: false - ---serotypefinder_run : Run SerotypeFinder tool. Default: true - ---serotypefinder_x : Generate extended output files. Default: - true - ---serotypefinder_db : Path to SerotypeFinder databases. Default: / - hpc/db/serotypefinder/2.0.2 - ---serotypefinder_min_threshold : Minimum percent identity (in float) - required for calling a hit. Default: 0.85 - ---serotypefinder_min_cov : Minumum percent coverage (in float) - required for calling a hit. Default: 0.80 - ---seqsero2_run : Run SeqSero2 tool. Default: false - ---seqsero2_t : '1' for interleaved paired-end reads, '2' - for separated paired-end reads, '3' for - single reads, '4' for genome assembly, '5' - for nanopore reads (fasta/fastq). Default: - 4 - ---seqsero2_m : Which workflow to apply, 'a'(raw reads - allele micro-assembly), 'k'(raw reads and - genome assembly k-mer). Default: k - ---seqsero2_c : SeqSero2 will only output serotype - prediction without the directory containing - log files. Default: false - ---seqsero2_s : SeqSero2 will not output header in - SeqSero_result.tsv. Default: false - ---mlst_run : Run MLST tool. Default: true - ---mlst_minid : DNA %identity of full allelle to consider ' - similar' [~]. Default: 95 - ---mlst_mincov : DNA %cov to report partial allele at all [?]. - Default: 10 - ---mlst_minscore : Minumum score out of 100 to match a scheme. - Default: 50 - ---abricate_run : Run ABRicate tool. Default: true - ---abricate_minid : Minimum DNA %identity. Defaut: 90 - ---abricate_mincov : Minimum DNA %coverage. Defaut: 80 - ---abricate_datadir : ABRicate databases folder. Defaut: /hpc/db/ - abricate/1.0.1/db - -Help options : - ---help : Display this message. -``` - -### **BETA** - ---- -The development of the modular structure and flow is an ongoing effort and may change depending on assessment of various computational topics and other considerations. diff -r 17890124001d -r 52045ea4679d 0.4.0/readme/centriflaken_hy.md --- a/0.4.0/readme/centriflaken_hy.md Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,367 +0,0 @@ -# CPIPES (CFSAN PIPELINES) - -## The modular pipeline repository at CFSAN, FDA - -**CPIPES** (CFSAN PIPELINES) is a collection of modular pipelines based on **NEXTFLOW**, -mostly for bioinformatics data analysis at **CFSAN, FDA.** - ---- - -### **centriflaken_hy** - ---- -`centriflaken_hy` is a variant of the original `centriflaken` pipeline but for Illumina short reads either single-end or paired-end. - -#### Workflow Usage - -```bash -module load cpipes/0.4.0 - -cpipes --pipeline centriflaken_hy [options] -``` - -Example: Run the default `centriflaken_hy` pipeline with taxa of interest as *E. coli*. - -```bash -cd /hpc/scratch/$USER -mkdir nf-cpipes -cd nf-cpipes -cpipes --pipeline centriflaken_hy --input /path/to/illumina/fastq/dir --output /path/to/output --user_email 'Kranti.Konganti@fda.hhs.gov' -``` - -Example: Run the `centriflaken_hy` pipeline with taxa of interest as *Salmonella*. In this mode, `SerotypeFinder` tool will be replaced with `SeqSero2` tool. - -```bash -cd /hpc/scratch/$USER -mkdir nf-cpipes -cd nf-cpipes -cpipes --pipeline centriflaken_hy --centrifuge_extract_bug 'Salmonella' --input /path/to/illumina/fastq/dir --output /path/to/output --user_email 'Kranti.Konganti@fda.hhs.gov' -``` - -#### `centriflaken_hy` Help - -```text -[Kranti.Konganti@login2-slurm ]$ cpipes --pipeline centriflaken_hy --help -N E X T F L O W ~ version 21.12.1-edge -Launching `/home/Kranti.Konganti/apps/cpipes/cpipes` [soggy_curie] - revision: 72db279311 -================================================================================ - (o) - ___ _ __ _ _ __ ___ ___ - / __|| '_ \ | || '_ \ / _ \/ __| -| (__ | |_) || || |_) || __/\__ \ - \___|| .__/ |_|| .__/ \___||___/ - | | | | - |_| |_| --------------------------------------------------------------------------------- -A collection of modular pipelines at CFSAN, FDA. --------------------------------------------------------------------------------- -Name : CPIPES -Author : Kranti.Konganti@fda.hhs.gov -Version : 0.4.0 -Center : CFSAN, FDA. -================================================================================ - -Workflow : centriflaken_hy - -Author : Kranti.Konganti@fda.hhs.gov - -Version : 0.4.0 - - -Usage : cpipes --pipeline centriflaken_hy [options] - - -Required : - ---input : Absolute path to directory containing FASTQ - files. The directory should contain only - FASTQ files as all the files within the - mentioned directory will be read. Ex: -- - input /path/to/fastq_pass - ---output : Absolute path to directory where all the - pipeline outputs should be stored. Ex: -- - output /path/to/output - -Other options : - ---metadata : Absolute path to metadata CSV file - containing five mandatory columns: sample, - fq1,fq2,strandedness,single_end. The fq1 - and fq2 columns contain absolute paths to - the FASTQ files. This option can be used in - place of --input option. This is rare. Ex: -- - metadata samplesheet.csv - ---fq_suffix : The suffix of FASTQ files (Unpaired reads - or R1 reads or Long reads) if an input - directory is mentioned via --input option. - Default: _R1_001.fastq.gz - ---fq2_suffix : The suffix of FASTQ files (Paired-end reads - or R2 reads) if an input directory is - mentioned via --input option. Default: - _R2_001.fastq.gz - ---fq_filter_by_len : Remove FASTQ reads that are less than this - many bases. Default: 75 - ---fq_strandedness : The strandedness of the sequencing run. - This is mostly needed if your sequencing - run is RNA-SEQ. For most of the other runs, - it is probably safe to use unstranded for - the option. Default: unstranded - ---fq_single_end : SINGLE-END information will be auto- - detected but this option forces PAIRED-END - FASTQ files to be treated as SINGLE-END so - only read 1 information is included in auto- - generated samplesheet. Default: false - ---fq_filename_delim : Delimiter by which the file name is split - to obtain sample name. Default: _ - ---fq_filename_delim_idx : After splitting FASTQ file name by using - the --fq_filename_delim option, all - elements before this index (1-based) will - be joined to create final sample name. - Default: 1 - ---seqkit_rmdup_run : Remove duplicate sequences using seqkit - rmdup. Default: false - ---seqkit_rmdup_n : Match and remove duplicate sequences by - full name instead of just ID. Defaut: false - ---seqkit_rmdup_s : Match and remove duplicate sequences by - sequence content. Defaut: true - ---seqkit_rmdup_d : Save the duplicated sequences to a file. - Defaut: false - ---seqkit_rmdup_D : Save the number and list of duplicated - sequences to a file. Defaut: false - ---seqkit_rmdup_i : Ignore case while using seqkit rmdup. - Defaut: false - ---seqkit_rmdup_P : Only consider positive strand (i.e. 5') - when comparing by sequence content. Defaut: - false - ---kraken2_db : Absolute path to kraken database. Default: / - hpc/db/kraken2/standard-210914 - ---kraken2_confidence : Confidence score threshold which must be - between 0 and 1. Default: 0.0 - ---kraken2_quick : Quick operation (use first hit or hits). - Default: false - ---kraken2_use_mpa_style : Report output like Kraken 1's kraken-mpa- - report. Default: false - ---kraken2_minimum_base_quality : Minimum base quality used in classification - which is only effective with FASTQ input. - Default: 0 - ---kraken2_report_zero_counts : Report counts for ALL taxa, even if counts - are zero. Default: false - ---kraken2_report_minmizer_data : Report minimizer and distinct minimizer - count information in addition to normal - Kraken report. Default: false - ---kraken2_use_names : Print scientific names instead of just - taxids. Default: true - ---kraken2_extract_bug : Extract the reads or contigs beloging to - this bug. Default: Escherichia coli - ---centrifuge_x : Absolute path to centrifuge database. - Default: /hpc/db/centrifuge/2022-04-12/ab - ---centrifuge_save_unaligned : Save SINGLE-END reads that did not align. - For PAIRED-END reads, save read pairs that - did not align concordantly. Default: false - ---centrifuge_save_aligned : Save SINGLE-END reads that aligned. For - PAIRED-END reads, save read pairs that - aligned concordantly. Default: false - ---centrifuge_out_fmt_sam : Centrifuge output should be in SAM. Default: - false - ---centrifuge_extract_bug : Extract this bug from centrifuge results. - Default: Escherichia coli - ---centrifuge_ignore_quals : Treat all quality values as 30 on Phred - scale. Default: false - ---megahit_run : Run MEGAHIT assembler. Default: true - ---megahit_min_count : . Minimum multiplicity for filtering ( - k_min+1)-mers. Defaut: false - ---megahit_k_list : Comma-separated list of kmer size. All - values must be odd, in the range 15-255, - increment should be <= 28. Ex: '21,29,39,59, - 79,99,119,141'. Default: false - ---megahit_no_mercy : Do not add mercy k-mers. Default: false - ---megahit_bubble_level : . Intensity of bubble merging (0-2), 0 - to disable. Default: false - ---megahit_merge_level : . Merge complex bubbles of length <= l* - kmer_size and similarity >= s. Default: - false - ---megahit_prune_level : . Strength of low depth pruning (0-3). - Default: false - ---megahit_prune_depth : . Remove unitigs with avg k-mer depth - less than this value. Default: false - ---megahit_low_local_ratio : . Ratio threshold to define low - local coverage contigs. Default: false - ---megahit_max_tip_len : . remove tips less than this value [< - int> * k]. Default: false - ---megahit_no_local : Disable local assembly. Default: false - ---megahit_kmin_1pass : Use 1pass mode to build SdBG of k_min. - Default: false - ---megahit_preset : . Override a group of parameters. - Valid values are meta-sensitive which - enforces '--min-count 1 --k-list 21,29,39, - 49,...,129,141', meta-large (large & - complex metagenomes, like soil) which - enforces '--k-min 27 --k-max 127 --k-step - 10'. Default: meta-sensitive - ---megahit_mem_flag : . SdBG builder memory mode. 0: minimum; - 1: moderate; 2: use all memory specified. - Default: 2 - ---megahit_min_contig_len : . Minimum length of contigs to output. - Default: false - ---spades_run : Run SPAdes assembler. Default: false - ---spades_isolate : This flag is highly recommended for high- - coverage isolate and multi-cell data. - Defaut: false - ---spades_sc : This flag is required for MDA (single-cell) - data. Default: false - ---spades_meta : This flag is required for metagenomic data. - Default: true - ---spades_bio : This flag is required for biosytheticSPAdes - mode. Default: false - ---spades_corona : This flag is required for coronaSPAdes mode. - Default: false - ---spades_rna : This flag is required for RNA-Seq data. - Default: false - ---spades_plasmid : Runs plasmidSPAdes pipeline for plasmid - detection. Default: false - ---spades_metaviral : Runs metaviralSPAdes pipeline for virus - detection. Default: false - ---spades_metaplasmid : Runs metaplasmidSPAdes pipeline for plasmid - detection in metagenomics datasets. Default: - false - ---spades_rnaviral : This flag enables virus assembly module - from RNA-Seq data. Default: false - ---spades_iontorrent : This flag is required for IonTorrent data. - Default: false - ---spades_only_assembler : Runs only the SPAdes assembler module ( - without read error correction). Default: - false - ---spades_careful : Tries to reduce the number of mismatches - and short indels in the assembly. Default: - false - ---spades_cov_cutoff : Coverage cutoff value (a positive float - number). Default: false - ---spades_k : List of k-mer sizes (must be odd and less - than 128). Default: false - ---spades_hmm : Directory with custom hmms that replace the - default ones (very rare). Default: false - ---serotypefinder_run : Run SerotypeFinder tool. Default: true - ---serotypefinder_x : Generate extended output files. Default: - true - ---serotypefinder_db : Path to SerotypeFinder databases. Default: / - hpc/db/serotypefinder/2.0.2 - ---serotypefinder_min_threshold : Minimum percent identity (in float) - required for calling a hit. Default: 0.85 - ---serotypefinder_min_cov : Minumum percent coverage (in float) - required for calling a hit. Default: 0.80 - ---seqsero2_run : Run SeqSero2 tool. Default: false - ---seqsero2_t : '1' for interleaved paired-end reads, '2' - for separated paired-end reads, '3' for - single reads, '4' for genome assembly, '5' - for nanopore reads (fasta/fastq). Default: - 4 - ---seqsero2_m : Which workflow to apply, 'a'(raw reads - allele micro-assembly), 'k'(raw reads and - genome assembly k-mer). Default: k - ---seqsero2_c : SeqSero2 will only output serotype - prediction without the directory containing - log files. Default: false - ---seqsero2_s : SeqSero2 will not output header in - SeqSero_result.tsv. Default: false - ---mlst_run : Run MLST tool. Default: true - ---mlst_minid : DNA %identity of full allelle to consider ' - similar' [~]. Default: 95 - ---mlst_mincov : DNA %cov to report partial allele at all [?]. - Default: 10 - ---mlst_minscore : Minumum score out of 100 to match a scheme. - Default: 50 - ---abricate_run : Run ABRicate tool. Default: true - ---abricate_minid : Minimum DNA %identity. Defaut: 90 - ---abricate_mincov : Minimum DNA %coverage. Defaut: 80 - ---abricate_datadir : ABRicate databases folder. Defaut: /hpc/db/ - abricate/1.0.1/db - -Help options : - ---help : Display this message. -``` - -### **BETA** - ---- -The development of the modular structure and flow is an ongoing effort and may change depending on assessment of various computational topics and other considerations. diff -r 17890124001d -r 52045ea4679d 0.4.0/subworkflows/process_fastq.nf --- a/0.4.0/subworkflows/process_fastq.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,144 +0,0 @@ -// Include any necessary methods and modules -include { stopNow; validateParamsForFASTQ } from "${params.routines}" -include { GEN_SAMPLESHEET } from "${params.modules}${params.fs}gen_samplesheet${params.fs}main" -include { SAMPLESHEET_CHECK } from "${params.modules}${params.fs}samplesheet_check${params.fs}main" -include { CAT_FASTQ } from "${params.modules}${params.fs}cat${params.fs}fastq${params.fs}main" -include { SEQKIT_SEQ } from "${params.modules}${params.fs}seqkit${params.fs}seq${params.fs}main" - -// Validate 4 required workflow parameters if -// FASTQ files are the input for the -// entry point. -validateParamsForFASTQ() - -// Start the subworkflow -workflow PROCESS_FASTQ { - main: - versions = Channel.empty() - input_ch = Channel.empty() - reads = Channel.empty() - - def input = file( (params.input ?: params.metadata) ) - - if (params.input) { - def fastq_files = [] - - if (params.fq_suffix == null) { - stopNow("We need to know what suffix the FASTQ files ends with inside the\n" + - "directory. Please use the --fq_suffix option to indicate the file\n" + - "suffix by which the files are to be collected to run the pipeline on.") - } - - if (params.fq_strandedness == null) { - stopNow("We need to know if the FASTQ files inside the directory\n" + - "are sequenced using stranded or non-stranded sequencing. This is generally\n" + - "required if the sequencing experiment is RNA-SEQ. For almost all of the other\n" + - "cases, you can probably use the --fq_strandedness unstranded option to indicate\n" + - "that the reads are unstranded.") - } - - if (params.fq_filename_delim == null || params.fq_filename_delim_idx == null) { - stopNow("We need to know the delimiter of the filename of the FASTQ files.\n" + - "By default the filename delimiter is _ (underscore). This delimiter character\n" + - "is used to split and assign a group name. The group name can be controlled by\n" + - "using the --fq_filename_delim_idx option (1-based). For example, if the FASTQ\n" + - "filename is WT_REP1_001.fastq, then to create a group WT, use the following\n" + - "options: --fq_filename_delim _ --fq_filename_delim_idx 1") - } - - if (!input.exists()) { - stopNow("The input directory,\n${params.input}\ndoes not exist!") - } - - input.eachFileRecurse { - it.name.endsWith("${params.fq_suffix}") ? fastq_files << it : fastq_files << null - } - - if (fastq_files.findAll{ it != null }.size() == 0) { - stopNow("The input directory,\n${params.input}\nis empty! or does not " + - "have FASTQ files ending with the suffix: ${params.fq_suffix}") - } - - GEN_SAMPLESHEET( Channel.fromPath(params.input, type: 'dir') ) - GEN_SAMPLESHEET.out.csv.set{ input_ch } - versions.mix( GEN_SAMPLESHEET.out.versions ) - .set { versions } - } else if (params.metadata) { - if (!input.exists()) { - stopNow("The metadata CSV file,\n${params.metadata}\ndoes not exist!") - } - - if (input.size() <= 0) { - stopNow("The metadata CSV file,\n${params.metadata}\nis empty!") - } - - Channel.fromPath(params.metadata, type: 'file') - .set { input_ch } - } - - SAMPLESHEET_CHECK( input_ch ) - .csv - .splitCsv( header: true, sep: ',') - .map { create_fastq_channel(it) } - .groupTuple(by: [0]) - .branch { - meta, fastq -> - single : fastq.size() == 1 - return [ meta, fastq.flatten() ] - multiple : fastq.size() > 1 - return [ meta, fastq.flatten() ] - } - .set { reads } - - CAT_FASTQ( reads.multiple ) - .catted_reads - .mix( reads.single ) - .set { processed_reads } - - if (params.fq_filter_by_len.toInteger() > 0) { - SEQKIT_SEQ( processed_reads ) - .fastx - .set { processed_reads } - - versions.mix( SEQKIT_SEQ.out.versions.first().ifEmpty(null) ) - .set { versions } - } - - versions.mix( - SAMPLESHEET_CHECK.out.versions, - CAT_FASTQ.out.versions.first().ifEmpty(null) - ) - .set { versions } - - emit: - processed_reads - versions -} - -// Function to get list of [ meta, [ fq1, fq2 ] ] -def create_fastq_channel(LinkedHashMap row) { - - def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() - meta.strandedness = row.strandedness - meta.id = meta.id.split(params.fq_filename_delim)[0..params.fq_filename_delim_idx.toInteger() - 1] - .join(params.fq_filename_delim) - meta.id = (meta.id =~ /\./ ? meta.id.take(meta.id.indexOf('.')) : meta.id) - - def array = [] - - if (!file(row.fq1).exists()) { - stopNow("Please check input metadata CSV. The following Read 1 FASTQ file does not exist!" + - "\n${row.fq1}") - } - if (meta.single_end) { - array = [ meta, [ file(row.fq1) ] ] - } else { - if (!file(row.fq2).exists()) { - stopNow("Please check input metadata CSV. The following Read 2 FASTQ file does not exist!" + - "\n${row.fq2}") - } - array = [ meta, [ file(row.fq1), file(row.fq2) ] ] - } - return array -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/workflows/centriflaken.nf --- a/0.4.0/workflows/centriflaken.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,332 +0,0 @@ -// Define any required imports for this specific workflow -import java.nio.file.Paths -import nextflow.file.FileHelper - -// Include any necessary methods -include { \ - summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; \ - addPadding; wrapUpHelp } from "${params.routines}" -include { kraken2Help } from "${params.toolshelp}${params.fs}kraken2" -include { centrifugeHelp } from "${params.toolshelp}${params.fs}centrifuge" -include { flyeHelp } from "${params.toolshelp}${params.fs}flye" -include { serotypefinderHelp } from "${params.toolshelp}${params.fs}serotypefinder" -include { seqsero2Help } from "${params.toolshelp}${params.fs}seqsero2" -include { mlstHelp } from "${params.toolshelp}${params.fs}mlst" -include { abricateHelp } from "${params.toolshelp}${params.fs}abricate" - -// Exit if help requested before any subworkflows -if (params.help) { - log.info help() - exit 0 -} - -// Include any necessary modules and subworkflows -include { PROCESS_FASTQ } from "${params.subworkflows}${params.fs}process_fastq" -include { FASTQC } from "${params.modules}${params.fs}fastqc${params.fs}main" -include { CENTRIFUGE_CLASSIFY } from "${params.modules}${params.fs}centrifuge${params.fs}classify${params.fs}main" -include { CENTRIFUGE_PROCESS } from "${params.modules}${params.fs}centrifuge${params.fs}process${params.fs}main" -include { SEQKIT_GREP } from "${params.modules}${params.fs}seqkit${params.fs}grep${params.fs}main" -include { FLYE_ASSEMBLE } from "${params.modules}${params.fs}flye${params.fs}assemble${params.fs}main" -include { KRAKEN2_CLASSIFY } from "${params.modules}${params.fs}kraken2${params.fs}classify${params.fs}main" -include { KRAKEN2_EXTRACT_CONTIGS } from "${params.modules}${params.fs}kraken2${params.fs}extract_contigs${params.fs}main" -include { SEROTYPEFINDER } from "${params.modules}${params.fs}serotypefinder${params.fs}main" -include { SEQSERO2 } from "${params.modules}${params.fs}seqsero2${params.fs}main" -include { MLST } from "${params.modules}${params.fs}mlst${params.fs}main" -include { ABRICATE_RUN } from "${params.modules}${params.fs}abricate${params.fs}run${params.fs}main" -include { ABRICATE_SUMMARY } from "${params.modules}${params.fs}abricate${params.fs}summary${params.fs}main" -include { TABLE_SUMMARY } from "${params.modules}${params.fs}cat${params.fs}tables${params.fs}main" -include { MULTIQC } from "${params.modules}${params.fs}multiqc${params.fs}main" -include { DUMP_SOFTWARE_VERSIONS } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main" - - - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - INPUTS AND ANY CHECKS FOR THE CENTRIFLAKEN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -def kraken2_db_dir = file ( "${params.kraken2_db}" ) -def centrifuge_x = file ( "${params.centrifuge_x}" ) -def reads_platform = 0 -def abricate_dbs = [ 'ncbiamrplus', 'resfinder', 'megares', 'argannot' ] - -reads_platform += (params.flye_nano_raw ? 1 : 0) -reads_platform += (params.flye_nano_corr ? 1 : 0) -reads_platform += (params.flye_nano_hq ? 1 : 0) -reads_platform += (params.flye_pacbio_raw ? 1 : 0) -reads_platform += (params.flye_pacbio_corr ? 1 : 0) -reads_platform += (params.flye_pacbio_hifi ? 1 : 0) - -if (!kraken2_db_dir.exists() || !centrifuge_x.getParent().exists()) { - stopNow("Please check if the following absolute paths are valid:\n" + - "${params.kraken2_db}\n${params.centrifuge_x}\n" + - "Cannot proceed further!") -} - -if (reads_platform > 1 || reads_platform == 0) { - msg_0 = (reads_platform > 1 ? "only" : "at least") - stopNow("Please mention ${msg_0} one read platform for use with the flye assembler\n" + - "using any one of the following options:\n" + - "--flye_nano_raw\n--flye_nano_corr\n--flye_nano_hq\n" + - "--flye_pacbio_raw\n--flye_pacbio_corr\n--flye_pacbio_hifi") -} - -if (params.centrifuge_extract_bug != params.kraken2_extract_bug) { - stopNow("Please make sure that the bug to be extracted is same\n" + - "for both --centrifuge_extract_bug and --kraken2_extract_bug options.") -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN THE CENTRIFLAKEN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow CENTRIFLAKEN { - main: - ch_asm_filtered_contigs = Channel.empty() - ch_mqc_custom_tbl = Channel.empty() - - log.info summaryOfParams() - - PROCESS_FASTQ() - .processed_reads - .map { - meta, fastq -> - meta.centrifuge_x = params.centrifuge_x - meta.kraken2_db = params.kraken2_db - [meta, fastq] - } - .set { ch_processed_reads } - - PROCESS_FASTQ - .out - .versions - .set { software_versions } - - FASTQC ( ch_processed_reads ) - - CENTRIFUGE_CLASSIFY ( ch_processed_reads ) - - CENTRIFUGE_PROCESS ( - CENTRIFUGE_CLASSIFY.out.report - .join( CENTRIFUGE_CLASSIFY.out.output ) - ) - - ch_processed_reads.join ( CENTRIFUGE_PROCESS.out.extracted ) - .set { ch_centrifuge_extracted } - - SEQKIT_GREP ( ch_centrifuge_extracted ) - - FLYE_ASSEMBLE ( SEQKIT_GREP.out.fastx ) - - FLYE_ASSEMBLE - .out - .assembly - .set { ch_flye_assembly } - - ch_flye_assembly - .map { - meta, fastq -> - meta.is_assembly = true - [meta, fastq] - } - .set { ch_flye_assembly } - - ch_flye_assembly.ifEmpty { [ false, false ] } - - KRAKEN2_CLASSIFY ( ch_flye_assembly ) - - KRAKEN2_EXTRACT_CONTIGS ( - ch_flye_assembly - .join( KRAKEN2_CLASSIFY.out.kraken_output ), - params.kraken2_extract_bug - ) - - KRAKEN2_EXTRACT_CONTIGS - .out - .asm_filtered_contigs - .map { - meta, fastq -> - meta.organism = params.kraken2_extract_bug.split(/\s+/)[0].capitalize() - meta.serotypefinder_db = params.serotypefinder_db - [meta, fastq] - } - .set { ch_asm_filtered_contigs } - - SEROTYPEFINDER ( ch_asm_filtered_contigs ) - - SEQSERO2 ( ch_asm_filtered_contigs ) - - MLST ( ch_asm_filtered_contigs ) - - ABRICATE_RUN ( - ch_asm_filtered_contigs, - abricate_dbs - ) - - ABRICATE_RUN - .out - .abricated - .map { meta, abres -> [ abricate_dbs, abres ] } - .groupTuple(by: [0]) - .map { it -> tuple ( it[0], it[1].flatten() ) } - .set { ch_abricated } - - ABRICATE_SUMMARY ( ch_abricated ) - - // ABRICATE_SUMMARY.out.ecoli_vf.set { ch_abricate_summary_ecoli_vf } - // ch_abricate_summary_ecoli_vf.ifEmpty { [ false, false ] } - - CENTRIFUGE_CLASSIFY.out.kreport - .map { meta, kreport -> [ kreport ] } - .flatten() - .concat ( - KRAKEN2_CLASSIFY.out.kraken_report - .map { meta, kreport -> [ kreport ] } - .flatten(), - FASTQC.out.zip - .map { meta, zip -> [ zip ] } - .flatten() - ) - .set { ch_mqc_classify } - - if (params.serotypefinder_run) { - SEROTYPEFINDER - .out - .serotyped - .map { meta, tsv -> [ 'serotypefinder', tsv ] } - .groupTuple(by: [0]) - .map { it -> tuple ( it[0], it[1].flatten() ) } - .set { ch_mqc_custom_tbl } - } else if (params.seqsero2_run) { - SEQSERO2 - .out - .serotyped - .map { meta, tsv -> [ 'seqsero2', tsv ] } - .groupTuple(by: [0]) - .map { it -> tuple ( it[0], it[1].flatten() ) } - .set { ch_mqc_custom_tbl } - } - - ch_mqc_custom_tbl - .concat ( - ABRICATE_SUMMARY.out.ncbiamrplus.map{ it -> tuple ( it[0], it[1] )}, - ABRICATE_SUMMARY.out.resfinder.map{ it -> tuple ( it[0], it[1] )}, - ABRICATE_SUMMARY.out.megares.map{ it -> tuple ( it[0], it[1] )}, - ABRICATE_SUMMARY.out.argannot.map{ it -> tuple ( it[0], it[1] )}, - ) - .groupTuple(by: [0]) - .map { it -> [ it[0], it[1].flatten() ]} - .set { ch_mqc_custom_tbl } - - TABLE_SUMMARY ( ch_mqc_custom_tbl ) - - DUMP_SOFTWARE_VERSIONS ( - software_versions - .mix ( - FASTQC.out.versions, - CENTRIFUGE_CLASSIFY.out.versions, - CENTRIFUGE_PROCESS.out.versions, - SEQKIT_GREP.out.versions, - FLYE_ASSEMBLE.out.versions.ifEmpty(null), - KRAKEN2_CLASSIFY.out.versions.ifEmpty(null), - KRAKEN2_EXTRACT_CONTIGS.out.versions.ifEmpty(null), - SEROTYPEFINDER.out.versions.ifEmpty(null), - SEQSERO2.out.versions.ifEmpty(null), - MLST.out.versions.ifEmpty(null), - ABRICATE_RUN.out.versions.ifEmpty(null), - ABRICATE_SUMMARY.out.versions.ifEmpty(null), - TABLE_SUMMARY.out.versions.ifEmpty(null) - ) - .unique() - .collectFile(name: 'collected_versions.yml') - ) - - DUMP_SOFTWARE_VERSIONS - .out - .mqc_yml - .concat ( - ch_mqc_classify, - TABLE_SUMMARY.out.mqc_yml - ) - .collect() - .set { ch_multiqc } - - MULTIQC ( ch_multiqc ) -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (workflow.success) { - // CREATE APPROPRIATE DIRECTORIES AND MOVE AS REQUESTED BY STAKEHOLDER(S) - // - // Nextflow's .moveTo will error out if directories contain files and it - // would be complex to include logic to skip directories - // - def final_intermediate_dir = "${params.output}${params.fs}${params.pipeline}-steps" - def final_results_dir = "${params.output}${params.fs}${params.pipeline}-results" - def kraken2_ext_contigs = file( "${final_intermediate_dir}${params.fs}kraken2_extract_contigs", type: 'dir' ) - def final_intermediate = file( final_intermediate_dir, type: 'dir' ) - def final_results = file( final_results_dir, type: 'dir' ) - def pipeline_output = file( params.output, type: 'dir' ) - - if ( !final_intermediate.exists() ) { - final_intermediate.mkdirs() - - FileHelper.visitFiles(Paths.get("${params.output}"), '*') { - if ( !(it.name ==~ /^(${params.cfsanpipename}|multiqc|\.nextflow|${workflow.workDir.name}|${params.pipeline}).*/) ) { - FileHelper.movePath( - it, Paths.get( "${final_intermediate_dir}${params.fs}${it.name}" ) - ) - } - } - } - - if ( kraken2_ext_contigs.exists() && !final_results.exists() ) { - final_results.mkdirs() - - FileHelper.movePath( - Paths.get( "${final_intermediate_dir}${params.fs}kraken2_extract_contigs" ), - Paths.get( "${final_results_dir}${params.fs}kraken2_extract_contigs" ) - ) - } - - sendMail() - } -} - -workflow.onError { - sendMail() -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - HELPER METHODS FOR CENTRIFLAKEN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -def help() { - - Map helptext = [:] - - helptext.putAll ( - fastqEntryPointHelp() + - kraken2Help(params).text + - centrifugeHelp(params).text + - flyeHelp(params).text + - serotypefinderHelp(params).text + - seqsero2Help(params).text + - mlstHelp(params).text + - abricateHelp(params).text + - wrapUpHelp() - ) - - return addPadding(helptext) -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/workflows/centriflaken_hy.nf --- a/0.4.0/workflows/centriflaken_hy.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,375 +0,0 @@ -// Define any required imports for this specific workflow -import java.nio.file.Paths -import nextflow.file.FileHelper - -// Include any necessary methods -include { \ - summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; \ - addPadding; wrapUpHelp } from "${params.routines}" -include { seqkitrmdupHelp } from "${params.toolshelp}${params.fs}seqkitrmdup" -include { kraken2Help } from "${params.toolshelp}${params.fs}kraken2" -include { centrifugeHelp } from "${params.toolshelp}${params.fs}centrifuge" -include { megahitHelp } from "${params.toolshelp}${params.fs}megahit" -include { spadesHelp } from "${params.toolshelp}${params.fs}spades" -include { serotypefinderHelp } from "${params.toolshelp}${params.fs}serotypefinder" -include { seqsero2Help } from "${params.toolshelp}${params.fs}seqsero2" -include { mlstHelp } from "${params.toolshelp}${params.fs}mlst" -include { abricateHelp } from "${params.toolshelp}${params.fs}abricate" - -// Exit if help requested before any subworkflows -if (params.help) { - log.info help() - exit 0 -} - -// Include any necessary modules and subworkflows -include { PROCESS_FASTQ } from "${params.subworkflows}${params.fs}process_fastq" -include { FASTQC } from "${params.modules}${params.fs}fastqc${params.fs}main" -include { SEQKIT_RMDUP } from "${params.modules}${params.fs}seqkit${params.fs}rmdup${params.fs}main" -include { CENTRIFUGE_CLASSIFY } from "${params.modules}${params.fs}centrifuge${params.fs}classify${params.fs}main" -include { CENTRIFUGE_PROCESS } from "${params.modules}${params.fs}centrifuge${params.fs}process${params.fs}main" -include { SEQKIT_GREP } from "${params.modules}${params.fs}seqkit${params.fs}grep${params.fs}main" -include { MEGAHIT_ASSEMBLE } from "${params.modules}${params.fs}megahit${params.fs}assemble${params.fs}main" -include { SPADES_ASSEMBLE } from "${params.modules}${params.fs}spades${params.fs}assemble${params.fs}main" -include { KRAKEN2_CLASSIFY } from "${params.modules}${params.fs}kraken2${params.fs}classify${params.fs}main" -include { KRAKEN2_EXTRACT_CONTIGS } from "${params.modules}${params.fs}kraken2${params.fs}extract_contigs${params.fs}main" -include { SEROTYPEFINDER } from "${params.modules}${params.fs}serotypefinder${params.fs}main" -include { SEQSERO2 } from "${params.modules}${params.fs}seqsero2${params.fs}main" -include { MLST } from "${params.modules}${params.fs}mlst${params.fs}main" -include { ABRICATE_RUN } from "${params.modules}${params.fs}abricate${params.fs}run${params.fs}main" -include { ABRICATE_SUMMARY } from "${params.modules}${params.fs}abricate${params.fs}summary${params.fs}main" -include { TABLE_SUMMARY } from "${params.modules}${params.fs}cat${params.fs}tables${params.fs}main" -include { MULTIQC } from "${params.modules}${params.fs}multiqc${params.fs}main" -include { DUMP_SOFTWARE_VERSIONS } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main" - - - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - INPUTS AND ANY CHECKS FOR THE CENTRIFLAKEN-HY WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -def kraken2_db_dir = file ( "${params.kraken2_db}" ) -def centrifuge_x = file ( "${params.centrifuge_x}" ) -def spades_custom_hmm = (params.spades_hmm ? file ( "${params.spades_hmm}" ) : false) -def reads_platform = 0 -def abricate_dbs = [ 'ncbiamrplus', 'resfinder', 'megares', 'argannot' ] - -reads_platform += (params.input ? 1 : 0) - -if (!kraken2_db_dir.exists() || !centrifuge_x.getParent().exists()) { - stopNow("Please check if the following absolute paths are valid:\n" + - "${params.kraken2_db}\n${params.centrifuge_x}\n" + - "Cannot proceed further!") -} - -if (spades_custom_hmm && !spades_custom_hmm.exists()) { - stopNow("Please check if the following SPAdes' custom HMM directory\n" + - "path is valid:\n${params.spades_hmm}\nCannot proceed further!") -} - -if (reads_platform < 1 || reads_platform == 0) { - stopNow("Please mention at least one absolute path to input folder which contains\n" + - "FASTQ files sequenced using the --input option.\n" + - "Ex: --input (Illumina or Generic short reads in FASTQ format)") -} - -if (params.centrifuge_extract_bug != params.kraken2_extract_bug) { - stopNow("Please make sure that the bug to be extracted is same\n" + - "for both --centrifuge_extract_bug and --kraken2_extract_bug options.") -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN THE CENTRIFLAKEN-HY WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow CENTRIFLAKEN_HY { - main: - ch_asm_filtered_contigs = Channel.empty() - ch_mqc_custom_tbl = Channel.empty() - ch_dummy = Channel.fromPath("${params.dummyfile}") - ch_dummy2 = Channel.fromPath("${params.dummyfile2}") - - log.info summaryOfParams() - - PROCESS_FASTQ() - .processed_reads - .map { - meta, fastq -> - meta.centrifuge_x = params.centrifuge_x - meta.kraken2_db = params.kraken2_db - [meta, fastq] - } - .set { ch_processed_reads } - - PROCESS_FASTQ - .out - .versions - .set { software_versions } - - FASTQC ( ch_processed_reads ) - - if (params.seqkit_rmdup_run) { - SEQKIT_RMDUP ( ch_processed_reads ) - - SEQKIT_RMDUP - .out - .fastx - .set { ch_processed_reads } - - software_versions - .mix ( SEQKIT_RMDUP.out.versions.ifEmpty(null) ) - .set { software_versions } - } - - CENTRIFUGE_CLASSIFY ( ch_processed_reads ) - - CENTRIFUGE_PROCESS ( - CENTRIFUGE_CLASSIFY.out.report - .join( CENTRIFUGE_CLASSIFY.out.output ) - ) - - ch_processed_reads.join ( CENTRIFUGE_PROCESS.out.extracted ) - .set { ch_centrifuge_extracted } - - SEQKIT_GREP ( ch_centrifuge_extracted ) - - // As of 06/02/2022, with the upcoming newer versions of NextFlow, we will be able to do - // allowNull: true for both input and output, but until then, we have to use dummy files. - // and work arounds. - // https://github.com/nextflow-io/nextflow/pull/2893 - if (params.spades_run) { - SPADES_ASSEMBLE ( - SEQKIT_GREP.out.fastx - .combine(ch_dummy) - .combine(ch_dummy2) - ) - - SPADES_ASSEMBLE - .out - .assembly - .set { ch_assembly } - - software_versions - .mix ( SPADES_ASSEMBLE.out.versions.ifEmpty(null) ) - .set { software_versions } - } else if (params.megahit_run) { - MEGAHIT_ASSEMBLE ( - SEQKIT_GREP.out.fastx - ) - - MEGAHIT_ASSEMBLE - .out - .assembly - .set { ch_assembly } - - software_versions - .mix ( MEGAHIT_ASSEMBLE.out.versions.ifEmpty(null) ) - .set { software_versions } - } - - ch_assembly - .map { - meta, fastq -> - meta.is_assembly = true - [meta, fastq] - } - .set { ch_assembly } - - ch_assembly.ifEmpty { [ false, false ] } - - KRAKEN2_CLASSIFY ( ch_assembly ) - - KRAKEN2_EXTRACT_CONTIGS ( - ch_assembly - .join( KRAKEN2_CLASSIFY.out.kraken_output ), - params.kraken2_extract_bug - ) - - KRAKEN2_EXTRACT_CONTIGS - .out - .asm_filtered_contigs - .map { - meta, fastq -> - meta.organism = params.kraken2_extract_bug.split(/\s+/)[0].capitalize() - meta.serotypefinder_db = params.serotypefinder_db - [meta, fastq] - } - .set { ch_asm_filtered_contigs } - - SEROTYPEFINDER ( ch_asm_filtered_contigs ) - - SEQSERO2 ( ch_asm_filtered_contigs ) - - MLST ( ch_asm_filtered_contigs ) - - ABRICATE_RUN ( - ch_asm_filtered_contigs, - abricate_dbs - ) - - ABRICATE_RUN - .out - .abricated - .map { meta, abres -> [ abricate_dbs, abres ] } - .groupTuple(by: [0]) - .map { it -> tuple ( it[0], it[1].flatten() ) } - .set { ch_abricated } - - ABRICATE_SUMMARY ( ch_abricated ) - - CENTRIFUGE_CLASSIFY.out.kreport - .map { meta, kreport -> [ kreport ] } - .flatten() - .concat ( - KRAKEN2_CLASSIFY.out.kraken_report - .map { meta, kreport -> [ kreport ] } - .flatten(), - FASTQC.out.zip - .map { meta, zip -> [ zip ] } - .flatten() - ) - .set { ch_mqc_classify } - - if (params.serotypefinder_run) { - SEROTYPEFINDER - .out - .serotyped - .map { meta, tsv -> [ 'serotypefinder', tsv ] } - .groupTuple(by: [0]) - .map { it -> tuple ( it[0], it[1].flatten() ) } - .set { ch_mqc_custom_tbl } - } else if (params.seqsero2_run) { - SEQSERO2 - .out - .serotyped - .map { meta, tsv -> [ 'seqsero2', tsv ] } - .groupTuple(by: [0]) - .map { it -> tuple ( it[0], it[1].flatten() ) } - .set { ch_mqc_custom_tbl } - } - - ch_mqc_custom_tbl - .concat ( - ABRICATE_SUMMARY.out.ncbiamrplus.map{ it -> tuple ( it[0], it[1] )}, - ABRICATE_SUMMARY.out.resfinder.map{ it -> tuple ( it[0], it[1] )}, - ABRICATE_SUMMARY.out.megares.map{ it -> tuple ( it[0], it[1] )}, - ABRICATE_SUMMARY.out.argannot.map{ it -> tuple ( it[0], it[1] )}, - ) - .groupTuple(by: [0]) - .map { it -> [ it[0], it[1].flatten() ]} - .set { ch_mqc_custom_tbl } - - TABLE_SUMMARY ( ch_mqc_custom_tbl ) - - DUMP_SOFTWARE_VERSIONS ( - software_versions - .mix ( - FASTQC.out.versions, - CENTRIFUGE_CLASSIFY.out.versions, - CENTRIFUGE_PROCESS.out.versions, - SEQKIT_GREP.out.versions, - KRAKEN2_CLASSIFY.out.versions.ifEmpty(null), - KRAKEN2_EXTRACT_CONTIGS.out.versions.ifEmpty(null), - SEROTYPEFINDER.out.versions.ifEmpty(null), - SEQSERO2.out.versions.ifEmpty(null), - MLST.out.versions.ifEmpty(null), - ABRICATE_RUN.out.versions.ifEmpty(null), - ABRICATE_SUMMARY.out.versions.ifEmpty(null), - TABLE_SUMMARY.out.versions.ifEmpty(null) - ) - .unique() - .collectFile(name: 'collected_versions.yml') - ) - - DUMP_SOFTWARE_VERSIONS - .out - .mqc_yml - .concat ( - ch_mqc_classify, - TABLE_SUMMARY.out.mqc_yml - ) - .collect() - .set { ch_multiqc } - - MULTIQC ( ch_multiqc ) -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (workflow.success) { - // CREATE APPROPRIATE DIRECTORIES AND MOVE AS REQUESTED BY STAKEHOLDER(S) - // - // Nextflow's .moveTo will error out if directories contain files and it - // would be complex to include logic to skip directories - // - def final_intermediate_dir = "${params.output}${params.fs}${params.pipeline}-steps" - def final_results_dir = "${params.output}${params.fs}${params.pipeline}-results" - def kraken2_ext_contigs = file( "${final_intermediate_dir}${params.fs}kraken2_extract_contigs", type: 'dir' ) - def final_intermediate = file( final_intermediate_dir, type: 'dir' ) - def final_results = file( final_results_dir, type: 'dir' ) - def pipeline_output = file( params.output, type: 'dir' ) - - if ( !final_intermediate.exists() ) { - final_intermediate.mkdirs() - - FileHelper.visitFiles(Paths.get("${params.output}"), '*') { - if ( !(it.name ==~ /^(${params.cfsanpipename}|multiqc|\.nextflow|${workflow.workDir.name}|${params.pipeline}).*/) ) { - FileHelper.movePath( - it, Paths.get( "${final_intermediate_dir}${params.fs}${it.name}" ) - ) - } - } - } - - if ( kraken2_ext_contigs.exists() && !final_results.exists() ) { - final_results.mkdirs() - - FileHelper.movePath( - Paths.get( "${final_intermediate_dir}${params.fs}kraken2_extract_contigs" ), - Paths.get( "${final_results_dir}${params.fs}kraken2_extract_contigs" ) - ) - } - - sendMail() - } -} - -workflow.onError { - sendMail() -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - HELPER METHODS FOR CENTRIFLAKEN-HY WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -def help() { - - Map helptext = [:] - - helptext.putAll ( - fastqEntryPointHelp() + - seqkitrmdupHelp(params).text + - kraken2Help(params).text + - centrifugeHelp(params).text + - megahitHelp(params).text + - spadesHelp(params).text + - serotypefinderHelp(params).text + - seqsero2Help(params).text + - mlstHelp(params).text + - abricateHelp(params).text + - wrapUpHelp() - ) - - return addPadding(helptext) -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/workflows/conf/centriflaken.config --- a/0.4.0/workflows/conf/centriflaken.config Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ -params { - workflow_blueprint_by = 'Narjol.Gonzalez-Escalona@fda.hhs.gov' - workflow_built_by = 'Kranti.Konganti@fda.hhs.gov' - workflow_version = '0.2.1' - centrifuge_x = '/tool-local-data/cfsan-centriflaken-db/0/centrifuge/2022-04-12/ab' - centrifuge_extract_bug = 'Escherichia coli' - centrifuge_save_aligned = false - centrifuge_save_unaligned = false - centrifuge_out_fmt_sam = false - centrifuge_ignore_quals = false - kraken2_db = '/tool-local-data/cfsan-centriflaken-db/0/kraken2/standard-210914' - kraken2_confidence = '0.0' - kraken2_quick = false - kraken2_use_mpa_style = false - kraken2_minimum_base_quality = '0' - kraken2_report_zero_counts = false - kraken2_report_minimizer_data = false - kraken2_use_names = true - kraken2_extract_bug = params.centrifuge_extract_bug - flye_pacbio_raw = false - flye_pacbio_corr = false - flye_pacbio_hifi = false - flye_nano_raw = true - flye_nano_corr = false - flye_nano_hq = false - flye_genome_size = (params.centrifuge_extract_bug ==~ /(?i)Salmonella/ ? '5m' : '5.5m') - flye_polish_iter = false - flye_min_overlap = false - flye_scaffold = false - flye_meta = true - ectyper_run = false - ectyper_perc_opid = 90 - ectyper_perc_hpid = 95 - ectyper_perc_opcov = 95 - ectyper_perc_hpcov = 50 - serotypefinder_run = (params.centrifuge_extract_bug ==~ /(?i)Salmonella/ ? false : true) - serotypefinder_db = '/tool-local-data/cfsan-centriflaken-db/0/serotypefinder/2.0.2' - serotypefinder_min_cov = 0.80 - serotypefinder_min_threshold = 0.85 - serotypefinder_x = true - seqsero2_run = (params.centrifuge_extract_bug ==~ /(?i)Salmonella/ ? true : false) - seqsero2_t = 4 - seqsero2_m = 'k' - seqsero2_c = false - seqsero2_s = false - mlst_run = true - mlst_minid = 95 - mlst_mincov = 10 - mlst_minscore = 50 - amrfinderplus_run = false - amrfinderplus_db = '/tool-local-data/cfsan-centriflaken-db/0/amrfinderplus/3.10.24/latest' - amrfinderplus_genes = true - abricate_run = true - abricate_datadir = '/tool-local-data/cfsan-centriflaken-db/0/abricate/1.0.1/db' - abricate_minid = 90 - abricate_mincov = 80 - abricate_summary_run = true - seqkit_grep_on = false -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/workflows/conf/centriflaken_hy.config --- a/0.4.0/workflows/conf/centriflaken_hy.config Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,90 +0,0 @@ -params { - workflow_blueprint_by = 'Narjol.Gonzalez-Escalona@fda.hhs.gov' - workflow_built_by = 'Kranti.Konganti@fda.hhs.gov' - workflow_version = '0.4.0' - seqkit_rmdup_run = false - seqkit_rmdup_n = false - seqkit_rmdup_s = true - seqkit_rmdup_d = false - seqkit_rmdup_D = false - seqkit_rmdup_P = false - seqkit_rmdup_i = false - centrifuge_x = '/tool-local-data/cfsan-centriflaken-db/0/centrifuge/2022-04-12/ab' - centrifuge_extract_bug = 'Escherichia coli' - centrifuge_save_aligned = false - centrifuge_save_unaligned = false - centrifuge_out_fmt_sam = false - centrifuge_ignore_quals = false - kraken2_db = '/tool-local-data/cfsan-centriflaken-db/0/kraken2/standard-210914' - kraken2_confidence = '0.0' - kraken2_quick = false - kraken2_use_mpa_style = false - kraken2_minimum_base_quality = '0' - kraken2_report_zero_counts = false - kraken2_report_minimizer_data = false - kraken2_use_names = true - kraken2_extract_bug = params.centrifuge_extract_bug - megahit_run = true - megahit_min_count = false - megahit_k_list = false - megahit_no_mercy = false - megahit_bubble_level = false - megahit_merge_level = false - megahit_prune_level = false - megahit_prune_depth = false - megahit_low_local_ratio = false - megahit_max_tip_len = false - megahit_no_local = false - megahit_kmin_1pass = false - megahit_preset = 'meta-sensitive' - megahit_mem_flag = 2 - megahit_min_contig_len = false - spades_run = false - spades_isolate = false - spades_sc = false - spades_meta = true - spades_bio = false - spades_corona = false - spades_rna = false - spades_plasmid = false - spades_metaviral = false - spades_metaplasmid = false - spades_rnaviral = false - spades_iontorrent = false - spades_only_assembler = false - spades_careful = false - spades_cov_cutoff = false - spades_k = false - spades_hmm = false - ectyper_run = false - ectyper_perc_opid = 90 - ectyper_perc_hpid = 95 - ectyper_perc_opcov = 95 - ectyper_perc_hpcov = 50 - serotypefinder_run = (params.centrifuge_extract_bug ==~ /(?i)Salmonella/ ? false : true) - serotypefinder_db = '/tool-local-data/cfsan-centriflaken-db/0/serotypefinder/2.0.2' - serotypefinder_min_cov = 0.80 - serotypefinder_min_threshold = 0.85 - serotypefinder_x = true - seqsero2_run = (params.centrifuge_extract_bug ==~ /(?i)Salmonella/ ? true : false) - seqsero2_t = 4 - seqsero2_m = 'k' - seqsero2_c = false - seqsero2_s = false - mlst_run = true - mlst_minid = 95 - mlst_mincov = 10 - mlst_minscore = 50 - amrfinderplus_run = false - amrfinderplus_db = '/tool-local-data/cfsan-centriflaken-db/0/amrfinderplus/3.10.24/latest' - amrfinderplus_genes = true - abricate_run = true - abricate_datadir = '/tool-local-data/cfsan-centriflaken-db/0/abricate/1.0.1/db' - abricate_minid = 90 - abricate_mincov = 80 - abricate_summary_run = true - seqkit_grep_on = false - fq_filter_by_len = 75 - fq_suffix = '_R1_001.fastq.gz' - fq2_suffix = '_R2_001.fastq.gz' -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/workflows/conf/nanofactory.config --- a/0.4.0/workflows/conf/nanofactory.config Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -params { - workflow_author = "Rodney.Engelbach@fda.hhs.gov" - workflow_version = "0.4.1" - sample_sheet = "" - global_settings = "" - log_file = "" - log_level = "info" - mode = "" - verbose = false - disable_project_setup = false - setup_purge_existing = false - setup_fix_existing = true - setup_nocopy = false - setup_runtype = "" - guppy_threads = 4 - guppy_config = "" - merge_overwrite = false - mail_group = "stakeholders" - help = false - enable_module = "'nanofactory/current'" -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/workflows/conf/process/centriflaken.process.config --- a/0.4.0/workflows/conf/process/centriflaken.process.config Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,111 +0,0 @@ -process { - withName: 'SEQKIT_SEQ' { - ext.args = [ - params.fq_filter_by_len ? "-m ${params.fq_filter_by_len}" : '' - ].join(' ').trim() - } - - if (params.seqkit_grep_on) { - withName: 'SEQKIT_GREP' { - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}seqkitgrep.nf").seqkitgrepHelp(params).helpparams - ) - } - } - - withName: 'CENTRIFUGE_CLASSIFY' { - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}centrifuge.nf").centrifugeHelp(params).helpparams - ) - } - - withName: 'KRAKEN2_CLASSIFY' { - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}kraken2.nf").kraken2Help(params).helpparams - ) - } - - withName: 'FLYE_ASSEMBLE' { - errorStrategy = 'ignore' - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}flye.nf").flyeHelp(params).helpparams - ) - } - - if (params.ectyper_run) { - withName: 'ECTYPER' { - ext.when = params.ectyper_run - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}ectyper.nf").ectyperHelp(params).helpparams - ) - } - } - - withName: 'SEROTYPEFINDER' { - ext.when = params.serotypefinder_run - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}serotypefinder.nf").serotypefinderHelp(params).helpparams - ) - } - - withName: 'SEQSERO2' { - ext.when = params.seqsero2_run - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}seqsero2.nf").seqsero2Help(params).helpparams - ) - } - - withName: 'MLST' { - ext.when = params.mlst_run - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}mlst.nf").mlstHelp(params).helpparams - ) - } - - if (params.amrfinderplus_run) { - withName: 'AMRFINDERPLUS_RUN' { - ext.when = params.amrfinderplus_run - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}amrfinderplus.nf").amrfinderplusHelp(params).helpparams - ) - } - } - - withName: 'ABRICATE_RUN' { - ext.when = params.abricate_run - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}abricate.nf").abricateHelp(params).helpparams - ) - } - - withName: 'ABRICATE_SUMMARY' { - ext.when = params.abricate_summary_run - } -} - -// Method to instantiate a new function parser -// Need to refactor using ScriptParser... another day -def loadThisFunction (func_file) { - GroovyShell grvy_sh = new GroovyShell() - def func = grvy_sh.parse(new File ( func_file ) ) - return func -} - -// Method to add relevant final parameters to summary log -def addParamsToSummary(Map params_to_add = [:]) { - - if (!params_to_add.isEmpty()) { - def not_null_params_to_add = params_to_add.findAll { - it.value.clivalue != null && - it.value.clivalue != '[:]' && - it.value.clivalue != '' - } - - params.logtheseparams += not_null_params_to_add.keySet().toList() - - return not_null_params_to_add.collect { - "${it.value.cliflag} ${it.value.clivalue.toString().replaceAll(/(?:^\s+|\s+$)/, '')}" - }.join(' ').trim() - } - return 1 -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/workflows/conf/process/centriflaken_hy.process.config --- a/0.4.0/workflows/conf/process/centriflaken_hy.process.config Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,129 +0,0 @@ -process { - withName: 'SEQKIT_SEQ' { - ext.args = [ - params.fq_filter_by_len ? "-m ${params.fq_filter_by_len}" : '' - ].join(' ').trim() - } - - if (params.seqkit_rmdup_run) { - withName: 'SEQKIT_RMDUP' { - ext.when = params.seqkit_rmdup_run - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}seqkitrmdup.nf").seqkitrmdupHelp(params).helpparams - ) - } - } - - if (params.seqkit_grep_on) { - withName: 'SEQKIT_GREP' { - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}seqkitgrep.nf").seqkitgrepHelp(params).helpparams - ) - } - } - - withName: 'CENTRIFUGE_CLASSIFY' { - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}centrifuge.nf").centrifugeHelp(params).helpparams - ) - } - - withName: 'KRAKEN2_CLASSIFY' { - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}kraken2.nf").kraken2Help(params).helpparams - ) - } - - withName: 'MEGAHIT_ASSEMBLE' { - ext.when = params.megahit_run - errorStrategy = 'ignore' - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}megahit.nf").megahitHelp(params).helpparams - ) - } - - withName: 'SPADES_ASSEMBLE' { - ext.when = params.spades_run - errorStrategy = 'ignore' - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}spades.nf").spadesHelp(params).helpparams - ) - } - - if (params.ectyper_run) { - withName: 'ECTYPER' { - ext.when = params.ectyper_run - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}ectyper.nf").ectyperHelp(params).helpparams - ) - } - } - - withName: 'SEROTYPEFINDER' { - ext.when = params.serotypefinder_run - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}serotypefinder.nf").serotypefinderHelp(params).helpparams - ) - } - - withName: 'SEQSERO2' { - ext.when = params.seqsero2_run - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}seqsero2.nf").seqsero2Help(params).helpparams - ) - } - - withName: 'MLST' { - ext.when = params.mlst_run - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}mlst.nf").mlstHelp(params).helpparams - ) - } - - if (params.amrfinderplus_run) { - withName: 'AMRFINDERPLUS_RUN' { - ext.when = params.amrfinderplus_run - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}amrfinderplus.nf").amrfinderplusHelp(params).helpparams - ) - } - } - - withName: 'ABRICATE_RUN' { - ext.when = params.abricate_run - ext.args = addParamsToSummary( - loadThisFunction("${params.toolshelp}${params.fs}abricate.nf").abricateHelp(params).helpparams - ) - } - - withName: 'ABRICATE_SUMMARY' { - ext.when = params.abricate_summary_run - } -} - -// Method to instantiate a new function parser -// Need to refactor using ScriptParser... another day -def loadThisFunction (func_file) { - GroovyShell grvy_sh = new GroovyShell() - def func = grvy_sh.parse(new File ( func_file ) ) - return func -} - -// Method to add relevant final parameters to summary log -def addParamsToSummary(Map params_to_add = [:]) { - - if (!params_to_add.isEmpty()) { - def not_null_params_to_add = params_to_add.findAll { - it.value.clivalue != null && - it.value.clivalue != '[:]' && - it.value.clivalue != '' - } - - params.logtheseparams += not_null_params_to_add.keySet().toList() - - return not_null_params_to_add.collect { - "${it.value.cliflag} ${it.value.clivalue.toString().replaceAll(/(?:^\s+|\s+$)/, '')}" - }.join(' ').trim() - } - return 1 -} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.0/workflows/nanofactory.nf --- a/0.4.0/workflows/nanofactory.nf Sun Aug 28 00:37:10 2022 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,185 +0,0 @@ -// -// Start nanofactory workflow. Since this is a special -// case workflow wherein most of the bioinformatics -// tools are not used, there won't be any modules or -// subworkflows and therefore all the processes -// reside here. -// - -// Include any necessary methods. -include { addPadding; summaryOfParams; stopNow} \ - from "${params.routines}" - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - PROCESS DEFINITIONS FOR NANOFACTORY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -process SETPUBLISHDIR { - label 'process_femto' - module (params.enable_module ? params.enable_module : null) - conda (params.enable_conda ? params.enable_conda : null) - - input: - val options - - output: - stdout - - shell: - ''' - project_setup.py -s !{options.sample_sheet} \ - !{options.alt_settings} !{options.verbose} -b - ''' -} - -process PROJECTSETUP { - label 'process_femto' - publishDir "${publish_dir.trim()}", mode: 'copy', overwrite: false - module (params.enable_module ? params.enable_module : null) - conda (params.enable_conda ? params.enable_conda : null) - - input: - val options - val publish_dir - - output: - stdout - - script: - params.publish_dir = "${publish_dir.trim()}" - - shell: - ''' - project_setup.py -y -s !{options.sample_sheet} !{options.alt_settings} \ - !{options.purge} !{options.runtype} !{options.logfile} \ - !{options.loglevel} !{options.verbose} !{options.nocopy} \ - !{options.fix_existing} - - cat < original_source.txt - ''' -} - -process TRIMDEMUX { - label 'process_pico' - module (params.enable_module ? params.enable_module : null) - conda (params.enable_conda ? params.enable_conda : null) - cpus "${params.guppy_threads}" - - input: - val options - val original_source - - output: - path 'source.txt' - - shell: - ''' - trim_demux.py -s !{options.sample_sheet} !{options.verbose} \ - !{options.alt_settings} !{options.guppy_config} -t !{options.guppy_threads} - ''' -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - WORKFLOW ENTRY POINT -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow NANOFACTORY { - - if ( params.help ) { - log.info help() - } else if ( params.sample_sheet == null || - params.sample_sheet.length() == 0 ) { - - log.info help() - stopNow("Please provide absolute path to a JSON formatted sample sheet using the\n" + - "--sample_sheet option.") - } else { - log.info summaryOfParams() - - options = Channel.empty() - Channel - .from(setOptions()) - .set { options } - - take: - options - - main: - SETPUBLISHDIR(options) - PROJECTSETUP(options, SETPUBLISHDIR.out) - TRIMDEMUX(options, PROJECTSETUP.out) - } -} - - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - HELPER METHODS FOR NANOFACTORY WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -def setOptions() { - - Map options = [:] - - options['sample_sheet'] ?= "${params.sample_sheet}" - options['verbose'] = params.verbose ? "-v" : "" - options['alt_settings'] = params.global_settings ? "-c ${params.global_settings}" : "" - options['purge'] = params.setup_purge_existing ? "-p" : "" - options['logfile'] = params.log_file ? "-l ${params.log_file}" : "" - options['loglevel'] = params.log_level ? "--loglevel ${params.log_level}" : "" - options['nocopy'] = params.setup_nocopy ? "--nocopy" : "" - options['runtype'] = params.setup_runtype ? "-r ${params.setup_runtype}" : "" - options['fix_existing'] = params.setup_fix_existing ? "-f" : "" - options['guppy_config'] = params.guppy_config ? " -g ${params.guppy_config}" : "" - options['mode'] = params.mode ? "-m ${params.mode}" : "-m prod" - options['mail_group'] = params.mail_group ? "-g ${params.mail_group}" : "-g stakeholders" - options['guppy_threads'] = params.guppy_threads ? "${params.guppy_threads}" : 1 - options['pad'] = pad.toInteger() - options['nocapitalize'] = true - - return options -} - -def help() { - - Map helptext = [:] - - helptext['help'] = true - helptext['nocapitalize'] = true - helptext['Workflow'] = "${params.pipeline}" - helptext['Author'] = "${params.workflow_author}" - helptext['Version'] = "${params.workflow_version}\n" - helptext['Usage'] = "cpipes --pipeline nanofactory [options]\n" - helptext['Required'] = "" - helptext['--sample_sheet'] = "The JSON-formatted sample sheet for this run. Normally provided by Pore Refiner.\n" - helptext['Other options'] = "" - helptext['--global_settings'] = "An alternate global settings file. If not present the installed default will be used." - helptext['--log_file'] = "Path and file name to a log file relative to the project directory (Default: 'logs/workflow.log')" - helptext['--log_level'] = "One of 'debug', 'info', 'warning', 'error', 'fatal' (Default: 'info')" - helptext['--mode'] = "Set the run mode. One of 'dev', 'test', 'stage', or 'prod' (Default: 'prod')" - helptext['--verbose'] = "Use to enable more verbose console output from each tool\n" - helptext['Project setup options'] = "" - helptext['--disable_project_setup'] = "Do not do project setup (Default: setup is enabled)" - helptext['--setup_purge_existing'] = "Before setting up the project area delete any existing files (Default: don't purge)" - helptext['--setup_nocopy'] = "During setup, do NOT copy the original data files to the scrach location (Default: copy)" - helptext['--setup_runtype'] = "Set things up for the indicated run type (Currently not used)" - helptext['--setup_runtype'] = "Set things up for the indicated run type (Currently not used)" - helptext['--enable_module'] = "Software environment module. Ex: --enable_module 'nanofactory/current'" - helptext['--enable_conda'] = "CONDA environment module. Ex: --enable_conda nanofactory\n" - helptext['Help options'] = "" - helptext['--help'] = "Display this message.\n" - - return addPadding(helptext) -} diff -r 17890124001d -r 52045ea4679d 0.4.2/LICENSE.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/LICENSE.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,98 @@ +# CPIPES (CFSAN PIPELINES) + +## The modular pipeline repository at CFSAN, FDA + +**CPIPES** (CFSAN PIPELINES) is a collection of modular pipelines based on **NEXTFLOW**, +mostly for bioinformatics data analysis at **CFSAN, FDA.** + +--- + +### **LICENSES** + +\ +  + +**CPIPES** is licensed under: + +```text +MIT License + +In the U.S.A. Public Domain; elsewhere Copyright (c) 2022 U.S. Food and Drug Administration + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +``` + +\ +  + +Portions of **CPIPES** are built on modified versions of many tools, scripts and libraries from [nf-core/modules](https://github.com/nf-core/modules) and [nf-core/rnaseq](https://github.com/nf-core/rna-seq) which are originally licensed under: + +```text +MIT License + +Copyright (c) Philip Ewels +Copyright (c) Phil Ewels, Rickard Hammarén + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +``` + +\ +  + +The **MultiQC** report, in addition uses [DataTables](https://datatables.net), which is licensed under: + +```text +MIT License + +Copyright (C) 2008-2022, SpryMedia Ltd. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +``` diff -r 17890124001d -r 52045ea4679d 0.4.2/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,48 @@ +# CPIPES (CFSAN PIPELINES) + +## The modular pipeline repository at CFSAN, FDA + +**CPIPES** (CFSAN PIPELINES) is a collection of modular pipelines based on **NEXTFLOW**, +mostly for bioinformatics data analysis at **CFSAN, FDA.** + +--- + +### **Pipelines** + +--- +**CPIPES**: + + 1. `centriflaken` : [README](./readme/centriflaken.md). + 2. `centriflaken_hy` : [README](./readme/centriflaken_hy.md). + +#### Workflow Usage + +Following is the example of how to run the `centriflaken` pipeline on the **CFSAN** raven cluster. + +```bash +module load cpipes/0.4.0 + +cpipes --pipeline centriflaken [options] +``` + +Example: + +```bash +cd /hpc/scratch/$USER +mkdir nf-cpipes +cd nf-cpipes +cpipes \ + --pipeline centriflaken \ + --input /path/to/fastq_pass_dir \ + --output /path/to/where/output/should/go \ + --user_email First.Last@fda.hhs.gov \ + -profile raven +``` + +The above command would run the pipeline and store the output wherever the author of the workflow decided it to be and the **NEXTFLOW** reports are always stored in the current working directory from where `cpipes` is run. For example, for the above command, a directory called `CPIPES-centriflaken` would hold all the **NEXTFLOW** +related logs, reports and trace files. + +### **BETA** + +--- +The development of the modular structure and flow is an ongoing effort and may change depending on assessment of various computational topics and other considerations. diff -r 17890124001d -r 52045ea4679d 0.4.2/assets/FDa-Logo-Blue---medium-01.png Binary file 0.4.2/assets/FDa-Logo-Blue---medium-01.png has changed diff -r 17890124001d -r 52045ea4679d 0.4.2/assets/FDa-Logo-replace-Blue-small-01.png Binary file 0.4.2/assets/FDa-Logo-replace-Blue-small-01.png has changed diff -r 17890124001d -r 52045ea4679d 0.4.2/assets/dummy_file.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/assets/dummy_file.txt Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,1 @@ +DuMmY diff -r 17890124001d -r 52045ea4679d 0.4.2/assets/dummy_file2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/assets/dummy_file2.txt Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,1 @@ +DuMmY diff -r 17890124001d -r 52045ea4679d 0.4.2/bin/check_samplesheet.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/bin/check_samplesheet.py Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 + +import os +import sys +import errno +import argparse + + +def parse_args(args=None): + Description = "Reformat samplesheet file and check its contents." + Epilog = "Example usage: python check_samplesheet.py " + + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument("FILE_IN", help="Input samplesheet file.") + parser.add_argument("FILE_OUT", help="Output file.") + return parser.parse_args(args) + + +def make_dir(path): + if len(path) > 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise exception + + +def print_error(error, context="Line", context_str=""): + error_str = f"ERROR: Please check samplesheet -> {error}" + if context != "" and context_str != "": + error_str = f"ERROR: Please check samplesheet -> {error}\n{context.strip()}: '{context_str.strip()}'" + print(error_str) + sys.exit(1) + + +def check_samplesheet(file_in, file_out): + """ + This function checks that the samplesheet follows the following structure: + + sample,fq1,fq2,strandedness + SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz,forward + SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz,forward + SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq,,forward + SAMPLE_SE,SAMPLE_SE_RUN1_2.fastq.gz,,forward + + For an example see: + https://github.com/nf-core/test-datasets/blob/rnaseq/samplesheet/v3.1/samplesheet_test.csv + """ + + sample_mapping_dict = {} + with open(file_in, "r", encoding='utf-8-sig') as fin: + + ## Check header + MIN_COLS = 3 + HEADER = ["sample", "fq1", "fq2", "strandedness"] + header = [x.strip('"') for x in fin.readline().strip().split(",")] + if header[: len(HEADER)] != HEADER: + print( + f"ERROR: Please check samplesheet header -> {','.join(header)} != {','.join(HEADER)}" + ) + sys.exit(1) + + ## Check sample entries + for line in fin: + if line.strip(): + lspl = [x.strip().strip('"') for x in line.strip().split(",")] + + ## Check valid number of columns per row + if len(lspl) < len(HEADER): + print_error( + f"Invalid number of columns (minimum = {len(HEADER)})!", + "Line", + line, + ) + + num_cols = len([x for x in lspl if x]) + if num_cols < MIN_COLS: + print_error( + f"Invalid number of populated columns (minimum = {MIN_COLS})!", + "Line", + line, + ) + + ## Check sample name entries + sample, fq1, fq2, strandedness = lspl[: len(HEADER)] + if sample.find(" ") != -1: + print( + f"WARNING: Spaces have been replaced by underscores for sample: {sample}" + ) + sample = sample.replace(" ", "_") + if not sample: + print_error("Sample entry has not been specified!", "Line", line) + + ## Check FastQ file extension + for fastq in [fq1, fq2]: + if fastq: + if fastq.find(" ") != -1: + print_error("FastQ file contains spaces!", "Line", line) + # if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): + # print_error( + # "FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", + # "Line", + # line, + # ) + + ## Check strandedness + strandednesses = ["unstranded", "forward", "reverse"] + if strandedness: + if strandedness not in strandednesses: + print_error( + f"Strandedness must be one of '{', '.join(strandednesses)}'!", + "Line", + line, + ) + else: + print_error( + f"Strandedness has not been specified! Must be one of {', '.join(strandednesses)}.", + "Line", + line, + ) + + ## Auto-detect paired-end/single-end + sample_info = [] ## [single_end, fq1, fq2, strandedness] + if sample and fq1 and fq2: ## Paired-end short reads + sample_info = ["0", fq1, fq2, strandedness] + elif sample and fq1 and not fq2: ## Single-end short reads + sample_info = ["1", fq1, fq2, strandedness] + else: + print_error("Invalid combination of columns provided!", "Line", line) + + ## Create sample mapping dictionary = {sample: [[ single_end, fq1, fq2, strandedness ]]} + if sample not in sample_mapping_dict: + sample_mapping_dict[sample] = [sample_info] + else: + if sample_info in sample_mapping_dict[sample]: + print_error("Samplesheet contains duplicate rows!", "Line", line) + else: + sample_mapping_dict[sample].append(sample_info) + + ## Write validated samplesheet with appropriate columns + if len(sample_mapping_dict) > 0: + out_dir = os.path.dirname(file_out) + make_dir(out_dir) + with open(file_out, "w") as fout: + fout.write( + ",".join(["sample", "single_end", "fq1", "fq2", "strandedness"]) + + "\n" + ) + for sample in sorted(sample_mapping_dict.keys()): + + ## Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + if not all( + x[0] == sample_mapping_dict[sample][0][0] + for x in sample_mapping_dict[sample] + ): + print_error( + f"Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end!", + "Sample", + sample, + ) + + ## Check that multiple runs of the same sample are of the same strandedness + if not all( + x[-1] == sample_mapping_dict[sample][0][-1] + for x in sample_mapping_dict[sample] + ): + print_error( + f"Multiple runs of a sample must have the same strandedness!", + "Sample", + sample, + ) + + for idx, val in enumerate(sample_mapping_dict[sample]): + fout.write(",".join([f"{sample}_T{idx+1}"] + val) + "\n") + else: + print_error(f"No entries to process!", "Samplesheet: {file_in}") + + +def main(args=None): + args = parse_args(args) + check_samplesheet(args.FILE_IN, args.FILE_OUT) + + +if __name__ == "__main__": + sys.exit(main()) diff -r 17890124001d -r 52045ea4679d 0.4.2/bin/create_mqc_data_table.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/bin/create_mqc_data_table.py Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,124 @@ +#!/usr/bin/env python + +import sys +import yaml +from textwrap import dedent + +def main() : + """ + Takes a tab-delimited text file with a mandatory header + column and generates an HTML table. + """ + + args = sys.argv + if (len(args) < 2 or len(args) > 3): + print(f"\nTwo CL arguments are required!\n") + exit(1) + + table_sum_on = args[1].lower() + workflow_name = args[2].lower() + + with open(f"{table_sum_on}.tblsum.txt", "r") as tbl: + header = tbl.readline() + header_cols = header.strip().split('\t') + + html = [ + dedent( + f""" +
+ + + + """ + ) + ] + + for header_col in header_cols: + html.append( + dedent( + f""" + """ + ) + ) + + html.append( + dedent( + """ + + + """ + ) + ) + + for row in tbl: + html.append("\n") + data_cols = row.strip().split('\t') + if ( len(header_cols) != len(data_cols) ): + print(f"\nWARN: Number of header columns ({len(header_cols)}) and data " + + f"columns ({len(data_cols)}) are not equal!\nWill append empty columns!\n") + if ( len(header_cols) > len(data_cols) ): + data_cols += (( len(header_cols) - len(data_cols) ) * ' ' ) + print(len(data_cols)) + else: + header_cols += (( len(data_cols) - len(header_cols) ) * ' ') + + html.append( + dedent( + f""" + + """ + ) + ) + + for data_col in data_cols[1:]: + html.append( + dedent( + f""" + """ + ) + ) + html.append("\n") + html.append("\n") + html.append("
{header_col}
{data_cols[0]}{data_col}
\n") + html.append("
\n") + + mqc_yaml = { + "id": f"{table_sum_on.upper()}_collated_table", + "section_name": f"{table_sum_on.upper()}", + "section_href": f"https://cfsan-git.fda.gov/Kranti.Konganti/{workflow_name}", + "plot_type": "html", + "description": "The results table shown here is a collection from all samples.", + "data": ('').join(html), + } + + with open(f"{table_sum_on.lower()}_mqc.yml", "w") as html_mqc: + yaml.dump(mqc_yaml, html_mqc, default_flow_style=False) + +if __name__ == "__main__": + main() \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/bin/extract_assembled_filtered_contigs.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/bin/extract_assembled_filtered_contigs.py Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,56 @@ +#!/usr/bin/env python + +import os +import argparse +import logging as log +import pandas as pd +import numpy as np +from Bio import SeqIO + + +def main(): + # READ IN ARGUMENTS + desc = """This script is part of the centriflaken pipeline: + - accepts assembled contigs (assembly.fasta from flye) and kraken classification (kraken_output.txt from kraken2) output + - filters the assembled contigs based on taxa specified + - outputs an assembled and filtered fasta (assembled_filtered_contigs.fasta) """ + parser = argparse.ArgumentParser(prog='extract_assembled_filtered_contigs.py', description=desc) + parser.add_argument("-v", dest='verbose', action="store_true", help="for more verbose output") + parser.add_argument("-i", dest='input_fasta', required=True, help="Path to input fasta file (assembled output from flye)") + parser.add_argument("-o", dest='assembled_filtered_contigs', required=True, help="Path to output fasta file filtered by taxa specified") + parser.add_argument("-k", dest='kraken_output', required=True, help="Path to kraken output file") + parser.add_argument("-b", dest='bug', required=True, help="name or fragment of name of bug") + args = parser.parse_args() + + # MORE INFO IF VERBOSE + if args.verbose: + log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG) + else: + log.basicConfig(format="%(levelname)s: %(message)s") + + # ASSIGN VARIABLES + input_fasta = args.input_fasta + assembled_filtered_contigs = args.assembled_filtered_contigs + kraken_output = args.kraken_output + bug = args.bug + + # Match and filter taxa names and ids from kraken output file + report_df = pd.read_csv(kraken_output, delimiter="\t", usecols=[1,2], header=None) + report_df.columns = ["contig", "name"] + report_df['name'] = report_df['name'].str.lower() + filt_report_df = report_df[report_df['name'].str.contains(bug.lower())] + print("\nMatching taxa names and ids:\n",filt_report_df) + filtered_contig_list = filt_report_df['contig'] + + # Extract filtered reads from assembled input fasta and write to output fasta + print ("Indexing reads..") + rec = SeqIO.index(input_fasta,"fasta") + TF=open(assembled_filtered_contigs, "w") + for i in filtered_contig_list: + if i in rec: + SeqIO.write(rec[i], TF, "fasta") + TF.close() + + +if __name__ == "__main__": + main() \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/bin/fastq_dir_to_samplesheet.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/bin/fastq_dir_to_samplesheet.py Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 + +import os +import sys +import glob +import argparse +import re + + +def parse_args(args=None): + Description = "Generate samplesheet from a directory of FastQ files." + Epilog = "Example usage: python fastq_dir_to_samplesheet.py " + + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument("FASTQ_DIR", help="Folder containing raw FastQ files.") + parser.add_argument("SAMPLESHEET_FILE", help="Output samplesheet file.") + parser.add_argument( + "-st", + "--strandedness", + type=str, + dest="STRANDEDNESS", + default="unstranded", + help="Value for 'strandedness' in samplesheet. Must be one of 'unstranded', 'forward', 'reverse'.", + ) + parser.add_argument( + "-r1", + "--read1_extension", + type=str, + dest="READ1_EXTENSION", + default="_R1_001.fastq.gz", + help="File extension for read 1.", + ) + parser.add_argument( + "-r2", + "--read2_extension", + type=str, + dest="READ2_EXTENSION", + default="_R2_001.fastq.gz", + help="File extension for read 2.", + ) + parser.add_argument( + "-se", + "--single_end", + dest="SINGLE_END", + action="store_true", + help="Single-end information will be auto-detected but this option forces paired-end FastQ files to be treated as single-end so only read 1 information is included in the samplesheet.", + ) + parser.add_argument( + "-sn", + "--sanitise_name", + dest="SANITISE_NAME", + action="store_true", + help="Whether to further sanitise FastQ file name to get sample id. Used in conjunction with --sanitise_name_delimiter and --sanitise_name_index.", + ) + parser.add_argument( + "-sd", + "--sanitise_name_delimiter", + type=str, + dest="SANITISE_NAME_DELIMITER", + default="_", + help="Delimiter to use to sanitise sample name.", + ) + parser.add_argument( + "-si", + "--sanitise_name_index", + type=int, + dest="SANITISE_NAME_INDEX", + default=1, + help="After splitting FastQ file name by --sanitise_name_delimiter all elements before this index (1-based) will be joined to create final sample name.", + ) + return parser.parse_args(args) + + +def fastq_dir_to_samplesheet( + fastq_dir, + samplesheet_file, + strandedness="unstranded", + read1_extension="_R1_001.fastq.gz", + read2_extension="_R2_001.fastq.gz", + single_end=False, + sanitise_name=False, + sanitise_name_delimiter="_", + sanitise_name_index=1, +): + def sanitize_sample(path, extension): + """Retrieve sample id from filename""" + sample = os.path.basename(path).replace(extension, "") + if sanitise_name: + if sanitise_name_index > 0: + sample = sanitise_name_delimiter.join( + os.path.basename(path).split(sanitise_name_delimiter)[ + :sanitise_name_index + ] + ) + # elif sanitise_name_index == -1: + # sample = os.path.basename(path)[ :os.path.basename(path).index('.') ] + return sample + + def get_fastqs(extension): + """ + Needs to be sorted to ensure R1 and R2 are in the same order + when merging technical replicates. Glob is not guaranteed to produce + sorted results. + See also https://stackoverflow.com/questions/6773584/how-is-pythons-glob-glob-ordered + """ + abs_fq_files = glob.glob(os.path.join(fastq_dir, f"**", f"*{extension}"), recursive=True) + return sorted( + [ + fq for _, fq in enumerate(abs_fq_files) if re.match('^((?!undetermined|unclassified|downloads).)*$', fq, flags=re.IGNORECASE) + ] + ) + + read_dict = {} + + ## Get read 1 files + for read1_file in get_fastqs(read1_extension): + sample = sanitize_sample(read1_file, read1_extension) + if sample not in read_dict: + read_dict[sample] = {"R1": [], "R2": []} + read_dict[sample]["R1"].append(read1_file) + + ## Get read 2 files + if not single_end: + for read2_file in get_fastqs(read2_extension): + sample = sanitize_sample(read2_file, read2_extension) + read_dict[sample]["R2"].append(read2_file) + + ## Write to file + if len(read_dict) > 0: + out_dir = os.path.dirname(samplesheet_file) + if out_dir and not os.path.exists(out_dir): + os.makedirs(out_dir) + + with open(samplesheet_file, "w") as fout: + header = ["sample", "fq1", "fq2", "strandedness"] + fout.write(",".join(header) + "\n") + for sample, reads in sorted(read_dict.items()): + for idx, read_1 in enumerate(reads["R1"]): + read_2 = "" + if idx < len(reads["R2"]): + read_2 = reads["R2"][idx] + sample_info = ",".join([sample, read_1, read_2, strandedness]) + fout.write(f"{sample_info}\n") + else: + error_str = ( + "\nWARNING: No FastQ files found so samplesheet has not been created!\n\n" + ) + error_str += "Please check the values provided for the:\n" + error_str += " - Path to the directory containing the FastQ files\n" + error_str += " - '--read1_extension' parameter\n" + error_str += " - '--read2_extension' parameter\n" + print(error_str) + sys.exit(1) + + +def main(args=None): + args = parse_args(args) + + strandedness = "unstranded" + if args.STRANDEDNESS in ["unstranded", "forward", "reverse"]: + strandedness = args.STRANDEDNESS + + fastq_dir_to_samplesheet( + fastq_dir=args.FASTQ_DIR, + samplesheet_file=args.SAMPLESHEET_FILE, + strandedness=strandedness, + read1_extension=args.READ1_EXTENSION, + read2_extension=args.READ2_EXTENSION, + single_end=args.SINGLE_END, + sanitise_name=args.SANITISE_NAME, + sanitise_name_delimiter=args.SANITISE_NAME_DELIMITER, + sanitise_name_index=args.SANITISE_NAME_INDEX, + ) + + +if __name__ == "__main__": + sys.exit(main()) diff -r 17890124001d -r 52045ea4679d 0.4.2/bin/prepare_nanopore_fastq_dir.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/bin/prepare_nanopore_fastq_dir.py Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 + +import os +import re +import glob +import argparse +import logging + +def main(): + # READ IN ARGUMENTS + desc = """ + Takes in a file with flowcell ID, one per line and creates soft links + to 'fastq_pass' directory at target location. + + Ex: + + prepare_nanopore_fastq_dir.py \ + -o /hpc/scratch/Kranti.Konganti/np_test \ + -f flowcells.txt + + where flowcells.txt contains the following lines: + + FAL11127 + FAL11151 + + """ + parser = argparse.ArgumentParser(prog='prepare_nanopore_fastq_dir.py', + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description=desc) + required = parser.add_argument_group('required arguments') + + required.add_argument("-f", dest='flowcells', required=True, + help="Path to a text file containing Nanopore flowcell IDs, one per line") + required.add_argument("-i", dest='inputdir', + required=False, action='append', nargs='*', + help="Path to search directory. This directory location is where" + + " the presence of 'fastq_pass' will be searched for each flowcell.") + required.add_argument("-o", dest='outputdir', + required=True, + help="Path to output directory. This directory is created by the script" + + " and new soft links (symlinks) are created in this directory.") + + args = parser.parse_args() + flowcells = args.flowcells + output = args.outputdir + inputs = args.inputdir + + logging.basicConfig(format='%(asctime)s - %(levelname)s => %(message)s', level=logging.DEBUG) + + if not inputs: + inputs = ['/projects/nanopore/raw'] + nanopore_machines = ['RazorCrest', 'Revolution', 'ObiWan', 'MinIT', + 'Mayhem', 'CaptainMarvel', 'MinION', 'MinION_Padmini', 'RogueOne'] + logging.info(f"Searching default path(s). Use -i option if custom path should be searched.") + else: + nanopore_machines = ['custom'] + + fastq_pass_found = {} + was_fastq_pass_found = [] + + for each_input in inputs: + for machine in nanopore_machines: + if ''.join(nanopore_machines) != 'custom': + input = os.path.join(each_input, machine) + else: + input = ''.join(each_input) + + logging.info(f"Searching path: {input}") + + if (os.path.exists(flowcells) and os.path.getsize(flowcells) > 0): + with open(flowcells, 'r') as fcells: + for flowcell in fcells: + if re.match('^\s*$', flowcell): + continue + flowcell = flowcell.strip() + fastq_pass_path = glob.glob(os.path.join(input, flowcell, f"**", f"*[!fast5]*", 'fastq_pass')) + # Try one more time since the flowcell user is trying to query may be the parent directory + # of fastq_pass + fastq_pass = fastq_pass_path if fastq_pass_path else glob.glob(os.path.join(input, f"**", f"*[!fast5]*", flowcell, 'fastq_pass')) + if not fastq_pass: + # logging.warning(f"Flowcell " + + # os.path.join(input, flowcell).strip() + + # f" does not seem to have a fastq_pass directory! Skipped!!") + if not flowcell in fastq_pass_found.keys(): + fastq_pass_found[flowcell] = 0 + else: + fastq_pass_found[flowcell] = 1 + sym_link_dir = os.path.join(output, flowcell) + sym_link_dir_dest = os.path.join(sym_link_dir, 'fastq_pass') + if not os.path.exists(sym_link_dir): + os.makedirs(sym_link_dir) + os.symlink( + ''.join(fastq_pass), + sym_link_dir_dest, target_is_directory=True + ) + logging.info(f"New soft link created: {sym_link_dir_dest}") + else: + logging.info(f"Soft link {sym_link_dir_dest} already exists! Skipped!!") + fcells.close() + else: + logging.error(f"File {flowcells} is empty or does not exist!\n") + + for k,v in fastq_pass_found.items(): + if not v: + was_fastq_pass_found.append(k) + + if was_fastq_pass_found: + logging.warning("Did not find fastq_pass folder for the supplied flowcells: " + + ', '.join(was_fastq_pass_found)) + + if was_fastq_pass_found and len(was_fastq_pass_found) == len(fastq_pass_found): + logging.error(f"None of the supplied flowcells were found! The output directory, {output} may not have been created!") + else: + logging.info(f"NOTE: Now you can use {output} directory as --input to cpipes.\n") + +if __name__ == "__main__": + main() \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/bin/process_centrifuge_output.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/bin/process_centrifuge_output.py Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,75 @@ +#!/usr/bin/env python + +import os +import argparse +import logging as log +import pandas as pd +import numpy as np +from Bio import SeqIO + + +def main(): + # READ IN ARGUMENTS + desc = """ + This script is part of the centriflaken pipeline: It processes centrifuge + output and produces either a filtered FASTQ or a text file of FASTQ IDs based + on the supplied taxa/bug + """ + parser = argparse.ArgumentParser(prog='process_centrifuge_output.py', description=desc) + parser.add_argument("-v", dest='verbose', action="store_true", help="For more verbose output") + parser.add_argument("-i", dest='input_fastq', required=False, + help="Path to input FASTQ file (same as input to centrifuge). If not mentioned, \ + a text file of sequence IDs are produced instead of a FASTQ file") + parser.add_argument("-t", dest='taxa_filtered_fastq_file', required=True, + help="Path to output FASTQ or output text file filtered by the taxa specified") + parser.add_argument("-r", dest='cent_report', required=True, help="Path to centrifuge report") + parser.add_argument("-o", dest='cent_output', required=True, help="Path to centrifuge output") + parser.add_argument("-b", dest='bug', required=True, + help="Name or fragment of name of the bug by which reads are extracted") + args = parser.parse_args() + + # MORE INFO IF VERBOSE + if args.verbose: + log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG) + else: + log.basicConfig(format="%(levelname)s: %(message)s") + + # ASSIGN VARIABLES + input_fastq = args.input_fastq + taxa_filtered_fastq_file = args.taxa_filtered_fastq_file + cent_report = args.cent_report + cent_output = args.cent_output + bug = args.bug + report_col_list = ["name", "taxID"] + output_col_list = ["taxID", "readID"] + + # Match and filter taxa names and ids from centrifuge report file + report_df = pd.read_csv(cent_report, delimiter="\t", usecols=report_col_list) + report_df['name'] = report_df['name'].str.lower() + filt_report_df = report_df[report_df['name'].str.contains(bug.lower())] + #print("\nMatching taxa names and ids:\n",filt_report_df) + taxID_list = filt_report_df['taxID'] + + # Match the above tax ids to read ids from centrifuge output file and deduplicate + output_df = pd.read_csv(cent_output, delimiter="\t", usecols=output_col_list) + filt_output_df = output_df.loc[output_df['taxID'].isin(taxID_list)] + readID_list = filt_output_df['readID'] + readID_dedup_list = np.unique(readID_list) + TF=open(taxa_filtered_fastq_file, "w") + + if (not input_fastq): + # print("\nFILTERED READ ID LIST:\n", readID_dedup_list) + for ID in readID_dedup_list: + TF.write(f"{ID}\n") + else: + # Extract filtered reads from input fastq and write to output fastq + print ("Indexing reads..") + rec = SeqIO.index(input_fastq,"fastq") + for i in readID_dedup_list: + if i in rec: + SeqIO.write(rec[i], TF, "fastq") + + TF.close() + +if __name__ == "__main__": + main() \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/conf/base.config --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/conf/base.config Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,49 @@ +params { + fs = File.separator + cfsanpipename = 'CPIPES' + center = 'CFSAN, FDA.' + libs = "${projectDir}${params.fs}lib" + modules = "${projectDir}${params.fs}modules" + projectconf = "${projectDir}${params.fs}conf" + assetsdir = "${projectDir}${params.fs}assets" + subworkflows = "${projectDir}${params.fs}subworkflows" + workflows = "${projectDir}${params.fs}workflows" + workflowsconf = "${workflows}${params.fs}conf" + routines = "${libs}${params.fs}routines" + toolshelp = "${libs}${params.fs}help" + swmodulepath = "${params.fs}nfs${params.fs}software${params.fs}modules" + tracereportsdir = "${launchDir}${params.fs}${cfsanpipename}-${params.pipeline}${params.fs}nextflow-reports" + dummyfile = "${projectDir}${params.fs}assets${params.fs}dummy_file.txt" + dummyfile2 = "${projectDir}${params.fs}assets${params.fs}dummy_file2.txt" + linewidth = 80 + pad = 32 + pipeline = null + help = null + input = null + output = null + metadata = null + publish_dir_mode = "copy" + publish_dir_overwrite = true + user_email = null +} + +dag { + enabled = true + file = "${params.tracereportsdir}${params.fs}${params.pipeline}_dag.html" +} + +report { + enabled = true + file = "${params.tracereportsdir}${params.fs}${params.pipeline}_exec_report.html" +} + +trace { + enabled = true + file = "${params.tracereportsdir}${params.fs}${params.pipeline}_exec_trace.txt" +} + +timeline { + enabled = true + file = "${params.tracereportsdir}${params.fs}${params.pipeline}_exec_timeline.html" +} + diff -r 17890124001d -r 52045ea4679d 0.4.2/conf/fastq.config --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/conf/fastq.config Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,9 @@ +params { + fq_filter_by_len = "4000" + fq_suffix = ".fastq.gz" + fq2_suffix = false + fq_strandedness = "unstranded" + fq_single_end = false + fq_filename_delim = "_" + fq_filename_delim_idx = "1" +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/conf/logtheseparams.config --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/conf/logtheseparams.config Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,16 @@ +params { + logtheseparams = [ + "${params.metadata}" ? 'metadata' : null, + "${params.input}" ? 'input' : null, + "${params.output}" ? 'output' : null, + "${params.fq_suffix}" ? 'fq_suffix' : null, + "${params.fq2_suffix}" ? 'fq2_suffix' : null, + "${params.fq_strandedness}" ? 'fq_strandedness' : null, + "${params.fq_single_end}" ? 'fq_single_end' : null, + "${params.fq_filter_by_len}" ? 'fq_filter_by_len' : null, + "${params.fq_filename_delim}" ? 'fq_filename_delim' : null, + "${params.fq_filename_delim_idx}" ? 'fq_filename_delim_idx' : null, + 'enable_conda', + 'enable_module', + ] +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/conf/manifest.config --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/conf/manifest.config Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,8 @@ +manifest { + author = 'Kranti.Konganti@fda.hhs.gov' + homePage = 'https://cfsan-git.fda.gov/cfsan-dev/cpipes' + name = 'CPIPES' + version = '0.4.0' + nextflowVersion = '>=21.12' + description = 'Modular Nextflow pipelines at CFSAN, FDA.' +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/conf/modules.config --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/conf/modules.config Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,87 @@ +process { + publishDir = [ + path: { + "${task.process.tokenize(':')[-1].toLowerCase()}" == "multiqc" ? + "${params.output}${params.fs}${params.pipeline.toLowerCase()}-${task.process.tokenize(':')[-1].toLowerCase()}" : + "${params.output}${params.fs}${task.process.tokenize(':')[-1].toLowerCase()}" + }, + mode: params.publish_dir_mode, + overwrite: params.publish_dir_overwrite, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + errorStrategy = { + ![0].contains(task.exitStatus) ? dynamic_retry(task.attempt, 10) : 'finish' + } + + maxRetries = 80 + + withLabel: 'process_femto' { + cpus = 2 + } + + withLabel: 'process_pico' { + cpus = 2 + } + + withLabel: 'process_nano' { + cpus = 4 + } + + withLabel: 'process_micro' { + cpus = 8 + } + + withLabel: 'process_only_mem_low' { + cpus = 2 + } + + withLabel: 'process_only_mem_medium' { + cpus = 2 + } + + withLabel: 'process_only_mem_high' { + cpus = 2 + } + + withLabel: 'process_low' { + cpus = 8 + } + + withLabel: 'process_medium' { + cpus = 8 + } + + withLabel: 'process_high' { + cpus = 8 + } + + withLabel: 'process_higher' { + cpus = 8 + } + + withLabel: 'process_gigantic' { + cpus = 8 + } +} + +if ( (params.input || params.metadata ) && params.pipeline ) { + try { + includeConfig "${params.workflowsconf}${params.fs}process${params.fs}${params.pipeline}.process.config" + } catch (Exception e) { + System.err.println('-'.multiply(params.linewidth) + "\n" + + "\033[0;31m${params.cfsanpipename} - ERROR\033[0m\n" + + '-'.multiply(params.linewidth) + "\n" + "\033[0;31mCould not load " + + "default pipeline's process configuration. Please provide a pipeline \n" + + "name using the --pipeline option.\n\033[0m" + '-'.multiply(params.linewidth) + "\n") + System.exit(1) + } +} + +// Function will return after sleeping for some time. +// Sleep time increases exponentially by task attempt. +def dynamic_retry(task_retry_num, factor_by) { + // sleep(Math.pow(2, task_retry_num.toInteger()) * factor_by.toInteger() as long) + sleep(Math.pow(1.27, task_retry_num.toInteger()) as long) + return 'retry' +} diff -r 17890124001d -r 52045ea4679d 0.4.2/conf/multiqc/centriflaken_hy_mqc.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/conf/multiqc/centriflaken_hy_mqc.yml Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,82 @@ +custom_logo: "FDa-Logo-Blue---medium-01.png" +custom_logo_url: "https://www.fda.gov/about-fda/fda-organization/center-food-safety-and-applied-nutrition-cfsan" +custom_logo_title: "CFSAN, FDA" +title: CPIPES Report +intro_text: > + CPIPES (CFSAN PIPELINES) is a modular bioinformatics data analysis project at CFSAN, FDA based on NEXTFLOW DSL2. +report_comment: > + This report has been generated by the CPIPES - Workflow_Name_Placeholder + analysis pipeline. Only certain tables and plots are reported here. For complete results, please refer to the analysis pipeline output directory. +report_header_info: + - CPIPES Version: CPIPES_Version_Placeholder + - Workflow: Workflow_Name_Placeholder + - Workflow Version: Workflow_Version_Placeholder + - Input Directory: Workflow_Input_Placeholder + - Output Directory: Workflow_Output_Placeholder + - Developer E-mail: 'Kranti.Konganti@fda.hhs.gov' + - Stakeholder E-mail: 'Narjol.Gonzalez-Escalona@fda.hhs.gov' +show_analysis_paths: False +show_analysis_time: False +report_section_order: + MLST_collated_table: + order: -989 + ECTYPER_collated_table: + order: -990 + SEROTYPEFINDER_collated_table: + order: -991 + SEQSERO2_collated_table: + order: -992 + ABRICATE_ECOLI_VF_collated_table: + order: -993 + ABRICATE_NCBI_collated_table: + order: -994 + ABRICATE_NCBIAMRPLUS_collated_table: + order: -995 + ABRICATE_MEGARES_collated_table: + order: -996 + ABRICATE_RESFINDER_collated_table: + order: -997 + ABRICATE_ARGANNOT_collated_table: + order: -998 + software_versions: + order: -999 + +export_plots: true + +# Run only these modules +run_modules: + - fastqc + - kraken + - custom_content + +module_order: + - fastqc: + name: 'FastQC' + info: 'section of the report shows FastQC results before adapter trimming.' + path_filters: + - '*_fastqc.zip' + - kraken: + name: 'Centrifuge' + href: 'https://ccb.jhu.edu/software/centrifuge' + doi: '10.1101/gr.210641.116' + info: > + section of the report shows how reads are classified. + Please note that the plot title below is shown as + Kraken2: Top taxa since centrifuge-kreport was used + to create Kraken-style reports from centrifuge output files. + path_filters: + - '*.kreport.txt' + - kraken: + name: 'Kraken2' + info: 'section of the report shows how assembled contigs are classified.' + path_filters: + - '*.report.txt' + +extra_fn_clean_exts: + - '.centrifuge.kreport' + - '.report' + +table_columns_visible: + Kraken: False + Kraken2: False + Centrifuge: False \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/conf/multiqc/centriflaken_mqc.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/conf/multiqc/centriflaken_mqc.yml Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,82 @@ +custom_logo: "FDa-Logo-Blue---medium-01.png" +custom_logo_url: "https://www.fda.gov/about-fda/fda-organization/center-food-safety-and-applied-nutrition-cfsan" +custom_logo_title: "CFSAN, FDA" +title: CPIPES Report +intro_text: > + CPIPES (CFSAN PIPELINES) is a modular bioinformatics data analysis project at CFSAN, FDA based on NEXTFLOW DSL2. +report_comment: > + This report has been generated by the CPIPES - Workflow_Name_Placeholder + analysis pipeline. Only certain tables and plots are reported here. For complete results, please refer to the analysis pipeline output directory. +report_header_info: + - CPIPES Version: CPIPES_Version_Placeholder + - Workflow: Workflow_Name_Placeholder + - Workflow Version: Workflow_Version_Placeholder + - Input Directory: Workflow_Input_Placeholder + - Output Directory: Workflow_Output_Placeholder + - Developer E-mail: 'Kranti.Konganti@fda.hhs.gov' + - Stakeholder E-mail: 'Narjol.Gonzalez-Escalona@fda.hhs.gov' +show_analysis_paths: False +show_analysis_time: False +report_section_order: + MLST_collated_table: + order: -989 + ECTYPER_collated_table: + order: -990 + SEROTYPEFINDER_collated_table: + order: -991 + SEQSERO2_collated_table: + order: -992 + ABRICATE_ECOLI_VF_collated_table: + order: -993 + ABRICATE_NCBI_collated_table: + order: -994 + ABRICATE_NCBIAMRPLUS_collated_table: + order: -995 + ABRICATE_MEGARES_collated_table: + order: -996 + ABRICATE_RESFINDER_collated_table: + order: -997 + ABRICATE_ARGANNOT_collated_table: + order: -998 + software_versions: + order: -999 + +export_plots: true + +# Run only these modules +run_modules: + - fastqc + - kraken + - custom_content + +module_order: + - fastqc: + name: 'FastQC' + info: 'section of the report shows FastQC results before adapter trimming.' + path_filters: + - '*_fastqc.zip' + - kraken: + name: 'Centrifuge' + href: 'https://ccb.jhu.edu/software/centrifuge' + doi: '10.1101/gr.210641.116' + info: > + section of the report shows how reads are classified. + Please note that the plot title below is shown as + Kraken2: Top taxa since centrifuge-kreport was used + to create Kraken-style reports from centrifuge output files. + path_filters: + - '*.kreport.txt' + - kraken: + name: 'Kraken2' + info: 'section of the report shows how assembled contigs are classified.' + path_filters: + - '*.report.txt' + +extra_fn_clean_exts: + - '.centrifuge.kreport' + - '.report' + +table_columns_visible: + Kraken: False + Kraken2: False + Centrifuge: False \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/cpipes --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/cpipes Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,72 @@ +#!/usr/bin/env nextflow + +/* +---------------------------------------------------------------------------------------- + cfsan-dev/cpipes +---------------------------------------------------------------------------------------- + NAME : CPIPES + DESCRIPTION : Modular Nextflow pipelines at CFSAN, FDA. + GITLAB : https://cfsan-git.fda.gov/cfsan-dev/cpipes + JIRA : https://sde.fda.gov/jira/projects/CPIPES/ + CONTRIBUTORS : Kranti.Konganti@fda.hhs.gov +---------------------------------------------------------------------------------------- +*/ + +// Enable DSL 2 +nextflow.enable.dsl = 2 + +// Default routines for MAIN +include { pipelineBanner; stopNow; } from "${params.routines}" + +// Our banner for CPIPES +log.info pipelineBanner() + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOW CAN BE USED TO RUN A SPECIFIC PIPELINE. THIS IS THE RECOMMENDED WAY. + NEED TO FIND A BETTER SOLUTION IF WE SEE A LOT OF PIPELINES. + See: https://github.com/nf-core/rnaseq/issues/619 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +switch ("${params.pipeline}") { + case "nanofactory": + include { NANOFACTORY } from "${params.workflows}${params.fs}${params.pipeline}" + break + case "centriflaken": + include { CENTRIFLAKEN } from "${params.workflows}${params.fs}${params.pipeline}" + break + case "centriflaken_hy": + include { CENTRIFLAKEN_HY } from "${params.workflows}${params.fs}${params.pipeline}" + break + default: + stopNow("PLEASE MENTION A PIPELINE NAME. Ex: --pipeline centriflaken") +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN ALL WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow { + // THIS IS REPETETIVE BUT WE ARE NOT ALLOWED TO INCLUDE "INCLUDE" + // INSIDE WORKFLOW + switch ("${params.pipeline}") { + case "nanofactory": + NANOFACTORY() + break + case "centriflaken": + CENTRIFLAKEN() + break + case "centriflaken_hy": + CENTRIFLAKEN_HY() + break + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff -r 17890124001d -r 52045ea4679d 0.4.2/lib/help/abricate.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/lib/help/abricate.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,43 @@ +// Help text for abricate within CPIPES. + +def abricateHelp(params) { + +Map tool = [:] +Map toolspecs = [:] +tool.text = [:] +tool.helpparams = [:] + + toolspecs = [ + 'abricate_run': [ + clihelp: 'Run ABRicate tool. Default: ' + + (params.abricate_run ?: false), + cliflag: null, + clivalue: null + ], + 'abricate_minid': [ + clihelp: 'Minimum DNA %identity. ' + + "Defaut: " + (params.abricate_minid ?: 80), + cliflag: '--minid', + clivalue: (params.abricate_minid ?: 80) + ], + 'abricate_mincov': [ + clihelp: 'Minimum DNA %coverage. ' + + "Defaut: " + (params.abricate_mincov ?: 80), + cliflag: '--mincov', + clivalue: (params.abricate_mincov ?: 80) + ], + 'abricate_datadir': [ + clihelp: 'ABRicate databases folder. ' + + "Defaut: " + (params.abricate_datadir ?: 'undefined'), + cliflag: '--datadir', + clivalue: (params.abricate_datadir ?: '') + ] + ] + + toolspecs.each { + k, v -> tool.text['--' + k] = "${v.clihelp}" + tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] + } + + return tool +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/lib/help/amrfinderplus.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/lib/help/amrfinderplus.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,36 @@ +def amrfinderplusHelp(params) { + + Map tool = [:] + Map toolspecs = [:] + tool.text = [:] + tool.helpparams = [:] + + toolspecs = [ + 'amrfinderplus_run': [ + clihelp: "Run AMRFinderPlus tool. Default: ${params.amrfinderplus_run}", + cliflag: null, + clivalue: null + ], + 'amrfinderplus_db': [ + clihelp: 'Path to AMRFinderPlus database. Please note that ' + + ' the databases should be ready and formatted with blast for use. ' + + 'Please read more at: ' + + 'https://github.com/ncbi/amr/wiki/AMRFinderPlus-database ' + + "Default: ${params.amrfinderplus_db}", + cliflag: '--database', + clivalue: (params.amrfinderplus_db ?: '') + ], + 'amrfinderplus_genes': [ + clihelp: 'Add the plus genes to the report', + cliflag: '--plus', + clivalue: (params.amrfinderplus_genes ? ' ' : '') + ] + ] + + toolspecs.each { + k, v -> tool.text['--' + k] = "${v.clihelp}" + tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] + } + + return tool +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/lib/help/centrifuge.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/lib/help/centrifuge.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,54 @@ +// Help text for centrifuge within CPIPES. + +def centrifugeHelp(params) { + + Map tool = [:] + Map toolspecs = [:] + tool.text = [:] + tool.helpparams = [:] + + toolspecs = [ + 'centrifuge_x': [ + clihelp: "Absolute path to centrifuge database. Default: ${params.centrifuge_x}", + cliflag: '-x', + clivalue: null + ], + 'centrifuge_save_unaligned': [ + clihelp: 'Save SINGLE-END reads that did not align. For PAIRED-END' + + " reads, save read pairs that did not align concordantly. Default: ${params.centrifuge_save_unaligned}", + cliflag: null, // Handled in modules logic. + clivalue: null + ], + 'centrifuge_save_aligned': [ + clihelp: 'Save SINGLE-END reads that aligned. For PAIRED-END' + + " reads, save read pairs that aligned concordantly. Default: ${params.centrifuge_save_aligned}", + cliflag: null, // Handled in modules logic. + clivalue: null + ], + 'centrifuge_out_fmt_sam': [ + clihelp: "Centrifuge output should be in SAM. Default: ${params.centrifuge_save_aligned}", + cliflag: null, // Handled in modules logic. + clivalue: null + ], + 'centrifuge_extract_bug': [ + clihelp: "Extract this bug from centrifuge results." + + " Default: ${params.centrifuge_extract_bug}", + cliflag: null, // Handled in modules logic. + clivalue: null, + ], + 'centrifuge_ignore_quals': [ + clihelp: 'Treat all quality values as 30 on Phred scale. ' + + "Default: ${params.centrifuge_ignore_quals}", + cliflag: '--ignore-quals', + clivalue: (params.centrifuge_ignore_quals ? ' ' : '') + ] + ] + + toolspecs.each { + k, v -> tool.text['--' + k] = "${v.clihelp}" + tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] + } + + return tool +} + diff -r 17890124001d -r 52045ea4679d 0.4.2/lib/help/ectyper.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/lib/help/ectyper.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,46 @@ +def ectyperHelp(params) { + + Map tool = [:] + Map toolspecs = [:] + tool.text = [:] + tool.helpparams = [:] + + toolspecs = [ + 'ectyper_run': [ + clihelp: "Run ectyper tool. Default: ${params.ectyper_run}", + cliflag: null, + clivalue: null + ], + 'ectyper_perc_opid': [ + clihelp: 'Percent identity required for an O antigen allele match. ' + + "Default: ${params.ectyper_perc_opid}", + cliflag: '-opid', + clivalue: (params.ectyper_perc_opid ?: 90) + ], + 'ectyper_perc_hpid': [ + clihelp: 'Percent identity required for a H antigen allele match. ' + + "Default: ${params.ectyper_perc_hpid}", + cliflag: '-hpid', + clivalue: (params.ectyper_perc_hpid ?: 95) + ], + 'ectyper_perc_opcov': [ + clihelp: 'Minumum percent coverage required for an O antigen allele match. ' + + "Default: ${params.ectyper_perc_opcov}", + cliflag: '-opcov', + clivalue: (params.ectyper_perc_opcov ?: 95) + ], + 'ectyper_perc_hpcov': [ + clihelp: 'Minumum percent coverage required for a H antigen allele match. ' + + "Default: ${params.ectyper_perc_hpcov}", + cliflag: '-hpcov', + clivalue: (params.ectyper_perc_hpcov ?: 50) + ] + ] + + toolspecs.each { + k, v -> tool.text['--' + k] = "${v.clihelp}" + tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] + } + + return tool +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/lib/help/flye.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/lib/help/flye.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,82 @@ +// Help text for flye within CPIPES. + +def flyeHelp(params) { + +Map tool = [:] +Map toolspecs = [:] +tool.text = [:] +tool.helpparams = [:] + + toolspecs = [ + 'flye_pacbio_raw': [ + clihelp: 'Input FASTQ reads are PacBio regular CLR reads (<20% error) ' + + "Defaut: ${params.flye_pacbio_raw}", + cliflag: '--pacbio-raw', + clivalue: (params.flye_pacbio_raw ? ' ' : '') + ], + 'flye_pacbio_corr': [ + clihelp: 'Input FASTQ reads are PacBio reads that were corrected ' + + "with other methods (<3% error). Default: ${params.flye_pacbio_corr}", + cliflag: '--pacbio-corr', + clivalue: (params.flye_pacbio_corr ? ' ' : '') + ], + 'flye_pacbio_hifi': [ + clihelp: 'Input FASTQ reads are PacBio HiFi reads (<1% error). ' + + "Default: ${params.flye_pacbio_hifi}", + cliflag: '--pacbio-hifi', + clivalue: (params.flye_pacbio_hifi ? ' ' : '') + ], + 'flye_nano_raw': [ + clihelp: 'Input FASTQ reads are ONT regular reads, pre-Guppy5 (<20% error). ' + + "Default: ${params.flye_nano_raw}", + cliflag: '--nano-raw', + clivalue: (params.flye_nano_raw ? ' ' : '') + ], + 'flye_nano_corr': [ + clihelp: 'Input FASTQ reads are ONT reads that were corrected with other ' + + "methods (<3% error). Default: ${params.flye_nano_corr}", + cliflag: '--nano-corr', + clivalue: (params.flye_nano_corr ? ' ' : '') + ], + 'flye_nano_hq': [ + clihelp: 'Input FASTQ reads are ONT high-quality reads: ' + + "Guppy5+ SUP or Q20 (<5% error). Default: ${params.flye_nano_hq}", + cliflag: '--nano-hq', + clivalue: (params.flye_nano_hq ? ' ' : '') + ], + 'flye_genome_size': [ + clihelp: 'Estimated genome size (for example, 5m or 2.6g). ' + + "Default: ${params.flye_genome_size}", + cliflag: '--genome-size', + clivalue: (params.flye_genome_size ?: '') + ], + 'flye_polish_iter': [ + clihelp: 'Number of genome polishing iterations. ' + + "Default: ${params.flye_polish_iter}", + cliflag: '--iterations', + clivalue: (params.flye_polish_iter ?: '') + ], + 'flye_meta': [ + clihelp: "Do a metagenome assembly (unenven coverage mode). Default: ${params.flye_meta}", + cliflag: '--meta', + clivalue: (params.flye_meta ? ' ' : '') + ], + 'flye_min_overlap': [ + clihelp: "Minimum overlap between reads. Default: ${params.flye_min_overlap}", + cliflag: '--min-overlap', + clivalue: (params.flye_min_overlap ?: '') + ], + 'flye_scaffold': [ + clihelp: "Enable scaffolding using assembly graph. Default: ${params.flye_scaffold}", + cliflag: '--scaffold', + clivalue: (params.flye_scaffold ? ' ' : '') + ] + ] + + toolspecs.each { + k, v -> tool.text['--' + k] = "${v.clihelp}" + tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] + } + + return tool +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/lib/help/kraken2.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/lib/help/kraken2.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,72 @@ +// Help text for kraken2 within CPIPES. + +def kraken2Help(params) { + + Map tool = [:] + Map toolspecs = [:] + tool.text = [:] + tool.helpparams = [:] + + toolspecs = [ + 'kraken2_db': [ + clihelp: "Absolute path to kraken database. Default: ${params.kraken2_db}", + cliflag: '--db', + clivalue: null + ], + 'kraken2_confidence': [ + clihelp: 'Confidence score threshold which must be ' + + "between 0 and 1. Default: ${params.kraken2_confidence}", + cliflag: '--confidence', + clivalue: (params.kraken2_confidence ?: '') + ], + 'kraken2_quick': [ + clihelp: "Quick operation (use first hit or hits). Default: ${params.kraken2_quick}", + cliflag: '--quick', + clivalue: (params.kraken2_quick ? ' ' : '') + ], + 'kraken2_use_mpa_style': [ + clihelp: "Report output like Kraken 1's " + + "kraken-mpa-report. Default: ${params.kraken2_use_mpa_style}", + cliflag: '--use-mpa-style', + clivalue: (params.kraken2_use_mpa_style ? ' ' : '') + ], + 'kraken2_minimum_base_quality': [ + clihelp: 'Minimum base quality used in classification ' + + " which is only effective with FASTQ input. Default: ${params.kraken2_minimum_base_quality}", + cliflag: '--minimum-base-quality', + clivalue: (params.kraken2_minimum_base_quality ?: '') + ], + 'kraken2_report_zero_counts': [ + clihelp: 'Report counts for ALL taxa, even if counts are zero. ' + + "Default: ${params.kraken2_report_zero_counts}", + cliflag: '--report-zero-counts', + clivalue: (params.kraken2_report_zero_counts ? ' ' : '') + ], + 'kraken2_report_minmizer_data': [ + clihelp: 'Report minimizer and distinct minimizer count' + + ' information in addition to normal Kraken report. ' + + "Default: ${params.kraken2_report_minimizer_data}", + cliflag: '--report-minimizer-data', + clivalue: (params.kraken2_report_minimizer_data ? ' ' : '') + ], + 'kraken2_use_names': [ + clihelp: 'Print scientific names instead of just taxids. ' + + "Default: ${params.kraken2_use_names}", + cliflag: '--use-names', + clivalue: (params.kraken2_use_names ? ' ' : '') + ], + 'kraken2_extract_bug': [ + clihelp: 'Extract the reads or contigs beloging to this bug. ' + + "Default: ${params.kraken2_extract_bug}", + cliflag: null, + clivalue: null + ] + ] + + toolspecs.each { + k, v -> tool.text['--' + k] = "${v.clihelp}" + tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] + } + + return tool +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/lib/help/megahit.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/lib/help/megahit.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,113 @@ +// Help text for megahit within CPIPES. + +def megahitHelp(params) { + +Map tool = [:] +Map toolspecs = [:] +tool.text = [:] +tool.helpparams = [:] + + toolspecs = [ + 'megahit_run': [ + clihelp: 'Run MEGAHIT assembler. Default: ' + + (params.megahit_run ?: false), + cliflag: null, + clivalue: null + ], + 'megahit_min_count': [ + clihelp: '. Minimum multiplicity for filtering (k_min+1)-mers. ' + + "Defaut: ${params.megahit_min_count}", + cliflag: '--min-count', + clivalue: (params.megahit_min_count ?: '') + ], + 'megahit_k_list': [ + clihelp: 'Comma-separated list of kmer size. All values must be odd, in ' + + "the range 15-255, increment should be <= 28. Ex: '21,29,39,59,79,99,119,141'. " + + "Default: ${params.megahit_k_list}", + cliflag: '--k-list', + clivalue: (params.megahit_k_list ?: '') + ], + 'megahit_no_mercy': [ + clihelp: 'Do not add mercy k-mers. ' + + "Default: ${params.megahit_no_mercy}", + cliflag: '--no-mercy', + clivalue: (params.megahit_no_mercy ? ' ' : '') + ], + 'megahit_bubble_level': [ + clihelp: '. Intensity of bubble merging (0-2), 0 to disable. ' + + "Default: ${params.megahit_bubble_level}", + cliflag: '--bubble-level', + clivalue: (params.megahit_bubble_level ?: '') + ], + 'megahit_merge_level': [ + clihelp: '. Merge complex bubbles of length <= l*kmer_size and ' + + "similarity >= s. Default: ${params.megahit_merge_level}", + cliflag: '--merge-level', + clivalue: (params.megahit_merge_level ?: '') + ], + 'megahit_prune_level': [ + clihelp: '. Strength of low depth pruning (0-3). ' + + "Default: ${params.megahit_prune_level}", + cliflag: '--prune-level', + clivalue: (params.megahit_prune_level ?: '') + ], + 'megahit_prune_depth': [ + clihelp: '. Remove unitigs with avg k-mer depth less than this value. ' + + "Default: ${params.megahit_prune_depth}", + cliflag: '--prune-depth', + clivalue: (params.megahit_prune_depth ?: '') + ], + 'megahit_low_local_ratio': [ + clihelp: '. Ratio threshold to define low local coverage contigs. ' + + "Default: ${params.megahit_low_local_ratio}", + cliflag: '--low-local-ratio', + clivalue: (params.megahit_low_local_ratio ?: '') + ], + 'megahit_max_tip_len': [ + clihelp: '. remove tips less than this value [ * k]. ' + + "Default: ${params.megahit_max_tip_len}", + cliflag: '--max-tip-len', + clivalue: (params.megahit_max_tip_len ?: '') + ], + 'megahit_no_local': [ + clihelp: 'Disable local assembly. ' + + "Default: ${params.megahit_no_local}", + cliflag: '--no-local', + clivalue: (params.megahit_no_local ? ' ' : '') + ], + 'megahit_kmin_1pass': [ + clihelp: 'Use 1pass mode to build SdBG of k_min. ' + + "Default: ${params.megahit_kmin_1pass}", + cliflag: '--kmin-1pass', + clivalue: (params.megahit_kmin_1pass ? ' ' : '') + ], + 'megahit_preset': [ + clihelp: '. Override a group of parameters. Valid values are '+ + "meta-sensitive which enforces '--min-count 1 --k-list 21,29,39,49,...,129,141', " + + 'meta-large (large & complex metagenomes, like soil) which enforces ' + + "'--k-min 27 --k-max 127 --k-step 10'. " + + "Default: ${params.megahit_preset}", + cliflag: '--preset', + clivalue: (params.megahit_preset ?: '') + ], + 'megahit_mem_flag': [ + clihelp: '. SdBG builder memory mode. 0: minimum; 1: moderate; 2: use all memory specified. ' + + "Default: ${params.megahit_mem_flag}", + cliflag: '--mem-flag', + clivalue: (params.megahit_mem_flag ?: '') + ], + 'megahit_min_contig_len': [ + clihelp: '. Minimum length of contigs to output. ' + + "Default: ${params.megahit_min_contig_len}", + cliflag: '--use-gpu', + clivalue: (params.megahit_min_contig_len ?: '') + ] + ] + + toolspecs.each { + k, v -> tool.text['--' + k] = "${v.clihelp}" + tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] + } + + return tool +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/lib/help/mlst.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/lib/help/mlst.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,40 @@ +def mlstHelp(params) { + + Map tool = [:] + Map toolspecs = [:] + tool.text = [:] + tool.helpparams = [:] + + toolspecs = [ + 'mlst_run': [ + clihelp: "Run MLST tool. Default: ${params.mlst_run}", + cliflag: null, + clivalue: null + ], + 'mlst_minid': [ + clihelp: "DNA %identity of full allelle to consider 'similar' [~]. " + + "Default: ${params.mlst_minid}", + cliflag: '--minid', + clivalue: (params.mlst_minid ?: 95) + ], + 'mlst_mincov': [ + clihelp: 'DNA %cov to report partial allele at all [?].' + + "Default: ${params.mlst_mincov}", + cliflag: '--mincov', + clivalue: (params.mlst_mincov ?: 10) + ], + 'mlst_minscore': [ + clihelp: 'Minumum score out of 100 to match a scheme.' + + "Default: ${params.mlst_minscore}", + cliflag: '--minscore', + clivalue: (params.mlst_minscore ?: 50) + ] + ] + + toolspecs.each { + k, v -> tool.text['--' + k] = "${v.clihelp}" + tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] + } + + return tool +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/lib/help/seqkitgrep.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/lib/help/seqkitgrep.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,69 @@ +// Help text for seqkit grep within CPIPES. + +def seqkitgrepHelp(params) { + +Map tool = [:] +Map toolspecs = [:] +tool.text = [:] +tool.helpparams = [:] + + toolspecs = [ + 'seqkit_grep_n': [ + clihelp: 'Match by full name instead of just ID. ' + + "Defaut: " + (params.seqkit_grep_n ?: 'undefined'), + cliflag: '--seqkit_grep_n', + clivalue: (params.seqkit_grep_n ? ' ' : '') + ], + 'seqkit_grep_s': [ + clihelp: 'Search subseq on seq, both positive and negative ' + + 'strand are searched, and mismatch allowed using flag --seqkit_grep_m. ' + + "Defaut: " + (params.seqkit_grep_s ?: 'undefined'), + cliflag: '--seqkit_grep_s', + clivalue: (params.seqkit_grep_s ? ' ' : '') + ], + 'seqkit_grep_c': [ + clihelp: 'Input is circular genome ' + + "Defaut: " + (params.seqkit_grep_c ?: 'undefined'), + cliflag: '--seqkit_grep_c', + clivalue: (params.seqkit_grep_c ? ' ' : '') + ], + 'seqkit_grep_C': [ + clihelp: 'Just print a count of matching records. With the ' + + '--seqkit_grep_v flag, count non-matching records. ' + + "Defaut: " + (params.seqkit_grep_v ?: 'undefined'), + cliflag: '--seqkit_grep_v', + clivalue: (params.seqkit_grep_v ? ' ' : '') + ], + 'seqkit_grep_i': [ + clihelp: 'Ignore case while using seqkit grep. ' + + "Defaut: " + (params.seqkit_grep_i ?: 'undefined'), + cliflag: '--seqkit_grep_i', + clivalue: (params.seqkit_grep_i ? ' ' : '') + ], + 'seqkit_grep_v': [ + clihelp: 'Invert the match i.e. select non-matching records. ' + + "Defaut: " + (params.seqkit_grep_v ?: 'undefined'), + cliflag: '--seqkit_grep_v', + clivalue: (params.seqkit_grep_v ? ' ' : '') + ], + 'seqkit_grep_m': [ + clihelp: 'Maximum mismatches when matching by sequence. ' + + "Defaut: " + (params.seqkit_grep_m ?: 'undefined'), + cliflag: '--seqkit_grep_m', + clivalue: (params.seqkit_grep_v ?: '') + ], + 'seqkit_grep_r': [ + clihelp: 'Input patters are regular expressions. ' + + "Defaut: " + (params.seqkit_grep_m ?: 'undefined'), + cliflag: '--seqkit_grep_m', + clivalue: (params.seqkit_grep_v ?: '') + ] + ] + + toolspecs.each { + k, v -> tool.text['--' + k] = "${v.clihelp}" + tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] + } + + return tool +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/lib/help/seqkitrmdup.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/lib/help/seqkitrmdup.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,61 @@ +// Help text for seqkit rmdup within CPIPES. + +def seqkitrmdupHelp(params) { + +Map tool = [:] +Map toolspecs = [:] +tool.text = [:] +tool.helpparams = [:] + + toolspecs = [ + 'seqkit_rmdup_run': [ + clihelp: 'Remove duplicate sequences using seqkit rmdup. Default: ' + + (params.seqkit_rmdup_run ?: false), + cliflag: null, + clivalue: null + ], + 'seqkit_rmdup_n': [ + clihelp: 'Match and remove duplicate sequences by full name instead of just ID. ' + + "Defaut: ${params.seqkit_rmdup_n}", + cliflag: '-n', + clivalue: (params.seqkit_rmdup_n ? ' ' : '') + ], + 'seqkit_rmdup_s': [ + clihelp: 'Match and remove duplicate sequences by sequence content. ' + + "Defaut: ${params.seqkit_rmdup_s}", + cliflag: '-s', + clivalue: (params.seqkit_rmdup_s ? ' ' : '') + ], + 'seqkit_rmdup_d': [ + clihelp: 'Save the duplicated sequences to a file. ' + + "Defaut: ${params.seqkit_rmdup_d}", + cliflag: null, + clivalue: null + ], + 'seqkit_rmdup_D': [ + clihelp: 'Save the number and list of duplicated sequences to a file. ' + + "Defaut: ${params.seqkit_rmdup_D}", + cliflag: null, + clivalue: null + ], + 'seqkit_rmdup_i': [ + clihelp: 'Ignore case while using seqkit rmdup. ' + + "Defaut: ${params.seqkit_rmdup_i}", + cliflag: '-i', + clivalue: (params.seqkit_rmdup_i ? ' ' : '') + ], + 'seqkit_rmdup_P': [ + clihelp: "Only consider positive strand (i.e. 5') when comparing by sequence content. " + + "Defaut: ${params.seqkit_rmdup_P}", + cliflag: '-P', + clivalue: (params.seqkit_rmdup_P ? ' ' : '') + ] + ] + + toolspecs.each { + k, v -> tool.text['--' + k] = "${v.clihelp}" + tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] + } + + return tool +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/lib/help/seqsero2.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/lib/help/seqsero2.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,50 @@ +def seqsero2Help(params) { + + Map tool = [:] + Map toolspecs = [:] + tool.text = [:] + tool.helpparams = [:] + + toolspecs = [ + 'seqsero2_run': [ + clihelp: "Run SeqSero2 tool. Default: ${params.seqsero2_run}", + cliflag: null, + clivalue: null + ], + 'seqsero2_t': [ + clihelp: "'1' for interleaved paired-end reads, '2' for " + + "separated paired-end reads, '3' for single reads, '4' for " + + "genome assembly, '5' for nanopore reads (fasta/fastq). " + + "Default: ${params.seqsero2_t}", + cliflag: '-t', + clivalue: (params.seqsero2_t ?: '') + ], + 'seqsero2_m': [ + clihelp: "Which workflow to apply, 'a'(raw reads allele " + + "micro-assembly), 'k'(raw reads and genome assembly k-mer). " + + "Default: ${params.seqsero2_m}", + cliflag: '-m', + clivalue: (params.seqsero2_m ?: '') + ], + 'seqsero2_c': [ + clihelp: 'SeqSero2 will only output serotype prediction without the directory ' + + 'containing log files. ' + + "Default: ${params.seqsero2_c}", + cliflag: '-c', + clivalue: (params.seqsero2_c ? ' ' : '') + ], + 'seqsero2_s': [ + clihelp: 'SeqSero2 will not output header in SeqSero_result.tsv. ' + + "Default: ${params.seqsero2_s}", + cliflag: '-l', + clivalue: (params.seqsero2_s ? ' ' : '') + ] + ] + + toolspecs.each { + k, v -> tool.text['--' + k] = "${v.clihelp}" + tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] + } + + return tool +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/lib/help/serotypefinder.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/lib/help/serotypefinder.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,46 @@ +def serotypefinderHelp(params) { + + Map tool = [:] + Map toolspecs = [:] + tool.text = [:] + tool.helpparams = [:] + + toolspecs = [ + 'serotypefinder_run': [ + clihelp: "Run SerotypeFinder tool. Default: ${params.serotypefinder_run}", + cliflag: null, + clivalue: null + ], + 'serotypefinder_x': [ + clihelp: 'Generate extended output files. ' + + "Default: ${params.serotypefinder_x}", + cliflag: '-x', + clivalue: (params.serotypefinder_x ? ' ' : '') + ], + 'serotypefinder_db': [ + clihelp: 'Path to SerotypeFinder databases. ' + + "Default: ${params.serotypefinder_db}", + cliflag: '-p', + clivalue: null + ], + 'serotypefinder_min_threshold': [ + clihelp: 'Minimum percent identity (in float) required for calling a hit. ' + + "Default: ${params.serotypefinder_min_threshold}", + cliflag: '-t', + clivalue: (params.serotypefinder_min_threshold ?: '') + ], + 'serotypefinder_min_cov': [ + clihelp: 'Minumum percent coverage (in float) required for calling a hit. ' + + "Default: ${params.serotypefinder_min_cov}", + cliflag: '-l', + clivalue: (params.serotypefinder_min_cov ?: '') + ] + ] + + toolspecs.each { + k, v -> tool.text['--' + k] = "${v.clihelp}" + tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] + } + + return tool +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/lib/help/spades.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/lib/help/spades.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,121 @@ +// Help text for spades within CPIPES. + +def spadesHelp(params) { + +Map tool = [:] +Map toolspecs = [:] +tool.text = [:] +tool.helpparams = [:] + + toolspecs = [ + 'spades_run': [ + clihelp: 'Run SPAdes assembler. Default: ' + + (params.spades_run ?: false), + cliflag: null, + clivalue: null + ], + 'spades_isolate': [ + clihelp: 'This flag is highly recommended for high-coverage isolate and ' + + "multi-cell data. Defaut: ${params.spades_isolate}", + cliflag: '--isolate', + clivalue: (params.spades_isolate ? ' ' : '') + ], + 'spades_sc': [ + clihelp: 'This flag is required for MDA (single-cell) data. ' + + "Default: ${params.spades_sc}", + cliflag: '--sc', + clivalue: (params.spades_sc ? ' ' : '') + ], + 'spades_meta': [ + clihelp: 'This flag is required for metagenomic data. ' + + "Default: ${params.spades_meta}", + cliflag: '--meta', + clivalue: (params.spades_meta ? ' ' : '') + ], + 'spades_bio': [ + clihelp: 'This flag is required for biosytheticSPAdes mode. ' + + "Default: ${params.spades_bio}", + cliflag: '--bio', + clivalue: (params.spades_bio ? ' ' : '') + ], + 'spades_corona': [ + clihelp: 'This flag is required for coronaSPAdes mode. ' + + "Default: ${params.spades_corona}", + cliflag: '--corona', + clivalue: (params.spades_corona ? ' ' : '') + ], + 'spades_rna': [ + clihelp: 'This flag is required for RNA-Seq data. ' + + "Default: ${params.spades_rna}", + cliflag: '--rna', + clivalue: (params.spades_rna ? ' ' : '') + ], + 'spades_plasmid': [ + clihelp: 'Runs plasmidSPAdes pipeline for plasmid detection. ' + + "Default: ${params.spades_plasmid}", + cliflag: '--plasmid', + clivalue: (params.spades_plasmid ? ' ' : '') + ], + 'spades_metaviral': [ + clihelp: 'Runs metaviralSPAdes pipeline for virus detection. ' + + "Default: ${params.spades_metaviral}", + cliflag: '--metaviral', + clivalue: (params.spades_metaviral ? ' ' : '') + ], + 'spades_metaplasmid': [ + clihelp: 'Runs metaplasmidSPAdes pipeline for plasmid detection in ' + + "metagenomics datasets. Default: ${params.spades_metaplasmid}", + cliflag: '--metaplasmid', + clivalue: (params.spades_metaplasmid ? ' ' : '') + ], + 'spades_rnaviral': [ + clihelp: 'This flag enables virus assembly module from RNA-Seq data. ' + + "Default: ${params.spades_rnaviral}", + cliflag: '--rnaviral', + clivalue: (params.spades_rnaviral ? ' ' : '') + ], + 'spades_iontorrent': [ + clihelp: 'This flag is required for IonTorrent data. ' + + "Default: ${params.spades_iontorrent}", + cliflag: '--iontorrent', + clivalue: (params.spades_iontorrent ? ' ' : '') + ], + 'spades_only_assembler': [ + clihelp: 'Runs only the SPAdes assembler module (without read error correction). ' + + "Default: ${params.spades_only_assembler}", + cliflag: '--only-assembler', + clivalue: (params.spades_only_assembler ? ' ' : '') + ], + 'spades_careful': [ + clihelp: 'Tries to reduce the number of mismatches and short indels in the assembly. ' + + "Default: ${params.spades_careful}", + cliflag: '--careful', + clivalue: (params.spades_careful ? ' ' : '') + ], + 'spades_cov_cutoff': [ + clihelp: 'Coverage cutoff value (a positive float number). ' + + "Default: ${params.spades_cov_cutoff}", + cliflag: '--cov-cutoff', + clivalue: (params.spades_cov_cutoff ?: '') + ], + 'spades_k': [ + clihelp: 'List of k-mer sizes (must be odd and less than 128). ' + + "Default: ${params.spades_k}", + cliflag: '-k', + clivalue: (params.spades_k ?: '') + ], + 'spades_hmm': [ + clihelp: 'Directory with custom hmms that replace the default ones (very rare). ' + + "Default: ${params.spades_hmm}", + cliflag: '--custom-hmms', + clivalue: (params.spades_hmm ?: '') + ] + ] + + toolspecs.each { + k, v -> tool.text['--' + k] = "${v.clihelp}" + tool.helpparams[k] = [ cliflag: "${v.cliflag}", clivalue: v.clivalue ] + } + + return tool +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/lib/routines.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/lib/routines.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,368 @@ +// Hold methods to print: +// 1. Colored logo. +// 2. Summary of parameters. +// 3. Single dashed line. +// 4. Double dashed line. +// + +import groovy.json.JsonSlurper +import nextflow.config.ConfigParser +// import groovy.json.JsonOutput + +// ASCII logo +def pipelineBanner() { + + def padding = (params.pad) ?: 30 + Map fgcolors = getANSIColors() + + def banner = [ + name: "${fgcolors.magenta}${workflow.manifest.name}${fgcolors.reset}", + author: "${fgcolors.cyan}${workflow.manifest.author}${fgcolors.reset}", + // workflow: "${fgcolors.magenta}${params.pipeline}${fgcolors.reset}", + version: "${fgcolors.green}${workflow.manifest.version}${fgcolors.reset}", + center: "${fgcolors.green}${params.center}${fgcolors.reset}", + pad: padding + ] + + manifest = addPadding(banner) + + return """${fgcolors.white}${dashedLine(type: '=')}${fgcolors.magenta} + (o) + ___ _ __ _ _ __ ___ ___ + / __|| '_ \\ | || '_ \\ / _ \\/ __| +| (__ | |_) || || |_) || __/\\__ \\ + \\___|| .__/ |_|| .__/ \\___||___/ + | | | | + |_| |_|${fgcolors.reset} +${dashedLine()} +${fgcolors.blue}A collection of modular pipelines at CFSAN, FDA.${fgcolors.reset} +${dashedLine()} +${manifest} +${dashedLine(type: '=')} +""".stripIndent() +} + +// Add padding to keys so that +// they indent nicely on the +// terminal +def addPadding(values) { + + def pad = (params.pad) ?: 30 + values.pad = pad + + def padding = values.pad.toInteger() + def nocapitalize = values.nocapitalize + def stopnow = values.stopNow + def help = values.help + + values.removeAll { + k, v -> [ + 'nocapitalize', + 'pad', + 'stopNow', + 'help' + ].contains(k) + } + + values.keySet().each { k -> + v = values[k] + s = params.linewidth - (pad + 5) + if (v.toString().size() > s && !stopnow) { + def sen = '' + v.toString().findAll(/.{1,${s}}\b(?:\W*|\s*)/).each { + sen += ' '.multiply(padding + 2) + it + '\n' + } + values[k] = ( + help ? sen.replaceAll(/^(\n|\s)*/, '') : sen.trim() + ) + } else { + values[k] = (help ? v + "\n" : v) + } + k = k.replaceAll(/\./, '_') + } + + return values.findResults { + k, v -> nocapitalize ? + k.padRight(padding) + ': ' + v : + k.capitalize().padRight(padding) + ': ' + v + }.join("\n") +} + +// Method for error messages +def stopNow(msg) { + + Map fgcolors = getANSIColors() + Map errors = [:] + + if (msg == null) { + msg = "Unknown error" + } + + errors['stopNow'] = true + errors["${params.cfsanpipename} - ${params.pipeline} - ERROR"] = """ +${fgcolors.reset}${dashedLine()} +${fgcolors.red}${msg}${fgcolors.reset} +${dashedLine()} +""".stripIndent() + // println dashedLine() // defaults to stdout + // log.info addPadding(errors) // prints to stdout + exit 1, "\n" + dashedLine() + + "${fgcolors.red}\n" + addPadding(errors) +} + +// Method to validate 4 required parameters +// if input for entry point is FASTQ files +def validateParamsForFASTQ() { + switch (params) { + case { params.metadata == null && params.input == null }: + stopNow("Either metadata CSV file with 5 required columns\n" + + "in order: sample, fq1, fq2, strandedness, single_end or \n" + + "input directory of only FASTQ files (gzipped or unzipped) should be provided\n" + + "using --metadata or --input options.\n" + + "None of these two options were provided!") + break + case { params.metadata != null && params.input != null }: + stopNow("Either metadata or input directory of FASTQ files\n" + + "should be provided using --metadata or --input options.\n" + + "Using both these options is not allowed!") + break + case { params.output == null }: + stopNow("Please mention output directory to store all results " + + "using --output option!") + break + } + return 1 +} + +// Method to print summary of parameters +// before running +def summaryOfParams() { + + def pipeline_specific_config = new ConfigParser().setIgnoreIncludes(true).parse( + file("${params.workflowsconf}${params.fs}${params.pipeline}.config").text + ) + Map fgcolors = getANSIColors() + Map globalparams = [:] + Map localparams = params.subMap( + pipeline_specific_config.params.keySet().toList() + params.logtheseparams + ) + + if (localparams !instanceof Map) { + stopNow("Need a Map of paramters. We got: " + localparams.getClass()) + } + + if (localparams.size() != 0) { + localparams['nocapitalize'] = true + globalparams['nocapitalize'] = true + globalparams['nextflow_version'] = "${nextflow.version}" + globalparams['nextflow_build'] = "${nextflow.build}" + globalparams['nextflow_timestamp'] = "${nextflow.timestamp}" + globalparams['workflow_projectDir'] = "${workflow.projectDir}" + globalparams['workflow_launchDir'] = "${workflow.launchDir}" + globalparams['workflow_workDir'] = "${workflow.workDir}" + globalparams['workflow_container'] = "${workflow.container}" + globalparams['workflow_containerEngine'] = "${workflow.containerEngine}" + globalparams['workflow_runName'] = "${workflow.runName}" + globalparams['workflow_sessionId'] = "${workflow.sessionId}" + globalparams['workflow_profile'] = "${workflow.profile}" + globalparams['workflow_start'] = "${workflow.start}" + globalparams['workflow_commandLine'] = "${workflow.commandLine}" + return """${dashedLine()} +Summary of the current workflow (${fgcolors.magenta}${params.pipeline}${fgcolors.reset}) parameters +${dashedLine()} +${addPadding(localparams)} +${dashedLine()} +${fgcolors.cyan}N E X T F L O W${fgcolors.reset} - ${fgcolors.magenta}${params.cfsanpipename}${fgcolors.reset} - Runtime metadata +${dashedLine()} +${addPadding(globalparams)} +${dashedLine()}""".stripIndent() + } + return 1 +} + +// Method to display +// Return dashed line either '-' +// type or '=' type +def dashedLine(Map defaults = [:]) { + + Map fgcolors = getANSIColors() + def line = [color: 'white', type: '-'] + + if (!defaults.isEmpty()) { + line.putAll(defaults) + } + + return fgcolors."${line.color}" + + "${line.type}".multiply(params.linewidth) + + fgcolors.reset +} + +// Return slurped keys parsed from JSON +def slurpJson(file) { + def slurped = null + def jsonInst = new JsonSlurper() + + try { + slurped = jsonInst.parse(new File ("${file}")) + } + catch (Exception e) { + log.error 'Please check your JSON schema. Invalid JSON file: ' + file + } + + // Declare globals for the nanofactory + // workflow. + return [keys: slurped.keySet().toList(), cparams: slurped] +} + +// Default help text in a map if the entry point +// to a pipeline is FASTQ files. +def fastqEntryPointHelp() { + + Map helptext = [:] + Map fgcolors = getANSIColors() + + helptext['Workflow'] = "${fgcolors.magenta}${params.pipeline}${fgcolors.reset}" + helptext['Author'] = "${fgcolors.cyan}${params.workflow_built_by}${fgcolors.reset}" + helptext['Version'] = "${fgcolors.green}${params.workflow_version}${fgcolors.reset}\n" + helptext['Usage'] = "cpipes --pipeline ${params.pipeline} [options]\n" + helptext['Required'] = "" + helptext['--input'] = "Absolute path to directory containing FASTQ files. " + + "The directory should contain only FASTQ files as all the " + + "files within the mentioned directory will be read. " + + "Ex: --input /path/to/fastq_pass" + helptext['--output'] = "Absolute path to directory where all the pipeline " + + "outputs should be stored. Ex: --output /path/to/output" + helptext['Other options'] = "" + helptext['--metadata'] = "Absolute path to metadata CSV file containing five " + + "mandatory columns: sample,fq1,fq2,strandedness,single_end. The fq1 and fq2 " + + "columns contain absolute paths to the FASTQ files. This option can be used in place " + + "of --input option. This is rare. Ex: --metadata samplesheet.csv" + helptext['--fq_suffix'] = "The suffix of FASTQ files (Unpaired reads or R1 reads or Long reads) if " + + "an input directory is mentioned via --input option. Default: ${params.fq_suffix}" + helptext['--fq2_suffix'] = "The suffix of FASTQ files (Paired-end reads or R2 reads) if an input directory is mentioned via " + + "--input option. Default: ${params.fq2_suffix}" + helptext['--fq_filter_by_len'] = "Remove FASTQ reads that are less than this many bases. " + + "Default: ${params.fq_filter_by_len}" + helptext['--fq_strandedness'] = "The strandedness of the sequencing run. This is mostly needed " + + "if your sequencing run is RNA-SEQ. For most of the other runs, it is probably safe to use " + + "unstranded for the option. Default: ${params.fq_strandedness}" + helptext['--fq_single_end'] = "SINGLE-END information will be auto-detected but this option forces " + + "PAIRED-END FASTQ files to be treated as SINGLE-END so only read 1 information is included in " + + "auto-generated samplesheet. Default: ${params.fq_single_end}" + helptext['--fq_filename_delim'] = "Delimiter by which the file name is split to obtain sample name. " + + "Default: ${params.fq_filename_delim}" + helptext['--fq_filename_delim_idx'] = "After splitting FASTQ file name by using the --fq_filename_delim option," + + " all elements before this index (1-based) will be joined to create final sample name." + + " Default: ${params.fq_filename_delim_idx}" + + return helptext +} + +// Wrap help text with the following options +def wrapUpHelp() { + + return [ + 'Help options' : "", + '--help': "Display this message.\n", + 'help': true, + 'nocapitalize': true + ] +} + +// Method to send email on workflow complete. +def sendMail() { + + if (params.user_email == null) { + return 1 + } + + def pad = (params.pad) ?: 30 + def contact_emails = [ + stakeholder: (params.workflow_blueprint_by ?: 'Not defined'), + author: (params.workflow_built_by ?: 'Not defined') + ] + def msg = """ +${pipelineBanner()} +${summaryOfParams()} +${params.cfsanpipename} - ${params.pipeline} +${dashedLine()} +Please check the following directory for N E X T F L O W +reports. You can view the HTML files directly by double clicking +them on your workstation. +${dashedLine()} +${params.tracereportsdir} +${dashedLine()} +Please send any bug reports to CFSAN Dev Team or the author or +the stakeholder of the current pipeline. +${dashedLine()} +Error messages (if any) +${dashedLine()} +${workflow.errorMessage} +${workflow.errorReport} +${dashedLine()} +Contact emails +${dashedLine()} +${addPadding(contact_emails)} +${dashedLine()} +Thank you for using ${params.cfsanpipename} - ${params.pipeline}! +${dashedLine()} +""".stripIndent() + + def mail_cmd = [ + 'sendmail', + '-f', 'cfsan-hpc-noreply@fda.hhs.gov', + '-F', 'cfsan-hpc-noreply', + '-t', "${params.user_email}" + ] + + def email_subject = "${params.cfsanpipename} - ${params.pipeline}" + Map fgcolors = getANSIColors() + + if (workflow.success) { + email_subject += ' completed successfully!' + } + else if (!workflow.success) { + email_subject += ' has failed!' + } + + try { + ['env', 'bash'].execute() << """${mail_cmd.join(' ')} +Subject: ${email_subject} +Mime-Version: 1.0 +Content-Type: text/html +
+${msg.replaceAll(/\x1b\[[0-9;]*m/, '')}
+
+""".stripIndent() + } catch (all) { + def warning_msg = "${fgcolors.yellow}${params.cfsanpipename} - ${params.pipeline} - WARNING" + .padRight(pad) + ':' + log.info """ +${dashedLine()} +${warning_msg} +${dashedLine()} +Could not send mail with the sendmail command! +${dashedLine()} +""".stripIndent() + } + return 1 +} + +// Set ANSI colors for any and all +// STDOUT or STDERR +def getANSIColors() { + + Map fgcolors = [:] + + fgcolors['reset'] = "\033[0m" + fgcolors['black'] = "\033[0;30m" + fgcolors['red'] = "\033[0;31m" + fgcolors['green'] = "\033[0;32m" + fgcolors['yellow'] = "\033[0;33m" + fgcolors['blue'] = "\033[0;34m" + fgcolors['magenta'] = "\033[0;35m" + fgcolors['cyan'] = "\033[0;36m" + fgcolors['white'] = "\033[0;37m" + + return fgcolors +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/abricate/run/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/abricate/run/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,94 @@ +# NextFlow DSL2 Module + +```bash +ABRICATE_RUN +``` + +## Description + +Run `abricate` tool on a list of assembled contigs in FASTA format given a list of database names. Produces a single output table in ASCII text format per database. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of metadata (`meta`) and a list of assemled contig FASTA files of input type `path` (`assembly`). + +Ex: + +```groovy +[ [id: 'sample1', single_end: true], '/data/sample1/f_assembly.fa' ] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTQ file. + +Ex: + +```groovy +[ id: 'FAL00870', strandedness: 'unstranded', single_end: true ] +``` + +\ +  + +#### `assembly` + +Type: `path` + +NextFlow input type of `path` pointing to assembled contig file in FASTA format. + +\ +  + +#### `abdbs` + +Type: `val` + +Nextflow input type of `val` containing a list of at least one of the following database names on which `abricate` should be run. + +Ex: + +```groovy +[ 'resfinder', 'megares', 'ncbi', 'ncbiamrplus', 'argannot' , 'ecoli_vf' ] +``` + +\ +  + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and list of `abricate` result files (`abricated`). + +\ +  + +#### `abricated` + +Type: `path` + +NextFlow output type of `path` pointing to the `abricate` results table file per sample (`id:`). + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/abricate/run/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/abricate/run/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,48 @@ +process ABRICATE_RUN { + tag "$meta.id" + label 'process_low' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}abricate${params.fs}1.0.1" : null) + conda (params.enable_conda ? "bioconda::abricate=1.0.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/abricate%3A1.0.1--ha8f3691_1': + 'quay.io/biocontainers/abricate:1.0.1--ha8f3691_1' }" + + input: + tuple val(meta), path(assembly) + val abdbs + + output: + path "${meta.id}${params.fs}*" + tuple val(meta), path("${meta.id}${params.fs}*.ab.txt"), emit: abricated + path "versions.yml" , emit: versions + + when: + (task.ext.when == null || task.ext.when) && assembly.size() > 0 + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def dbs = abdbs.collect().join('\\n') + """ + newprefix="${prefix}${params.fs}${prefix}" + + if [ ! -d "$prefix" ]; then + mkdir "$prefix" || exit 1 + fi + + echo -e "$dbs" | while read -r db; do + abricate \\ + $assembly \\ + $args \\ + --db \$db \\ + --threads $task.cpus 1> "\${newprefix}.\${db}.ab.txt" + done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + abricate: \$(echo \$(abricate --version 2>&1) | sed 's/^.*abricate //' ) + bash: \$( bash --version 2>&1 | sed '1!d; s/^.*version //; s/ (.*\$//' ) + END_VERSIONS + """ +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/abricate/summary/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/abricate/summary/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,140 @@ +# NextFlow DSL2 Module + +```bash +ABRICATE_SUMMARY +``` + +## Description + +Run `abricate` tool's `summary` sub-command on a list of `abricate`'s result table files per database. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of `abricate` database names of type `val` (`abdbs`) and a list of `abricate` result table files for all databases of type `path` (`abfiles`). + +Ex: + +```groovy +[ + [ 'megares', 'argannot', 'resfinder', 'ncbi' ], + [ '/data/sample1/f.ncbi.ab.txt', + '/data/sample1/f.megares.ab.txt', + '/data/sample1/f.resfinder.ab.txt', + '/data/sample1/f.argannot.ab.txt', + '/data/sample1/f2.ncbi.ab.txt', + '/data/sample1/f2.megares.ab.txt', + '/data/sample1/f2.resfinder.ab.txt', + '/data/sample1/f2.argannot.ab.txt' + ] +] +``` + +\ +  + +#### `abdbs` + +Type: `val` + +A Groovy List containing the **mandatory** list of at least the following 4 `abricate` database names on which `abricate` was run. + +Ex: + +```groovy +[ 'resfinder', 'megares', 'ncbi', 'argannot' ] +``` + +\ +  + +#### `abfiles` + +Type: `path` + +NextFlow input type of `path` pointing to `abricate` result files for each of the database. + +\ +  + +### `output:` + +___ + +#### `ncbi` + +Type: `tuple` +\ +Optional: `true` + +Outputs a tuple of `abricate` database key (`abricate_ncbi`) and summary result file from `abricate summary` command of type `path` (`ncbi`). This database includes only core AMR genes. This tuple is emitted optionally only where there are output files with suffix `.ncbi.absum.txt` + +\ +  + +#### `ncbiamrplus` + +Type: `tuple` +\ +Optional: `true` + +Outputs a tuple of `abricate` database key (`abricate_ncbiamrplus`) and summary result file from `abricate summary` command of type `path` (`ncbiamrplus`). This database includes both core AMR genes and plus AMR genes. This tuple is emitted optionally only where there are output files with suffix `.ncbiamrplus.absum.txt` + +\ +  + +#### `resfinder` + +Type: `tuple` +\ +Optional: `true` + +Outputs a tuple of `abricate` database key (`abricate_resfinder`) and summary result file from `abricate summary` command of type `path` (`resfinder`). This tuple is emitted optionally only where there are output files with suffix `.resfinder.absum.txt` + +\ +  + +#### `megares` + +Type: `tuple` +\ +Optional: `true` + +Outputs a tuple of `abricate` database key (`abricate_megares`) and summary result file from `abricate summary` command of type `path` (`megares`). This tuple is emitted optionally only where there are output files with suffix `.megares.absum.txt` + +\ +  + +#### `argannot` + +Type: `tuple` +\ +Optional: `true` + +Outputs a tuple of `abricate` database key (`abricate_argannot`) and summary result file from `abricate summary` command of type `path` (`argannot`). This tuple is emitted optionally only where there are output files with suffix `.argannot.absum.txt` + +\ +  + +#### `ecoli_vf` + +Type: `tuple` +\ +Optional: `true` + +Outputs an **optional** tuple of `abricate` database key (`abricate_ecoli_vf`) and summary result file from `abricate summary` command of type `path` (`ecoli_vf`). This tuple is emitted only when there are output files with suffix `.ecoli_vf.absum.txt` within the `work` folder. + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/abricate/summary/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/abricate/summary/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,80 @@ +process ABRICATE_SUMMARY { + tag "${abdbs.join(',')}" + label 'process_low' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}abricate${params.fs}1.0.1" : null) + conda (params.enable_conda ? "bioconda::abricate=1.0.1 conda-forge::coreutils" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/abricate%3A1.0.1--ha8f3691_1': + 'quay.io/biocontainers/abricate:1.0.1--ha8f3691_1' }" + + input: + tuple val(abdbs), path(abfiles) + + output: + tuple val('abricate_ncbi'), path("*.ncbi.absum.txt") , emit: ncbi, optional: true + tuple val('abricate_ncbiamrplus'), path("*.ncbiamrplus.absum.txt"), emit: ncbiamrplus, optional: true + tuple val('abricate_resfinder'), path("*resfinder.absum.txt") , emit: resfinder, optional: true + tuple val('abricate_megares'), path("*.megares.absum.txt") , emit: megares, optional: true + tuple val('abricate_argannot'), path("*.argannot.absum.txt") , emit: argannot, optional: true + tuple val('abricate_ecoli_vf'), path("*.ecoli_vf.absum.txt") , emit: ecoli_vf, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def onthese = abdbs.collect{ db -> + abfiles.findAll { files -> + files =~ /\.${db}/ + }.join(' ') + }.join('\\n') + """ + filenum="1" + + echo -e "$onthese" | while read -r files; do + db=\$( echo -e "\${files}" | grep -E -o '\\w+\\.ab\\.txt' | sort -u | sed -e 's/.ab.txt//' ) + + if [ -z "\$db" ]; then + db="\$filenum" + fi + + abricate \\ + $args \\ + --summary \${files} \\ + 1> "abricate.\${db}.absum.txt" + + sed -i -e "s/.\${db}.ab.txt//" "abricate.\${db}.absum.txt" + sed -i -e 's/.assembly_filtered_contigs.fasta//' "abricate.\${db}.absum.txt" + + filenum=\$((filenum+1)) + done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + abricate: \$(echo \$(abricate --version 2>&1) | sed 's/^.*abricate //' ) + bash: \$( bash --version 2>&1 | sed '1!d; s/^.*version //; s/ (.*\$//' ) + END_VERSIONS + + sedver="" + sortver="" + grepver="" + + if [ "${workflow.containerEngine}" != "null" ]; then + sortver=\$( sort --help 2>&1 | sed -e '1!d; s/ (.*\$//' ) + sedver="\$sortver" + grepver="\$sortver" + else + sortver=\$( sort --version 2>&1 | sed '1!d; s/^.*(GNU coreutils//; s/) //;' ) + sedver=\$( echo \$(sed --version 2>&1) | sed 's/^.*(GNU sed) //; s/ Copyright.*\$//' ) + grepver=\$( echo \$(grep --version 2>&1) | sed 's/^.*(GNU grep) //; s/ Copyright.*\$//' ) + fi + + cat <<-END_VERSIONS >> versions.yml + sort: \$sortver + grep: \$grepver + sed: \$sedver + END_VERSIONS + """ +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/amrfinderplus/run/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/amrfinderplus/run/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,104 @@ +# NextFlow DSL2 Module + +```bash +AMRFINDERPLUS_RUN +``` + +## Description + +Run `amrfinder` tool on a list of assembled contigs in FASTA format. Produces a single output table in ASCII text format per database. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of metadata (`meta`) and a list of assemled contig FASTA file of input type `path` (`fasta`). + +Ex: + +```groovy +[ [id: 'sample1', single_end: true], '/data/sample1/f_assembly.fa' ] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTQ file. + +Ex: + +```groovy +[ id: 'FAL00870', strandedness: 'unstranded', single_end: true, organism: 'Escherichia' ] +``` + +\ +  + +#### `fasta` + +Type: `path` + +NextFlow input type of `path` pointing to assembled contig file in FASTA format. + +\ +  + +#### `args` + +Type: Groovy String + +String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. + +Ex: + +```groovy +withName: 'AMRFINDERPLUS_RUN' { + ext.args = '--gpipe_org' +} +``` + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and list of `amrfinder` result files (`report`). + +\ +  + +#### `report` + +Type: `path` + +NextFlow output type of `path` pointing to the `amrfinder` results table file (`.tsv`) per sample (`id:`). + +\ +  + +#### `mutional_report` + +Type: `path` +\ +Optional: `true` + +NextFlow output type of `path` pointing to the `amrfinder` mutation results table file (`.tsv`) per sample (`id:`). Obtaining this output will depend on the presence of the `organism` key in the metadata (`meta`). See example above. + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/amrfinderplus/run/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/amrfinderplus/run/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,52 @@ +process AMRFINDERPLUS_RUN { + tag "$meta.id" + label 'process_low' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}amrfinderplus${params.fs}3.10.24" : null) + conda (params.enable_conda ? "bioconda::ncbi-amrfinderplus=3.10.24 conda-forge::libgcc-ng" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ncbi-amrfinderplus%3A3.10.23--h17dc2d4_0': + 'quay.io/biocontainers/ncbi-amrfinderplus:3.10.23--h17dc2d4_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("${prefix}.tsv") , emit: report + tuple val(meta), path("${prefix}-mutations.tsv"), emit: mutation_report, optional: true + path "versions.yml" , emit: versions + + when: + (task.ext.when == null || task.ext.when) && fasta.size() > 0 + + script: + def args = task.ext.args ?: '' + def is_compressed = fasta.getName().endsWith(".gz") ? true : false + prefix = task.ext.prefix ?: "${meta.id}" + organism_param = meta.containsKey("organism") ? "--organism ${meta.organism} --mutation_all ${prefix}-mutations.tsv" : "" + fasta_name = fasta.getName().replace(".gz", "") + fasta_param = "-n" + if (meta.containsKey("is_proteins")) { + if (meta.is_proteins) { + fasta_param = "-p" + } + } + """ + if [ "$is_compressed" == "true" ]; then + gzip -c -d $fasta > $fasta_name + fi + + amrfinder \\ + $fasta_param $fasta_name \\ + $organism_param \\ + $args \\ + --threads $task.cpus > ${prefix}.tsv + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + amrfinderplus: \$(amrfinder --version) + gzip: \$( echo \$(gzip --version 2>&1) | sed 's/^.*(gzip) //; s/gzip //; s/ Copyright.*\$//' ) + END_VERSIONS + """ +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/cat/fastq/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/cat/fastq/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,96 @@ +# NextFlow DSL2 Module + +```bash +CAT_FASTQ +``` + +## Description + +Concatenates a list of FASTQ files. Produces 2 files per sample (`id:`) if `single_end` is `false` as mentioned in the metadata Groovy Map. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of metadata (`meta`) and a list of FASTQ files of input type `path` (`reads`) to be concatenated. + +Ex: + +```groovy +[ [id: 'sample1', single_end: true], ['/data/sample1/f_L001.fq', '/data/sample1/f_L002.fq'] ] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTQ file. + +Ex: + +```groovy +[ id: 'FAL00870', strandedness: 'unstranded', single_end: true ] +``` + +\ +  + +#### `reads` + +Type: `path` + +NextFlow input type of `path` pointing to list of FASTQ files. + +\ +  + +#### `args` + +Type: Groovy String + +String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. + +Ex: + +```groovy +withName: 'CAT_FASTQ' { + ext.args = '--genome_size 5.5m' +} +``` + +\ +  + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and list of concatenated FASTQ files (`catted_reads`). + +\ +  + +#### `catted_reads` + +Type: `path` + +NextFlow output type of `path` pointing to the concatenated FASTQ files per sample (`id:`). + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/cat/fastq/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/cat/fastq/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,89 @@ +process CAT_FASTQ { + tag "$meta.id" + label 'process_micro' + + conda (params.enable_conda ? "conda-forge::sed=4.7 conda-forge::gzip" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : + 'biocontainers/biocontainers:v1.2.0_cv1' }" + + input: + tuple val(meta), path(reads, stageAs: "input*/*") + + output: + tuple val(meta), path("*.merged.fastq.gz"), emit: catted_reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads.collect{ it.toString() } + def is_in_gz = readList[0].endsWith('.gz') + def gz_or_ungz = (is_in_gz ? '' : ' | gzip') + def pigz_or_ungz = (is_in_gz ? '' : " | pigz -p ${task.cpus}") + if (meta.single_end) { + if (readList.size > 1) { + """ + zcmd="gzip" + zver="" + + if type pigz > /dev/null 2>&1; then + cat ${readList.join(' ')} ${pigz_or_ungz} > ${prefix}.merged.fastq.gz + zcmd="pigz" + zver=\$( echo \$( \$zcmd --version 2>&1 ) | sed -e '1!d' | sed "s/\$zcmd //" ) + else + cat ${readList.join(' ')} ${gz_or_ungz} > ${prefix}.merged.fastq.gz + zcmd="gzip" + + if [ "${workflow.containerEngine}" != "null" ]; then + zver=\$( echo \$( \$zcmd --help 2>&1 ) | sed -e '1!d; s/ (.*\$//' ) + else + zver=\$( echo \$( \$zcmd --version 2>&1 ) | sed "s/^.*(\$zcmd) //; s/\$zcmd //; s/ Copyright.*\$//" ) + fi + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$( echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//' ) + \$zcmd: \$zver + END_VERSIONS + """ + } + } else { + if (readList.size > 2) { + def read1 = [] + def read2 = [] + readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } + """ + zcmd="gzip" + zver="" + + if type pigz > /dev/null 2>&1; then + cat ${read1.join(' ')} ${pigz_or_ungz} > ${prefix}_1.merged.fastq.gz + cat ${read2.join(' ')} ${pigz_or_ungz} > ${prefix}_2.merged.fastq.gz + zcmd="pigz" + zver=\$( echo \$( \$zcmd --version 2>&1 ) | sed -e '1!d' | sed "s/\$zcmd //" ) + else + cat ${read1.join(' ')} ${gz_or_ungz} > ${prefix}_1.merged.fastq.gz + cat ${read2.join(' ')} ${gz_or_ungz} > ${prefix}_2.merged.fastq.gz + zcmd="gzip" + + if [ "${workflow.containerEngine}" != "null" ]; then + zver=\$( echo \$( \$zcmd --help 2>&1 ) | sed -e '1!d; s/ (.*\$//' ) + else + zver=\$( echo \$( \$zcmd --version 2>&1 ) | sed "s/^.*(\$zcmd) //; s/\$zcmd //; s/ Copyright.*\$//" ) + fi + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$( echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//' ) + \$zcmd: \$zver + END_VERSIONS + """ + } + } +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/cat/tables/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/cat/tables/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,88 @@ +# NextFlow DSL2 Module + +```bash +TABLE_SUMMARY +``` + +## Description + +Concatenates a list of tables (CSV or TAB delimited) in `.txt` or `.csv` format. The table files to be concatenated **must** have a header as the header from one of the table files will be used as the header for the concatenated result table file. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of `val` table key (`table_sum_on`) and a list of table files of input type `path` (`tables`) to be concatenated. For this module to work, a `bin` directory with the script `create_mqc_data_table.py` should be present where the NextFlow script using this DSL2 module will be run. This `python` script will convert the aggregated table to `.yml` format to be used with `multiqc`. + +Ex: + +```groovy +[ ['ectyper'], ['/data/sample1/f1_ectyper.txt', '/data/sample2/f2_ectyper.txt'] ] +``` + +\ +  + +#### `table_sum_on` + +Type: `val` + +A single key defining what tables are being concatenated. For example, if all the `ectyper` results are being concatenated for all samples, then this can be `ectyper`. + +Ex: + +```groovy +[ ['ectyper'], ['/data/sample1/f1_ectyper.txt', '/data/sample2/f2_ectyper.txt'] ] +``` + +\ +  + +#### `tables` + +Type: `path` + +NextFlow input type of `path` pointing to a list of tables (files) to be concatenated. + +\ +  + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of table key (`table_sum_on` from `input:`) and list of concatenated table files (`tblsummed`). + +\ +  + +#### `tblsummed` + +Type: `path` + +NextFlow output type of `path` pointing to the concatenated table files per table key (Ex: `ectyper`). + +\ +  + +#### `mqc_yml` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing table contents in `YAML` format which can be used to inject this table as part of the `multiqc` report. + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/cat/tables/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/cat/tables/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,58 @@ +process TABLE_SUMMARY { + tag "$table_sum_on" + label 'process_low' + + // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container + module (params.enable_module ? "${params.swmodulepath}${params.fs}python${params.fs}3.8.1" : null) + conda (params.enable_conda ? "conda-forge::python=3.9 conda-forge::pyyaml conda-forge::coreutils" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + + input: + tuple val(table_sum_on), path(tables) + + output: + tuple val(table_sum_on), path("*.tblsum.txt"), emit: tblsummed + path "*_mqc.yml" , emit: mqc_yml + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when || tables + + script: + def args = task.ext.args ?: '' + def onthese = tables.collect().join('\\n') + """ + filenum="1" + header="" + + echo -e "$onthese" | while read -r file; do + + if [ "\${filenum}" == "1" ]; then + header=\$( head -n1 "\${file}" ) + echo -e "\${header}" > ${table_sum_on}.tblsum.txt + fi + + tail -n+2 "\${file}" >> ${table_sum_on}.tblsum.txt + + filenum=\$((filenum+1)) + done + + create_mqc_data_table.py $table_sum_on ${workflow.manifest.name} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bash: \$( bash --version 2>&1 | sed '1!d; s/^.*version //; s/ (.*\$//' ) + python: \$( python --version | sed 's/Python //g' ) + END_VERSIONS + + headver=\$( head --version 2>&1 | sed '1!d; s/^.*(GNU coreutils//; s/) //;' ) + tailver=\$( tail --version 2>&1 | sed '1!d; s/^.*(GNU coreutils//; s/) //;' ) + + cat <<-END_VERSIONS >> versions.yml + head: \$headver + tail: \$tailver + END_VERSIONS + """ +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/centrifuge/classify/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/centrifuge/classify/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,159 @@ +# NextFlow DSL2 Module + +```bash +CENTRIFUGE_CLASSIFY +``` + +## Description + +Run `centrifuge` tool on reads in FASTQ format. Produces 3 output files in ASCII text format and optional output files. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of metadata (`meta`) and a list of reads of type `path` (`reads`) per sample (`id:`). + +Ex: + +```groovy +[ + [ id: 'FAL00870', + strandedness: 'unstranded', + single_end: true, + centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' + ], + '/hpc/scratch/test/FAL000870/f1.merged.fq.gz' +] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTQ file. + +Ex: + +```groovy +[ + id: 'FAL00870', + strandedness: 'unstranded', + single_end: true, + centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' +] +``` + +\ +  + +#### `reads` + +Type: `path` + +NextFlow input type of `path` pointing to FASTQ files on which `centrifuge` classification should be run. + +\ +  + +#### `args` + +Type: Groovy String + +String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. + +Ex: + +```groovy +withName: 'CENTRIFUGE_CLASSIFY' { + ext.args = '--met 3' +} +``` + +\ +  + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and list of `centrifuge` result files. + +\ +  + +#### `report` + +Type: `path` + +NextFlow output type of `path` pointing to the `centrifuge` report table file (`.report.txt`) per sample (`id:`). + +\ +  + +#### `output` + +Type: `path` + +NextFlow output type of `path` pointing to the `centrifuge` output table file (`.output.txt`) per sample (`id:`). + +\ +  + +#### `kreport` + +Type: `path` + +NextFlow output type of `path` pointing to the `centrifuge` **Kraken** style report table file (`.kreport.txt`) per sample (`id:`). + +\ +  + +#### `sam` + +Type: `path` +\ +Optional: `true` + +NextFlow output type of `path` pointing to the `centrifuge` alignment results in SAM (`.sam`) format per sample (`id:`). Obtaining this output will depend on the mention of `--centrifuge_out_fmt_sam` command-line option when the NextFlow pipeline is called. + +\ +  + +#### `fastq_mapped` + +Type: `path` +\ +Optional: `true` + +NextFlow output type of `path` pointing to the `centrifuge` alignment results in FASTQ (`.fastq.gz`) format per sample (`id:`). Obtaining this output will depend on the mention of `--centrifuge_save_aligned` command-line option when the NextFlow pipeline is called. + +\ +  + +#### `fastq_unmapped` + +Type: `path` +\ +Optional: `true` + +NextFlow output type of `path` pointing to the `centrifuge` FASTQ (`.fastq.gz`) files of unaligned reads per sample (`id:`). Obtaining this output will depend on the mention of `--centrifuge_save_unaligned` command-line option when the NextFlow pipeline is called. + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/centrifuge/classify/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/centrifuge/classify/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,61 @@ +process CENTRIFUGE_CLASSIFY { + tag "$meta.id" + label 'process_medium' + + module (params.enable_module ? 'centrifuge' : null) + conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' : + 'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path('*.report.txt') , emit: report + tuple val(meta), path('*.output.txt') , emit: output + tuple val(meta), path('*.kreport.txt') , emit: kreport + tuple val(meta), path('*.sam') , optional: true, emit: sam + tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped + tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}" + def db = meta.centrifuge_x ?: '' + def db_name = db.toString().replace(".tar.gz","") + def unaligned = '' + def aligned = '' + if (meta.single_end) { + unaligned = params.centrifuge_save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : '' + aligned = params.centrifuge_save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : '' + } else { + unaligned = params.centrifuge_save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : '' + aligned = params.centrifuge_save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : '' + } + def sam_output = params.centrifuge_out_fmt_sam ? "--out-fmt 'sam'" : '' + """ + centrifuge \\ + -x $db \\ + -p $task.cpus \\ + $paired \\ + --report-file ${prefix}.centrifuge.report.txt \\ + -S ${prefix}.centrifuge.output.txt \\ + $unaligned \\ + $aligned \\ + $sam_output \\ + $args + + centrifuge-kreport -x $db_name ${prefix}.centrifuge.output.txt > ${prefix}.centrifuge.kreport.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //') + END_VERSIONS + """ +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/centrifuge/extract/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/centrifuge/extract/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,113 @@ +# NextFlow DSL2 Module + +```bash +CENTRIFUGE_EXTRACT +``` + +## Description + +Extract FASTQ reads given a FASTQ file originally used with `centrifuge` tool and a taxa of interest. This specific module uses only GNU Coreutils to create a list of FASTQ read ids that need to be extract. See also `CENTRIFUGE_PROCESS` module which uses a `python` script to generate the FASTQ read ids. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following 2 tuples: + +- A tuple of metadata (`meta`) and of type `path` (`centrifuge_output`) per sample (`id:`). + +- A tuple of metadata (`meta`) and of type `path` (`centrifuge_report`) per sample (`id:`). + +Ex: + +```groovy +[ + [ id: 'FAL00870', + strandedness: 'unstranded', + single_end: true, + centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' + ], + '/hpc/scratch/test/FAL000870/f1.merged.cent_out.output.txt' +] + +[ + [ id: 'FAL00870', + strandedness: 'unstranded', + single_end: true, + centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' + ], + '/hpc/scratch/test/FAL000870/f1.merged.cent_out.report.txt' +] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTQ file. + +Ex: + +```groovy +[ + id: 'FAL00870', + strandedness: 'unstranded', + single_end: true, + centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' +] +``` + +\ +  + +#### `centrifuge_report` + +Type: `path` + +NextFlow input type of `path` pointing to `centrifuge` report file generated using `--report-file` option of `centrifuge` tool. + +\ +  + +#### `centrifuge_output` + +Type: `path` + +NextFlow input type of `path` pointing to `centrifuge` output file generated using `-S` option of `centrifuge` tool. + +\ +  + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and list of extracted FASTQ read ids. + +\ +  + +#### `extracted` + +Type: `path` + +NextFlow output type of `path` pointing to the extracted FASTQ read ids belonging to a particular taxa (`*.extract-centrifuge-bug-ids.txt`). + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/centrifuge/extract/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/centrifuge/extract/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,59 @@ +process CENTRIFUGE_EXTRACT { + tag "$meta.id" + label 'process_low' + + //seqkit container contains required bash and other utilities + module (params.enable_module ? "${params.swmodulepath}${params.fs}python${params.fs}3.8.1" : null) + conda (params.enable_conda ? "conda-forge::sed=4.7 conda-forge::coreutils" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-039542721b6b463b663872ba8b7e9fbc05f01925:1de88053ebf8fb9884758395c4871f642c57750c-0': + 'quay.io/biocontainers/mulled-v2-039542721b6b463b663872ba8b7e9fbc05f01925:1de88053ebf8fb9884758395c4871f642c57750c-0' }" + + input: + tuple val(meta), path(centrifuge_report) + tuple val(meta), path(centrifuge_output) + + output: + tuple val(meta), path('*.extract-centrifuge-bug-ids.txt'), emit: extracted + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + grep -F '${params.centrifuge_extract_bug}' $centrifuge_report \ + | cut -f2 \ + | sort -u \ + | while read -r taxId; do + echo -e "\t\$taxId"'\$' + done > gotcha.txt + + cut -f1-3 $centrifuge_output | grep -E -f gotcha.txt | cut -f1 | sort -u > ${prefix}.extract-centrifuge-bug-ids.txt || true + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bash: \$( bash --version 2>&1 | sed '1!d; s/^.*version //; s/ (.*\$//' ) + END_VERSIONS + + ver="" + sedver="" + + if [ "${workflow.containerEngine}" != "null" ]; then + ver=\$( cut --help 2>&1 | sed -e '1!d; s/ (.*\$//' ) + sedver="\$ver" + else + ver=\$( cut --version 2>&1 | sed '1!d; s/^.*(GNU coreutils//; s/) //;' ) + sedver=\$( echo \$(sed --version 2>&1) | sed 's/^.*(GNU sed) //; s/ Copyright.*\$//' ) + fi + + cat <<-END_VERSIONS >> versions.yml + cut: \$ver + tail: \$ver + sort: \$ver + sed: \$sedver + END_VERSIONS + """ +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/centrifuge/process/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/centrifuge/process/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,101 @@ +# NextFlow DSL2 Module + +```bash +CENTRIFUGE_PROCESS +``` + +## Description + +Extract FASTQ reads given a FASTQ file originally used with `centrifuge` tool and a taxa of interest. This specific module uses a `python` script to generate the FASTQ read ids and as such requires a `bin` folder with `process_centrifuge_output.py` to be present where the NextFlow script will be executed from. See also `CENTRIFUGE_EXTRACT` module which uses only GNU Coreutils to create a list of FASTQ read ids that need to be extracted. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in a tuple in order of metadata (`meta`), a `path` (`centrifuge_report`) type and another `path` (`centrifuge_report`) per sample (`id:`). + +Ex: + +```groovy +[ + [ id: 'FAL00870', + strandedness: 'unstranded', + single_end: true, + centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' + ], + '/hpc/scratch/test/FAL000870/f1.merged.cent_out.report.txt', + '/hpc/scratch/test/FAL000870/f1.merged.cent_out.output.txt' +] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTQ file. + +Ex: + +```groovy +[ + id: 'FAL00870', + strandedness: 'unstranded', + single_end: true, + centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' +] +``` + +\ +  + +#### `centrifuge_report` + +Type: `path` + +NextFlow input type of `path` pointing to `centrifuge` report file generated using `--report-file` option of `centrifuge` tool. + +\ +  + +#### `centrifuge_output` + +Type: `path` + +NextFlow input type of `path` pointing to `centrifuge` output file generated using `-S` option of `centrifuge` tool. + +\ +  + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and list of extracted FASTQ read ids. + +\ +  + +#### `extracted` + +Type: `path` + +NextFlow output type of `path` pointing to the extracted FASTQ read ids belonging to a particular taxa (`*.extract-centrifuge-bug-ids.txt`). + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/centrifuge/process/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/centrifuge/process/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,39 @@ +process CENTRIFUGE_PROCESS { + tag "$meta.id" + label 'process_low' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}python${params.fs}3.8.1" : null) + conda (params.enable_conda ? "conda-forge::python=3.9 conda-forge::pandas conda-forge::biopython" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-d91be2208450c41a5198d8660b6d9a5b60613b3a:d9847b41af5ef58746c86d7114cd010650f3d9a2-0' : + 'quay.io/biocontainers/mulled-v2-d91be2208450c41a5198d8660b6d9a5b60613b3a:d9847b41af5ef58746c86d7114cd010650f3d9a2-0' }" + + input: + tuple val(meta), path(centrifuge_report), path(centrifuge_output) + + output: + tuple val(meta), path('*.process-centrifuge-bug-ids.txt'), emit: extracted + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + process_centrifuge_output.py \\ + -r $centrifuge_report \\ + -o $centrifuge_output \\ + -b '${params.centrifuge_extract_bug}' \\ + -t ${prefix}.process-centrifuge-bug-ids.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$( python --version | sed 's/Python //g' ) + biopython: \$( python -c 'import Bio as bio; print(bio.__version__)' ) + numpy: \$( python -c 'import numpy as np; print(np.__version__)' ) + pandas: \$( python -c 'import pandas as pd; print(pd.__version__)' ) + END_VERSIONS + """ +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/custom/dump_software_versions/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/custom/dump_software_versions/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,57 @@ +# NextFlow DSL2 Module + +```bash +DUMP_SOFTWARE_VERSIONS +``` + +## Description + +Given an `YAML` format file, produce a final `.yml` file which has unique entries and a corresponding `.mqc.yml` file for use with `multiqc`. + +\ +  + +### `input:` + +___ + +Type: `path` + +Takes in a `path` (`versions`) type pointing to the file to be used to produce a final `.yml` file without any duplicate entries and a `.mqc.yml` file. Generally, this is passed by mixing `versions` from various run time channels and finally passed to this module to produce a final software versions list. + +Ex: + +```groovy +[ '/hpc/scratch/test/work/9b/e7bf7e28806419c1c9a571dacd1f67/versions.yml' ] +``` + +\ +  + +### `output:` + +___ + +#### `yml` + +Type: `path` + +NextFlow output type of `path` type pointing to an `YAML` file with software versions. + +\ +  + +#### `mqc_yml` + +Type: `path` + +NextFlow output type of `path` pointing to `.mqc.yml` file which can be used to produce a software versions' table with `multiqc`. + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/custom/dump_software_versions/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/custom/dump_software_versions/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,26 @@ +process DUMP_SOFTWARE_VERSIONS { + tag "${params.pipeline} software versions" + label 'process_pico' + + // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container + module (params.enable_module ? "${params.swmodulepath}${params.fs}python${params.fs}3.8.1" : null) + conda (params.enable_conda ? "conda-forge::python=3.9 conda-forge::pyyaml" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ca258a039fcd88610bc4e297b13703e8be53f5ca:d638c4f85566099ea0c74bc8fddc6f531fe56753-0' : + 'quay.io/biocontainers/mulled-v2-ca258a039fcd88610bc4e297b13703e8be53f5ca:d638c4f85566099ea0c74bc8fddc6f531fe56753-0' }" + + input: + path versions + + output: + path "software_versions.yml" , emit: yml + path "software_versions_mqc.yml", emit: mqc_yml + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + template 'dumpsoftwareversions.py' +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/custom/dump_software_versions/templates/dumpsoftwareversions.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/custom/dump_software_versions/templates/dumpsoftwareversions.py Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,100 @@ +#!/usr/bin/env python + +import yaml +import platform +import subprocess +from textwrap import dedent + + +def _make_versions_html(versions): + html = [ + dedent( + """\\ + + + + + + + + + + + + + + """ + ) + ] + for process, tmp_versions in sorted(versions.items()): + html.append("") + for i, (tool, version) in enumerate(sorted(tmp_versions.items())): + html.append( + dedent( + f"""\\ + + + + + + """ + ) + ) + html.append("") + html.append("
Process Name Software Version
{process if (i == 0) else ''}{tool}{version}
") + return "\\n".join(html) + + +versions_this_module = {} +versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, +} + +with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) + versions_by_process.update(versions_this_module) + +# aggregate versions by the module name (derived from fully-qualified process name) +versions_by_module = {} +for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + assert versions_by_module[module] == process_versions, ( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + +versions_by_module["CPIPES"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", + "${params.pipeline}": "${params.workflow_version}" +} + +versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://cfsan-git.fda.gov/Kranti.Konganti/${workflow.manifest.name.toLowerCase()}", + "plot_type": "html", + "description": "Collected at run time from the software output (STDOUT/STDERR).", + "data": _make_versions_html(versions_by_module), +} + +with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) + +# print('sed -i -e "' + "s%'%%g" + '" *.yml') +subprocess.run('sed -i -e "' + "s%'%%g" + '" software_versions.yml', shell=True) + +with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + +with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/ectyper/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/ectyper/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,96 @@ +# NextFlow DSL2 Module + +```bash +ECTYPER +``` + +## Description + +Run `ectyper` tool on a list of assembled contigs in FASTA format. Produces a single output table in ASCII text format. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of metadata (`meta`) and a list of assemled contig FASTA files of input type `path` (`fasta`). + +Ex: + +```groovy +[ [id: 'sample1', single_end: true], '/data/sample1/f_assembly.fa' ] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTQ file. + +Ex: + +```groovy +[ id: 'FAL00870', strandedness: 'unstranded', single_end: true ] +``` + +\ +  + +#### `fasta` + +Type: `path` + +NextFlow input type of `path` pointing to assembled contig file in FASTA format. + +\ +  + +#### `args` + +Type: Groovy String + +String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. + +Ex: + +```groovy +withName: 'ECTYPER' { + ext.args = '--detailed' +} +``` + +\ +  + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and list of `ectyper` result files (`ectyped`). + +\ +  + +#### `ectyped` + +Type: `path` + +NextFlow output type of `path` pointing to the `ectyper` results table file per sample (`id:`). + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/ectyper/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/ectyper/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,45 @@ +process ECTYPER { + tag "$meta.id" + label 'process_low' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}ectyper${params.fs}1.0.0" : null) + conda (params.enable_conda ? "bioconda::ectyper=1.0.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ectyper:1.0.0--pyhdfd78af_1' : + 'quay.io/biocontainers/ectyper:1.0.0--pyhdfd78af_1' }" + + input: + tuple val(meta), path(fasta) + + output: + path("${meta.id}${params.fs}*") + tuple val(meta), path("${meta.id}${params.fs}${meta.id}.tsv"), emit: ectyped + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when || fasta.size() > 0 + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.getName().endsWith(".gz") ? true : false + def fasta_name = fasta.getName().replace(".gz", "") + """ + if [ "$is_compressed" == "true" ]; then + gzip -c -d $fasta > $fasta_name + fi + + ectyper \\ + $args \\ + --cores $task.cpus \\ + --output $prefix \\ + --input $fasta_name + + mv ${prefix}${params.fs}output.tsv ${prefix}${params.fs}${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ectyper: \$(echo \$(ectyper --version 2>&1) | sed 's/.*ectyper //; s/ .*\$//') + END_VERSIONS + """ +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/fastqc/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/fastqc/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,113 @@ +# NextFlow DSL2 Module + +```bash +FASTQC +``` + +## Description + +Run `fastqc` tool on reads in FASTQ format. Produces a HTML report file and a `.zip` file containing plots and data used to produce the plots. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of metadata (`meta`) and a list of reads of type `path` (`reads`) per sample (`id:`). + +Ex: + +```groovy +[ + [ id: 'FAL00870', + strandedness: 'unstranded', + single_end: true, + centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' + ], + '/hpc/scratch/test/FAL000870/f1.merged.fq.gz' +] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTQ file. + +Ex: + +```groovy +[ + id: 'FAL00870', + strandedness: 'unstranded', + single_end: true +] +``` + +\ +  + +#### `reads` + +Type: `path` + +NextFlow input type of `path` pointing to FASTQ files on which `fastqc` classification should be run. + +\ +  + +#### `args` + +Type: Groovy String + +String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. + +Ex: + +```groovy +withName: 'FASTQC' { + ext.args = '--nano' +} +``` + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and list of `fastqc` result files. + +\ +  + +#### `html` + +Type: `path` + +NextFlow output type of `path` pointing to the `fastqc` report file in HTML format per sample (`id:`). + +\ +  + +#### `zip` + +Type: `path` + +NextFlow output type of `path` pointing to the zipped `fastqc` results per sample (`id:`). + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/fastqc/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/fastqc/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,48 @@ +process FASTQC { + tag "$meta.id" + label 'process_low' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}fastqc${params.fs}0.11.9" : null) + conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : + 'quay.io/biocontainers/fastqc:0.11.9--0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + // Add soft-links to original FastQs for consistent naming in pipeline + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz + fastqc $args --threads $task.cpus ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ + } else { + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ + } +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/flye/assemble/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/flye/assemble/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,96 @@ +# NextFlow DSL2 Module + +```bash +FLYE_ASSEMBLE +``` + +## Description + +Run `flye` assembler tool on a list of read files in FASTQ format. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of metadata (`meta`) and a list of FASTQ files of input type `path` (`reads`). + +Ex: + +```groovy +[ [id: 'sample1', single_end: true], '/data/sample1/f_merged.fq.gz' ] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTQ file. + +Ex: + +```groovy +[ id: 'FAL00870', strandedness: 'unstranded', single_end: true ] +``` + +\ +  + +#### `reads` + +Type: `path` + +NextFlow input type of `path` pointing to read files in FASTQ format that need to be *de novo* assembled. + +\ +  + +#### `args` + +Type: Groovy String + +String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. + +Ex: + +```groovy +withName: 'FLYE_ASSEMBLE' { + ext.args = '--casava' +} +``` + +\ +  + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and `flye` assembled contig file in FASTA format. + +\ +  + +#### `assembly` + +Type: `path` + +NextFlow output type of `path` pointing to the `flye` assembler results file per sample (`id:`) i.e., the final assembled contig file in FASTA format. + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/flye/assemble/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/flye/assemble/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,51 @@ +process FLYE_ASSEMBLE { + tag "$meta.id" + label 'process_medium' + // errorStrategy 'ignore' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}flye${params.fs}2.8" : null) + conda (params.enable_conda ? "bioconda::flye=2.8.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/flye:2.8.1--py37h8270d21_1' : + 'quay.io/biocontainers/flye:2.8.1--py37h8270d21_1' }" + + input: + tuple val(meta), path(reads) + + output: + path "${meta.id}${params.fs}*" + tuple val(meta), path("${meta.id}${params.fs}assembly.fasta"), emit: assembly, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + reads_platform=\$( echo "$args" | grep -E -o '(--nano|--pacbio)-(raw|corr|hq|hifi)' ) + flye \\ + \$(echo "$args" | sed -e "s/\$reads_platform//") \\ + -t $task.cpus \\ + --out-dir "${meta.id}" \\ + \$reads_platform \\ + $reads + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + flye: \$( flye --version ) + END_VERSIONS + + grepver="" + + if [ "${workflow.containerEngine}" != "null" ]; then + grepver=\$( grep --help 2>&1 | sed -e '1!d; s/ (.*\$//' ) + else + grepver=\$( echo \$( grep --version 2>&1 ) | sed 's/^.*(GNU grep) //; s/ Copyright.*\$//' ) + fi + + cat <<-END_VERSIONS >> versions.yml + grep: \$grepver + END_VERSIONS + """ +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/gen_samplesheet/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/gen_samplesheet/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,55 @@ +# NextFlow DSL2 Module + +```bash +GEN_SAMPLESHEET +``` + +## Description + +Generates a sample sheet in CSV format that contains required fields to be used to construct a Groovy Map of metadata. It requires as input, an absolute UNIX path to a folder containing only FASTQ files. This module requires the `fastq_dir_to_samplesheet.py` script to be present in the `bin` folder from where the NextFlow script including this module will be executed. + +\ +  + +### `input:` + +___ + +Type: `val` + +Takes in the absolute UNIX path to a folder containing only FASTQ files (`inputdir`). + +Ex: + +```groovy +'/hpc/scratch/test/reads' +``` + +\ +  + +### `output:` + +___ + +Type: `path` + +NextFlow output of type `path` pointing to auto-generated CSV sample sheet (`csv`). + +\ +  + +#### `csv` + +Type: `path` + +NextFlow output type of `path` pointing to auto-generated CSV sample sheet for all FASTQ files present in the folder given by NextFlow input type of `val` (`inputdir`). + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/gen_samplesheet/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/gen_samplesheet/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,41 @@ +process GEN_SAMPLESHEET { + tag "${inputdir.simpleName}" + label "process_pico" + + module (params.enable_module ? "${params.swmodulepath}${params.fs}python${params.fs}3.8.1" : null) + conda (params.enable_conda ? "conda-forge::python=3.9.5" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.9--1' : + 'quay.io/biocontainers/python:3.9--1' }" + + input: + val inputdir + + output: + path '*.csv' , emit: csv + path 'versions.yml', emit: versions + + when: + task.ext.when == null || task.ext.when + + // This script (fastq_dir_to_samplesheet.py) is distributed + // as part of the pipeline nf-core/rnaseq/bin/. MIT License. + script: + def this_script_args = (params.fq_single_end ? ' -se' : '') + this_script_args += (params.fq_suffix ? " -r1 '${params.fq_suffix}'" : '') + this_script_args += (params.fq2_suffix ? " -r2 '${params.fq2_suffix}'" : '') + + """ + fastq_dir_to_samplesheet.py -sn \\ + -st '${params.fq_strandedness}' \\ + -sd '${params.fq_filename_delim}' \\ + -si ${params.fq_filename_delim_idx} \\ + ${this_script_args} \\ + ${inputdir} autogen_samplesheet.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$( python --version | sed 's/Python //g' ) + END_VERSIONS + """ +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/kraken2/classify/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/kraken2/classify/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,121 @@ +# NextFlow DSL2 Module + +```bash +KRAKEN2_CLASSIFY +``` + +## Description + +Run `kraken2` tool on reads in FASTQ format. Produces 4 output files per sample (`id:`) in ASCII text format. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of metadata (`meta`) and a list of reads or FASTA assembly of type `path` (`reads`) per sample (`id:`). + +Ex: + +```groovy +[ + [ id: 'FAL00870', + strandedness: 'unstranded', + single_end: true, + is_assembly: false, + centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' + kraken2_db: '/hpc/db/kraken2/standard-210914', + ], + '/hpc/scratch/test/FAL000870/f1.merged.fq.gz' +] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTQ / FASTA file. + +Ex: + +```groovy +[ + id: 'FAL00870', + strandedness: 'unstranded', + single_end: true, + is_assembly: false, + kraken2_db: '/hpc/db/kraken2/standard-210914' +] +``` + +\ +  + +#### `reads` + +Type: `path` + +NextFlow input type of `path` pointing to FASTQ files on which `kraken2` classification should be run. + +\ +  + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and list of `kraken2` result files. + +\ +  + +#### `kraken_report` + +Type: `path` + +NextFlow output type of `path` pointing to the `kraken2` report table file (`.report.txt`) per sample (`id:`). + +\ +  + +#### `kraken_output` + +Type: `path` + +NextFlow output type of `path` pointing to the `kraken2` output table file (`.output.txt`) per sample (`id:`). + +\ +  + +#### `classified` + +Type: `path` + +NextFlow output type of `path` pointing to the `kraken2` processed gzipped FASTQ files containing only reads that have been classified (`*classified.fastq`) per sample (`id:`). + +\ +  + +#### `unclassified` + +Type: `path` + +NextFlow output type of `path` pointing to the `kraken2` processed gzipped FASTQ files containing only reads that are unclassified (`*unclassified.fastq`) per sample (`id:`). + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/kraken2/classify/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/kraken2/classify/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,73 @@ +process KRAKEN2_CLASSIFY { + tag "$meta.id" + label 'process_low' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}kraken2${params.fs}2.1.2" : null) + conda (params.enable_conda ? 'bioconda::kraken2=2.1.2 conda-forge::pigz=2.6' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' : + 'quay.io/biocontainers/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path('*classified*') , emit: classified + tuple val(meta), path('*unclassified*'), emit: unclassified + tuple val(meta), path('*.report.txt') , emit: kraken_report + tuple val(meta), path('*.output.txt') , emit: kraken_output + path "versions.yml" , emit: versions + + when: + (task.ext.when == null || task.ext.when) && (meta.is_assembly ? reads.size() : 1) + + script: + def args = task.ext.args ?: '' + def db = meta.kraken2_db ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads.collect{ it.toString() } + def is_single_end = (meta.single_end || meta.is_assembly) ? true : false + def paired = is_single_end ? "" : "--paired" + def classified = is_single_end ? "--classified-out ${prefix}.classified.fastq" : "--classified-out ${prefix}.classified#.fastq" + def unclassified = is_single_end ? "--unclassified-out ${prefix}.unclassified.fastq" : "--unclassified-out ${prefix}.unclassified#.fastq" + args += (reads.getName().endsWith(".gz") ? ' --gzip-compressed ' : '') + """ + kraken2 \\ + --db $db \\ + --threads $task.cpus \\ + $unclassified \\ + $classified \\ + --report ${prefix}.kraken2.report.txt \\ + --output ${prefix}.kraken2.output.txt \\ + $paired \\ + $args \\ + $reads + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//') + END_VERSIONS + + zcmd="" + zver="" + + if type pigz > /dev/null 2>&1; then + pigz -p $task.cpus *.fastq + zcmd="pigz" + zver=\$( echo \$( \$zcmd --version 2>&1 ) | sed -e '1!d' | sed "s/\$zcmd //" ) + elif type gzip > /dev/null 2>&1; then + gzip *.fastq + zcmd="gzip" + + if [ "${workflow.containerEngine}" != "null" ]; then + zver=\$( echo \$( \$zcmd --help 2>&1 ) | sed -e '1!d; s/ (.*\$//' ) + else + zver=\$( echo \$( \$zcmd --version 2>&1 ) | sed "s/^.*(\$zcmd) //; s/\$zcmd //; s/ Copyright.*\$//" ) + fi + fi + + cat <<-END_VERSIONS >> versions.yml + \$zcmd: \$zver + END_VERSIONS + """ +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/kraken2/extract_contigs/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/kraken2/extract_contigs/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,101 @@ +# NextFlow DSL2 Module + +```bash +KRAKEN2_EXTRACT +``` + +## Description + +Extract FASTA reads or contigs given a FASTA file originally used with `kraken2` tool and a taxa of interest. This specific module uses a `python` script to generate the FASTA reads or contigs and as such requires a `bin` folder with `extract_assembled_filtered_contigs.py` script to be present where the NextFlow script will be executed from. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in a tuple in order of metadata (`meta`), a `path` (`kraken2_output`) type and another `path` (`assembly`) per sample (`id:`). + +Ex: + +```groovy +[ + [ id: 'FAL00870', + strandedness: 'unstranded', + single_end: true, + kraken2_db: '/hpc/db/kraken2/standard-210914' + ], + '/hpc/scratch/test/FAL000870/f1.merged.kraken2.output.txt', + '/hpc/scratch/test/FAL000870/f1.assembly.fasta' +] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTA file. + +Ex: + +```groovy +[ + id: 'FAL00870', + strandedness: 'unstranded', + single_end: true, + kraken2_db: '/hpc/db/kraken2/standard-210914' +] +``` + +\ +  + +#### `kraken2_output` + +Type: `path` + +NextFlow input type of `path` pointing to `kraken2` output file generated using `--output` option of `kraken2` tool. + +\ +  + +#### `assembly` + +Type: `path` + +NextFlow input type of `path` pointing to a FASTA format file, in this case an assembled contig file in FASTA format. + +\ +  + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and list of extracted FASTQ read ids. + +\ +  + +#### `asm_filtered_contigs` + +Type: `path` + +NextFlow output type of `path` pointing to the extracted FASTA reads or contigs belonging to a particular taxa. + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/kraken2/extract_contigs/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/kraken2/extract_contigs/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,40 @@ +process KRAKEN2_EXTRACT_CONTIGS { + tag "$meta.id" + label 'process_nano' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}python${params.fs}3.8.1" : null) + conda (params.enable_conda ? "conda-forge::python=3.9 conda-forge::pandas conda-forge::biopython" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-d91be2208450c41a5198d8660b6d9a5b60613b3a:d9847b41af5ef58746c86d7114cd010650f3d9a2-0' : + 'quay.io/biocontainers/mulled-v2-d91be2208450c41a5198d8660b6d9a5b60613b3a:d9847b41af5ef58746c86d7114cd010650f3d9a2-0' }" + + input: + tuple val(meta), path(assembly), path(kraken2_output) + val kraken2_extract_bug + + output: + tuple val(meta), path('*assembly_filtered_contigs.fasta'), emit: asm_filtered_contigs + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + extract_assembled_filtered_contigs.py \\ + -i $assembly \\ + -o ${prefix}.assembly_filtered_contigs.fasta \\ + -k $kraken2_output \\ + -b '$kraken2_extract_bug' + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$( python --version | sed 's/Python //g' ) + biopython: \$( python -c 'import Bio as bio; print(bio.__version__)' ) + numpy: \$( python -c 'import numpy as np; print(np.__version__)' ) + pandas: \$( python -c 'import pandas as pd; print(pd.__version__)' ) + END_VERSIONS + """ +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/megahit/assemble/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/megahit/assemble/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,97 @@ +# NextFlow DSL2 Module + +```bash +MEGAHIT_ASSEMBLE +``` + +## Description + +Run `megahit` assembler tool on a list of read files in FASTQ format. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of metadata (`meta`) and a list of FASTQ files (short reads) of input type `path` (`reads`). + +Ex: + +```groovy +[ [id: 'sample1', single_end: true], '/data/sample1/f_merged.fq.gz' ] +[ [id: 'sample1', single_end: false], ['/data/sample1/f1_merged.fq.gz', '/data/sample2/f2_merged.fq.gz'] ] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTQ file. + +Ex: + +```groovy +[ id: 'KB01', strandedness: 'unstranded', single_end: true ] +``` + +\ +  + +#### `reads` + +Type: `path` + +NextFlow input type of `path` pointing to short read files in FASTQ format that need to be *de novo* assembled. + +\ +  + +#### `args` + +Type: Groovy String + +String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. + +Ex: + +```groovy +withName: 'MEGAHIT_ASSEMBLE' { + ext.args = '--keep-tmp-files' +} +``` + +\ +  + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and `megahit` assembled contigs file in FASTA format. + +\ +  + +#### `assembly` + +Type: `path` + +NextFlow output type of `path` pointing to the `megahit` assembler results file (`final.contigs.fa`) per sample (`id:`) i.e., the final assembled contigs file in FASTA format. + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/megahit/assemble/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/megahit/assemble/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,57 @@ +process MEGAHIT_ASSEMBLE { + tag "$meta.id" + label 'process_higher' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}megahit${params.fs}1.2.9" : null) + conda (params.enable_conda ? "bioconda::megahit=1.2.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/megahit:1.2.9--h2e03b76_1' : + 'quay.io/biocontainers/megahit:1.2.9--h2e03b76_1' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("${meta.id}${params.fs}${meta.id}.contigs.fa"), emit: assembly, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def maxmem = task.memory ? "--memory ${task.memory.toBytes()}" : "" + if (meta.single_end) { + """ + megahit \\ + -r ${reads} \\ + -t $task.cpus \\ + $maxmem \\ + $args \\ + --out-dir $prefix \\ + --out-prefix $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + megahit: \$(echo \$(megahit -v 2>&1) | sed 's/MEGAHIT v//') + END_VERSIONS + """ + } else { + """ + megahit \\ + -1 ${reads[0]} \\ + -2 ${reads[1]} \\ + -t $task.cpus \\ + $maxmem \\ + $args \\ + --out-dir $prefix \\ + --out-prefix $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + megahit: \$(echo \$(megahit -v 2>&1) | sed 's/MEGAHIT v//') + END_VERSIONS + """ + } +} diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/mlst/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/mlst/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,96 @@ +# NextFlow DSL2 Module + +```bash +MLST +``` + +## Description + +Run `mlst` tool on a list of assembled contigs in FASTA format. Produces a single output table in ASCII text format. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of metadata (`meta`) and a list of assemled contig FASTA files of input type `path` (`fasta`). + +Ex: + +```groovy +[ [id: 'sample1', single_end: true], '/data/sample1/f_assembly.fa' ] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTQ file. + +Ex: + +```groovy +[ id: 'FAL00870', strandedness: 'unstranded', single_end: true ] +``` + +\ +  + +#### `fasta` + +Type: `path` + +NextFlow input type of `path` pointing to assembled contig file in FASTA format. + +\ +  + +#### `args` + +Type: Groovy String + +String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. + +Ex: + +```groovy +withName: 'MLST' { + ext.args = '--nopath' +} +``` + +\ +  + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and list of `mlst` result files (`tsv`). + +\ +  + +#### `tsv` + +Type: `path` + +NextFlow output type of `path` pointing to the `mlst` results table file per sample (`id:`). + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/mlst/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/mlst/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,36 @@ +process MLST { + tag "$meta.id" + label 'process_low' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}mlst${params.fs}2.19.0" : null) + conda (params.enable_conda ? "bioconda::mlst=2.19.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mlst:2.19.0--hdfd78af_1' : + 'quay.io/biocontainers/mlst:2.19.0--hdfd78af_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + (task.ext.when == null || task.ext.when) && fasta.size() > 0 + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mlst \\ + --threads $task.cpus \\ + $args \\ + $fasta > ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mlst: \$( echo \$(mlst --version 2>&1) | sed 's/mlst //' ) + END_VERSIONS + """ + +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/multiqc/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/multiqc/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,67 @@ +# NextFlow DSL2 Module + +```bash +MULTIQC +``` + +## Description + +Generate an aggregated [**MultiQC**](https://multiqc.info/) report. This particular module **will only work** within the framework of `cpipes` as in, it uses many `cpipes` related UNIX absolute paths to store and retrieve **MultiQC** related configration files and `cpipes` context aware metadata. It also uses a custom logo with filename `FDa-Logo-Blue---medium-01.png` which should be located inside an `assets` folder from where the NextFlow script including this module will be executed. + +\ +  + +### `input:` + +___ + +Type: `path` + +Takes in NextFlow input type of `path` which points to many log files that **MultiQC** should parse. + +Ex: + +```groovy +[ '/data/sample1/centrifuge/cent_output.txt', '/data/sample1/kraken/kraken_output.txt'] ] +``` + +\ +  + +### `output:` + +___ + +#### `report` + +Type: `path` + +Outputs a NextFlow output type of `path` pointing to the location of **MultiQC** final HTML report. + +\ +  + +#### `data` + +Type: `path` + +NextFlow output type of `path` pointing to the data files folder generated by **MultiQC** which were used to generate plots and HTML report. + +\ +  + +#### `plots` + +Type: `path` +Optional: `true` + +NextFlow output type of `path` pointing to the plots folder generated by **MultiQC**. + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/multiqc/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/multiqc/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,52 @@ +process MULTIQC { + label 'process_low' + tag 'MultiQC' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}multiqc${params.fs}1.12" : null) + conda (params.enable_conda ? 'bioconda::multiqc=1.12 conda-forge::spectra conda-forge::lzstring' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" + + input: + path multiqc_files + + output: + path "*multiqc*" + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional: true, emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + cp ${params.projectconf}${params.fs}multiqc${params.fs}${params.pipeline}_mqc.yml cpipes_mqc_config.yml + cp ${params.assetsdir}${params.fs}FDa-Logo-Blue---medium-01.png FDa-Logo-Blue---medium-01.png + sed -i -e 's/Workflow_Name_Placeholder/${params.pipeline}/g; s/Workflow_Version_Placeholder/${params.workflow_version}/g' cpipes_mqc_config.yml + sed -i -e 's/CPIPES_Version_Placeholder/${workflow.manifest.version}/g; s%Workflow_Output_Placeholder%${params.output}%g' cpipes_mqc_config.yml + sed -i -e 's%Workflow_Input_Placeholder%${params.input}%g' cpipes_mqc_config.yml + + multiqc -c cpipes_mqc_config.yml -f $args . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + + sedver="" + + if [ "${workflow.containerEngine}" != "null" ]; then + sedver=\$( sed --help 2>&1 | sed -e '1!d; s/ (.*\$//' ) + else + sedver=\$( echo \$(sed --version 2>&1) | sed 's/^.*(GNU sed) //; s/ Copyright.*\$//' ) + fi + + cat <<-END_VERSIONS >> versions.yml + sed: \$sedver + END_VERSIONS + """ +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/samplesheet_check/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/samplesheet_check/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,55 @@ +# NextFlow DSL2 Module + +```bash +SAMPLESHEET_CHECK +``` + +## Description + +Checks the validity of the sample sheet in CSV format to make sure there are required mandatory fields. This module generally succeeds `GEN_SAMPLESHEET` module as part of the `cpipes` pipelines to make sure that all fields of the columns are properly formatted to be used as Groovy Map for `meta` which is of input type `val`. This module requires the `check_samplesheet.py` script to be present in the `bin` folder from where the NextFlow script including this module will be executed + +\ +  + +### `input:` + +___ + +Type: `path` + +Takes in the absolute UNIX path to the sample sheet in CSV format (`samplesheet`). + +Ex: + +```groovy +'/hpc/scratch/test/reads/output/gen_samplesheet/autogen_samplesheet.csv' +``` + +\ +  + +### `output:` + +___ + +Type: `path` + +NextFlow output of type `path` pointing to properly formatted CSV sample sheet (`csv`). + +\ +  + +#### `csv` + +Type: `path` + +NextFlow output type of `path` pointing to auto-generated CSV sample sheet for all FASTQ files present in the folder given by NextFlow input type of `val` (`inputdir`). + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/samplesheet_check/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/samplesheet_check/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,32 @@ +process SAMPLESHEET_CHECK { + tag "$samplesheet" + label "process_femto" + + module (params.enable_module ? "${params.swmodulepath}${params.fs}python${params.fs}3.8.1" : null) + conda (params.enable_conda ? "conda-forge::python=3.9.5" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.9--1' : + 'quay.io/biocontainers/python:3.9--1' }" + + input: + path samplesheet + + output: + path '*.csv' , emit: csv + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/rnaseq/bin/ + """ + check_samplesheet.py \\ + $samplesheet \\ + samplesheet.valid.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$( python --version | sed 's/Python //g' ) + END_VERSIONS + """ +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/seqkit/grep/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/seqkit/grep/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,113 @@ +# NextFlow DSL2 Module + +```bash +SEQKIT_GREP +``` + +## Description + +Run `seqkit grep` command on reads in FASTQ format. Produces a filtered FASTQ file as per the filter strategy in the supplied input file. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of metadata (`meta`) and a list of reads of type `path` (`reads`) per sample (`id:`). + +Ex: + +```groovy +[ + [ id: 'FAL00870', + strandedness: 'unstranded', + single_end: true, + centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' + ], + '/hpc/scratch/test/FAL000870/f1.merged.fq.gz' +] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTQ file. + +Ex: + +```groovy +[ + id: 'FAL00870', + strandedness: 'unstranded', + single_end: true +] +``` + +\ +  + +#### `reads` + +Type: `path` + +NextFlow input type of `path` pointing to FASTQ files on which `seqkit grep` should be run. + +\ +  + +#### `pattern_file` + +Type: path + +NextFlow input type of `path` pointing to the pattern file which has the patterns, one per line, by which FASTQ sequence ids should be searched and whose reads will be extracted. + +\ +  + +#### `args` + +Type: Groovy String + +String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. + +Ex: + +```groovy +withName: 'SEQKIT_GREP' { + ext.args = '--only-positive-strand' +} +``` + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and and filtered gzipped FASTQ file. + +\ +  + +#### `fastx` + +Type: `path` + +NextFlow output type of `path` pointing to the FASTQ format filtered gzipped file per sample (`id:`). + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/seqkit/grep/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/seqkit/grep/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,89 @@ +process SEQKIT_GREP { + tag "$meta.id" + label 'process_low' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}seqkit${params.fs}2.2.0" : null) + conda (params.enable_conda ? "bioconda::seqkit=2.2.0 conda-forge::sed=4.7 conda-forge::coreutils" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqkit:2.1.0--h9ee0642_0': + 'quay.io/biocontainers/seqkit:2.1.0--h9ee0642_0' }" + + input: + tuple val(meta), path(reads), path(pattern_file) + + output: + tuple val(meta), path("*.gz"), emit: fastx + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def extension = "fastq" + if ("$reads" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/) { + extension = "fasta" + } + + if (meta.single_end) { + """ + pattern_file_contents=\$(sed '1!d' $pattern_file) + if [ "\$pattern_file_contents" != "DuMmY" ]; then + additional_args="-f $pattern_file $args" + else + additional_args="$args" + fi + + seqkit \\ + grep \\ + -j $task.cpus \\ + -o ${prefix}.seqkit-grep.${extension}.gz \\ + \$additional_args \\ + $reads + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$( seqkit | sed '3!d; s/Version: //' ) + END_VERSIONS + """ + } else { + """ + pattern_file_contents=\$(sed '1!d' $pattern_file) + if [ "\$pattern_file_contents" != "DuMmY" ]; then + additional_args="-f $pattern_file $args" + else + additional_args="$args" + fi + + seqkit \\ + grep \\ + -j $task.cpus \\ + -o ${prefix}.R1.seqkit-grep.${extension}.gz \\ + \$additional_args \\ + ${reads[0]} + + seqkit \\ + grep \\ + -j $task.cpus \\ + -o ${prefix}.R2.seqkit-grep.${extension}.gz \\ + \$additional_args \\ + ${reads[1]} + + seqkit \\ + pair \\ + -j $task.cpus \\ + -1 ${prefix}.R1.seqkit-grep.${extension}.gz \\ + -2 ${prefix}.R2.seqkit-grep.${extension}.gz + + rm ${prefix}.R1.seqkit-grep.${extension}.gz + rm ${prefix}.R2.seqkit-grep.${extension}.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$( seqkit | sed '3!d; s/Version: //' ) + END_VERSIONS + """ + } +} diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/seqkit/rmdup/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/seqkit/rmdup/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,104 @@ +# NextFlow DSL2 Module + +```bash +SEQKIT_RMDUP +``` + +## Description + +Run `seqkit rmdup` command on reads in FASTQ format. Produces a filtered FASTQ file without duplicate sequences as per the strategy set using `ext.args` within the process scope. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of metadata (`meta`) and a list of reads of type `path` (`reads`) per sample (`id:`). + +Ex: + +```groovy +[ + [ id: 'FAL00870', + strandedness: 'unstranded', + single_end: true, + centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' + ], + '/hpc/scratch/test/FAL000870/f1.merged.fq.gz' +] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTQ file. + +Ex: + +```groovy +[ + id: 'FAL00870', + strandedness: 'unstranded', + single_end: true +] +``` + +\ +  + +#### `reads` + +Type: `path` + +NextFlow input type of `path` pointing to FASTQ files on which `seqkit rmdup` should be run. + +\ +  + +#### `args` + +Type: Groovy String + +String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. + +Ex: + +```groovy +withName: 'SEQKIT_DUP' { + ext.args = '-t dna' +} +``` + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and filtered gzipped FASTQ file. + +\ +  + +#### `fastx` + +Type: `path` + +NextFlow output type of `path` pointing to the FASTQ format filtered gzipped file per sample (`id:`). + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/seqkit/rmdup/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/seqkit/rmdup/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,84 @@ +process SEQKIT_RMDUP { + tag "$meta.id" + label 'process_low' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}seqkit${params.fs}2.2.0" : null) + conda (params.enable_conda ? "bioconda::seqkit=2.2.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqkit:2.1.0--h9ee0642_0': + 'quay.io/biocontainers/seqkit:2.1.0--h9ee0642_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*duplicated.details.txt"), optional: true + tuple val(meta), path("*.gz") , emit: fastx + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def rmdup_d = params.seqkit_rmdup_d ? "-d ${prefix}.seqs.duplicated.fastq.gz" : "" + def rmdup_D = params.seqkit_rmdup_D ? "-D ${prefix}.duplicated.details.txt" : "" + + def extension = "fastq" + if ("$reads" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/) { + extension = "fasta" + } + + if (meta.single_end) { + """ + seqkit \\ + rmdup \\ + $rmdup_d \\ + $rmdup_D \\ + -j $task.cpus \\ + -o ${prefix}.seqkit-rmdup.${extension}.gz \\ + $args \\ + $reads + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$( seqkit | sed '3!d; s/Version: //' ) + END_VERSIONS + """ + } else { + """ + seqkit \\ + rmdup \\ + $rmdup_d \\ + $rmdup_D \\ + -j $task.cpus \\ + -o ${prefix}.R1.seqkit-rmdup.${extension}.gz \\ + $args \\ + ${reads[0]} + + seqkit \\ + rmdup \\ + $rmdup_d \\ + $rmdup_D \\ + -j $task.cpus \\ + -o ${prefix}.R2.seqkit-rmdup.${extension}.gz \\ + $args \\ + ${reads[1]} + + seqkit \\ + pair \\ + -j $task.cpus \\ + -1 ${prefix}.R1.seqkit-rmdup.${extension}.gz \\ + -2 ${prefix}.R2.seqkit-rmdup.${extension}.gz + + rm ${prefix}.R1.seqkit-rmdup.${extension}.gz + rm ${prefix}.R2.seqkit-rmdup.${extension}.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$( seqkit | sed '3!d; s/Version: //' ) + END_VERSIONS + """ + } +} diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/seqkit/seq/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/seqkit/seq/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,104 @@ +# NextFlow DSL2 Module + +```bash +SEQKIT_SEQ +``` + +## Description + +Run `seqkit seq` command on reads in FASTQ format. Produces a filtered FASTQ file as per the filter strategy mentioned using the `ext.args` within the process scope. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of metadata (`meta`) and a list of reads of type `path` (`reads`) per sample (`id:`). + +Ex: + +```groovy +[ + [ id: 'FAL00870', + strandedness: 'unstranded', + single_end: true, + centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' + ], + '/hpc/scratch/test/FAL000870/f1.merged.fq.gz' +] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTQ file. + +Ex: + +```groovy +[ + id: 'FAL00870', + strandedness: 'unstranded', + single_end: true +] +``` + +\ +  + +#### `reads` + +Type: `path` + +NextFlow input type of `path` pointing to FASTQ files on which `seqkit seq` should be run. + +\ +  + +#### `args` + +Type: Groovy String + +String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. + +Ex: + +```groovy +withName: 'SEQKIT_SEQ' { + ext.args = '--max-len 4000' +} +``` + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and filtered gzipped FASTQ file. + +\ +  + +#### `fastx` + +Type: `path` + +NextFlow output type of `path` pointing to the FASTQ format filtered gzipped file per sample (`id:`). + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/seqkit/seq/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/seqkit/seq/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,75 @@ +process SEQKIT_SEQ { + tag "$meta.id" + label 'process_low' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}seqkit${params.fs}2.2.0" : null) + conda (params.enable_conda ? "bioconda::seqkit=2.2.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqkit:2.1.0--h9ee0642_0': + 'quay.io/biocontainers/seqkit:2.1.0--h9ee0642_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.gz"), emit: fastx + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def extension = "fastq" + if ("$reads" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/) { + extension = "fasta" + } + + if (meta.single_end) { + """ + seqkit \\ + seq \\ + -j $task.cpus \\ + -o ${prefix}.seqkit-seq.${extension}.gz \\ + $args \\ + $reads + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$( seqkit | sed '3!d; s/Version: //' ) + END_VERSIONS + """ + } else { + """ + seqkit \\ + seq \\ + -j $task.cpus \\ + -o ${prefix}.R1.seqkit-seq.${extension}.gz \\ + $args \\ + ${reads[0]} + + seqkit \\ + seq \\ + -j $task.cpus \\ + -o ${prefix}.R2.seqkit-seq.${extension}.gz \\ + $args \\ + ${reads[1]} + + seqkit \\ + pair \\ + -j $task.cpus \\ + -1 ${prefix}.R1.seqkit-seq.${extension}.gz \\ + -2 ${prefix}.R2.seqkit-seq.${extension}.gz + + rm ${prefix}.R1.seqkit-seq.${extension}.gz + rm ${prefix}.R2.seqkit-seq.${extension}.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$( seqkit | sed '3!d; s/Version: //' ) + END_VERSIONS + """ + } +} diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/seqsero2/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/seqsero2/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,96 @@ +# NextFlow DSL2 Module + +```bash +SEQSERO2 +``` + +## Description + +Run `seqsero2` tool on a list of assembled *Salmonella* contigs in FASTA format or sequencing reads in FASTQ format. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of metadata (`meta`) and a list of assemled contig FASTA files or list of sequencing reads in FASTQ format of input type `path` (`reads_or_asm`). + +Ex: + +```groovy +[ [id: 'sample1', single_end: true], '/data/sample1/f_assembly.fa' ] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTQ files or assembly FASTA files. + +Ex: + +```groovy +[ id: 'FAL00870', strandedness: 'unstranded', single_end: true ] +``` + +\ +  + +#### `reads_or_asm` + +Type: `path` + +NextFlow input type of `path` pointing to assembled contig file in FASTA format or sequencing reads in FASTQ format. + +\ +  + +#### `args` + +Type: Groovy String + +String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. + +Ex: + +```groovy +withName: 'SEQSERO2' { + ext.args = '-b mem' +} +``` + +\ +  + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and list of `seqsero2` result files (`serotyped`). + +\ +  + +#### `serotyped` + +Type: `path` + +NextFlow output type of `path` pointing to the `seqsero2` results table file per sample (`id:`). + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/seqsero2/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/seqsero2/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,42 @@ +process SEQSERO2 { + tag "$meta.id" + label 'process_low' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}seqsero2${params.fs}1.2.1" : null) + conda (params.enable_conda ? "bioconda::seqsero2=1.2.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqsero2:1.2.1--py_0' : + 'quay.io/biocontainers/seqsero2:1.2.1--py_0' }" + + input: + tuple val(meta), path(reads_or_asm) + + output: + path("${meta.id}${params.fs}*") + tuple val(meta), path("${meta.id}${params.fs}*_result.tsv"), emit: serotyped + path "versions.yml" , emit: versions + + when: + (task.ext.when == null || task.ext.when) && reads_or_asm.size() > 0 + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + SeqSero2_package.py \\ + $args \\ + -d $prefix \\ + -n $prefix \\ + -p $task.cpus \\ + -i $reads_or_asm + + mv ${prefix}${params.fs}SeqSero_log.txt ${prefix}${params.fs}${prefix}.SeqSero_log.txt + mv ${prefix}${params.fs}SeqSero_result.txt ${prefix}${params.fs}${prefix}.SeqSero_result.txt + mv ${prefix}${params.fs}SeqSero_result.tsv ${prefix}${params.fs}${prefix}.SeqSero_result.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqsero2: \$( echo \$( SeqSero2_package.py --version 2>&1) | sed 's/^.*SeqSero2_package.py //' ) + END_VERSIONS + """ +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/seqtk/seq/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/seqtk/seq/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,104 @@ +# NextFlow DSL2 Module + +```bash +SEQTK_SEQ +``` + +## Description + +Run `seqtk seq` command on reads in FASTQ format. Produces a filtered FASTQ file as per the filter strategy mentioned using the `ext.args` within the process scope. Please note that `seqtk seq` works only on one FASTQ file per command call. For paired-end reads, please use the `SEQKIT_SEQ` module. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of metadata (`meta`) and a list of reads of type `path` (`reads`) per sample (`id:`). + +Ex: + +```groovy +[ + [ id: 'FAL00870', + strandedness: 'unstranded', + single_end: true, + centrifuge_x: '/hpc/db/centrifuge/2022-04-12/ab' + ], + '/hpc/scratch/test/FAL000870/f1.merged.fq.gz' +] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTQ file. + +Ex: + +```groovy +[ + id: 'FAL00870', + strandedness: 'unstranded', + single_end: true +] +``` + +\ +  + +#### `reads` + +Type: `path` + +NextFlow input type of `path` pointing to FASTQ files on which `seqtk seq` should be run. + +\ +  + +#### `args` + +Type: Groovy String + +String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. + +Ex: + +```groovy +withName: 'SEQTK_SEQ' { + ext.args = '-L 4000' +} +``` + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and filtered gzipped FASTQ file. + +\ +  + +#### `fastx` + +Type: `path` + +NextFlow output type of `path` pointing to the FASTQ format filtered gzipped file per sample (`id:`). + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/seqtk/seq/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/seqtk/seq/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,42 @@ +process SEQTK_SEQ { + tag "$meta.id" + label 'process_mem_low' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}seqtk${params.fs}1.3-r106" : null) + conda (params.enable_conda ? "bioconda::seqtk=1.3 conda-forge::gzip" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqtk:1.3--h5bf99c6_3' : + 'quay.io/biocontainers/seqtk:1.3--h5bf99c6_3' }" + + input: + tuple val(meta), path(fastx) + + output: + tuple val(meta), path("*.gz"), emit: fastx + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/ || "$args" ==~ /\-[aA]/ ) { + extension = "fasta" + } + """ + seqtk \\ + seq \\ + $args \\ + $fastx | \\ + gzip -c > ${prefix}.seqtk-seq.${task.index}.${extension}.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + gzip: \$( echo \$(gzip --version 2>&1) | sed 's/^.*(gzip) //; s/gzip //; s/ Copyright.*\$//' ) + END_VERSIONS + """ +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/serotypefinder/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/serotypefinder/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,96 @@ +# NextFlow DSL2 Module + +```bash +SEROTYPEFINDER +``` + +## Description + +Run `serotypefinder` tool on a list of assembled *E. coli* contigs or partial sequences in FASTA format. Produces a single output table in ASCII text format. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of metadata (`meta`) and a list of assemled contig FASTA files of input type `path` (`fasta`). + +Ex: + +```groovy +[ [id: 'sample1', single_end: true], '/data/sample1/f_assembly.fa' ] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the assembly FASTA file. + +Ex: + +```groovy +[ id: 'FAL00870', strandedness: 'unstranded', single_end: true ] +``` + +\ +  + +#### `fasta` + +Type: `path` + +NextFlow input type of `path` pointing to assembled contig file or partial sequences in FASTA format. + +\ +  + +#### `args` + +Type: Groovy String + +String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. + +Ex: + +```groovy +withName: 'SEROTYPEFINDER' { + ext.args = '-mp kma' +} +``` + +\ +  + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and list of `serotypefinder` result files (`serotyped`). + +\ +  + +#### `serotyped` + +Type: `path` + +NextFlow output type of `path` pointing to the `serotypefinder` results table file per sample (`id:`). + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/serotypefinder/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/serotypefinder/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,70 @@ +process SEROTYPEFINDER { + tag "$meta.id" + label 'process_low' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}serotypefinder${params.fs}2.0.2" : null) + conda (params.enable_conda ? "bioconda::serotypefinder=2.0.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/serotypefinder:2.0.1--py39hdfd78af_0' : + 'quay.io/biocontainers/serotypefinder:2.0.1--py39hdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + + output: + path("${meta.id}${params.fs}*") + tuple val(meta), path("${meta.id}${params.fs}${meta.id}.tsv"), emit: serotyped + path "versions.yml" , emit: versions + + when: + (task.ext.when == null || task.ext.when) && fasta.size() > 0 + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.getName().endsWith(".gz") ? true : false + def fasta_name = fasta.getName().replace(".gz", "") + def serotypefinder_db = "${meta.serotypefinder_db}" + def serotypefinder_cmd = (params.enable_module ? "serotypefinder.py" : "serotypefinder") + """ + if [ "$is_compressed" == "true" ]; then + gzip -c -d $fasta > $fasta_name + fi + + mkdir -p $prefix > /dev/null 2>&1 + + $serotypefinder_cmd \\ + $args \\ + -p $serotypefinder_db \\ + -o $prefix \\ + -i $fasta_name + + head -n1 ${prefix}${params.fs}results_tab.tsv | sed -E "s/(.*)/Name\\t\\1/g" > ${prefix}${params.fs}${prefix}.tsv + tail -n+2 ${prefix}${params.fs}results_tab.tsv | sed -E "s/(.*)/${prefix}\\t\\1/g" >> ${prefix}${params.fs}${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + serotypefinder: 2.0.1/2.0.2 + END_VERSIONS + + sedver="" + headver="" + tailver="" + + if [ "${workflow.containerEngine}" != "null" ]; then + sedver=\$( sed --help 2>&1 | sed -e '1!d; s/ (.*\$//' ) + headver=\$( head --help 2>&1 | sed -e '1!d; s/ (.*\$//' ) + tailver="\$headver" + else + sedver=\$( echo \$(sed --version 2>&1) | sed 's/^.*(GNU sed) //; s/ Copyright.*\$//' ) + headver=\$( head --version 2>&1 | sed '1!d; s/^.*(GNU coreutils//; s/) //;' ) + tailver=\$( tail --version 2>&1 | sed '1!d; s/^.*(GNU coreutils//; s/) //;' ) + fi + + cat <<-END_VERSIONS >> versions.yml + sed: \$sedver + head: \$headver + tail: \$tailver + END_VERSIONS + """ +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/spades/assemble/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/spades/assemble/README.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,115 @@ +# NextFlow DSL2 Module + +```bash +SPADES_ASSEMBLE +``` + +## Description + +Run `spades` assembler tool on a list of read files in FASTQ format. + +\ +  + +### `input:` + +___ + +Type: `tuple` + +Takes in the following tuple of metadata (`meta`) and a list of FASTQ files from various platforms of input type `path` (`illumina`, `pacbio`, `nanopore`). + +Ex: + +```groovy +[ [id: 'sample1', single_end: true], '/data/sample1/f_merged.fq.gz' ] +[ [id: 'sample1', single_end: false], ['/data/sample1/f1_merged.fq.gz', '/data/sample2/f2_merged.fq.gz'], ['/data/sample1/nanopore.fastq'], ['/data/sample1/pacbio.fastq'] ] +``` + +\ +  + +#### `meta` + +Type: Groovy Map + +A Groovy Map containing the metadata about the FASTQ file. + +Ex: + +```groovy +[ id: 'FAL00870', strandedness: 'unstranded', single_end: true ] +``` + +\ +  + +#### `illumina` + +Type: `path` + +NextFlow input type of `path` pointing to Illumina read files in FASTQ format that need to be *de novo* assembled along with reads from any other sequencing platforms, if any. + +\ +  + +#### `nanopore` + +Type: `path` + +NextFlow input type of `path` pointing to Oxford Nanopore read files in FASTQ format that need to be *de novo* assembled along with reads from any other sequencing platforms, if any. + +\ +  + +#### `pacbio` + +Type: `path` + +NextFlow input type of `path` pointing to PacBio read files in FASTQ format that need to be *de novo* assembled along with reads from any other sequencing platforms, if any. + +\ +  + +#### `args` + +Type: Groovy String + +String of optional command-line arguments to be passed to the tool. This can be mentioned in `process` scope within `withName:process_name` block using `ext.args` option within your `nextflow.config` file. + +Ex: + +```groovy +withName: 'SPADES_ASSEMBLE' { + ext.args = '--rna' +} +``` + +\ +  + +### `output:` + +___ + +Type: `tuple` + +Outputs a tuple of metadata (`meta` from `input:`) and `spades` assembled scaffolds file in FASTA format. + +\ +  + +#### `assembly` + +Type: `path` + +NextFlow output type of `path` pointing to the `spades` assembler results file (`scaffolds.fasta`) per sample (`id:`) i.e., the final assembled scaffolds file in FASTA format. + +\ +  + +#### `versions` + +Type: `path` + +NextFlow output type of `path` pointing to the `.yml` file storing software versions for this process. diff -r 17890124001d -r 52045ea4679d 0.4.2/modules/spades/assemble/main.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/modules/spades/assemble/main.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,47 @@ +process SPADES_ASSEMBLE { + tag "$meta.id" + label 'process_higher' + + module (params.enable_module ? "${params.swmodulepath}${params.fs}spades${params.fs}3.15.3" : null) + conda (params.enable_conda ? 'bioconda::spades=3.15.3' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/spades:3.15.3--h95f258a_0' : + 'quay.io/biocontainers/spades:3.15.3--h95f258a_0' }" + + input: + tuple val(meta), path(illumina), path(pacbio), path(nanopore) + + output: + path "${meta.id}${params.fs}*" + tuple val(meta), path("${meta.id}${params.fs}scaffolds.fasta"), emit: assembly, optional: true + tuple val(meta), path("${meta.id}${params.fs}spades.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def maxmem = task.memory ? "--memory ${task.memory.toGiga()}" : "" + def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : "" + def pacbio_reads = !(pacbio.simpleName ==~ 'dummy_file.*') ? "--pacbio $pacbio" : "" + def nanopore_reads = !(nanopore.simpleName ==~ 'dummy_file.*') ? "--nanopore $nanopore" : "" + def custom_hmms = params.spades_hmm ? "--custom-hmms ${params.spades_hmm}" : "" + """ + spades.py \\ + $args \\ + --threads $task.cpus \\ + $maxmem \\ + $custom_hmms \\ + $illumina_reads \\ + $pacbio_reads \\ + $nanopore_reads \\ + -o ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + spades: \$(spades.py --version 2>&1 | sed 's/^.*SPAdes genome assembler v//; s/ .*\$//') + END_VERSIONS + """ +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/nextflow.config --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/nextflow.config Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,162 @@ +def fs = File.separator +def pd = "${projectDir}" + +// Global parameters +includeConfig "${pd}${fs}conf${fs}manifest.config" +includeConfig "${pd}${fs}conf${fs}base.config" + +// Include FASTQ config to prepare for a case when the entry point is +// FASTQ metadata CSV or FASTQ input directory +includeConfig "${pd}${fs}conf${fs}fastq.config" + +if (params.pipeline != null) { + try { + includeConfig "${params.workflowsconf}${fs}${params.pipeline}.config" + } catch (Exception e) { + System.err.println('-'.multiply(params.linewidth) + "\n" + + "\033[0;31m${params.cfsanpipename} - ERROR\033[0m\n" + + '-'.multiply(params.linewidth) + "\n" + "\033[0;31mCould not load " + + "default pipeline configuration. Please provide a pipeline \n" + + "name using the --pipeline option.\n\033[0m" + '-'.multiply(params.linewidth) + "\n") + System.exit(1) + } +} + +// Include modules' config last. +includeConfig "${pd}${fs}conf${fs}logtheseparams.config" +includeConfig "${pd}${fs}conf${fs}modules.config" + +// Nextflow runtime profiles +conda.cacheDir = '/tmp' +singularity.cacheDir = '/tmp' + +// Cleanup after running +// cleanup = true + +profiles { + standard { + process.executor = 'local' + process.cpus = 1 + params.enable_conda = false + params.enable_module = true + singularity.enabled = false + docker.enabled = false + } + + stdkondagac { + process.executor = 'local' + process.cpus = 4 + params.enable_conda = true + params.enable_module = false + singularity.enabled = false + docker.enabled = false + } + + stdcingularitygac { + process.executor = 'local' + process.cpus = 4 + params.enable_conda = false + params.enable_module = false + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + } + + raven { + process.executor = 'slurm' + process.queue = 'prod' + process.memory = '10GB' + process.cpus = 4 + params.enable_conda = false + params.enable_module = true + singularity.enabled = false + docker.enabled = false + clusterOptions = '--signal B:USR2' + } + + eprod { + process.executor = 'slurm' + process.queue = 'lowmem,midmem,bigmem' + process.memory = '10GB' + process.cpus = 4 + params.enable_conda = false + params.enable_module = true + singularity.enabled = false + docker.enabled = false + clusterOptions = '--signal B:USR2' + } + + eprodcingularity { + process.executor = 'slurm' + process.queue = 'lowmem,midmem,bigmem' + process.memory = '10GB' + process.cpus = 4 + params.enable_conda = false + params.enable_module = false + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + clusterOptions = '--signal B:USR2' + } + + cingularity { + process.executor = 'slurm' + process.queue = 'prod' + process.memory = '10GB' + process.cpus = 4 + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + params.enable_conda = false + params.enable_module = false + clusterOptions = '--signal B:USR2' + } + + cingularitygac { + process.executor = 'slurm' + executor.$slurm.exitReadTimeout = 120000 + process.queue = 'centriflaken' + process.cpus = 4 + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + params.enable_conda = false + params.enable_module = false + clusterOptions = '-n 1 --signal B:USR2' + } + + konda { + process.executor = 'slurm' + process.queue = 'prod' + process.memory = '10GB' + process.cpus = 4 + singularity.enabled = false + docker.enabled = false + params.enable_conda = true + params.enable_module = false + clusterOptions = '--signal B:USR2' + } + + kondagac { + process.executor = 'slurm' + executor.$slurm.exitReadTimeout = 120000 + process.queue = 'centriflaken' + process.cpus = 4 + singularity.enabled = false + docker.enabled = false + params.enable_conda = true + params.enable_module = false + clusterOptions = '-n 1 --signal B:USR2' + } + + gxkubernetes { + process.executor = 'k8s' + k8s.namespace = 'galaxy' + k8s.serviceAccount = 'galaxy' + k8s.maxErrorRetry = 5 + singularity.enabled = false + docker.enabled = true + params.enable_conda = false + params.enable_module = false + } +} diff -r 17890124001d -r 52045ea4679d 0.4.2/readme/centriflaken.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/readme/centriflaken.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,276 @@ +# CPIPES (CFSAN PIPELINES) + +## The modular pipeline repository at CFSAN, FDA + +**CPIPES** (CFSAN PIPELINES) is a collection of modular pipelines based on **NEXTFLOW**, +mostly for bioinformatics data analysis at **CFSAN, FDA.** + +--- + +### **centriflaken** + +--- +Precision long-read metagenomics sequencing for food safety by detection and assembly of Shiga toxin-producing Escherichia coli. + +#### Workflow Usage + +```bash +module load cpipes/0.4.0 + +cpipes --pipeline centriflaken [options] +``` + +Example: Run the default `centriflaken` pipeline with taxa of interest as *E. coli*. + +```bash +cd /hpc/scratch/$USER +mkdir nf-cpipes +cd nf-cpipes +cpipes --pipeline centriflaken --input /path/to/fastq/dir --output /path/to/output --user_email 'Kranti.Konganti@fda.hhs.gov' +``` + +Example: Run the `centriflaken` pipeline with taxa of interest as *Salmonella*. In this mode, `SerotypeFinder` tool will be replaced with `SeqSero2` tool. + +```bash +cd /hpc/scratch/$USER +mkdir nf-cpipes +cd nf-cpipes +cpipes --pipeline centriflaken --centrifuge_extract_bug 'Salmonella' --input /path/to/fastq/dir --output /path/to/output --user_email 'Kranti.Konganti@fda.hhs.gov' +``` + +#### `centriflaken` Help + +```text +[Kranti.Konganti@login2-slurm ]$ cpipes --pipeline centriflaken --help +N E X T F L O W ~ version 21.12.1-edge +Launching `/nfs/software/apps/cpipes/0.4.0/cpipes` [crazy_euler] - revision: 72db279311 +================================================================================ + (o) + ___ _ __ _ _ __ ___ ___ + / __|| '_ \ | || '_ \ / _ \/ __| +| (__ | |_) || || |_) || __/\__ \ + \___|| .__/ |_|| .__/ \___||___/ + | | | | + |_| |_| +-------------------------------------------------------------------------------- +A collection of modular pipelines at CFSAN, FDA. +-------------------------------------------------------------------------------- +Name : CPIPES +Author : Kranti.Konganti@fda.hhs.gov +Version : 0.4.0 +Center : CFSAN, FDA. +================================================================================ + +Workflow : centriflaken + +Author : Kranti.Konganti@fda.hhs.gov + +Version : 0.2.1 + + +Usage : cpipes --pipeline centriflaken [options] + + +Required : + +--input : Absolute path to directory containing FASTQ + files. The directory should contain only + FASTQ files as all the files within the + mentioned directory will be read. Ex: -- + input /path/to/fastq_pass + +--output : Absolute path to directory where all the + pipeline outputs should be stored. Ex: -- + output /path/to/output + +Other options : + +--metadata : Absolute path to metadata CSV file + containing five mandatory columns: sample, + fq1,fq2,strandedness,single_end. The fq1 + and fq2 columns contain absolute paths to + the FASTQ files. This option can be used in + place of --input option. This is rare. Ex: -- + metadata samplesheet.csv + +--fq_suffix : The suffix of FASTQ files (Unpaired reads + or R1 reads or Long reads) if an input + directory is mentioned via --input option. + Default: .fastq.gz + +--fq2_suffix : The suffix of FASTQ files (Paired-end reads + or R2 reads) if an input directory is + mentioned via --input option. Default: + false + +--fq_filter_by_len : Remove FASTQ reads that are less than this + many bases. Default: 4000 + +--fq_strandedness : The strandedness of the sequencing run. + This is mostly needed if your sequencing + run is RNA-SEQ. For most of the other runs, + it is probably safe to use unstranded for + the option. Default: unstranded + +--fq_single_end : SINGLE-END information will be auto- + detected but this option forces PAIRED-END + FASTQ files to be treated as SINGLE-END so + only read 1 information is included in auto- + generated samplesheet. Default: false + +--fq_filename_delim : Delimiter by which the file name is split + to obtain sample name. Default: _ + +--fq_filename_delim_idx : After splitting FASTQ file name by using + the --fq_filename_delim option, all + elements before this index (1-based) will + be joined to create final sample name. + Default: 1 + +--kraken2_db : Absolute path to kraken database. Default: / + hpc/db/kraken2/standard-210914 + +--kraken2_confidence : Confidence score threshold which must be + between 0 and 1. Default: 0.0 + +--kraken2_quick : Quick operation (use first hit or hits). + Default: false + +--kraken2_use_mpa_style : Report output like Kraken 1's kraken-mpa- + report. Default: false + +--kraken2_minimum_base_quality : Minimum base quality used in classification + which is only effective with FASTQ input. + Default: 0 + +--kraken2_report_zero_counts : Report counts for ALL taxa, even if counts + are zero. Default: false + +--kraken2_report_minmizer_data : Report minimizer and distinct minimizer + count information in addition to normal + Kraken report. Default: false + +--kraken2_use_names : Print scientific names instead of just + taxids. Default: true + +--kraken2_extract_bug : Extract the reads or contigs beloging to + this bug. Default: Escherichia coli + +--centrifuge_x : Absolute path to centrifuge database. + Default: /hpc/db/centrifuge/2022-04-12/ab + +--centrifuge_save_unaligned : Save SINGLE-END reads that did not align. + For PAIRED-END reads, save read pairs that + did not align concordantly. Default: false + +--centrifuge_save_aligned : Save SINGLE-END reads that aligned. For + PAIRED-END reads, save read pairs that + aligned concordantly. Default: false + +--centrifuge_out_fmt_sam : Centrifuge output should be in SAM. Default: + false + +--centrifuge_extract_bug : Extract this bug from centrifuge results. + Default: Escherichia coli + +--centrifuge_ignore_quals : Treat all quality values as 30 on Phred + scale. Default: false + +--flye_pacbio_raw : Input FASTQ reads are PacBio regular CLR + reads (<20% error) Defaut: false + +--flye_pacbio_corr : Input FASTQ reads are PacBio reads that + were corrected with other methods (<3% + error). Default: false + +--flye_pacbio_hifi : Input FASTQ reads are PacBio HiFi reads (<1% + error). Default: false + +--flye_nano_raw : Input FASTQ reads are ONT regular reads, + pre-Guppy5 (<20% error). Default: true + +--flye_nano_corr : Input FASTQ reads are ONT reads that were + corrected with other methods (<3% error). + Default: false + +--flye_nano_hq : Input FASTQ reads are ONT high-quality + reads: Guppy5+ SUP or Q20 (<5% error). + Default: false + +--flye_genome_size : Estimated genome size (for example, 5m or 2. + 6g). Default: 5.5m + +--flye_polish_iter : Number of genome polishing iterations. + Default: false + +--flye_meta : Do a metagenome assembly (unenven coverage + mode). Default: true + +--flye_min_overlap : Minimum overlap between reads. Default: + false + +--flye_scaffold : Enable scaffolding using assembly graph. + Default: false + +--serotypefinder_run : Run SerotypeFinder tool. Default: true + +--serotypefinder_x : Generate extended output files. Default: + true + +--serotypefinder_db : Path to SerotypeFinder databases. Default: / + hpc/db/serotypefinder/2.0.2 + +--serotypefinder_min_threshold : Minimum percent identity (in float) + required for calling a hit. Default: 0.85 + +--serotypefinder_min_cov : Minumum percent coverage (in float) + required for calling a hit. Default: 0.80 + +--seqsero2_run : Run SeqSero2 tool. Default: false + +--seqsero2_t : '1' for interleaved paired-end reads, '2' + for separated paired-end reads, '3' for + single reads, '4' for genome assembly, '5' + for nanopore reads (fasta/fastq). Default: + 4 + +--seqsero2_m : Which workflow to apply, 'a'(raw reads + allele micro-assembly), 'k'(raw reads and + genome assembly k-mer). Default: k + +--seqsero2_c : SeqSero2 will only output serotype + prediction without the directory containing + log files. Default: false + +--seqsero2_s : SeqSero2 will not output header in + SeqSero_result.tsv. Default: false + +--mlst_run : Run MLST tool. Default: true + +--mlst_minid : DNA %identity of full allelle to consider ' + similar' [~]. Default: 95 + +--mlst_mincov : DNA %cov to report partial allele at all [?]. + Default: 10 + +--mlst_minscore : Minumum score out of 100 to match a scheme. + Default: 50 + +--abricate_run : Run ABRicate tool. Default: true + +--abricate_minid : Minimum DNA %identity. Defaut: 90 + +--abricate_mincov : Minimum DNA %coverage. Defaut: 80 + +--abricate_datadir : ABRicate databases folder. Defaut: /hpc/db/ + abricate/1.0.1/db + +Help options : + +--help : Display this message. +``` + +### **BETA** + +--- +The development of the modular structure and flow is an ongoing effort and may change depending on assessment of various computational topics and other considerations. diff -r 17890124001d -r 52045ea4679d 0.4.2/readme/centriflaken_hy.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/readme/centriflaken_hy.md Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,367 @@ +# CPIPES (CFSAN PIPELINES) + +## The modular pipeline repository at CFSAN, FDA + +**CPIPES** (CFSAN PIPELINES) is a collection of modular pipelines based on **NEXTFLOW**, +mostly for bioinformatics data analysis at **CFSAN, FDA.** + +--- + +### **centriflaken_hy** + +--- +`centriflaken_hy` is a variant of the original `centriflaken` pipeline but for Illumina short reads either single-end or paired-end. + +#### Workflow Usage + +```bash +module load cpipes/0.4.0 + +cpipes --pipeline centriflaken_hy [options] +``` + +Example: Run the default `centriflaken_hy` pipeline with taxa of interest as *E. coli*. + +```bash +cd /hpc/scratch/$USER +mkdir nf-cpipes +cd nf-cpipes +cpipes --pipeline centriflaken_hy --input /path/to/illumina/fastq/dir --output /path/to/output --user_email 'Kranti.Konganti@fda.hhs.gov' +``` + +Example: Run the `centriflaken_hy` pipeline with taxa of interest as *Salmonella*. In this mode, `SerotypeFinder` tool will be replaced with `SeqSero2` tool. + +```bash +cd /hpc/scratch/$USER +mkdir nf-cpipes +cd nf-cpipes +cpipes --pipeline centriflaken_hy --centrifuge_extract_bug 'Salmonella' --input /path/to/illumina/fastq/dir --output /path/to/output --user_email 'Kranti.Konganti@fda.hhs.gov' +``` + +#### `centriflaken_hy` Help + +```text +[Kranti.Konganti@login2-slurm ]$ cpipes --pipeline centriflaken_hy --help +N E X T F L O W ~ version 21.12.1-edge +Launching `/home/Kranti.Konganti/apps/cpipes/cpipes` [soggy_curie] - revision: 72db279311 +================================================================================ + (o) + ___ _ __ _ _ __ ___ ___ + / __|| '_ \ | || '_ \ / _ \/ __| +| (__ | |_) || || |_) || __/\__ \ + \___|| .__/ |_|| .__/ \___||___/ + | | | | + |_| |_| +-------------------------------------------------------------------------------- +A collection of modular pipelines at CFSAN, FDA. +-------------------------------------------------------------------------------- +Name : CPIPES +Author : Kranti.Konganti@fda.hhs.gov +Version : 0.4.0 +Center : CFSAN, FDA. +================================================================================ + +Workflow : centriflaken_hy + +Author : Kranti.Konganti@fda.hhs.gov + +Version : 0.4.0 + + +Usage : cpipes --pipeline centriflaken_hy [options] + + +Required : + +--input : Absolute path to directory containing FASTQ + files. The directory should contain only + FASTQ files as all the files within the + mentioned directory will be read. Ex: -- + input /path/to/fastq_pass + +--output : Absolute path to directory where all the + pipeline outputs should be stored. Ex: -- + output /path/to/output + +Other options : + +--metadata : Absolute path to metadata CSV file + containing five mandatory columns: sample, + fq1,fq2,strandedness,single_end. The fq1 + and fq2 columns contain absolute paths to + the FASTQ files. This option can be used in + place of --input option. This is rare. Ex: -- + metadata samplesheet.csv + +--fq_suffix : The suffix of FASTQ files (Unpaired reads + or R1 reads or Long reads) if an input + directory is mentioned via --input option. + Default: _R1_001.fastq.gz + +--fq2_suffix : The suffix of FASTQ files (Paired-end reads + or R2 reads) if an input directory is + mentioned via --input option. Default: + _R2_001.fastq.gz + +--fq_filter_by_len : Remove FASTQ reads that are less than this + many bases. Default: 75 + +--fq_strandedness : The strandedness of the sequencing run. + This is mostly needed if your sequencing + run is RNA-SEQ. For most of the other runs, + it is probably safe to use unstranded for + the option. Default: unstranded + +--fq_single_end : SINGLE-END information will be auto- + detected but this option forces PAIRED-END + FASTQ files to be treated as SINGLE-END so + only read 1 information is included in auto- + generated samplesheet. Default: false + +--fq_filename_delim : Delimiter by which the file name is split + to obtain sample name. Default: _ + +--fq_filename_delim_idx : After splitting FASTQ file name by using + the --fq_filename_delim option, all + elements before this index (1-based) will + be joined to create final sample name. + Default: 1 + +--seqkit_rmdup_run : Remove duplicate sequences using seqkit + rmdup. Default: false + +--seqkit_rmdup_n : Match and remove duplicate sequences by + full name instead of just ID. Defaut: false + +--seqkit_rmdup_s : Match and remove duplicate sequences by + sequence content. Defaut: true + +--seqkit_rmdup_d : Save the duplicated sequences to a file. + Defaut: false + +--seqkit_rmdup_D : Save the number and list of duplicated + sequences to a file. Defaut: false + +--seqkit_rmdup_i : Ignore case while using seqkit rmdup. + Defaut: false + +--seqkit_rmdup_P : Only consider positive strand (i.e. 5') + when comparing by sequence content. Defaut: + false + +--kraken2_db : Absolute path to kraken database. Default: / + hpc/db/kraken2/standard-210914 + +--kraken2_confidence : Confidence score threshold which must be + between 0 and 1. Default: 0.0 + +--kraken2_quick : Quick operation (use first hit or hits). + Default: false + +--kraken2_use_mpa_style : Report output like Kraken 1's kraken-mpa- + report. Default: false + +--kraken2_minimum_base_quality : Minimum base quality used in classification + which is only effective with FASTQ input. + Default: 0 + +--kraken2_report_zero_counts : Report counts for ALL taxa, even if counts + are zero. Default: false + +--kraken2_report_minmizer_data : Report minimizer and distinct minimizer + count information in addition to normal + Kraken report. Default: false + +--kraken2_use_names : Print scientific names instead of just + taxids. Default: true + +--kraken2_extract_bug : Extract the reads or contigs beloging to + this bug. Default: Escherichia coli + +--centrifuge_x : Absolute path to centrifuge database. + Default: /hpc/db/centrifuge/2022-04-12/ab + +--centrifuge_save_unaligned : Save SINGLE-END reads that did not align. + For PAIRED-END reads, save read pairs that + did not align concordantly. Default: false + +--centrifuge_save_aligned : Save SINGLE-END reads that aligned. For + PAIRED-END reads, save read pairs that + aligned concordantly. Default: false + +--centrifuge_out_fmt_sam : Centrifuge output should be in SAM. Default: + false + +--centrifuge_extract_bug : Extract this bug from centrifuge results. + Default: Escherichia coli + +--centrifuge_ignore_quals : Treat all quality values as 30 on Phred + scale. Default: false + +--megahit_run : Run MEGAHIT assembler. Default: true + +--megahit_min_count : . Minimum multiplicity for filtering ( + k_min+1)-mers. Defaut: false + +--megahit_k_list : Comma-separated list of kmer size. All + values must be odd, in the range 15-255, + increment should be <= 28. Ex: '21,29,39,59, + 79,99,119,141'. Default: false + +--megahit_no_mercy : Do not add mercy k-mers. Default: false + +--megahit_bubble_level : . Intensity of bubble merging (0-2), 0 + to disable. Default: false + +--megahit_merge_level : . Merge complex bubbles of length <= l* + kmer_size and similarity >= s. Default: + false + +--megahit_prune_level : . Strength of low depth pruning (0-3). + Default: false + +--megahit_prune_depth : . Remove unitigs with avg k-mer depth + less than this value. Default: false + +--megahit_low_local_ratio : . Ratio threshold to define low + local coverage contigs. Default: false + +--megahit_max_tip_len : . remove tips less than this value [< + int> * k]. Default: false + +--megahit_no_local : Disable local assembly. Default: false + +--megahit_kmin_1pass : Use 1pass mode to build SdBG of k_min. + Default: false + +--megahit_preset : . Override a group of parameters. + Valid values are meta-sensitive which + enforces '--min-count 1 --k-list 21,29,39, + 49,...,129,141', meta-large (large & + complex metagenomes, like soil) which + enforces '--k-min 27 --k-max 127 --k-step + 10'. Default: meta-sensitive + +--megahit_mem_flag : . SdBG builder memory mode. 0: minimum; + 1: moderate; 2: use all memory specified. + Default: 2 + +--megahit_min_contig_len : . Minimum length of contigs to output. + Default: false + +--spades_run : Run SPAdes assembler. Default: false + +--spades_isolate : This flag is highly recommended for high- + coverage isolate and multi-cell data. + Defaut: false + +--spades_sc : This flag is required for MDA (single-cell) + data. Default: false + +--spades_meta : This flag is required for metagenomic data. + Default: true + +--spades_bio : This flag is required for biosytheticSPAdes + mode. Default: false + +--spades_corona : This flag is required for coronaSPAdes mode. + Default: false + +--spades_rna : This flag is required for RNA-Seq data. + Default: false + +--spades_plasmid : Runs plasmidSPAdes pipeline for plasmid + detection. Default: false + +--spades_metaviral : Runs metaviralSPAdes pipeline for virus + detection. Default: false + +--spades_metaplasmid : Runs metaplasmidSPAdes pipeline for plasmid + detection in metagenomics datasets. Default: + false + +--spades_rnaviral : This flag enables virus assembly module + from RNA-Seq data. Default: false + +--spades_iontorrent : This flag is required for IonTorrent data. + Default: false + +--spades_only_assembler : Runs only the SPAdes assembler module ( + without read error correction). Default: + false + +--spades_careful : Tries to reduce the number of mismatches + and short indels in the assembly. Default: + false + +--spades_cov_cutoff : Coverage cutoff value (a positive float + number). Default: false + +--spades_k : List of k-mer sizes (must be odd and less + than 128). Default: false + +--spades_hmm : Directory with custom hmms that replace the + default ones (very rare). Default: false + +--serotypefinder_run : Run SerotypeFinder tool. Default: true + +--serotypefinder_x : Generate extended output files. Default: + true + +--serotypefinder_db : Path to SerotypeFinder databases. Default: / + hpc/db/serotypefinder/2.0.2 + +--serotypefinder_min_threshold : Minimum percent identity (in float) + required for calling a hit. Default: 0.85 + +--serotypefinder_min_cov : Minumum percent coverage (in float) + required for calling a hit. Default: 0.80 + +--seqsero2_run : Run SeqSero2 tool. Default: false + +--seqsero2_t : '1' for interleaved paired-end reads, '2' + for separated paired-end reads, '3' for + single reads, '4' for genome assembly, '5' + for nanopore reads (fasta/fastq). Default: + 4 + +--seqsero2_m : Which workflow to apply, 'a'(raw reads + allele micro-assembly), 'k'(raw reads and + genome assembly k-mer). Default: k + +--seqsero2_c : SeqSero2 will only output serotype + prediction without the directory containing + log files. Default: false + +--seqsero2_s : SeqSero2 will not output header in + SeqSero_result.tsv. Default: false + +--mlst_run : Run MLST tool. Default: true + +--mlst_minid : DNA %identity of full allelle to consider ' + similar' [~]. Default: 95 + +--mlst_mincov : DNA %cov to report partial allele at all [?]. + Default: 10 + +--mlst_minscore : Minumum score out of 100 to match a scheme. + Default: 50 + +--abricate_run : Run ABRicate tool. Default: true + +--abricate_minid : Minimum DNA %identity. Defaut: 90 + +--abricate_mincov : Minimum DNA %coverage. Defaut: 80 + +--abricate_datadir : ABRicate databases folder. Defaut: /hpc/db/ + abricate/1.0.1/db + +Help options : + +--help : Display this message. +``` + +### **BETA** + +--- +The development of the modular structure and flow is an ongoing effort and may change depending on assessment of various computational topics and other considerations. diff -r 17890124001d -r 52045ea4679d 0.4.2/subworkflows/process_fastq.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/subworkflows/process_fastq.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,144 @@ +// Include any necessary methods and modules +include { stopNow; validateParamsForFASTQ } from "${params.routines}" +include { GEN_SAMPLESHEET } from "${params.modules}${params.fs}gen_samplesheet${params.fs}main" +include { SAMPLESHEET_CHECK } from "${params.modules}${params.fs}samplesheet_check${params.fs}main" +include { CAT_FASTQ } from "${params.modules}${params.fs}cat${params.fs}fastq${params.fs}main" +include { SEQKIT_SEQ } from "${params.modules}${params.fs}seqkit${params.fs}seq${params.fs}main" + +// Validate 4 required workflow parameters if +// FASTQ files are the input for the +// entry point. +validateParamsForFASTQ() + +// Start the subworkflow +workflow PROCESS_FASTQ { + main: + versions = Channel.empty() + input_ch = Channel.empty() + reads = Channel.empty() + + def input = file( (params.input ?: params.metadata) ) + + if (params.input) { + def fastq_files = [] + + if (params.fq_suffix == null) { + stopNow("We need to know what suffix the FASTQ files ends with inside the\n" + + "directory. Please use the --fq_suffix option to indicate the file\n" + + "suffix by which the files are to be collected to run the pipeline on.") + } + + if (params.fq_strandedness == null) { + stopNow("We need to know if the FASTQ files inside the directory\n" + + "are sequenced using stranded or non-stranded sequencing. This is generally\n" + + "required if the sequencing experiment is RNA-SEQ. For almost all of the other\n" + + "cases, you can probably use the --fq_strandedness unstranded option to indicate\n" + + "that the reads are unstranded.") + } + + if (params.fq_filename_delim == null || params.fq_filename_delim_idx == null) { + stopNow("We need to know the delimiter of the filename of the FASTQ files.\n" + + "By default the filename delimiter is _ (underscore). This delimiter character\n" + + "is used to split and assign a group name. The group name can be controlled by\n" + + "using the --fq_filename_delim_idx option (1-based). For example, if the FASTQ\n" + + "filename is WT_REP1_001.fastq, then to create a group WT, use the following\n" + + "options: --fq_filename_delim _ --fq_filename_delim_idx 1") + } + + if (!input.exists()) { + stopNow("The input directory,\n${params.input}\ndoes not exist!") + } + + input.eachFileRecurse { + it.name.endsWith("${params.fq_suffix}") ? fastq_files << it : fastq_files << null + } + + if (fastq_files.findAll{ it != null }.size() == 0) { + stopNow("The input directory,\n${params.input}\nis empty! or does not " + + "have FASTQ files ending with the suffix: ${params.fq_suffix}") + } + + GEN_SAMPLESHEET( Channel.fromPath(params.input, type: 'dir') ) + GEN_SAMPLESHEET.out.csv.set{ input_ch } + versions.mix( GEN_SAMPLESHEET.out.versions ) + .set { versions } + } else if (params.metadata) { + if (!input.exists()) { + stopNow("The metadata CSV file,\n${params.metadata}\ndoes not exist!") + } + + if (input.size() <= 0) { + stopNow("The metadata CSV file,\n${params.metadata}\nis empty!") + } + + Channel.fromPath(params.metadata, type: 'file') + .set { input_ch } + } + + SAMPLESHEET_CHECK( input_ch ) + .csv + .splitCsv( header: true, sep: ',') + .map { create_fastq_channel(it) } + .groupTuple(by: [0]) + .branch { + meta, fastq -> + single : fastq.size() == 1 + return [ meta, fastq.flatten() ] + multiple : fastq.size() > 1 + return [ meta, fastq.flatten() ] + } + .set { reads } + + CAT_FASTQ( reads.multiple ) + .catted_reads + .mix( reads.single ) + .set { processed_reads } + + if (params.fq_filter_by_len.toInteger() > 0) { + SEQKIT_SEQ( processed_reads ) + .fastx + .set { processed_reads } + + versions.mix( SEQKIT_SEQ.out.versions.first().ifEmpty(null) ) + .set { versions } + } + + versions.mix( + SAMPLESHEET_CHECK.out.versions, + CAT_FASTQ.out.versions.first().ifEmpty(null) + ) + .set { versions } + + emit: + processed_reads + versions +} + +// Function to get list of [ meta, [ fq1, fq2 ] ] +def create_fastq_channel(LinkedHashMap row) { + + def meta = [:] + meta.id = row.sample + meta.single_end = row.single_end.toBoolean() + meta.strandedness = row.strandedness + meta.id = meta.id.split(params.fq_filename_delim)[0..params.fq_filename_delim_idx.toInteger() - 1] + .join(params.fq_filename_delim) + meta.id = (meta.id =~ /\./ ? meta.id.take(meta.id.indexOf('.')) : meta.id) + + def array = [] + + if (!file(row.fq1).exists()) { + stopNow("Please check input metadata CSV. The following Read 1 FASTQ file does not exist!" + + "\n${row.fq1}") + } + if (meta.single_end) { + array = [ meta, [ file(row.fq1) ] ] + } else { + if (!file(row.fq2).exists()) { + stopNow("Please check input metadata CSV. The following Read 2 FASTQ file does not exist!" + + "\n${row.fq2}") + } + array = [ meta, [ file(row.fq1), file(row.fq2) ] ] + } + return array +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/workflows/centriflaken.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/workflows/centriflaken.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,332 @@ +// Define any required imports for this specific workflow +import java.nio.file.Paths +import nextflow.file.FileHelper + +// Include any necessary methods +include { \ + summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; \ + addPadding; wrapUpHelp } from "${params.routines}" +include { kraken2Help } from "${params.toolshelp}${params.fs}kraken2" +include { centrifugeHelp } from "${params.toolshelp}${params.fs}centrifuge" +include { flyeHelp } from "${params.toolshelp}${params.fs}flye" +include { serotypefinderHelp } from "${params.toolshelp}${params.fs}serotypefinder" +include { seqsero2Help } from "${params.toolshelp}${params.fs}seqsero2" +include { mlstHelp } from "${params.toolshelp}${params.fs}mlst" +include { abricateHelp } from "${params.toolshelp}${params.fs}abricate" + +// Exit if help requested before any subworkflows +if (params.help) { + log.info help() + exit 0 +} + +// Include any necessary modules and subworkflows +include { PROCESS_FASTQ } from "${params.subworkflows}${params.fs}process_fastq" +include { FASTQC } from "${params.modules}${params.fs}fastqc${params.fs}main" +include { CENTRIFUGE_CLASSIFY } from "${params.modules}${params.fs}centrifuge${params.fs}classify${params.fs}main" +include { CENTRIFUGE_PROCESS } from "${params.modules}${params.fs}centrifuge${params.fs}process${params.fs}main" +include { SEQKIT_GREP } from "${params.modules}${params.fs}seqkit${params.fs}grep${params.fs}main" +include { FLYE_ASSEMBLE } from "${params.modules}${params.fs}flye${params.fs}assemble${params.fs}main" +include { KRAKEN2_CLASSIFY } from "${params.modules}${params.fs}kraken2${params.fs}classify${params.fs}main" +include { KRAKEN2_EXTRACT_CONTIGS } from "${params.modules}${params.fs}kraken2${params.fs}extract_contigs${params.fs}main" +include { SEROTYPEFINDER } from "${params.modules}${params.fs}serotypefinder${params.fs}main" +include { SEQSERO2 } from "${params.modules}${params.fs}seqsero2${params.fs}main" +include { MLST } from "${params.modules}${params.fs}mlst${params.fs}main" +include { ABRICATE_RUN } from "${params.modules}${params.fs}abricate${params.fs}run${params.fs}main" +include { ABRICATE_SUMMARY } from "${params.modules}${params.fs}abricate${params.fs}summary${params.fs}main" +include { TABLE_SUMMARY } from "${params.modules}${params.fs}cat${params.fs}tables${params.fs}main" +include { MULTIQC } from "${params.modules}${params.fs}multiqc${params.fs}main" +include { DUMP_SOFTWARE_VERSIONS } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main" + + + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + INPUTS AND ANY CHECKS FOR THE CENTRIFLAKEN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def kraken2_db_dir = file ( "${params.kraken2_db}" ) +def centrifuge_x = file ( "${params.centrifuge_x}" ) +def reads_platform = 0 +def abricate_dbs = [ 'ncbiamrplus', 'resfinder', 'megares', 'argannot' ] + +reads_platform += (params.flye_nano_raw ? 1 : 0) +reads_platform += (params.flye_nano_corr ? 1 : 0) +reads_platform += (params.flye_nano_hq ? 1 : 0) +reads_platform += (params.flye_pacbio_raw ? 1 : 0) +reads_platform += (params.flye_pacbio_corr ? 1 : 0) +reads_platform += (params.flye_pacbio_hifi ? 1 : 0) + +if (!kraken2_db_dir.exists() || !centrifuge_x.getParent().exists()) { + stopNow("Please check if the following absolute paths are valid:\n" + + "${params.kraken2_db}\n${params.centrifuge_x}\n" + + "Cannot proceed further!") +} + +if (reads_platform > 1 || reads_platform == 0) { + msg_0 = (reads_platform > 1 ? "only" : "at least") + stopNow("Please mention ${msg_0} one read platform for use with the flye assembler\n" + + "using any one of the following options:\n" + + "--flye_nano_raw\n--flye_nano_corr\n--flye_nano_hq\n" + + "--flye_pacbio_raw\n--flye_pacbio_corr\n--flye_pacbio_hifi") +} + +if (params.centrifuge_extract_bug != params.kraken2_extract_bug) { + stopNow("Please make sure that the bug to be extracted is same\n" + + "for both --centrifuge_extract_bug and --kraken2_extract_bug options.") +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN THE CENTRIFLAKEN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow CENTRIFLAKEN { + main: + ch_asm_filtered_contigs = Channel.empty() + ch_mqc_custom_tbl = Channel.empty() + + log.info summaryOfParams() + + PROCESS_FASTQ() + .processed_reads + .map { + meta, fastq -> + meta.centrifuge_x = params.centrifuge_x + meta.kraken2_db = params.kraken2_db + [meta, fastq] + } + .set { ch_processed_reads } + + PROCESS_FASTQ + .out + .versions + .set { software_versions } + + FASTQC ( ch_processed_reads ) + + CENTRIFUGE_CLASSIFY ( ch_processed_reads ) + + CENTRIFUGE_PROCESS ( + CENTRIFUGE_CLASSIFY.out.report + .join( CENTRIFUGE_CLASSIFY.out.output ) + ) + + ch_processed_reads.join ( CENTRIFUGE_PROCESS.out.extracted ) + .set { ch_centrifuge_extracted } + + SEQKIT_GREP ( ch_centrifuge_extracted ) + + FLYE_ASSEMBLE ( SEQKIT_GREP.out.fastx ) + + FLYE_ASSEMBLE + .out + .assembly + .set { ch_flye_assembly } + + ch_flye_assembly + .map { + meta, fastq -> + meta.is_assembly = true + [meta, fastq] + } + .set { ch_flye_assembly } + + ch_flye_assembly.ifEmpty { [ false, false ] } + + KRAKEN2_CLASSIFY ( ch_flye_assembly ) + + KRAKEN2_EXTRACT_CONTIGS ( + ch_flye_assembly + .join( KRAKEN2_CLASSIFY.out.kraken_output ), + params.kraken2_extract_bug + ) + + KRAKEN2_EXTRACT_CONTIGS + .out + .asm_filtered_contigs + .map { + meta, fastq -> + meta.organism = params.kraken2_extract_bug.split(/\s+/)[0].capitalize() + meta.serotypefinder_db = params.serotypefinder_db + [meta, fastq] + } + .set { ch_asm_filtered_contigs } + + SEROTYPEFINDER ( ch_asm_filtered_contigs ) + + SEQSERO2 ( ch_asm_filtered_contigs ) + + MLST ( ch_asm_filtered_contigs ) + + ABRICATE_RUN ( + ch_asm_filtered_contigs, + abricate_dbs + ) + + ABRICATE_RUN + .out + .abricated + .map { meta, abres -> [ abricate_dbs, abres ] } + .groupTuple(by: [0]) + .map { it -> tuple ( it[0], it[1].flatten() ) } + .set { ch_abricated } + + ABRICATE_SUMMARY ( ch_abricated ) + + // ABRICATE_SUMMARY.out.ecoli_vf.set { ch_abricate_summary_ecoli_vf } + // ch_abricate_summary_ecoli_vf.ifEmpty { [ false, false ] } + + CENTRIFUGE_CLASSIFY.out.kreport + .map { meta, kreport -> [ kreport ] } + .flatten() + .concat ( + KRAKEN2_CLASSIFY.out.kraken_report + .map { meta, kreport -> [ kreport ] } + .flatten(), + FASTQC.out.zip + .map { meta, zip -> [ zip ] } + .flatten() + ) + .set { ch_mqc_classify } + + if (params.serotypefinder_run) { + SEROTYPEFINDER + .out + .serotyped + .map { meta, tsv -> [ 'serotypefinder', tsv ] } + .groupTuple(by: [0]) + .map { it -> tuple ( it[0], it[1].flatten() ) } + .set { ch_mqc_custom_tbl } + } else if (params.seqsero2_run) { + SEQSERO2 + .out + .serotyped + .map { meta, tsv -> [ 'seqsero2', tsv ] } + .groupTuple(by: [0]) + .map { it -> tuple ( it[0], it[1].flatten() ) } + .set { ch_mqc_custom_tbl } + } + + ch_mqc_custom_tbl + .concat ( + ABRICATE_SUMMARY.out.ncbiamrplus.map{ it -> tuple ( it[0], it[1] )}, + ABRICATE_SUMMARY.out.resfinder.map{ it -> tuple ( it[0], it[1] )}, + ABRICATE_SUMMARY.out.megares.map{ it -> tuple ( it[0], it[1] )}, + ABRICATE_SUMMARY.out.argannot.map{ it -> tuple ( it[0], it[1] )}, + ) + .groupTuple(by: [0]) + .map { it -> [ it[0], it[1].flatten() ]} + .set { ch_mqc_custom_tbl } + + TABLE_SUMMARY ( ch_mqc_custom_tbl ) + + DUMP_SOFTWARE_VERSIONS ( + software_versions + .mix ( + FASTQC.out.versions, + CENTRIFUGE_CLASSIFY.out.versions, + CENTRIFUGE_PROCESS.out.versions, + SEQKIT_GREP.out.versions, + FLYE_ASSEMBLE.out.versions.ifEmpty(null), + KRAKEN2_CLASSIFY.out.versions.ifEmpty(null), + KRAKEN2_EXTRACT_CONTIGS.out.versions.ifEmpty(null), + SEROTYPEFINDER.out.versions.ifEmpty(null), + SEQSERO2.out.versions.ifEmpty(null), + MLST.out.versions.ifEmpty(null), + ABRICATE_RUN.out.versions.ifEmpty(null), + ABRICATE_SUMMARY.out.versions.ifEmpty(null), + TABLE_SUMMARY.out.versions.ifEmpty(null) + ) + .unique() + .collectFile(name: 'collected_versions.yml') + ) + + DUMP_SOFTWARE_VERSIONS + .out + .mqc_yml + .concat ( + ch_mqc_classify, + TABLE_SUMMARY.out.mqc_yml + ) + .collect() + .set { ch_multiqc } + + MULTIQC ( ch_multiqc ) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (workflow.success) { + // CREATE APPROPRIATE DIRECTORIES AND MOVE AS REQUESTED BY STAKEHOLDER(S) + // + // Nextflow's .moveTo will error out if directories contain files and it + // would be complex to include logic to skip directories + // + def final_intermediate_dir = "${params.output}${params.fs}${params.pipeline}-steps" + def final_results_dir = "${params.output}${params.fs}${params.pipeline}-results" + def kraken2_ext_contigs = file( "${final_intermediate_dir}${params.fs}kraken2_extract_contigs", type: 'dir' ) + def final_intermediate = file( final_intermediate_dir, type: 'dir' ) + def final_results = file( final_results_dir, type: 'dir' ) + def pipeline_output = file( params.output, type: 'dir' ) + + if ( !final_intermediate.exists() ) { + final_intermediate.mkdirs() + + FileHelper.visitFiles(Paths.get("${params.output}"), '*') { + if ( !(it.name ==~ /^(${params.cfsanpipename}|multiqc|\.nextflow|${workflow.workDir.name}|${params.pipeline}).*/) ) { + FileHelper.movePath( + it, Paths.get( "${final_intermediate_dir}${params.fs}${it.name}" ) + ) + } + } + } + + if ( kraken2_ext_contigs.exists() && !final_results.exists() ) { + final_results.mkdirs() + + FileHelper.movePath( + Paths.get( "${final_intermediate_dir}${params.fs}kraken2_extract_contigs" ), + Paths.get( "${final_results_dir}${params.fs}kraken2_extract_contigs" ) + ) + } + + sendMail() + } +} + +workflow.onError { + sendMail() +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + HELPER METHODS FOR CENTRIFLAKEN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def help() { + + Map helptext = [:] + + helptext.putAll ( + fastqEntryPointHelp() + + kraken2Help(params).text + + centrifugeHelp(params).text + + flyeHelp(params).text + + serotypefinderHelp(params).text + + seqsero2Help(params).text + + mlstHelp(params).text + + abricateHelp(params).text + + wrapUpHelp() + ) + + return addPadding(helptext) +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/workflows/centriflaken_hy.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/workflows/centriflaken_hy.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,375 @@ +// Define any required imports for this specific workflow +import java.nio.file.Paths +import nextflow.file.FileHelper + +// Include any necessary methods +include { \ + summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; \ + addPadding; wrapUpHelp } from "${params.routines}" +include { seqkitrmdupHelp } from "${params.toolshelp}${params.fs}seqkitrmdup" +include { kraken2Help } from "${params.toolshelp}${params.fs}kraken2" +include { centrifugeHelp } from "${params.toolshelp}${params.fs}centrifuge" +include { megahitHelp } from "${params.toolshelp}${params.fs}megahit" +include { spadesHelp } from "${params.toolshelp}${params.fs}spades" +include { serotypefinderHelp } from "${params.toolshelp}${params.fs}serotypefinder" +include { seqsero2Help } from "${params.toolshelp}${params.fs}seqsero2" +include { mlstHelp } from "${params.toolshelp}${params.fs}mlst" +include { abricateHelp } from "${params.toolshelp}${params.fs}abricate" + +// Exit if help requested before any subworkflows +if (params.help) { + log.info help() + exit 0 +} + +// Include any necessary modules and subworkflows +include { PROCESS_FASTQ } from "${params.subworkflows}${params.fs}process_fastq" +include { FASTQC } from "${params.modules}${params.fs}fastqc${params.fs}main" +include { SEQKIT_RMDUP } from "${params.modules}${params.fs}seqkit${params.fs}rmdup${params.fs}main" +include { CENTRIFUGE_CLASSIFY } from "${params.modules}${params.fs}centrifuge${params.fs}classify${params.fs}main" +include { CENTRIFUGE_PROCESS } from "${params.modules}${params.fs}centrifuge${params.fs}process${params.fs}main" +include { SEQKIT_GREP } from "${params.modules}${params.fs}seqkit${params.fs}grep${params.fs}main" +include { MEGAHIT_ASSEMBLE } from "${params.modules}${params.fs}megahit${params.fs}assemble${params.fs}main" +include { SPADES_ASSEMBLE } from "${params.modules}${params.fs}spades${params.fs}assemble${params.fs}main" +include { KRAKEN2_CLASSIFY } from "${params.modules}${params.fs}kraken2${params.fs}classify${params.fs}main" +include { KRAKEN2_EXTRACT_CONTIGS } from "${params.modules}${params.fs}kraken2${params.fs}extract_contigs${params.fs}main" +include { SEROTYPEFINDER } from "${params.modules}${params.fs}serotypefinder${params.fs}main" +include { SEQSERO2 } from "${params.modules}${params.fs}seqsero2${params.fs}main" +include { MLST } from "${params.modules}${params.fs}mlst${params.fs}main" +include { ABRICATE_RUN } from "${params.modules}${params.fs}abricate${params.fs}run${params.fs}main" +include { ABRICATE_SUMMARY } from "${params.modules}${params.fs}abricate${params.fs}summary${params.fs}main" +include { TABLE_SUMMARY } from "${params.modules}${params.fs}cat${params.fs}tables${params.fs}main" +include { MULTIQC } from "${params.modules}${params.fs}multiqc${params.fs}main" +include { DUMP_SOFTWARE_VERSIONS } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main" + + + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + INPUTS AND ANY CHECKS FOR THE CENTRIFLAKEN-HY WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def kraken2_db_dir = file ( "${params.kraken2_db}" ) +def centrifuge_x = file ( "${params.centrifuge_x}" ) +def spades_custom_hmm = (params.spades_hmm ? file ( "${params.spades_hmm}" ) : false) +def reads_platform = 0 +def abricate_dbs = [ 'ncbiamrplus', 'resfinder', 'megares', 'argannot' ] + +reads_platform += (params.input ? 1 : 0) + +if (!kraken2_db_dir.exists() || !centrifuge_x.getParent().exists()) { + stopNow("Please check if the following absolute paths are valid:\n" + + "${params.kraken2_db}\n${params.centrifuge_x}\n" + + "Cannot proceed further!") +} + +if (spades_custom_hmm && !spades_custom_hmm.exists()) { + stopNow("Please check if the following SPAdes' custom HMM directory\n" + + "path is valid:\n${params.spades_hmm}\nCannot proceed further!") +} + +if (reads_platform < 1 || reads_platform == 0) { + stopNow("Please mention at least one absolute path to input folder which contains\n" + + "FASTQ files sequenced using the --input option.\n" + + "Ex: --input (Illumina or Generic short reads in FASTQ format)") +} + +if (params.centrifuge_extract_bug != params.kraken2_extract_bug) { + stopNow("Please make sure that the bug to be extracted is same\n" + + "for both --centrifuge_extract_bug and --kraken2_extract_bug options.") +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN THE CENTRIFLAKEN-HY WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow CENTRIFLAKEN_HY { + main: + ch_asm_filtered_contigs = Channel.empty() + ch_mqc_custom_tbl = Channel.empty() + ch_dummy = Channel.fromPath("${params.dummyfile}") + ch_dummy2 = Channel.fromPath("${params.dummyfile2}") + + log.info summaryOfParams() + + PROCESS_FASTQ() + .processed_reads + .map { + meta, fastq -> + meta.centrifuge_x = params.centrifuge_x + meta.kraken2_db = params.kraken2_db + [meta, fastq] + } + .set { ch_processed_reads } + + PROCESS_FASTQ + .out + .versions + .set { software_versions } + + FASTQC ( ch_processed_reads ) + + if (params.seqkit_rmdup_run) { + SEQKIT_RMDUP ( ch_processed_reads ) + + SEQKIT_RMDUP + .out + .fastx + .set { ch_processed_reads } + + software_versions + .mix ( SEQKIT_RMDUP.out.versions.ifEmpty(null) ) + .set { software_versions } + } + + CENTRIFUGE_CLASSIFY ( ch_processed_reads ) + + CENTRIFUGE_PROCESS ( + CENTRIFUGE_CLASSIFY.out.report + .join( CENTRIFUGE_CLASSIFY.out.output ) + ) + + ch_processed_reads.join ( CENTRIFUGE_PROCESS.out.extracted ) + .set { ch_centrifuge_extracted } + + SEQKIT_GREP ( ch_centrifuge_extracted ) + + // As of 06/02/2022, with the upcoming newer versions of NextFlow, we will be able to do + // allowNull: true for both input and output, but until then, we have to use dummy files. + // and work arounds. + // https://github.com/nextflow-io/nextflow/pull/2893 + if (params.spades_run) { + SPADES_ASSEMBLE ( + SEQKIT_GREP.out.fastx + .combine(ch_dummy) + .combine(ch_dummy2) + ) + + SPADES_ASSEMBLE + .out + .assembly + .set { ch_assembly } + + software_versions + .mix ( SPADES_ASSEMBLE.out.versions.ifEmpty(null) ) + .set { software_versions } + } else if (params.megahit_run) { + MEGAHIT_ASSEMBLE ( + SEQKIT_GREP.out.fastx + ) + + MEGAHIT_ASSEMBLE + .out + .assembly + .set { ch_assembly } + + software_versions + .mix ( MEGAHIT_ASSEMBLE.out.versions.ifEmpty(null) ) + .set { software_versions } + } + + ch_assembly + .map { + meta, fastq -> + meta.is_assembly = true + [meta, fastq] + } + .set { ch_assembly } + + ch_assembly.ifEmpty { [ false, false ] } + + KRAKEN2_CLASSIFY ( ch_assembly ) + + KRAKEN2_EXTRACT_CONTIGS ( + ch_assembly + .join( KRAKEN2_CLASSIFY.out.kraken_output ), + params.kraken2_extract_bug + ) + + KRAKEN2_EXTRACT_CONTIGS + .out + .asm_filtered_contigs + .map { + meta, fastq -> + meta.organism = params.kraken2_extract_bug.split(/\s+/)[0].capitalize() + meta.serotypefinder_db = params.serotypefinder_db + [meta, fastq] + } + .set { ch_asm_filtered_contigs } + + SEROTYPEFINDER ( ch_asm_filtered_contigs ) + + SEQSERO2 ( ch_asm_filtered_contigs ) + + MLST ( ch_asm_filtered_contigs ) + + ABRICATE_RUN ( + ch_asm_filtered_contigs, + abricate_dbs + ) + + ABRICATE_RUN + .out + .abricated + .map { meta, abres -> [ abricate_dbs, abres ] } + .groupTuple(by: [0]) + .map { it -> tuple ( it[0], it[1].flatten() ) } + .set { ch_abricated } + + ABRICATE_SUMMARY ( ch_abricated ) + + CENTRIFUGE_CLASSIFY.out.kreport + .map { meta, kreport -> [ kreport ] } + .flatten() + .concat ( + KRAKEN2_CLASSIFY.out.kraken_report + .map { meta, kreport -> [ kreport ] } + .flatten(), + FASTQC.out.zip + .map { meta, zip -> [ zip ] } + .flatten() + ) + .set { ch_mqc_classify } + + if (params.serotypefinder_run) { + SEROTYPEFINDER + .out + .serotyped + .map { meta, tsv -> [ 'serotypefinder', tsv ] } + .groupTuple(by: [0]) + .map { it -> tuple ( it[0], it[1].flatten() ) } + .set { ch_mqc_custom_tbl } + } else if (params.seqsero2_run) { + SEQSERO2 + .out + .serotyped + .map { meta, tsv -> [ 'seqsero2', tsv ] } + .groupTuple(by: [0]) + .map { it -> tuple ( it[0], it[1].flatten() ) } + .set { ch_mqc_custom_tbl } + } + + ch_mqc_custom_tbl + .concat ( + ABRICATE_SUMMARY.out.ncbiamrplus.map{ it -> tuple ( it[0], it[1] )}, + ABRICATE_SUMMARY.out.resfinder.map{ it -> tuple ( it[0], it[1] )}, + ABRICATE_SUMMARY.out.megares.map{ it -> tuple ( it[0], it[1] )}, + ABRICATE_SUMMARY.out.argannot.map{ it -> tuple ( it[0], it[1] )}, + ) + .groupTuple(by: [0]) + .map { it -> [ it[0], it[1].flatten() ]} + .set { ch_mqc_custom_tbl } + + TABLE_SUMMARY ( ch_mqc_custom_tbl ) + + DUMP_SOFTWARE_VERSIONS ( + software_versions + .mix ( + FASTQC.out.versions, + CENTRIFUGE_CLASSIFY.out.versions, + CENTRIFUGE_PROCESS.out.versions, + SEQKIT_GREP.out.versions, + KRAKEN2_CLASSIFY.out.versions.ifEmpty(null), + KRAKEN2_EXTRACT_CONTIGS.out.versions.ifEmpty(null), + SEROTYPEFINDER.out.versions.ifEmpty(null), + SEQSERO2.out.versions.ifEmpty(null), + MLST.out.versions.ifEmpty(null), + ABRICATE_RUN.out.versions.ifEmpty(null), + ABRICATE_SUMMARY.out.versions.ifEmpty(null), + TABLE_SUMMARY.out.versions.ifEmpty(null) + ) + .unique() + .collectFile(name: 'collected_versions.yml') + ) + + DUMP_SOFTWARE_VERSIONS + .out + .mqc_yml + .concat ( + ch_mqc_classify, + TABLE_SUMMARY.out.mqc_yml + ) + .collect() + .set { ch_multiqc } + + MULTIQC ( ch_multiqc ) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (workflow.success) { + // CREATE APPROPRIATE DIRECTORIES AND MOVE AS REQUESTED BY STAKEHOLDER(S) + // + // Nextflow's .moveTo will error out if directories contain files and it + // would be complex to include logic to skip directories + // + def final_intermediate_dir = "${params.output}${params.fs}${params.pipeline}-steps" + def final_results_dir = "${params.output}${params.fs}${params.pipeline}-results" + def kraken2_ext_contigs = file( "${final_intermediate_dir}${params.fs}kraken2_extract_contigs", type: 'dir' ) + def final_intermediate = file( final_intermediate_dir, type: 'dir' ) + def final_results = file( final_results_dir, type: 'dir' ) + def pipeline_output = file( params.output, type: 'dir' ) + + if ( !final_intermediate.exists() ) { + final_intermediate.mkdirs() + + FileHelper.visitFiles(Paths.get("${params.output}"), '*') { + if ( !(it.name ==~ /^(${params.cfsanpipename}|multiqc|\.nextflow|${workflow.workDir.name}|${params.pipeline}).*/) ) { + FileHelper.movePath( + it, Paths.get( "${final_intermediate_dir}${params.fs}${it.name}" ) + ) + } + } + } + + if ( kraken2_ext_contigs.exists() && !final_results.exists() ) { + final_results.mkdirs() + + FileHelper.movePath( + Paths.get( "${final_intermediate_dir}${params.fs}kraken2_extract_contigs" ), + Paths.get( "${final_results_dir}${params.fs}kraken2_extract_contigs" ) + ) + } + + sendMail() + } +} + +workflow.onError { + sendMail() +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + HELPER METHODS FOR CENTRIFLAKEN-HY WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def help() { + + Map helptext = [:] + + helptext.putAll ( + fastqEntryPointHelp() + + seqkitrmdupHelp(params).text + + kraken2Help(params).text + + centrifugeHelp(params).text + + megahitHelp(params).text + + spadesHelp(params).text + + serotypefinderHelp(params).text + + seqsero2Help(params).text + + mlstHelp(params).text + + abricateHelp(params).text + + wrapUpHelp() + ) + + return addPadding(helptext) +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/workflows/conf/centriflaken.config --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/workflows/conf/centriflaken.config Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,59 @@ +params { + workflow_blueprint_by = 'Narjol.Gonzalez-Escalona@fda.hhs.gov' + workflow_built_by = 'Kranti.Konganti@fda.hhs.gov' + workflow_version = '0.2.1' + centrifuge_x = '/tool-local-data/cfsan-centriflaken-db/0/centrifuge/2022-04-12/ab' + centrifuge_extract_bug = 'Escherichia coli' + centrifuge_save_aligned = false + centrifuge_save_unaligned = false + centrifuge_out_fmt_sam = false + centrifuge_ignore_quals = false + kraken2_db = '/tool-local-data/cfsan-centriflaken-db/0/kraken2/standard-210914' + kraken2_confidence = '0.0' + kraken2_quick = false + kraken2_use_mpa_style = false + kraken2_minimum_base_quality = '0' + kraken2_report_zero_counts = false + kraken2_report_minimizer_data = false + kraken2_use_names = true + kraken2_extract_bug = params.centrifuge_extract_bug + flye_pacbio_raw = false + flye_pacbio_corr = false + flye_pacbio_hifi = false + flye_nano_raw = true + flye_nano_corr = false + flye_nano_hq = false + flye_genome_size = (params.centrifuge_extract_bug ==~ /(?i)Salmonella/ ? '5m' : '5.5m') + flye_polish_iter = false + flye_min_overlap = false + flye_scaffold = false + flye_meta = true + ectyper_run = false + ectyper_perc_opid = 90 + ectyper_perc_hpid = 95 + ectyper_perc_opcov = 95 + ectyper_perc_hpcov = 50 + serotypefinder_run = (params.centrifuge_extract_bug ==~ /(?i)Salmonella/ ? false : true) + serotypefinder_db = '/tool-local-data/cfsan-centriflaken-db/0/serotypefinder/2.0.2' + serotypefinder_min_cov = 0.80 + serotypefinder_min_threshold = 0.85 + serotypefinder_x = true + seqsero2_run = (params.centrifuge_extract_bug ==~ /(?i)Salmonella/ ? true : false) + seqsero2_t = 4 + seqsero2_m = 'k' + seqsero2_c = false + seqsero2_s = false + mlst_run = true + mlst_minid = 95 + mlst_mincov = 10 + mlst_minscore = 50 + amrfinderplus_run = false + amrfinderplus_db = '/tool-local-data/cfsan-centriflaken-db/0/amrfinderplus/3.10.24/latest' + amrfinderplus_genes = true + abricate_run = true + abricate_datadir = '/tool-local-data/cfsan-centriflaken-db/0/abricate/1.0.1/db' + abricate_minid = 90 + abricate_mincov = 80 + abricate_summary_run = true + seqkit_grep_on = false +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/workflows/conf/centriflaken_hy.config --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/workflows/conf/centriflaken_hy.config Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,90 @@ +params { + workflow_blueprint_by = 'Narjol.Gonzalez-Escalona@fda.hhs.gov' + workflow_built_by = 'Kranti.Konganti@fda.hhs.gov' + workflow_version = '0.4.0' + seqkit_rmdup_run = false + seqkit_rmdup_n = false + seqkit_rmdup_s = true + seqkit_rmdup_d = false + seqkit_rmdup_D = false + seqkit_rmdup_P = false + seqkit_rmdup_i = false + centrifuge_x = '/tool-local-data/cfsan-centriflaken-db/0/centrifuge/2022-04-12/ab' + centrifuge_extract_bug = 'Escherichia coli' + centrifuge_save_aligned = false + centrifuge_save_unaligned = false + centrifuge_out_fmt_sam = false + centrifuge_ignore_quals = false + kraken2_db = '/tool-local-data/cfsan-centriflaken-db/0/kraken2/standard-210914' + kraken2_confidence = '0.0' + kraken2_quick = false + kraken2_use_mpa_style = false + kraken2_minimum_base_quality = '0' + kraken2_report_zero_counts = false + kraken2_report_minimizer_data = false + kraken2_use_names = true + kraken2_extract_bug = params.centrifuge_extract_bug + megahit_run = true + megahit_min_count = false + megahit_k_list = false + megahit_no_mercy = false + megahit_bubble_level = false + megahit_merge_level = false + megahit_prune_level = false + megahit_prune_depth = false + megahit_low_local_ratio = false + megahit_max_tip_len = false + megahit_no_local = false + megahit_kmin_1pass = false + megahit_preset = 'meta-sensitive' + megahit_mem_flag = 2 + megahit_min_contig_len = false + spades_run = false + spades_isolate = false + spades_sc = false + spades_meta = true + spades_bio = false + spades_corona = false + spades_rna = false + spades_plasmid = false + spades_metaviral = false + spades_metaplasmid = false + spades_rnaviral = false + spades_iontorrent = false + spades_only_assembler = false + spades_careful = false + spades_cov_cutoff = false + spades_k = false + spades_hmm = false + ectyper_run = false + ectyper_perc_opid = 90 + ectyper_perc_hpid = 95 + ectyper_perc_opcov = 95 + ectyper_perc_hpcov = 50 + serotypefinder_run = (params.centrifuge_extract_bug ==~ /(?i)Salmonella/ ? false : true) + serotypefinder_db = '/tool-local-data/cfsan-centriflaken-db/0/serotypefinder/2.0.2' + serotypefinder_min_cov = 0.80 + serotypefinder_min_threshold = 0.85 + serotypefinder_x = true + seqsero2_run = (params.centrifuge_extract_bug ==~ /(?i)Salmonella/ ? true : false) + seqsero2_t = 4 + seqsero2_m = 'k' + seqsero2_c = false + seqsero2_s = false + mlst_run = true + mlst_minid = 95 + mlst_mincov = 10 + mlst_minscore = 50 + amrfinderplus_run = false + amrfinderplus_db = '/tool-local-data/cfsan-centriflaken-db/0/amrfinderplus/3.10.24/latest' + amrfinderplus_genes = true + abricate_run = true + abricate_datadir = '/tool-local-data/cfsan-centriflaken-db/0/abricate/1.0.1/db' + abricate_minid = 90 + abricate_mincov = 80 + abricate_summary_run = true + seqkit_grep_on = false + fq_filter_by_len = 75 + fq_suffix = '_R1_001.fastq.gz' + fq2_suffix = '_R2_001.fastq.gz' +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/workflows/conf/nanofactory.config --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/workflows/conf/nanofactory.config Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,21 @@ +params { + workflow_author = "Rodney.Engelbach@fda.hhs.gov" + workflow_version = "0.4.1" + sample_sheet = "" + global_settings = "" + log_file = "" + log_level = "info" + mode = "" + verbose = false + disable_project_setup = false + setup_purge_existing = false + setup_fix_existing = true + setup_nocopy = false + setup_runtype = "" + guppy_threads = 4 + guppy_config = "" + merge_overwrite = false + mail_group = "stakeholders" + help = false + enable_module = "'nanofactory/current'" +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/workflows/conf/process/centriflaken.process.config --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/workflows/conf/process/centriflaken.process.config Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,111 @@ +process { + withName: 'SEQKIT_SEQ' { + ext.args = [ + params.fq_filter_by_len ? "-m ${params.fq_filter_by_len}" : '' + ].join(' ').trim() + } + + if (params.seqkit_grep_on) { + withName: 'SEQKIT_GREP' { + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}seqkitgrep.nf").seqkitgrepHelp(params).helpparams + ) + } + } + + withName: 'CENTRIFUGE_CLASSIFY' { + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}centrifuge.nf").centrifugeHelp(params).helpparams + ) + } + + withName: 'KRAKEN2_CLASSIFY' { + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}kraken2.nf").kraken2Help(params).helpparams + ) + } + + withName: 'FLYE_ASSEMBLE' { + errorStrategy = 'ignore' + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}flye.nf").flyeHelp(params).helpparams + ) + } + + if (params.ectyper_run) { + withName: 'ECTYPER' { + ext.when = params.ectyper_run + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}ectyper.nf").ectyperHelp(params).helpparams + ) + } + } + + withName: 'SEROTYPEFINDER' { + ext.when = params.serotypefinder_run + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}serotypefinder.nf").serotypefinderHelp(params).helpparams + ) + } + + withName: 'SEQSERO2' { + ext.when = params.seqsero2_run + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}seqsero2.nf").seqsero2Help(params).helpparams + ) + } + + withName: 'MLST' { + ext.when = params.mlst_run + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}mlst.nf").mlstHelp(params).helpparams + ) + } + + if (params.amrfinderplus_run) { + withName: 'AMRFINDERPLUS_RUN' { + ext.when = params.amrfinderplus_run + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}amrfinderplus.nf").amrfinderplusHelp(params).helpparams + ) + } + } + + withName: 'ABRICATE_RUN' { + ext.when = params.abricate_run + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}abricate.nf").abricateHelp(params).helpparams + ) + } + + withName: 'ABRICATE_SUMMARY' { + ext.when = params.abricate_summary_run + } +} + +// Method to instantiate a new function parser +// Need to refactor using ScriptParser... another day +def loadThisFunction (func_file) { + GroovyShell grvy_sh = new GroovyShell() + def func = grvy_sh.parse(new File ( func_file ) ) + return func +} + +// Method to add relevant final parameters to summary log +def addParamsToSummary(Map params_to_add = [:]) { + + if (!params_to_add.isEmpty()) { + def not_null_params_to_add = params_to_add.findAll { + it.value.clivalue != null && + it.value.clivalue != '[:]' && + it.value.clivalue != '' + } + + params.logtheseparams += not_null_params_to_add.keySet().toList() + + return not_null_params_to_add.collect { + "${it.value.cliflag} ${it.value.clivalue.toString().replaceAll(/(?:^\s+|\s+$)/, '')}" + }.join(' ').trim() + } + return 1 +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/workflows/conf/process/centriflaken_hy.process.config --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/workflows/conf/process/centriflaken_hy.process.config Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,129 @@ +process { + withName: 'SEQKIT_SEQ' { + ext.args = [ + params.fq_filter_by_len ? "-m ${params.fq_filter_by_len}" : '' + ].join(' ').trim() + } + + if (params.seqkit_rmdup_run) { + withName: 'SEQKIT_RMDUP' { + ext.when = params.seqkit_rmdup_run + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}seqkitrmdup.nf").seqkitrmdupHelp(params).helpparams + ) + } + } + + if (params.seqkit_grep_on) { + withName: 'SEQKIT_GREP' { + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}seqkitgrep.nf").seqkitgrepHelp(params).helpparams + ) + } + } + + withName: 'CENTRIFUGE_CLASSIFY' { + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}centrifuge.nf").centrifugeHelp(params).helpparams + ) + } + + withName: 'KRAKEN2_CLASSIFY' { + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}kraken2.nf").kraken2Help(params).helpparams + ) + } + + withName: 'MEGAHIT_ASSEMBLE' { + ext.when = params.megahit_run + errorStrategy = 'ignore' + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}megahit.nf").megahitHelp(params).helpparams + ) + } + + withName: 'SPADES_ASSEMBLE' { + ext.when = params.spades_run + errorStrategy = 'ignore' + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}spades.nf").spadesHelp(params).helpparams + ) + } + + if (params.ectyper_run) { + withName: 'ECTYPER' { + ext.when = params.ectyper_run + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}ectyper.nf").ectyperHelp(params).helpparams + ) + } + } + + withName: 'SEROTYPEFINDER' { + ext.when = params.serotypefinder_run + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}serotypefinder.nf").serotypefinderHelp(params).helpparams + ) + } + + withName: 'SEQSERO2' { + ext.when = params.seqsero2_run + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}seqsero2.nf").seqsero2Help(params).helpparams + ) + } + + withName: 'MLST' { + ext.when = params.mlst_run + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}mlst.nf").mlstHelp(params).helpparams + ) + } + + if (params.amrfinderplus_run) { + withName: 'AMRFINDERPLUS_RUN' { + ext.when = params.amrfinderplus_run + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}amrfinderplus.nf").amrfinderplusHelp(params).helpparams + ) + } + } + + withName: 'ABRICATE_RUN' { + ext.when = params.abricate_run + ext.args = addParamsToSummary( + loadThisFunction("${params.toolshelp}${params.fs}abricate.nf").abricateHelp(params).helpparams + ) + } + + withName: 'ABRICATE_SUMMARY' { + ext.when = params.abricate_summary_run + } +} + +// Method to instantiate a new function parser +// Need to refactor using ScriptParser... another day +def loadThisFunction (func_file) { + GroovyShell grvy_sh = new GroovyShell() + def func = grvy_sh.parse(new File ( func_file ) ) + return func +} + +// Method to add relevant final parameters to summary log +def addParamsToSummary(Map params_to_add = [:]) { + + if (!params_to_add.isEmpty()) { + def not_null_params_to_add = params_to_add.findAll { + it.value.clivalue != null && + it.value.clivalue != '[:]' && + it.value.clivalue != '' + } + + params.logtheseparams += not_null_params_to_add.keySet().toList() + + return not_null_params_to_add.collect { + "${it.value.cliflag} ${it.value.clivalue.toString().replaceAll(/(?:^\s+|\s+$)/, '')}" + }.join(' ').trim() + } + return 1 +} \ No newline at end of file diff -r 17890124001d -r 52045ea4679d 0.4.2/workflows/nanofactory.nf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.4.2/workflows/nanofactory.nf Thu Jun 27 14:17:26 2024 -0400 @@ -0,0 +1,185 @@ +// +// Start nanofactory workflow. Since this is a special +// case workflow wherein most of the bioinformatics +// tools are not used, there won't be any modules or +// subworkflows and therefore all the processes +// reside here. +// + +// Include any necessary methods. +include { addPadding; summaryOfParams; stopNow} \ + from "${params.routines}" + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + PROCESS DEFINITIONS FOR NANOFACTORY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +process SETPUBLISHDIR { + label 'process_femto' + module (params.enable_module ? params.enable_module : null) + conda (params.enable_conda ? params.enable_conda : null) + + input: + val options + + output: + stdout + + shell: + ''' + project_setup.py -s !{options.sample_sheet} \ + !{options.alt_settings} !{options.verbose} -b + ''' +} + +process PROJECTSETUP { + label 'process_femto' + publishDir "${publish_dir.trim()}", mode: 'copy', overwrite: false + module (params.enable_module ? params.enable_module : null) + conda (params.enable_conda ? params.enable_conda : null) + + input: + val options + val publish_dir + + output: + stdout + + script: + params.publish_dir = "${publish_dir.trim()}" + + shell: + ''' + project_setup.py -y -s !{options.sample_sheet} !{options.alt_settings} \ + !{options.purge} !{options.runtype} !{options.logfile} \ + !{options.loglevel} !{options.verbose} !{options.nocopy} \ + !{options.fix_existing} + + cat < original_source.txt + ''' +} + +process TRIMDEMUX { + label 'process_pico' + module (params.enable_module ? params.enable_module : null) + conda (params.enable_conda ? params.enable_conda : null) + cpus "${params.guppy_threads}" + + input: + val options + val original_source + + output: + path 'source.txt' + + shell: + ''' + trim_demux.py -s !{options.sample_sheet} !{options.verbose} \ + !{options.alt_settings} !{options.guppy_config} -t !{options.guppy_threads} + ''' +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + WORKFLOW ENTRY POINT +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow NANOFACTORY { + + if ( params.help ) { + log.info help() + } else if ( params.sample_sheet == null || + params.sample_sheet.length() == 0 ) { + + log.info help() + stopNow("Please provide absolute path to a JSON formatted sample sheet using the\n" + + "--sample_sheet option.") + } else { + log.info summaryOfParams() + + options = Channel.empty() + Channel + .from(setOptions()) + .set { options } + + take: + options + + main: + SETPUBLISHDIR(options) + PROJECTSETUP(options, SETPUBLISHDIR.out) + TRIMDEMUX(options, PROJECTSETUP.out) + } +} + + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + HELPER METHODS FOR NANOFACTORY WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def setOptions() { + + Map options = [:] + + options['sample_sheet'] ?= "${params.sample_sheet}" + options['verbose'] = params.verbose ? "-v" : "" + options['alt_settings'] = params.global_settings ? "-c ${params.global_settings}" : "" + options['purge'] = params.setup_purge_existing ? "-p" : "" + options['logfile'] = params.log_file ? "-l ${params.log_file}" : "" + options['loglevel'] = params.log_level ? "--loglevel ${params.log_level}" : "" + options['nocopy'] = params.setup_nocopy ? "--nocopy" : "" + options['runtype'] = params.setup_runtype ? "-r ${params.setup_runtype}" : "" + options['fix_existing'] = params.setup_fix_existing ? "-f" : "" + options['guppy_config'] = params.guppy_config ? " -g ${params.guppy_config}" : "" + options['mode'] = params.mode ? "-m ${params.mode}" : "-m prod" + options['mail_group'] = params.mail_group ? "-g ${params.mail_group}" : "-g stakeholders" + options['guppy_threads'] = params.guppy_threads ? "${params.guppy_threads}" : 1 + options['pad'] = pad.toInteger() + options['nocapitalize'] = true + + return options +} + +def help() { + + Map helptext = [:] + + helptext['help'] = true + helptext['nocapitalize'] = true + helptext['Workflow'] = "${params.pipeline}" + helptext['Author'] = "${params.workflow_author}" + helptext['Version'] = "${params.workflow_version}\n" + helptext['Usage'] = "cpipes --pipeline nanofactory [options]\n" + helptext['Required'] = "" + helptext['--sample_sheet'] = "The JSON-formatted sample sheet for this run. Normally provided by Pore Refiner.\n" + helptext['Other options'] = "" + helptext['--global_settings'] = "An alternate global settings file. If not present the installed default will be used." + helptext['--log_file'] = "Path and file name to a log file relative to the project directory (Default: 'logs/workflow.log')" + helptext['--log_level'] = "One of 'debug', 'info', 'warning', 'error', 'fatal' (Default: 'info')" + helptext['--mode'] = "Set the run mode. One of 'dev', 'test', 'stage', or 'prod' (Default: 'prod')" + helptext['--verbose'] = "Use to enable more verbose console output from each tool\n" + helptext['Project setup options'] = "" + helptext['--disable_project_setup'] = "Do not do project setup (Default: setup is enabled)" + helptext['--setup_purge_existing'] = "Before setting up the project area delete any existing files (Default: don't purge)" + helptext['--setup_nocopy'] = "During setup, do NOT copy the original data files to the scrach location (Default: copy)" + helptext['--setup_runtype'] = "Set things up for the indicated run type (Currently not used)" + helptext['--setup_runtype'] = "Set things up for the indicated run type (Currently not used)" + helptext['--enable_module'] = "Software environment module. Ex: --enable_module 'nanofactory/current'" + helptext['--enable_conda'] = "CONDA environment module. Ex: --enable_conda nanofactory\n" + helptext['Help options'] = "" + helptext['--help'] = "Display this message.\n" + + return addPadding(helptext) +} diff -r 17890124001d -r 52045ea4679d cfsan_centriflaken.xml --- a/cfsan_centriflaken.xml Sun Aug 28 00:37:10 2022 -0400 +++ b/cfsan_centriflaken.xml Thu Jun 27 14:17:26 2024 -0400 @@ -1,13 +1,16 @@ - + An automated pipeline to generate a MAG of interest (E.coli or Salmonella) and perform serotyping. - nextflow + nextflow graphviz nextflow -version