# HG changeset patch # User kkonganti # Date 1657556388 14400 # Node ID 30191f39a957f0c6f938d9971df99560eb9fa2d7 # Parent 0dd0ebe5cddf65c184d6ccfeabe7089930de0ae3 "planemo upload" diff -r 0dd0ebe5cddf -r 30191f39a957 0.2.1/README.md --- a/0.2.1/README.md Thu Jul 07 10:37:59 2022 -0400 +++ b/0.2.1/README.md Mon Jul 11 12:19:48 2022 -0400 @@ -20,7 +20,7 @@ Following is the example of how to run the `centriflaken` pipeline on the **CFSAN** raven cluster. ```bash -module load cpipes/0.1.0-test +module load cpipes/0.2.1 cpipes --pipeline centriflaken [options] ``` @@ -35,15 +35,14 @@ --pipeline centriflaken \ --input /path/to/fastq_pass_dir \ --output /path/to/where/output/should/go \ - --flye_nano_raw \ - --user_email Firt.Last@fda.hhs.gov \ + --user_email First.Last@fda.hhs.gov \ -profile raven ``` The above command would run the pipeline and store the output wherever the author of the workflow decided it to be and the **NEXTFLOW** reports are always stored in the current working directory from where `cpipes` is run. For example, for the above command, a directory called `CPIPES-centriflaken` would hold all the **NEXTFLOW** related logs, reports and trace files. -### **PRE ALPHA** +### **BETA** --- -This modular structure and flow is still in rapid development and may change depending on assessment of various computational topics and other considerations. +The development of the modular structure and flow is an ongoing effort and may change depending on assessment of various computational topics and other considerations. diff -r 0dd0ebe5cddf -r 30191f39a957 0.2.1/modules/kraken2/classify/main.nf --- a/0.2.1/modules/kraken2/classify/main.nf Thu Jul 07 10:37:59 2022 -0400 +++ b/0.2.1/modules/kraken2/classify/main.nf Mon Jul 11 12:19:48 2022 -0400 @@ -28,15 +28,15 @@ def readList = reads.collect{ it.toString() } def is_single_end = (meta.single_end || meta.is_assembly) ? true : false def paired = is_single_end ? "" : "--paired" - def classified = is_single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" - def unclassified = is_single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" + def classified = is_single_end ? "--classified-out ${prefix}.classified.fastq" : "--classified-out ${prefix}.classified#.fastq" + def unclassified = is_single_end ? "--unclassified-out ${prefix}.unclassified.fastq" : "--unclassified-out ${prefix}.unclassified#.fastq" args += (reads.getName().endsWith(".gz") ? ' --gzip-compressed ' : '') """ kraken2 \\ --db $db \\ --threads $task.cpus \\ - --unclassified-out $unclassified \\ - --classified-out $classified \\ + $unclassified \\ + $classified \\ --report ${prefix}.kraken2.report.txt \\ --output ${prefix}.kraken2.output.txt \\ $paired \\ diff -r 0dd0ebe5cddf -r 30191f39a957 0.2.1/nextflow.config --- a/0.2.1/nextflow.config Thu Jul 07 10:37:59 2022 -0400 +++ b/0.2.1/nextflow.config Mon Jul 11 12:19:48 2022 -0400 @@ -27,8 +27,8 @@ includeConfig "${pd}${fs}conf${fs}modules.config" // Nextflow runtime profiles -conda.cacheDir = '/tool/tool-data/cfsan-centriflaken-db/0/kondagac_cache' -singularity.cacheDir = '/tool/tool-data/cfsan-centriflaken-db/0/cingularitygac_cache' +conda.cacheDir = '/tool/tool-data/cfsan-centriflaken-db/0' +singularity.cacheDir = '/tool/tool-data/cfsan-centriflaken-db/0' profiles { standard { diff -r 0dd0ebe5cddf -r 30191f39a957 0.2.1/readme/centriflaken.md --- a/0.2.1/readme/centriflaken.md Thu Jul 07 10:37:59 2022 -0400 +++ b/0.2.1/readme/centriflaken.md Mon Jul 11 12:19:48 2022 -0400 @@ -273,5 +273,5 @@ ### **PRE ALPHA** --- -This modular structure and flow is still in rapid development and may change -depending on assessment of various computational topics and other considerations +The development of the modular structure and flow is an ongoing effort and may change depending on assessment of various computational topics and other considerations. + diff -r 0dd0ebe5cddf -r 30191f39a957 0.2.1/readme/centriflaken_hy.md --- a/0.2.1/readme/centriflaken_hy.md Thu Jul 07 10:37:59 2022 -0400 +++ b/0.2.1/readme/centriflaken_hy.md Mon Jul 11 12:19:48 2022 -0400 @@ -15,7 +15,7 @@ #### Workflow Usage ```bash -module load cpipes/0.2.0 +module load cpipes/0.2.1 cpipes --pipeline centriflaken_hy [options] ``` @@ -289,5 +289,5 @@ ### **PRE ALPHA** --- -This modular structure and flow is still in rapid development and may change -depending on assessment of various computational topics and other considerations +The development of the modular structure and flow is an ongoing effort and may change depending on assessment of various computational topics and other considerations. + diff -r 0dd0ebe5cddf -r 30191f39a957 0.2.1/workflows/centriflaken.nf --- a/0.2.1/workflows/centriflaken.nf Thu Jul 07 10:37:59 2022 -0400 +++ b/0.2.1/workflows/centriflaken.nf Mon Jul 11 12:19:48 2022 -0400 @@ -107,14 +107,14 @@ FASTQC ( ch_processed_reads ) - CENTRIFUGE_CLASSIFY( ch_processed_reads ) + CENTRIFUGE_CLASSIFY ( ch_processed_reads ) - CENTRIFUGE_PROCESS( + CENTRIFUGE_PROCESS ( CENTRIFUGE_CLASSIFY.out.report .join( CENTRIFUGE_CLASSIFY.out.output ) ) - ch_processed_reads.join( CENTRIFUGE_PROCESS.out.extracted ) + ch_processed_reads.join ( CENTRIFUGE_PROCESS.out.extracted ) .set { ch_centrifuge_extracted } SEQKIT_GREP ( ch_centrifuge_extracted ) @@ -128,7 +128,7 @@ ch_flye_assembly.ifEmpty { [ false, false ] } - KRAKEN2_CLASSIFY( ch_flye_assembly ) + KRAKEN2_CLASSIFY ( ch_flye_assembly ) KRAKEN2_EXTRACT_CONTIGS ( ch_flye_assembly @@ -213,7 +213,7 @@ .map { it -> [ it[0], it[1].flatten() ]} .set { ch_mqc_custom_tbl } - TABLE_SUMMARY( ch_mqc_custom_tbl ) + TABLE_SUMMARY ( ch_mqc_custom_tbl ) DUMP_SOFTWARE_VERSIONS ( software_versions @@ -246,7 +246,7 @@ .collect() .set { ch_multiqc } - MULTIQC( ch_multiqc ) + MULTIQC ( ch_multiqc ) } /* @@ -308,7 +308,7 @@ Map helptext = [:] - helptext.putAll( + helptext.putAll ( fastqEntryPointHelp() + kraken2Help(params).text + centrifugeHelp(params).text + diff -r 0dd0ebe5cddf -r 30191f39a957 0.2.1/workflows/centriflaken_hy.nf --- a/0.2.1/workflows/centriflaken_hy.nf Thu Jul 07 10:37:59 2022 -0400 +++ b/0.2.1/workflows/centriflaken_hy.nf Mon Jul 11 12:19:48 2022 -0400 @@ -108,14 +108,14 @@ FASTQC ( ch_processed_reads ) - CENTRIFUGE_CLASSIFY( ch_processed_reads ) + CENTRIFUGE_CLASSIFY ( ch_processed_reads ) - CENTRIFUGE_PROCESS( + CENTRIFUGE_PROCESS ( CENTRIFUGE_CLASSIFY.out.report .join( CENTRIFUGE_CLASSIFY.out.output ) ) - ch_processed_reads.join( CENTRIFUGE_PROCESS.out.extracted ) + ch_processed_reads.join ( CENTRIFUGE_PROCESS.out.extracted ) .set { ch_centrifuge_extracted } SEQKIT_GREP ( ch_centrifuge_extracted ) @@ -145,7 +145,7 @@ ch_spades_assembly.ifEmpty { [ false, false ] } - KRAKEN2_CLASSIFY( ch_spades_assembly ) + KRAKEN2_CLASSIFY ( ch_spades_assembly ) KRAKEN2_EXTRACT_CONTIGS ( ch_spades_assembly @@ -227,7 +227,7 @@ .map { it -> [ it[0], it[1].flatten() ]} .set { ch_mqc_custom_tbl } - TABLE_SUMMARY( ch_mqc_custom_tbl ) + TABLE_SUMMARY ( ch_mqc_custom_tbl ) DUMP_SOFTWARE_VERSIONS ( software_versions @@ -260,7 +260,7 @@ .collect() .set { ch_multiqc } - MULTIQC( ch_multiqc ) + MULTIQC ( ch_multiqc ) } /* @@ -322,7 +322,7 @@ Map helptext = [:] - helptext.putAll( + helptext.putAll ( fastqEntryPointHelp() + kraken2Help(params).text + centrifugeHelp(params).text + diff -r 0dd0ebe5cddf -r 30191f39a957 cfsan_centriflaken.xml --- a/cfsan_centriflaken.xml Thu Jul 07 10:37:59 2022 -0400 +++ b/cfsan_centriflaken.xml Mon Jul 11 12:19:48 2022 -0400 @@ -43,8 +43,7 @@ --fq_filename_delim '${fq_filename_delim}' --fq_filename_delim_idx $fq_filename_delim_idx --centrifuge_extract_bug '${centrifuge_extract_bug}' - -profile $runtime_profile - -resume; + -profile kondagac; mv './cpipes-output/${pipeline}-multiqc/multiqc_report.html' './multiqc_report.html'; mv './cpipes-output/${pipeline}-results/kraken2_extract_contigs' kraken2_extract_contigs; rm -rf ./cpipes-output; @@ -72,7 +71,8 @@ - + ^([0-9]*[.])?[0-9]+[kmg]?$ - + @@ -124,11 +124,11 @@ **Testing and Validation** -The pipeline has been wrapped to make it work in Galaxy. It takes in either paired or unpaired short reads or long reads, generates MAGs and performs +The CPIPES - Centriflaken Nextflow pipeline has been wrapped to make it work in Galaxy. It takes in either paired or unpaired short reads or long reads, generates MAGs and performs in silico-based analysis (i.e., virulence gene finding). Additionally, AMR gene finding analysis is also included in Centriflaken and performed on MAGs of interest. The final summary plots and tables can be downloaded from the provided MultiQC HTML report generated as part of the pipeline. The Centriflaken pipeline was validated with data from our previously published method (Maguire et al, 2021) and was able to replicate the detection -and classification of STECs for each sample. We tested the pipeline with nanopore data obtained from 21 additional enriched samples from +and classification of STECs for each sample. We tested the pipeline with Nanopore data obtained from 21 additional enriched samples from irrigation water and was able to perform the entire precision metagenomics analysis in less than 5 hours for all of them. All the original testing and validation was done on the command line on the CFSAN Raven2 HPC Cluster.