Mercurial > repos > kkonganti > cfsan_centriflaken
changeset 47:30191f39a957
"planemo upload"
author | kkonganti |
---|---|
date | Mon, 11 Jul 2022 12:19:48 -0400 |
parents | 0dd0ebe5cddf |
children | 8202a3cedcc4 |
files | 0.2.1/README.md 0.2.1/modules/kraken2/classify/main.nf 0.2.1/nextflow.config 0.2.1/readme/centriflaken.md 0.2.1/readme/centriflaken_hy.md 0.2.1/workflows/centriflaken.nf 0.2.1/workflows/centriflaken_hy.nf cfsan_centriflaken.xml |
diffstat | 8 files changed, 36 insertions(+), 37 deletions(-) [+] |
line wrap: on
line diff
--- a/0.2.1/README.md Thu Jul 07 10:37:59 2022 -0400 +++ b/0.2.1/README.md Mon Jul 11 12:19:48 2022 -0400 @@ -20,7 +20,7 @@ Following is the example of how to run the `centriflaken` pipeline on the **CFSAN** raven cluster. ```bash -module load cpipes/0.1.0-test +module load cpipes/0.2.1 cpipes --pipeline centriflaken [options] ``` @@ -35,15 +35,14 @@ --pipeline centriflaken \ --input /path/to/fastq_pass_dir \ --output /path/to/where/output/should/go \ - --flye_nano_raw \ - --user_email Firt.Last@fda.hhs.gov \ + --user_email First.Last@fda.hhs.gov \ -profile raven ``` The above command would run the pipeline and store the output wherever the author of the workflow decided it to be and the **NEXTFLOW** reports are always stored in the current working directory from where `cpipes` is run. For example, for the above command, a directory called `CPIPES-centriflaken` would hold all the **NEXTFLOW** related logs, reports and trace files. -### **PRE ALPHA** +### **BETA** --- -This modular structure and flow is still in rapid development and may change depending on assessment of various computational topics and other considerations. +The development of the modular structure and flow is an ongoing effort and may change depending on assessment of various computational topics and other considerations.
--- a/0.2.1/modules/kraken2/classify/main.nf Thu Jul 07 10:37:59 2022 -0400 +++ b/0.2.1/modules/kraken2/classify/main.nf Mon Jul 11 12:19:48 2022 -0400 @@ -28,15 +28,15 @@ def readList = reads.collect{ it.toString() } def is_single_end = (meta.single_end || meta.is_assembly) ? true : false def paired = is_single_end ? "" : "--paired" - def classified = is_single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" - def unclassified = is_single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" + def classified = is_single_end ? "--classified-out ${prefix}.classified.fastq" : "--classified-out ${prefix}.classified#.fastq" + def unclassified = is_single_end ? "--unclassified-out ${prefix}.unclassified.fastq" : "--unclassified-out ${prefix}.unclassified#.fastq" args += (reads.getName().endsWith(".gz") ? ' --gzip-compressed ' : '') """ kraken2 \\ --db $db \\ --threads $task.cpus \\ - --unclassified-out $unclassified \\ - --classified-out $classified \\ + $unclassified \\ + $classified \\ --report ${prefix}.kraken2.report.txt \\ --output ${prefix}.kraken2.output.txt \\ $paired \\
--- a/0.2.1/nextflow.config Thu Jul 07 10:37:59 2022 -0400 +++ b/0.2.1/nextflow.config Mon Jul 11 12:19:48 2022 -0400 @@ -27,8 +27,8 @@ includeConfig "${pd}${fs}conf${fs}modules.config" // Nextflow runtime profiles -conda.cacheDir = '/tool/tool-data/cfsan-centriflaken-db/0/kondagac_cache' -singularity.cacheDir = '/tool/tool-data/cfsan-centriflaken-db/0/cingularitygac_cache' +conda.cacheDir = '/tool/tool-data/cfsan-centriflaken-db/0' +singularity.cacheDir = '/tool/tool-data/cfsan-centriflaken-db/0' profiles { standard {
--- a/0.2.1/readme/centriflaken.md Thu Jul 07 10:37:59 2022 -0400 +++ b/0.2.1/readme/centriflaken.md Mon Jul 11 12:19:48 2022 -0400 @@ -273,5 +273,5 @@ ### **PRE ALPHA** --- -This modular structure and flow is still in rapid development and may change -depending on assessment of various computational topics and other considerations +The development of the modular structure and flow is an ongoing effort and may change depending on assessment of various computational topics and other considerations. +
--- a/0.2.1/readme/centriflaken_hy.md Thu Jul 07 10:37:59 2022 -0400 +++ b/0.2.1/readme/centriflaken_hy.md Mon Jul 11 12:19:48 2022 -0400 @@ -15,7 +15,7 @@ #### Workflow Usage ```bash -module load cpipes/0.2.0 +module load cpipes/0.2.1 cpipes --pipeline centriflaken_hy [options] ``` @@ -289,5 +289,5 @@ ### **PRE ALPHA** --- -This modular structure and flow is still in rapid development and may change -depending on assessment of various computational topics and other considerations +The development of the modular structure and flow is an ongoing effort and may change depending on assessment of various computational topics and other considerations. +
--- a/0.2.1/workflows/centriflaken.nf Thu Jul 07 10:37:59 2022 -0400 +++ b/0.2.1/workflows/centriflaken.nf Mon Jul 11 12:19:48 2022 -0400 @@ -107,14 +107,14 @@ FASTQC ( ch_processed_reads ) - CENTRIFUGE_CLASSIFY( ch_processed_reads ) + CENTRIFUGE_CLASSIFY ( ch_processed_reads ) - CENTRIFUGE_PROCESS( + CENTRIFUGE_PROCESS ( CENTRIFUGE_CLASSIFY.out.report .join( CENTRIFUGE_CLASSIFY.out.output ) ) - ch_processed_reads.join( CENTRIFUGE_PROCESS.out.extracted ) + ch_processed_reads.join ( CENTRIFUGE_PROCESS.out.extracted ) .set { ch_centrifuge_extracted } SEQKIT_GREP ( ch_centrifuge_extracted ) @@ -128,7 +128,7 @@ ch_flye_assembly.ifEmpty { [ false, false ] } - KRAKEN2_CLASSIFY( ch_flye_assembly ) + KRAKEN2_CLASSIFY ( ch_flye_assembly ) KRAKEN2_EXTRACT_CONTIGS ( ch_flye_assembly @@ -213,7 +213,7 @@ .map { it -> [ it[0], it[1].flatten() ]} .set { ch_mqc_custom_tbl } - TABLE_SUMMARY( ch_mqc_custom_tbl ) + TABLE_SUMMARY ( ch_mqc_custom_tbl ) DUMP_SOFTWARE_VERSIONS ( software_versions @@ -246,7 +246,7 @@ .collect() .set { ch_multiqc } - MULTIQC( ch_multiqc ) + MULTIQC ( ch_multiqc ) } /* @@ -308,7 +308,7 @@ Map helptext = [:] - helptext.putAll( + helptext.putAll ( fastqEntryPointHelp() + kraken2Help(params).text + centrifugeHelp(params).text +
--- a/0.2.1/workflows/centriflaken_hy.nf Thu Jul 07 10:37:59 2022 -0400 +++ b/0.2.1/workflows/centriflaken_hy.nf Mon Jul 11 12:19:48 2022 -0400 @@ -108,14 +108,14 @@ FASTQC ( ch_processed_reads ) - CENTRIFUGE_CLASSIFY( ch_processed_reads ) + CENTRIFUGE_CLASSIFY ( ch_processed_reads ) - CENTRIFUGE_PROCESS( + CENTRIFUGE_PROCESS ( CENTRIFUGE_CLASSIFY.out.report .join( CENTRIFUGE_CLASSIFY.out.output ) ) - ch_processed_reads.join( CENTRIFUGE_PROCESS.out.extracted ) + ch_processed_reads.join ( CENTRIFUGE_PROCESS.out.extracted ) .set { ch_centrifuge_extracted } SEQKIT_GREP ( ch_centrifuge_extracted ) @@ -145,7 +145,7 @@ ch_spades_assembly.ifEmpty { [ false, false ] } - KRAKEN2_CLASSIFY( ch_spades_assembly ) + KRAKEN2_CLASSIFY ( ch_spades_assembly ) KRAKEN2_EXTRACT_CONTIGS ( ch_spades_assembly @@ -227,7 +227,7 @@ .map { it -> [ it[0], it[1].flatten() ]} .set { ch_mqc_custom_tbl } - TABLE_SUMMARY( ch_mqc_custom_tbl ) + TABLE_SUMMARY ( ch_mqc_custom_tbl ) DUMP_SOFTWARE_VERSIONS ( software_versions @@ -260,7 +260,7 @@ .collect() .set { ch_multiqc } - MULTIQC( ch_multiqc ) + MULTIQC ( ch_multiqc ) } /* @@ -322,7 +322,7 @@ Map helptext = [:] - helptext.putAll( + helptext.putAll ( fastqEntryPointHelp() + kraken2Help(params).text + centrifugeHelp(params).text +
--- a/cfsan_centriflaken.xml Thu Jul 07 10:37:59 2022 -0400 +++ b/cfsan_centriflaken.xml Mon Jul 11 12:19:48 2022 -0400 @@ -43,8 +43,7 @@ --fq_filename_delim '${fq_filename_delim}' --fq_filename_delim_idx $fq_filename_delim_idx --centrifuge_extract_bug '${centrifuge_extract_bug}' - -profile $runtime_profile - -resume; + -profile kondagac; mv './cpipes-output/${pipeline}-multiqc/multiqc_report.html' './multiqc_report.html'; mv './cpipes-output/${pipeline}-results/kraken2_extract_contigs' kraken2_extract_contigs; rm -rf ./cpipes-output; @@ -72,7 +71,8 @@ <option value="pacbio_hifi">PacBio HiFi reads (<1% error)</option> </param> <param name="fq_suffix" value=".fastq.gz" type="text" label="Suffix of the R1 FASTQ or Unpaired FASTQ"/> - <param name="fq2_suffix" value="_R2_001.fastq.gz" type="text" label="Suffix of the R2 FASTQ"/> + <param name="fq2_suffix" value="_R2_001.fastq.gz" type="text" label="Suffix of the R2 FASTQ" + help="THIS OPTION IS IGNORED IF THE INPUT READS ARE UNPAIRED/LONG READS."/> <param name="fq_filter_by_len" optional="true" value="" type="integer" label="Enter minimum read length to retain before starting the analysis" help="Keep this option empty to use default values. Default for centriflaken (long reads) is 4000 bp and for centriflaken_hy (short reads) is 75 bp)"/> <param name="fq_filename_delim" type="text" value="_" label="File name delimitor by which samples are grouped together (--fq_filename_delim)" @@ -82,10 +82,10 @@ <param name="genome_size" type="text" optional="true" value="5.5m" label="Estimated genome size" help="For example, 5m or 2.6g."> <validator type="regex" message="Genome size must be a float or integer, optionally followed by the a unit prefix (kmg)">^([0-9]*[.])?[0-9]+[kmg]?$</validator> </param> - <param name="runtime_profile" type="select" label="Run time profile"> + <!-- <param name="runtime_profile" type="select" label="Run time profile"> <option value="kondagac" selected="true">conda</option> <option value="cingularitygac">singularity</option> - </param> + </param> --> </inputs> <outputs> <data name="multiqc_report" format="html" label="${pipeline}: MultiQC Report on ${on_string}" from_work_dir="multiqc_report.html"/> @@ -124,11 +124,11 @@ **Testing and Validation** -The pipeline has been wrapped to make it work in Galaxy. It takes in either paired or unpaired short reads or long reads, generates MAGs and performs +The CPIPES - Centriflaken Nextflow pipeline has been wrapped to make it work in Galaxy. It takes in either paired or unpaired short reads or long reads, generates MAGs and performs in silico-based analysis (i.e., virulence gene finding). Additionally, AMR gene finding analysis is also included in Centriflaken and performed on MAGs of interest. The final summary plots and tables can be downloaded from the provided MultiQC HTML report generated as part of the pipeline. The Centriflaken pipeline was validated with data from our previously published method (Maguire et al, 2021) and was able to replicate the detection -and classification of STECs for each sample. We tested the pipeline with nanopore data obtained from 21 additional enriched samples from +and classification of STECs for each sample. We tested the pipeline with Nanopore data obtained from 21 additional enriched samples from irrigation water and was able to perform the entire precision metagenomics analysis in less than 5 hours for all of them. All the original testing and validation was done on the command line on the CFSAN Raven2 HPC Cluster.