annotate cfsan_bettercallsal.xml @ 16:b90e5a7a3d4f

"planemo upload"
author kkonganti
date Thu, 07 Sep 2023 15:22:10 -0400
parents 749faef1caa9
children 0e7a0053e4a6
rev   line source
kkonganti@11 1 <tool id="cfsan_bettercallsal" name="bettercallsal" version="0.6.1">
kkonganti@1 2 <description>An automated workflow to assign Salmonella serotype based on NCBI Pathogen Detection Project for Salmonella.</description>
kkonganti@0 3 <requirements>
kkonganti@11 4 <requirement type="package" version="23.04">nextflow</requirement>
kkonganti@1 5 <requirement type="package" version="1.0.0">micromamba</requirement>
kkonganti@0 6 <requirement type="package">graphviz</requirement>
kkonganti@0 7 </requirements>
kkonganti@0 8 <version_command>nextflow -version</version_command>
kkonganti@0 9 <command detect_errors="exit_code"><![CDATA[
kkonganti@0 10 mkdir -p cpipes-input || exit 1;
kkonganti@0 11 pwd_path=\$(pwd);
kkonganti@0 12 #import re
kkonganti@0 13 #if (str($input_read_type_cond.input_read_type) == "single_long"):
kkonganti@0 14 #for _, $unpaired in enumerate($input_read_type_cond.input):
kkonganti@0 15 #set read1 = str($unpaired.name)
kkonganti@0 16 #if not str($unpaired.name).endswith(('.fastq', '.fastq.gz')):
kkonganti@0 17 #set read1_ext = re.sub('fastqsanger', 'fastq', str($unpaired.ext))
kkonganti@0 18 #set read1 = str($unpaired.name) + str('.') + $read1_ext
kkonganti@0 19 #end if
kkonganti@0 20 ln -sf '$unpaired' './cpipes-input/$read1';
kkonganti@0 21 #end for
kkonganti@0 22 #elif (str($input_read_type_cond.input_read_type) == "paired"):
kkonganti@0 23 #for _, $pair in enumerate($input_read_type_cond.input_pair)
kkonganti@0 24 #set read_R1 = re.sub('\:forward', '_forward', str($pair.forward.name))
kkonganti@0 25 #set read_R2 = re.sub('\:reverse', '_reverse', str($pair.reverse.name))
kkonganti@0 26 #set read_R1_ext = re.sub('fastqsanger', 'fastq', str($pair.forward.ext))
kkonganti@0 27 #set read_R2_ext = re.sub('fastqsanger', 'fastq', str($pair.reverse.ext))
kkonganti@0 28 #if not str($pair.forward.name).endswith(('.fastq', '.fastq.gz')):
kkonganti@0 29 #set read_R1 = $read_R1 + str('.') + $read_R1_ext
kkonganti@0 30 #end if
kkonganti@0 31 #if not str($pair.reverse.name).endswith(('.fastq', '.fastq.gz')):
kkonganti@0 32 #set read_R2 = $read_R2 + str('.') + $read_R2_ext
kkonganti@0 33 #end if
kkonganti@0 34 ln -sf '$pair.forward' './cpipes-input/$read_R1';
kkonganti@0 35 ln -sf '$pair.reverse' './cpipes-input/$read_R2';
kkonganti@0 36 #end for
kkonganti@0 37 #end if
kkonganti@11 38 $__tool_directory__/0.6.1/cpipes
kkonganti@2 39 --pipeline bettercallsal
kkonganti@1 40 --input \${pwd_path}/cpipes-input
kkonganti@0 41 --output \${pwd_path}/cpipes-output
kkonganti@0 42 --fq_suffix '${input_read_type_cond.fq_suffix}'
kkonganti@1 43 #if (str($input_read_type_cond.input_read_type) == "single_long"):
kkonganti@1 44 --fq_single_end true
kkonganti@1 45 #elif (str($input_read_type_cond.input_read_type) == "paired"):
kkonganti@1 46 --fq_single_end false --fq2_suffix '${input_read_type_cond.fq2_suffix}'
kkonganti@0 47 #end if
kkonganti@1 48 --tuspy_n $tuspy_n
kkonganti@3 49 #if ($sourmash_cond.run == "true"):
kkonganti@4 50 --sfhpy_fcv $sourmash_cond.sfhpy_fcv
kkonganti@1 51 #end if
kkonganti@11 52 --bcs_db_mode $bcs_db_mode
kkonganti@1 53 --bcs_thresholds $bcs_thresholds
kkonganti@0 54 --fq_filename_delim '${fq_filename_delim}'
kkonganti@0 55 --fq_filename_delim_idx $fq_filename_delim_idx
kkonganti@0 56 -profile kondagac;
kkonganti@2 57 mv './cpipes-output/bettercallsal-multiqc/multiqc_report.html' './multiqc_report.html' > /dev/null 2>&1 || exit 1;
kkonganti@0 58 rm -rf ./cpipes-output > /dev/null 2>&1 || exit 1;
kkonganti@0 59 rm -rf ./work > /dev/null 2>&1 || exit 1
kkonganti@0 60 ]]></command>
kkonganti@0 61 <inputs>
kkonganti@0 62 <conditional name="input_read_type_cond">
kkonganti@0 63 <param name="input_read_type" type="select" label="Select the read collection type">
kkonganti@1 64 <option value="single_long" selected="true">Single-End short reads</option>
kkonganti@1 65 <option value="paired">Paired-End short reads</option>
kkonganti@0 66 </param>
kkonganti@0 67 <when value="single_long">
kkonganti@0 68 <param name="input" type="data_collection" collection_type="list" format="fastq,fastq.gz"
kkonganti@0 69 label="Dataset list of unpaired short reads or long reads" />
kkonganti@1 70 <param name="fq_suffix" value=".fastq.gz" type="text" label="Suffix of the Single-End FASTQ"/>
kkonganti@0 71 </when>
kkonganti@0 72 <when value="paired">
kkonganti@0 73 <param name="input_pair" type="data_collection" collection_type="list:paired" format="fastq,fastq.gz" label="List of Dataset pairs" />
kkonganti@5 74 <param name="fq_suffix" value="_R1_001.fastq.gz" type="text" label="Suffix of the R1 FASTQ"
kkonganti@5 75 help="For any data sets downloaded from NCBI into Galaxy, change this to _forward.fastq.gz suffix."/>
kkonganti@5 76 <param name="fq2_suffix" value="_R2_001.fastq.gz" type="text" label="Suffix of the R2 FASTQ"
kkonganti@6 77 help="For any data sets downloaded from NCBI into Galaxy, change this to _reverse.fastq.gz suffix."/>
kkonganti@0 78 </when>
kkonganti@0 79 </conditional>
kkonganti@11 80 <param name="bcs_db_mode" type="select" label="Select the database mode with bettercallsal"
kkonganti@11 81 help="Refer to `Database generation` section in our manuscript: https://doi.org/10.3389/fmicb.2023.1200983">
kkonganti@11 82 <option value="snp" selected="true">per_snp_cluster</option>
kkonganti@11 83 <option value="comp">per_computed_type</option>
kkonganti@11 84 </param>
kkonganti@2 85 <param name="tuspy_n" optional="true" value="10" type="integer" label="Enter the number of top unique serotypes to retain after initial MASH screen step"
kkonganti@1 86 help="The default value of 10 is suitable for almost all scenarios."/>
kkonganti@2 87 <param name="bcs_thresholds" type="select" label="Enter the type of base quality thresholds to be set with bettercallsal"
kkonganti@1 88 help="The default value sets strictest thresholds that tends to filter out most of the false positive hits.">
kkonganti@1 89 <option value="strict" selected="true">strict</option>
kkonganti@1 90 <option value="relax">relax</option>
kkonganti@1 91 </param>
kkonganti@1 92 <conditional name="sourmash_cond">
kkonganti@1 93 <param name="run" type="select" label="Run sourmash"
kkonganti@2 94 help="Should sourmash be used for additional genome fraction filtering">
kkonganti@1 95 <option value="true" selected="true">yes</option>
kkonganti@1 96 <option value="false">no</option>
kkonganti@1 97 </param>
kkonganti@1 98 <when value="true">
kkonganti@2 99 <param name="sfhpy_fcv" type="text" value="0.1" label="Enter the minimum coverage match with sourmash before a serotype hit is considered for further processing"
kkonganti@1 100 help="The default value is set at 10% coverage threshold."/>
kkonganti@1 101 </when>
kkonganti@1 102 <when value="false">
kkonganti@2 103 <param name="sfhpy_fcv" type="select" label="Enter the minimum coverage match with sourmash before a serotype hit is considered for further processing"
kkonganti@8 104 help="THIS OPTION IS IGNORED IF SOURMASH TOOL IS NOT RUN.">
kkonganti@1 105 <option value="NA" selected="true">N/A</option>
kkonganti@1 106 </param>
kkonganti@1 107 </when>
kkonganti@1 108 </conditional>
kkonganti@0 109 <param name="fq_filename_delim" type="text" value="_" label="File name delimitor by which samples are grouped together (--fq_filename_delim)"
kkonganti@0 110 help="This is the delimitor by which samples are grouped together to display in the final MultiQC report. For example, if your input data sets are mango_replicate1.fastq.gz, mango_replicate2.fastq.gz, orange_replicate1_maryland.fastq.gz, orange_replicate2_maryland.fastq.gz, then to create 2 samples mango and orange, the value for --fq_filename_delim would be _ (underscore) and the value for --fq_filename_delim_idx would be 1, since you want to group by the first word (i.e. mango or orange) after splitting the filename based on _ (underscore)."/>
kkonganti@0 111 <param name="fq_filename_delim_idx" type="integer" value="1" label="File name delimitor index (--fq_filename_delim_idx)" />
kkonganti@0 112 <!-- <param name="runtime_profile" type="select" label="Run time profile">
kkonganti@0 113 <option value="kondagac" selected="true">conda</option>
kkonganti@0 114 <option value="cingularitygac">singularity</option>
kkonganti@0 115 </param> -->
kkonganti@0 116 </inputs>
kkonganti@0 117 <outputs>
kkonganti@2 118 <data name="multiqc_report" format="html" label="bettercallsal: MultiQC Report on ${on_string}" from_work_dir="multiqc_report.html"/>
kkonganti@0 119 </outputs>
kkonganti@0 120 <tests>
kkonganti@0 121 <!--Test 01: long reads-->
kkonganti@0 122 <test expect_num_outputs="2">
kkonganti@0 123 <param name="input">
kkonganti@0 124 <collection type="list">
kkonganti@0 125 <element name="FAL11127.fastq.gz" value="FAL11127.fastq.gz" />
kkonganti@0 126 <element name="FAL11341.fastq.gz" value="FAL11341.fastq.gz" />
kkonganti@0 127 <element name="FAL11342.fastq.gz" value="FAL11342.fastq.gz" />
kkonganti@0 128 </collection>
kkonganti@0 129 </param>
kkonganti@0 130 <param name="fq_suffix" value=".fastq.gz"/>
kkonganti@0 131 <output name="multiqc_report" file="multiqc_report.html" ftype="html" compare="sim_size"/>
kkonganti@0 132 <!-- <output name="assembled_mags" file="FAL11127.assembly_filtered.contigs.fasta" ftype="fasta" compare="sim_size"/> -->
kkonganti@0 133 </test>
kkonganti@0 134 </tests>
kkonganti@0 135 <help><![CDATA[
kkonganti@0 136
kkonganti@0 137 .. class:: infomark
kkonganti@0 138
kkonganti@0 139 **Purpose**
kkonganti@0 140
kkonganti@1 141 bettercallsal is an automated workflow to assign Salmonella serotype based on NCBI Pathogen Detection Project for Salmonella.
kkonganti@1 142 It uses MASH to reduce the search space followed by additional genome filtering with sourmash. It then performs genome based
kkonganti@1 143 alignment with kma followed by count generation using salmon. This workflow can be used to analyze shotgun metagenomics
kkonganti@1 144 datasets, quasi-metagenomic datasets (enriched for Salmonella) and target enriched datasets (enriched with molecular baits specific for Salmonella)
kkonganti@1 145 and is especially useful in a case where a sample is of multi-serovar mixture.
kkonganti@1 146
kkonganti@1 147 It is written in Nextflow and is part of the modular data analysis pipelines (CFSAN PIPELINES or CPIPES for short) at CFSAN.
kkonganti@1 148
kkonganti@0 149
kkonganti@0 150 ----
kkonganti@0 151
kkonganti@0 152 .. class:: infomark
kkonganti@0 153
kkonganti@0 154 **Testing and Validation**
kkonganti@0 155
kkonganti@1 156 The CPIPES - bettercallsal Nextflow pipeline has been wrapped to make it work in Galaxy. It takes in either paired or unpaired short reads list as an input
kkonganti@16 157 and generates a MultiQC report in the final step. The pipeline has been tested on 2x300 bp MiSeq and 2x150 bp NextSeq simulated reads and has been shown to call multiple Salmonella serotypes with up to ~95% accuracy. The pipeline has also been tested on metagenomics data sets from Peach and Papaya outbreaks as discussed in our publication (https://www.frontiersin.org/articles/10.3389/fmicb.2023.1200983/full). All the original testing and validation was done on the command line on the CFSAN Raven2 HPC Cluster.
kkonganti@0 158
kkonganti@0 159
kkonganti@0 160 ----
kkonganti@0 161
kkonganti@0 162 .. class:: infomark
kkonganti@0 163
kkonganti@0 164 **Outputs**
kkonganti@0 165
kkonganti@7 166 The main output file is a:
kkonganti@0 167
kkonganti@0 168 ::
kkonganti@0 169
kkonganti@0 170 - MultiQC Report: Contains a brief summary report including any serotyping and AMR result tables.
kkonganti@0 171 Please note that due to MultiQC customizations, the preview (eye icon) will not
kkonganti@0 172 work within Galaxy for the MultiQC report. Please download the file by clicking
kkonganti@0 173 on the floppy icon and view it in your browser on your local desktop/workstation.
kkonganti@1 174 You can export the tables and plots from the downloaded MultiQC report.
kkonganti@0 175
kkonganti@0 176 ]]></help>
kkonganti@0 177 <citations>
kkonganti@0 178 <citation type="bibtex">
kkonganti@11 179 @article{bettercallsal,
kkonganti@0 180 author = {Konganti, Kranti},
kkonganti@1 181 year = {2023},
kkonganti@11 182 month = {August},
kkonganti@1 183 title = {bettercallsal: better calling of Salmonella serotypes from enrichment cultures using shotgun metagenomic profiling and its application in an outbreak setting},
kkonganti@11 184 journal = {Frontiers in Microbiology},
kkonganti@11 185 doi = {10.3389/fmicb.2023.1200983},
kkonganti@11 186 url = {https://www.frontiersin.org/articles/10.3389/fmicb.2023.1200983/full}}
kkonganti@0 187 </citation>
kkonganti@0 188 </citations>
kkonganti@0 189 </tool>