comparison hfp_bettercallsal.xml @ 0:801b85b03a17 draft default tip

planemo upload
author galaxytrakr
date Thu, 28 May 2026 20:31:42 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:801b85b03a17
1 <tool id="hfp_bettercallsal_awsbatch" name="bettercallsal" version="1.0.0+awsbatch">
2 <description>An automated workflow to assign Salmonella serotype based on NCBI Pathogen Detection Project for Salmonella.</description>
3 <requirements>
4 <container type="docker">quay.io/galaxytrakr/mulled-v2-ebd88135862aa647eeae73d4d8e6ea8ec81245cd:v5.0</container>
5 </requirements>
6 <version_command>nextflow -version</version_command>
7 <command detect_errors="exit_code"><![CDATA[
8 export MAMBA_ROOT_PREFIX="/server/galaxy/data/nextflow-micromamba-cache";
9 export NXF_HOME=\$(pwd)"/.nextflow-home";
10 input_path=\$(pwd)"/cpipes-input";
11 workdir_path=\$(pwd)"/work";
12 mkdir -p "\${input_path}" || exit 1;
13 #import re
14 #if (str($input_read_type_cond.input_read_type) == "single_long" or str($input_read_type_cond.input_read_type) == "long_long"):
15 #for _, $unpaired in enumerate($input_read_type_cond.input):
16 #set read1 = str($unpaired.name)
17 #if not str($unpaired.name).endswith(('.fastq', '.fastq.gz')):
18 #set read1_ext = re.sub('fastqsanger', 'fastq', str($unpaired.ext))
19 #set read1 = str($unpaired.name) + str('.') + $read1_ext
20 #end if
21 ln -sf '$unpaired' "\${input_path}/$read1";
22 #end for
23 #elif (str($input_read_type_cond.input_read_type) == "paired"):
24 #for _, $pair in enumerate($input_read_type_cond.input_pair)
25 #set read_R1 = re.sub('\:forward', '_forward', str($pair.forward.name))
26 #set read_R2 = re.sub('\:reverse', '_reverse', str($pair.reverse.name))
27 #set read_R1_ext = re.sub('fastqsanger', 'fastq', str($pair.forward.ext))
28 #set read_R2_ext = re.sub('fastqsanger', 'fastq', str($pair.reverse.ext))
29 #if not str($pair.forward.name).endswith(('.fastq', '.fastq.gz')):
30 #set read_R1 = $read_R1 + str('.') + $read_R1_ext
31 #end if
32 #if not str($pair.reverse.name).endswith(('.fastq', '.fastq.gz')):
33 #set read_R2 = $read_R2 + str('.') + $read_R2_ext
34 #end if
35 ln -sf '$pair.forward' "\${input_path}/$read_R1";
36 ln -sf '$pair.reverse' "\${input_path}/$read_R2";
37 #end for
38 #end if
39 $__tool_directory__/1.0.0/cpipes
40 #if (str($input_read_type_cond.input_read_type) == "long_long"):
41 --pipeline bettercallsal_lr
42 #else
43 --pipeline bettercallsal
44 #end if
45 --input \${input_path}
46 --output cpipes-output
47 --fq_suffix '${input_read_type_cond.fq_suffix}'
48 #if (str($input_read_type_cond.input_read_type) == "long_long"):
49 --fq_single_end true
50 #elif (str($input_read_type_cond.input_read_type) == "single_long"):
51 --fq_single_end true
52 #elif (str($input_read_type_cond.input_read_type) == "paired"):
53 --fq_single_end false --fq2_suffix '${input_read_type_cond.fq2_suffix}'
54 #end if
55 --tuspy_n $tuspy_n
56 #if ($sourmash_cond.run == "true"):
57 --sfhpy_fcv $sourmash_cond.sfhpy_fcv
58 #end if
59 #if ($bcs_thresholds != 'relax' and str($input_read_type_cond.input_read_type) != "long_long"):
60 --kmaalign_ID $kma_id
61 #end if
62 #if ($sourmash_cond.run == "true"):
63 --sfhpy_fcv $sourmash_cond.sfhpy_fcv
64 #end if
65 --bcs_db_mode $bcs_db_mode
66 --bcs_thresholds $bcs_thresholds
67 --fq_filename_delim '${fq_filename_delim}'
68 --fq_filename_delim_idx $fq_filename_delim_idx
69 -work-dir "\${workdir_path}"
70 -profile stdkondagac;
71 #if (str($input_read_type_cond.input_read_type) == "long_long"):
72 mv './cpipes-output/bettercallsal_lr-multiqc/CPIPES-Report_multiqc_report.html' './multiqc_report.html' || exit 1;
73 #else
74 mv './cpipes-output/bettercallsal-multiqc/CPIPES-Report_multiqc_report.html' './multiqc_report.html' || exit 1;
75 #end if
76 rm -rf ./cpipes-output || exit 1;
77 rm -rf ./work || exit 1;
78 ]]></command>
79 <inputs>
80 <conditional name="input_read_type_cond">
81 <param name="input_read_type" type="select" label="Select the read collection type">
82 <option value="single_long" selected="true">Single-End short reads</option>
83 <option value="paired">Paired-End short reads</option>
84 <option value="long_long">Long reads</option>
85 </param>
86 <when value="single_long">
87 <param name="input" type="data_collection" collection_type="list" format="fastq,fastq.gz"
88 label="Dataset list of unpaired short reads" />
89 <param name="fq_suffix" value=".fastq.gz" type="text" label="Suffix of the Single-End FASTQ"/>
90 </when>
91 <when value="long_long">
92 <param name="input" type="data_collection" collection_type="list" format="fastq,fastq.gz"
93 label="Dataset list of long reads" />
94 <param name="fq_suffix" value=".fastq.gz" type="text" label="Suffix of the long read FASTQ"/>
95 </when>
96 <when value="paired">
97 <param name="input_pair" type="data_collection" collection_type="list:paired" format="fastq,fastq.gz" label="List of Dataset pairs" />
98 <param name="fq_suffix" value="_R1_001.fastq.gz" type="text" label="Suffix of the R1 FASTQ"
99 help="For any data sets downloaded from NCBI into Galaxy, change this to _forward.fastq.gz suffix."/>
100 <param name="fq2_suffix" value="_R2_001.fastq.gz" type="text" label="Suffix of the R2 FASTQ"
101 help="For any data sets downloaded from NCBI into Galaxy, change this to _reverse.fastq.gz suffix."/>
102 </when>
103 </conditional>
104 <param name="bcs_db_mode" type="select" label="Select the database mode with bettercallsal"
105 help="Refer to `Database generation` section in our manuscript: https://doi.org/10.3389/fmicb.2023.1200983">
106 <option value="snp" selected="true">per_snp_cluster</option>
107 <option value="comp">per_computed_type</option>
108 </param>
109 <param name="tuspy_n" optional="true" value="10" type="integer" label="Enter the number of top unique serotypes to retain after initial MASH screen step"
110 help="The default value of 10 is suitable for almost all scenarios."/>
111 <param name="bcs_thresholds" type="select" label="Enter the type of base quality thresholds to be set with bettercallsal"
112 help="The default value sets strictest thresholds that tends to filter out most of the false positive hits.">
113 <option value="strict" selected="true">strict</option>
114 <option value="relax">relax</option>
115 </param>
116 <param name="kma_id" optional="true" value="10.0" type="text" label="Enter the %ID threshold for KMA alignments of samples against genomes"
117 help="The default value of 10% works well for enrichment samples tested within FDA. The 'relax' preset for base quality thresholds automatically sets this value to 5%."/>
118 <conditional name="sourmash_cond">
119 <param name="run" type="select" label="Run sourmash"
120 help="Should sourmash be used for additional genome fraction filtering">
121 <option value="true" selected="true">yes</option>
122 <option value="false">no</option>
123 </param>
124 <when value="true">
125 <param name="sfhpy_fcv" type="text" value="0.1" label="Enter the minimum coverage match with sourmash before a serotype hit is considered for further processing"
126 help="The default value is set at 10% coverage threshold."/>
127 </when>
128 <when value="false">
129 <param name="sfhpy_fcv" type="select" label="Enter the minimum coverage match with sourmash before a serotype hit is considered for further processing"
130 help="THIS OPTION IS IGNORED IF SOURMASH TOOL IS NOT RUN.">
131 <option value="NA" selected="true">N/A</option>
132 </param>
133 </when>
134 </conditional>
135 <param name="fq_filename_delim" type="text" value="_" label="File name delimitor by which samples are grouped together (--fq_filename_delim)"
136 help="This is the delimitor by which samples are grouped together to display in the final MultiQC report. For example, if your input data sets are mango_replicate1.fastq.gz, mango_replicate2.fastq.gz, orange_replicate1_maryland.fastq.gz, orange_replicate2_maryland.fastq.gz, then to create 2 samples mango and orange, the value for --fq_filename_delim would be _ (underscore) and the value for --fq_filename_delim_idx would be 1, since you want to group by the first word (i.e. mango or orange) after splitting the filename based on _ (underscore)."/>
137 <param name="fq_filename_delim_idx" type="integer" value="1" label="File name delimitor index (--fq_filename_delim_idx)" />
138 <!-- <param name="runtime_profile" type="select" label="Run time profile">
139 <option value="kondagac" selected="true">conda</option>
140 <option value="cingularitygac">singularity</option>
141 </param> -->
142 </inputs>
143 <outputs>
144 <data name="multiqc_report" format="html" label="bettercallsal: MultiQC Report on ${on_string}" from_work_dir="multiqc_report.html"/>
145 </outputs>
146 <tests>
147 <!--Test 01: long reads-->
148 <test expect_num_outputs="2">
149 <param name="input">
150 <collection type="list">
151 <element name="FAL11127.fastq.gz" value="FAL11127.fastq.gz" />
152 <element name="FAL11341.fastq.gz" value="FAL11341.fastq.gz" />
153 <element name="FAL11342.fastq.gz" value="FAL11342.fastq.gz" />
154 </collection>
155 </param>
156 <param name="fq_suffix" value=".fastq.gz"/>
157 <output name="multiqc_report" file="multiqc_report.html" ftype="html" compare="sim_size"/>
158 <!-- <output name="assembled_mags" file="FAL11127.assembly_filtered.contigs.fasta" ftype="fasta" compare="sim_size"/> -->
159 </test>
160 </tests>
161 <help><![CDATA[
162
163 .. class:: infomark
164
165 **Purpose**
166
167 bettercallsal is an automated workflow to assign Salmonella serotype based on NCBI Pathogen Detection Project for Salmonella.
168 It uses MASH to reduce the search space followed by additional genome filtering with sourmash. It then performs genome based
169 alignment with kma followed by count generation using salmon. This workflow can be used to analyze shotgun metagenomics
170 datasets, quasi-metagenomic datasets (enriched for Salmonella) and target enriched datasets (enriched with molecular baits specific for Salmonella)
171 and is especially useful in a case where a sample is of multi-serovar mixture.
172
173 It is written in Nextflow and is part of the modular data analysis pipelines (CFSAN PIPELINES or CPIPES for short) at CFSAN.
174
175
176 ----
177
178 .. class:: infomark
179
180 **Testing and Validation**
181
182 The CPIPES - bettercallsal Nextflow pipeline has been wrapped to make it work in Galaxy. It takes in either paired or unpaired short reads list as an input
183 and generates a MultiQC report in the final step. The pipeline has been tested on 2x300 bp MiSeq and 2x150 bp NextSeq simulated reads and has been shown to call multiple
184 Salmonella serotypes with up to ~95% accuracy. The pipeline has also been tested on metagenomics data sets from Peach and Papaya outbreaks as discussed in
185 our publication (https://www.frontiersin.org/articles/10.3389/fmicb.2023.1200983/full). All the original testing and validation was
186 done on the command line on the CFSAN Raven2 HPC Cluster.
187
188
189 ----
190
191 .. class:: infomark
192
193 **Outputs**
194
195 The main output file is a:
196
197 ::
198
199 - MultiQC Report: Contains a brief summary report including any serotyping and AMR result tables.
200 Please note that due to MultiQC customizations, the preview (eye icon) will not
201 work within Galaxy for the MultiQC report. Please download the file by clicking
202 on the floppy icon and view it in your browser on your local desktop/workstation.
203 You can export the tables and plots from the downloaded MultiQC report.
204
205 ]]></help>
206 <citations>
207 <citation type="bibtex">
208 @article{bettercallsal,
209 author = {Konganti, Kranti},
210 year = {2023},
211 month = {August},
212 title = {bettercallsal: better calling of Salmonella serotypes from enrichment cultures using shotgun metagenomic profiling and its application in an outbreak setting},
213 journal = {Frontiers in Microbiology},
214 doi = {10.3389/fmicb.2023.1200983},
215 url = {https://www.frontiersin.org/articles/10.3389/fmicb.2023.1200983/full}}
216 </citation>
217 </citations>
218 </tool>