comparison cfsan_bettercallsal.xml @ 17:0e7a0053e4a6

planemo upload
author kkonganti
date Mon, 15 Jul 2024 10:42:02 -0400
parents b90e5a7a3d4f
children 4b304d77bbfb
comparison
equal deleted inserted replaced
16:b90e5a7a3d4f 17:0e7a0053e4a6
1 <tool id="cfsan_bettercallsal" name="bettercallsal" version="0.6.1"> 1 <tool id="cfsan_bettercallsal" name="bettercallsal" version="0.7.0+galaxy23.1">
2 <description>An automated workflow to assign Salmonella serotype based on NCBI Pathogen Detection Project for Salmonella.</description> 2 <description>An automated workflow to assign Salmonella serotype based on NCBI Pathogen Detection Project for Salmonella.</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="23.04">nextflow</requirement> 4 <container type="docker">quay.io/biocontainers/nextflow:24.04.2--hdfd78af_0</container>
5 <requirement type="package" version="1.0.0">micromamba</requirement> 5 </requirements>
6 <requirement type="package">graphviz</requirement>
7 </requirements>
8 <version_command>nextflow -version</version_command> 6 <version_command>nextflow -version</version_command>
9 <command detect_errors="exit_code"><![CDATA[ 7 <command detect_errors="exit_code"><![CDATA[
10 mkdir -p cpipes-input || exit 1; 8 input_path=\$(pwd)"/cpipes-input";
11 pwd_path=\$(pwd); 9 mkdir -p "\${input_path}" || exit 1;
12 #import re 10 #import re
13 #if (str($input_read_type_cond.input_read_type) == "single_long"): 11 #if (str($input_read_type_cond.input_read_type) == "single_long"):
14 #for _, $unpaired in enumerate($input_read_type_cond.input): 12 #for _, $unpaired in enumerate($input_read_type_cond.input):
15 #set read1 = str($unpaired.name) 13 #set read1 = str($unpaired.name)
16 #if not str($unpaired.name).endswith(('.fastq', '.fastq.gz')): 14 #if not str($unpaired.name).endswith(('.fastq', '.fastq.gz')):
17 #set read1_ext = re.sub('fastqsanger', 'fastq', str($unpaired.ext)) 15 #set read1_ext = re.sub('fastqsanger', 'fastq', str($unpaired.ext))
18 #set read1 = str($unpaired.name) + str('.') + $read1_ext 16 #set read1 = str($unpaired.name) + str('.') + $read1_ext
19 #end if 17 #end if
20 ln -sf '$unpaired' './cpipes-input/$read1'; 18 ln -sf '$unpaired' "\${input_path}/$read1";
21 #end for 19 #end for
22 #elif (str($input_read_type_cond.input_read_type) == "paired"): 20 #elif (str($input_read_type_cond.input_read_type) == "paired"):
23 #for _, $pair in enumerate($input_read_type_cond.input_pair) 21 #for _, $pair in enumerate($input_read_type_cond.input_pair)
24 #set read_R1 = re.sub('\:forward', '_forward', str($pair.forward.name)) 22 #set read_R1 = re.sub('\:forward', '_forward', str($pair.forward.name))
25 #set read_R2 = re.sub('\:reverse', '_reverse', str($pair.reverse.name)) 23 #set read_R2 = re.sub('\:reverse', '_reverse', str($pair.reverse.name))
29 #set read_R1 = $read_R1 + str('.') + $read_R1_ext 27 #set read_R1 = $read_R1 + str('.') + $read_R1_ext
30 #end if 28 #end if
31 #if not str($pair.reverse.name).endswith(('.fastq', '.fastq.gz')): 29 #if not str($pair.reverse.name).endswith(('.fastq', '.fastq.gz')):
32 #set read_R2 = $read_R2 + str('.') + $read_R2_ext 30 #set read_R2 = $read_R2 + str('.') + $read_R2_ext
33 #end if 31 #end if
34 ln -sf '$pair.forward' './cpipes-input/$read_R1'; 32 ln -sf '$pair.forward' "\${input_path}/$read_R1";
35 ln -sf '$pair.reverse' './cpipes-input/$read_R2'; 33 ln -sf '$pair.reverse' "\${input_path}/$read_R2";
36 #end for 34 #end for
37 #end if 35 #end if
38 $__tool_directory__/0.6.1/cpipes 36 $__tool_directory__/0.7.0/cpipes
39 --pipeline bettercallsal 37 --pipeline bettercallsal
40 --input \${pwd_path}/cpipes-input 38 --input \${input_path}
41 --output \${pwd_path}/cpipes-output 39 --output \${pwd_path}/cpipes-output
42 --fq_suffix '${input_read_type_cond.fq_suffix}' 40 --fq_suffix '${input_read_type_cond.fq_suffix}'
43 #if (str($input_read_type_cond.input_read_type) == "single_long"): 41 #if (str($input_read_type_cond.input_read_type) == "single_long"):
44 --fq_single_end true 42 --fq_single_end true
45 #elif (str($input_read_type_cond.input_read_type) == "paired"): 43 #elif (str($input_read_type_cond.input_read_type) == "paired"):
47 #end if 45 #end if
48 --tuspy_n $tuspy_n 46 --tuspy_n $tuspy_n
49 #if ($sourmash_cond.run == "true"): 47 #if ($sourmash_cond.run == "true"):
50 --sfhpy_fcv $sourmash_cond.sfhpy_fcv 48 --sfhpy_fcv $sourmash_cond.sfhpy_fcv
51 #end if 49 #end if
50 #if ($bcs_thresholds != 'relax'):
51 --kmaalign_ID $kma_id
52 #end if
53 #if ($sourmash_cond.run == "true"):
54 --sfhpy_fcv $sourmash_cond.sfhpy_fcv
55 #end if
52 --bcs_db_mode $bcs_db_mode 56 --bcs_db_mode $bcs_db_mode
53 --bcs_thresholds $bcs_thresholds 57 --bcs_thresholds $bcs_thresholds
54 --fq_filename_delim '${fq_filename_delim}' 58 --fq_filename_delim '${fq_filename_delim}'
55 --fq_filename_delim_idx $fq_filename_delim_idx 59 --fq_filename_delim_idx $fq_filename_delim_idx
56 -profile kondagac; 60 -profile gxkubernetes;
57 mv './cpipes-output/bettercallsal-multiqc/multiqc_report.html' './multiqc_report.html' > /dev/null 2>&1 || exit 1; 61 mv './cpipes-output/bettercallsal-multiqc/multiqc_report.html' './multiqc_report.html' || exit 1;
58 rm -rf ./cpipes-output > /dev/null 2>&1 || exit 1; 62 rm -rf ./cpipes-output > || exit 1;
59 rm -rf ./work > /dev/null 2>&1 || exit 1 63 rm -rf ./work || exit 1
60 ]]></command> 64 ]]></command>
61 <inputs> 65 <inputs>
62 <conditional name="input_read_type_cond"> 66 <conditional name="input_read_type_cond">
63 <param name="input_read_type" type="select" label="Select the read collection type"> 67 <param name="input_read_type" type="select" label="Select the read collection type">
64 <option value="single_long" selected="true">Single-End short reads</option> 68 <option value="single_long" selected="true">Single-End short reads</option>
87 <param name="bcs_thresholds" type="select" label="Enter the type of base quality thresholds to be set with bettercallsal" 91 <param name="bcs_thresholds" type="select" label="Enter the type of base quality thresholds to be set with bettercallsal"
88 help="The default value sets strictest thresholds that tends to filter out most of the false positive hits."> 92 help="The default value sets strictest thresholds that tends to filter out most of the false positive hits.">
89 <option value="strict" selected="true">strict</option> 93 <option value="strict" selected="true">strict</option>
90 <option value="relax">relax</option> 94 <option value="relax">relax</option>
91 </param> 95 </param>
96 <param name="kma_id" optional="true" value="10.0" type="text" label="Enter the %ID threshold for KMA alignments of samples against genomes"
97 help="The default value of 10% works well for enrichment samples tested within FDA. The 'relax' preset for base quality thresholds automatically sets this value to 5%."/>
92 <conditional name="sourmash_cond"> 98 <conditional name="sourmash_cond">
93 <param name="run" type="select" label="Run sourmash" 99 <param name="run" type="select" label="Run sourmash"
94 help="Should sourmash be used for additional genome fraction filtering"> 100 help="Should sourmash be used for additional genome fraction filtering">
95 <option value="true" selected="true">yes</option> 101 <option value="true" selected="true">yes</option>
96 <option value="false">no</option> 102 <option value="false">no</option>
152 .. class:: infomark 158 .. class:: infomark
153 159
154 **Testing and Validation** 160 **Testing and Validation**
155 161
156 The CPIPES - bettercallsal Nextflow pipeline has been wrapped to make it work in Galaxy. It takes in either paired or unpaired short reads list as an input 162 The CPIPES - bettercallsal Nextflow pipeline has been wrapped to make it work in Galaxy. It takes in either paired or unpaired short reads list as an input
157 and generates a MultiQC report in the final step. The pipeline has been tested on 2x300 bp MiSeq and 2x150 bp NextSeq simulated reads and has been shown to call multiple Salmonella serotypes with up to ~95% accuracy. The pipeline has also been tested on metagenomics data sets from Peach and Papaya outbreaks as discussed in our publication (https://www.frontiersin.org/articles/10.3389/fmicb.2023.1200983/full). All the original testing and validation was done on the command line on the CFSAN Raven2 HPC Cluster. 163 and generates a MultiQC report in the final step. The pipeline has been tested on 2x300 bp MiSeq and 2x150 bp NextSeq simulated reads and has been shown to call multiple
164 Salmonella serotypes with up to ~95% accuracy. The pipeline has also been tested on metagenomics data sets from Peach and Papaya outbreaks as discussed in
165 our publication (https://www.frontiersin.org/articles/10.3389/fmicb.2023.1200983/full). All the original testing and validation was
166 done on the command line on the CFSAN Raven2 HPC Cluster.
158 167
159 168
160 ---- 169 ----
161 170
162 .. class:: infomark 171 .. class:: infomark