Mercurial > repos > kkonganti > cfsan_cronology
comparison cfsan_cronology.xml @ 0:c8597e9e1a97
"planemo upload"
author | kkonganti |
---|---|
date | Mon, 27 Nov 2023 12:37:44 -0500 |
parents | |
children | c6327baca625 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c8597e9e1a97 |
---|---|
1 <tool id="cfsan_cronology" name="cronology" version="0.1.0"> | |
2 <description>An automated workflow for Cronobacter isolate assembly, sequence typing and traceback.</description> | |
3 <requirements> | |
4 <requirement type="package" version="23.04">nextflow</requirement> | |
5 <requirement type="package" version="1.0.0">micromamba</requirement> | |
6 <requirement type="package">graphviz</requirement> | |
7 </requirements> | |
8 <version_command>nextflow -version</version_command> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 mkdir -p cpipes-input || exit 1; | |
11 pwd_path=\$(pwd); | |
12 #import re | |
13 #if (str($input_read_type_cond.input_read_type) == "single_long"): | |
14 #for _, $unpaired in enumerate($input_read_type_cond.input): | |
15 #set read1 = str($unpaired.name) | |
16 #if not str($unpaired.name).endswith(('.fastq', '.fastq.gz')): | |
17 #set read1_ext = re.sub('fastqsanger', 'fastq', str($unpaired.ext)) | |
18 #set read1 = str($unpaired.name) + str('.') + $read1_ext | |
19 #end if | |
20 ln -sf '$unpaired' './cpipes-input/$read1'; | |
21 #end for | |
22 #elif (str($input_read_type_cond.input_read_type) == "paired"): | |
23 #for _, $pair in enumerate($input_read_type_cond.input_pair) | |
24 #set read_R1 = re.sub('\:forward', '_forward', str($pair.forward.name)) | |
25 #set read_R2 = re.sub('\:reverse', '_reverse', str($pair.reverse.name)) | |
26 #set read_R1_ext = re.sub('fastqsanger', 'fastq', str($pair.forward.ext)) | |
27 #set read_R2_ext = re.sub('fastqsanger', 'fastq', str($pair.reverse.ext)) | |
28 #if not str($pair.forward.name).endswith(('.fastq', '.fastq.gz')): | |
29 #set read_R1 = $read_R1 + str('.') + $read_R1_ext | |
30 #end if | |
31 #if not str($pair.reverse.name).endswith(('.fastq', '.fastq.gz')): | |
32 #set read_R2 = $read_R2 + str('.') + $read_R2_ext | |
33 #end if | |
34 ln -sf '$pair.forward' './cpipes-input/$read_R1'; | |
35 ln -sf '$pair.reverse' './cpipes-input/$read_R2'; | |
36 #end for | |
37 #end if | |
38 $__tool_directory__/0.1.0/cpipes | |
39 --pipeline cronology | |
40 --input \${pwd_path}/cpipes-input | |
41 --output \${pwd_path}/cpipes-output | |
42 --fq_suffix '${input_read_type_cond.fq_suffix}' | |
43 #if (str($input_read_type_cond.input_read_type) == "single_long"): | |
44 --fq_single_end true | |
45 #elif (str($input_read_type_cond.input_read_type) == "paired"): | |
46 --fq_single_end false --fq2_suffix '${input_read_type_cond.fq2_suffix}' | |
47 #end if | |
48 --ref_acc $refgenome | |
49 --tuspy_n $tuspy_n | |
50 --fq_filename_delim '${fq_filename_delim}' | |
51 --fq_filename_delim_idx $fq_filename_delim_idx | |
52 -profile kondagac; | |
53 mv './cpipes-output/cronology-multiqc/multiqc_report.html' './multiqc_report.html' > /dev/null 2>&1 || exit 1; | |
54 mv './cpipes-output/mashtree/hitsTree.dnd' './hitsTree.dnd' > /dev/null 2>&1 || exit 1; | |
55 ]]></command> | |
56 <inputs> | |
57 <conditional name="input_read_type_cond"> | |
58 <param name="input_read_type" type="select" label="Select the read collection type"> | |
59 <option value="single_long" selected="true">Single-End short reads</option> | |
60 <option value="paired">Paired-End short reads</option> | |
61 </param> | |
62 <when value="single_long"> | |
63 <param name="input" type="data_collection" collection_type="list" format="fastq,fastq.gz" | |
64 label="Dataset list of unpaired short reads or long reads" /> | |
65 <param name="fq_suffix" value=".fastq.gz" type="text" label="Suffix of the Single-End FASTQ"/> | |
66 </when> | |
67 <when value="paired"> | |
68 <param name="input_pair" type="data_collection" collection_type="list:paired" format="fastq,fastq.gz" label="List of Dataset pairs" /> | |
69 <param name="fq_suffix" value="_R1_001.fastq.gz" type="text" label="Suffix of the R1 FASTQ" | |
70 help="For any data sets downloaded from NCBI into Galaxy, change this to _forward.fastq.gz suffix."/> | |
71 <param name="fq2_suffix" value="_R2_001.fastq.gz" type="text" label="Suffix of the R2 FASTQ" | |
72 help="For any data sets downloaded from NCBI into Galaxy, change this to _reverse.fastq.gz suffix."/> | |
73 </when> | |
74 </conditional> | |
75 <param name="refgenome" optional="true" value="GCF_003516125" type="text" | |
76 label="NCBI reference genome accession" | |
77 help="Is the reference genome other than <i>Cronobacter sakazakii</i>? Reference genome FASTA is used as a model for gene prediction. DO NOT ENTER THE DECIMAL PART (Ex: GCF_003516125.1)." /> | |
78 <param name="tuspy_n" optional="true" value="10" type="integer" label="Enter the number of top unique hits to retain after initial MASH screen step" | |
79 help="These hits will be used to build a genome distance based tree for your experiment run. Default value of 2 is suitable for almost all scenarios."/> | |
80 <param name="fq_filename_delim" type="text" value="_" label="File name delimitor by which samples are grouped together (--fq_filename_delim)" | |
81 help="This is the delimitor by which samples are grouped together to display in the final MultiQC report. For example, if your input data sets are mango_replicate1.fastq.gz, mango_replicate2.fastq.gz, orange_replicate1_maryland.fastq.gz, orange_replicate2_maryland.fastq.gz, then to create 2 samples mango and orange, the value for --fq_filename_delim would be _ (underscore) and the value for --fq_filename_delim_idx would be 1, since you want to group by the first word (i.e. mango or orange) after splitting the filename based on _ (underscore)."/> | |
82 <param name="fq_filename_delim_idx" type="integer" value="1" label="File name delimitor index (--fq_filename_delim_idx)" /> | |
83 </inputs> | |
84 <outputs> | |
85 <data name="multiqc_report" format="html" label="cronology: MultiQC Report on ${on_string}" from_work_dir="multiqc_report.html"/> | |
86 <data name="mashtree" format="nwk" label="cronology: Genome distance based tree on ${on_string}" from_work_dir="hitsTree.dnd"/> | |
87 <collection name="itol_metadata" type="list" label="cronology: iTOL Metadata: ${on_string}"> | |
88 <discover_datasets pattern="(?P<name>.*)\.txt" ext="txt" match_relative_path="true" directory="./cpipes-output/cat_unique"/> | |
89 </collection> | |
90 <collection name="gene_models" type="list" label="cronology: Predicted gene models: ${on_string}"> | |
91 <discover_datasets pattern="(?P<name>.*)\.gff" ext="gff" match_relative_path="true" recurse="true" directory="./cpipes-output/prokka"/> | |
92 </collection> | |
93 <collection name="assemblies" type="list" label="cronology: Polished genome assemblies: ${on_string}"> | |
94 <discover_datasets pattern="(?P<name>.*)\.fa" ext="fa" match_relative_path="true" directory="./cpipes-output/polypolish"/> | |
95 </collection> | |
96 </outputs> | |
97 <tests> | |
98 <!--Test 01: long reads--> | |
99 <test expect_num_outputs="2"> | |
100 <param name="input"> | |
101 <collection type="list"> | |
102 <element name="FAL11127.fastq.gz" value="FAL11127.fastq.gz" /> | |
103 <element name="FAL11341.fastq.gz" value="FAL11341.fastq.gz" /> | |
104 <element name="FAL11342.fastq.gz" value="FAL11342.fastq.gz" /> | |
105 </collection> | |
106 </param> | |
107 <param name="fq_suffix" value=".fastq.gz"/> | |
108 <output name="multiqc_report" file="multiqc_report.html" ftype="html" compare="sim_size"/> | |
109 <!-- <output name="assembled_mags" file="FAL11127.assembly_filtered.contigs.fasta" ftype="fasta" compare="sim_size"/> --> | |
110 </test> | |
111 </tests> | |
112 <help><![CDATA[ | |
113 | |
114 .. class:: infomark | |
115 | |
116 **Purpose** | |
117 | |
118 cronology is an automated workflow to assign Salmonella serotype based on NCBI Pathogen Detection Project for Salmonella. | |
119 It uses MASH to reduce the search space followed by additional genome filtering with sourmash. It then performs genome based | |
120 alignment with kma followed by count generation using salmon. This workflow can be used to analyze shotgun metagenomics | |
121 datasets, quasi-metagenomic datasets (enriched for Salmonella) and target enriched datasets (enriched with molecular baits specific for Salmonella) | |
122 and is especially useful in a case where a sample is of multi-serovar mixture. | |
123 | |
124 It is written in Nextflow and is part of the modular data analysis pipelines (CFSAN PIPELINES or CPIPES for short) at CFSAN. | |
125 | |
126 | |
127 ---- | |
128 | |
129 .. class:: infomark | |
130 | |
131 **Testing and Validation** | |
132 | |
133 The CPIPES - cronology Nextflow pipeline has been wrapped to make it work in Galaxy. It takes in either paired or unpaired short reads list as an input | |
134 and generates a MultiQC report in the final step. The pipeline has been tested on 2x300 bp MiSeq and 2x150 bp NextSeq simulated reads and has been shown to call multiple | |
135 Salmonella serotypes with up to ~95% accuracy. The pipeline has also been tested on metagenomics data sets from Peach and Papaya outbreaks as discussed in | |
136 our publication (https://www.frontiersin.org/articles/10.3389/fmicb.2023.1200983/full). All the original testing and validation was | |
137 done on the command line on the CFSAN Raven2 HPC Cluster. | |
138 | |
139 | |
140 ---- | |
141 | |
142 .. class:: infomark | |
143 | |
144 **Outputs** | |
145 | |
146 The main output file is a: | |
147 | |
148 :: | |
149 | |
150 - MultiQC Report: Contains a brief summary report including any serotyping and AMR result tables. | |
151 Please note that due to MultiQC customizations, the preview (eye icon) will not | |
152 work within Galaxy for the MultiQC report. Please download the file by clicking | |
153 on the floppy icon and view it in your browser on your local desktop/workstation. | |
154 You can export the tables and plots from the downloaded MultiQC report. | |
155 | |
156 ]]></help> | |
157 <citations> | |
158 <citation type="bibtex"> | |
159 @article{cronology, | |
160 author = {Konganti, Kranti}, | |
161 year = {2023}, | |
162 month = {August}, | |
163 title = {cronology: better calling of Salmonella serotypes from enrichment cultures using shotgun metagenomic profiling and its application in an outbreak setting}, | |
164 journal = {Frontiers in Microbiology}, | |
165 doi = {10.3389/fmicb.2023.1200983}, | |
166 url = {https://www.frontiersin.org/articles/10.3389/fmicb.2023.1200983/full}} | |
167 </citation> | |
168 </citations> | |
169 </tool> |