comparison hfp_cronology.xml @ 0:9e8b1c747a6a draft default tip

planemo upload
author galaxytrakr
date Fri, 29 May 2026 13:32:17 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9e8b1c747a6a
1 <tool id="hfp_cronology_awsbatch" name="cronology" version="0.2.0+awsbatch">
2 <description>An automated workflow for Cronobacter isolate assembly, sequence typing and traceback.</description>
3 <requirements>
4 <container type="docker">quay.io/galaxytrakr/mulled-v2-ebd88135862aa647eeae73d4d8e6ea8ec81245cd:v5.0</container>
5 </requirements>
6 <version_command>nextflow -version</version_command>
7 <command detect_errors="exit_code"><![CDATA[
8 export MAMBA_ROOT_PREFIX="/server/galaxy/data/nextflow-micromamba-cache";
9 export NXF_HOME=\$(pwd)"/.nextflow-home";
10 mkdir -p cpipes-input || exit 1;
11 echo -e 'We attempted to create a tree and upload to microreact.org. The following is the log of that attempt\nand contains the URL if it was successful.\n\n' > upload_to_microreact.txt || exit 1;
12 pwd_path=\$(pwd);
13 #import re
14 #if (str($input_read_type_cond.input_read_type) == "single_long"):
15 #for _, $unpaired in enumerate($input_read_type_cond.input):
16 #set read1 = str($unpaired.name)
17 #if not str($unpaired.name).endswith(('.fastq', '.fastq.gz')):
18 #set read1_ext = re.sub('fastqsanger', 'fastq', str($unpaired.ext))
19 #set read1 = str($unpaired.name) + str('.') + $read1_ext
20 #end if
21 ln -sf '$unpaired' './cpipes-input/$read1';
22 #end for
23 #elif (str($input_read_type_cond.input_read_type) == "paired"):
24 #for _, $pair in enumerate($input_read_type_cond.input_pair)
25 #set read_R1 = re.sub('\:forward', '_forward', str($pair.forward.name))
26 #set read_R2 = re.sub('\:reverse', '_reverse', str($pair.reverse.name))
27 #set read_R1_ext = re.sub('fastqsanger', 'fastq', str($pair.forward.ext))
28 #set read_R2_ext = re.sub('fastqsanger', 'fastq', str($pair.reverse.ext))
29 #if not str($pair.forward.name).endswith(('.fastq', '.fastq.gz')):
30 #set read_R1 = $read_R1 + str('.') + $read_R1_ext
31 #end if
32 #if not str($pair.reverse.name).endswith(('.fastq', '.fastq.gz')):
33 #set read_R2 = $read_R2 + str('.') + $read_R2_ext
34 #end if
35 ln -sf '$pair.forward' './cpipes-input/$read_R1';
36 ln -sf '$pair.reverse' './cpipes-input/$read_R2';
37 #end for
38 #end if
39 $__tool_directory__/0.2.0/cpipes
40 --pipeline cronology
41 --input \${pwd_path}/cpipes-input
42 --output \${pwd_path}/cpipes-output
43 --fq_suffix '${input_read_type_cond.fq_suffix}'
44 #if (str($input_read_type_cond.input_read_type) == "single_long"):
45 --fq_single_end true
46 #elif (str($input_read_type_cond.input_read_type) == "paired"):
47 --fq_single_end false --fq2_suffix '${input_read_type_cond.fq2_suffix}'
48 #end if
49 --ref_acc $refgenome
50 --tuspy_n $tuspy_n
51 --fq_filename_delim '${fq_filename_delim}'
52 --fq_filename_delim_idx $fq_filename_delim_idx
53 -profile stdkondagac;
54 mv './cpipes-output/cronology-multiqc/multiqc_report.html' './multiqc_report.html' || exit 1;
55 mv './cpipes-output/mashtree/hitsTree.dnd' './hitsTree.newick' || exit 1;
56 cat ./cpipes-output/upload_microreact/microreact_url.txt >> upload_to_microreact.txt || exit 1;
57 ]]></command>
58 <inputs>
59 <conditional name="input_read_type_cond">
60 <param name="input_read_type" type="select" label="Select the read collection type">
61 <option value="single_long" selected="true">Single-End short reads</option>
62 <option value="paired">Paired-End short reads</option>
63 </param>
64 <when value="single_long">
65 <param name="input" type="data_collection" collection_type="list" format="fastq,fastq.gz"
66 label="Dataset list of unpaired short reads or long reads" />
67 <param name="fq_suffix" value=".fastq.gz" type="text" label="Suffix of the Single-End FASTQ"/>
68 </when>
69 <when value="paired">
70 <param name="input_pair" type="data_collection" collection_type="list:paired" format="fastq,fastq.gz" label="List of Dataset pairs" />
71 <param name="fq_suffix" value="_R1_001.fastq.gz" type="text" label="Suffix of the R1 FASTQ"
72 help="For any data sets downloaded from NCBI into Galaxy, change this to _forward.fastq.gz suffix."/>
73 <param name="fq2_suffix" value="_R2_001.fastq.gz" type="text" label="Suffix of the R2 FASTQ"
74 help="For any data sets downloaded from NCBI into Galaxy, change this to _reverse.fastq.gz suffix."/>
75 </when>
76 </conditional>
77 <param name="refgenome" optional="true" value="GCF_003516125" type="text"
78 label="NCBI reference genome accession"
79 help="Is the reference genome other than Cronobacter sakazakii? Reference genome FASTA is used as a model for gene prediction. DO NOT ENTER THE DECIMAL PART (Ex: GCF_003516125.1)." />
80 <param name="tuspy_n" optional="true" value="5" type="integer" label="Enter the number of top unique hits to retain after initial MASH screen step"
81 help="These hits will be used to build a genome distance based tree for your experiment run. Default value of 2 is suitable for almost all scenarios."/>
82 <param name="fq_filename_delim" type="text" value="_" label="File name delimitor by which samples are grouped together (--fq_filename_delim)"
83 help="This is the delimitor by which samples are grouped together to display in the final MultiQC report. For example, if your input data sets are mango_replicate1.fastq.gz, mango_replicate2.fastq.gz, orange_replicate1_maryland.fastq.gz, orange_replicate2_maryland.fastq.gz, then to create 2 samples mango and orange, the value for --fq_filename_delim would be _ (underscore) and the value for --fq_filename_delim_idx would be 1, since you want to group by the first word (i.e. mango or orange) after splitting the filename based on _ (underscore)."/>
84 <param name="fq_filename_delim_idx" type="integer" value="1" label="File name delimitor index (--fq_filename_delim_idx)" />
85 </inputs>
86 <outputs>
87 <data name="multiqc_report" format="html" label="cronology: MultiQC Report on ${on_string}" from_work_dir="multiqc_report.html"/>
88 <data name="mashtree" format="nhx" label="cronology: Genome distance based tree on ${on_string}" from_work_dir="hitsTree.newick"/>
89 <data name="microreact" format="txt" label="cronology: Tree result from microreact.org ${on_string}" from_work_dir="upload_to_microreact.txt"/>
90 <collection name="itol_metadata" type="list" label="cronology: iTOL Metadata: ${on_string}">
91 <discover_datasets pattern="(?P&lt;name&gt;.*)\.txt" ext="txt" directory="./cpipes-output/cat_unique" match_relative_path="true" />
92 </collection>
93 <collection name="gene_models" type="list" label="cronology: Predicted gene models: ${on_string}">
94 <discover_datasets pattern=".*\/(?P&lt;name&gt;.*)\.gff" ext="gff" directory="./cpipes-output/prokka" recurse="true" match_relative_path="true" />
95 </collection>
96 <collection name="assemblies" type="list" label="cronology: Polished genome assemblies: ${on_string}">
97 <discover_datasets pattern="(?P&lt;name&gt;.*)\.fa" ext="fa" directory="./cpipes-output/polypolish" match_relative_path="true" />
98 </collection>
99 </outputs>
100 <tests>
101 <!--Test 01: long reads-->
102 <test expect_num_outputs="2">
103 <param name="input">
104 <collection type="list">
105 <element name="FAL11127.fastq.gz" value="FAL11127.fastq.gz" />
106 <element name="FAL11341.fastq.gz" value="FAL11341.fastq.gz" />
107 <element name="FAL11342.fastq.gz" value="FAL11342.fastq.gz" />
108 </collection>
109 </param>
110 <param name="fq_suffix" value=".fastq.gz"/>
111 <output name="multiqc_report" file="multiqc_report.html" ftype="html" compare="sim_size"/>
112 <!-- <output name="assembled_mags" file="FAL11127.assembly_filtered.contigs.fasta" ftype="fasta" compare="sim_size"/> -->
113 </test>
114 </tests>
115 <help><![CDATA[
116
117 .. class:: infomark
118
119 **Purpose**
120
121 cronology is an automated workflow for Cronobacter isolate assembly,
122 sequencing typing and traceback. The workflow version 0.1.0 takes in single-end
123 or paired-end Illumina short read data, performs QC using fastp, assembly and polish using shovill and polypolish
124 and whole genome distance based clustering using mashtree based on NCBI Pathogen Detection DB for Cronobacter.
125
126 It is written in Nextflow and is part of the modular data analysis pipelines (CFSAN PIPELINES or CPIPES for short) at CFSAN.
127
128
129 ----
130
131 .. class:: infomark
132
133 **Testing and Validation**
134
135 The CPIPES - cronology Nextflow pipeline has been wrapped to make it work in Galaxy.
136 All the testing has been done on the command line on the CFSAN Raven2 HPC Cluster.
137
138
139 ----
140
141 .. class:: infomark
142
143 **Outputs**
144
145 The main output files are:
146
147 ::
148
149 - MultiQC Report: Contains a brief summary report including any serotyping and AMR result tables.
150 Please note that due to MultiQC customizations, the preview (eye icon) will not
151 work within Galaxy for the MultiQC report. Please download the file by clicking
152 on the floppy icon and view it in your browser on your local desktop/workstation.
153 You can export the tables and plots from the downloaded MultiQC report.
154 - Polished de novo assemblies (FASTA) for each sample.
155 - Genome annotations (GFF) for each sample.
156 - Whole genome distance based clustering tree (Newick).
157 - Additional metadata useful for uploading the Newick tree into iTOL.
158 - A https://microreact.org URL stored in a TXT file. An attempt will be made to upload the tree (Newick)
159 to https://microreact.org and if successful, the URL will be retrieved and stored in this TXT file.
160
161 ]]></help>
162 <citations>
163 <citation type="bibtex">
164 @article{cronology,
165 author = {Konganti, Kranti},
166 year = {2024},
167 month = {May},
168 title = {cronology: An automated workflow for Cronobacter isolate assembly, sequence typing and traceback},
169 journal = {Unpublished},
170 doi = {xx.xxxx/xxxxx.2024.xxxxxxxxxx},
171 url = {https://github.com/CFSAN-Biostatistics/cronology}}
172 </citation>
173 </citations>
174 </tool>