comparison hfp_nowayout.xml @ 0:3c767f9cfd88 draft default tip

planemo upload
author galaxytrakr
date Fri, 29 May 2026 13:37:56 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:3c767f9cfd88
1 <tool id="hfp_nowayout_awsbatch" name="nowayout" version="0.5.0+awsbatch">
2 <description>An automated workflow to identify Mitochondrial reads and classify Eukaryotes.</description>
3 <requirements>
4 <container type="docker">quay.io/galaxytrakr/mulled-v2-ebd88135862aa647eeae73d4d8e6ea8ec81245cd:v5.0</container>
5 </requirements>
6 <version_command>nextflow -version</version_command>
7 <command detect_errors="exit_code"><![CDATA[
8 export MAMBA_ROOT_PREFIX="/server/galaxy/data/nextflow-micromamba-cache";
9 export NXF_HOME=\$(pwd)"/.nextflow-home";
10 input_path=\$(pwd)"/cpipes-input";
11 mkdir -p "\${input_path}" || exit 1;
12 #import re
13 #if (str($input_read_type_cond.input_read_type) == "single_long"):
14 #for _, $unpaired in enumerate($input_read_type_cond.input):
15 #set read1 = str($unpaired.name)
16 #if not str($unpaired.name).endswith(('.fastq', '.fastq.gz')):
17 #set read1_ext = re.sub('fastqsanger', 'fastq', str($unpaired.ext))
18 #set read1 = str($unpaired.name) + str('.') + $read1_ext
19 #end if
20 ln -sf '$unpaired' "\${input_path}/$read1";
21 #end for
22 #elif (str($input_read_type_cond.input_read_type) == "paired"):
23 #for _, $pair in enumerate($input_read_type_cond.input_pair)
24 #set read_R1 = re.sub('\:forward', '_forward', str($pair.forward.name))
25 #set read_R2 = re.sub('\:reverse', '_reverse', str($pair.reverse.name))
26 #set read_R1_ext = re.sub('fastqsanger', 'fastq', str($pair.forward.ext))
27 #set read_R2_ext = re.sub('fastqsanger', 'fastq', str($pair.reverse.ext))
28 #if not str($pair.forward.name).endswith(('.fastq', '.fastq.gz')):
29 #set read_R1 = $read_R1 + str('.') + $read_R1_ext
30 #end if
31 #if not str($pair.reverse.name).endswith(('.fastq', '.fastq.gz')):
32 #set read_R2 = $read_R2 + str('.') + $read_R2_ext
33 #end if
34 ln -sf '$pair.forward' "\${input_path}/$read_R1";
35 ln -sf '$pair.reverse' "\${input_path}/$read_R2";
36 #end for
37 #end if
38 $__tool_directory__/0.5.0/cpipes
39 --pipeline nowayout
40 --input \${input_path}
41 --output cpipes-output
42 --fq_suffix '${input_read_type_cond.fq_suffix}'
43 #if (str($input_read_type_cond.input_read_type) == "single_long"):
44 --fq_single_end true
45 #elif (str($input_read_type_cond.input_read_type) == "paired"):
46 --fq_single_end false --fq2_suffix '${input_read_type_cond.fq2_suffix}'
47 #end if
48 --db_mode $nowo_db_mode
49 --nowo_thresholds $nowo_thresholds
50 --fq_filename_delim '${fq_filename_delim}'
51 --fq_filename_delim_idx $fq_filename_delim_idx
52 -profile stdkondagac;
53 mv './cpipes-output/nowayout-multiqc/CPIPES-Report_multiqc_report.html' './multiqc_report.html' || exit 1;
54 if [ -e './cpipes-output/krona_ktimporttext/CPIPES_nowayout_krona.html' ]; then mv './cpipes-output/krona_ktimporttext/CPIPES_nowayout_krona.html' './CPIPES_nowayout_krona.html'; else echo '<html><h1>No mitochondrial reads detected in any of the samples</h1></html>' > './CPIPES_nowayout_krona.html'; fi;
55 rm -rf ./cpipes-output || exit 1;
56 rm -rf ./work || exit 1;
57 ]]></command>
58 <inputs>
59 <conditional name="input_read_type_cond">
60 <param name="input_read_type" type="select" label="Select the read collection type">
61 <option value="single_long" selected="true">Single-End short reads</option>
62 <option value="paired">Paired-End short reads</option>
63 </param>
64 <when value="single_long">
65 <param name="input" type="data_collection" collection_type="list" format="fastq,fastq.gz"
66 label="Dataset list of unpaired short reads or long reads" />
67 <param name="fq_suffix" value=".fastq.gz" type="text" label="Suffix of the Single-End FASTQ"/>
68 </when>
69 <when value="paired">
70 <param name="input_pair" type="data_collection" collection_type="list:paired" format="fastq,fastq.gz" label="List of Dataset pairs" />
71 <param name="fq_suffix" value="_R1_001.fastq.gz" type="text" label="Suffix of the R1 FASTQ"
72 help="For any data sets downloaded from NCBI into Galaxy, change this to _forward.fastq.gz suffix."/>
73 <param name="fq2_suffix" value="_R2_001.fastq.gz" type="text" label="Suffix of the R2 FASTQ"
74 help="For any data sets downloaded from NCBI into Galaxy, change this to _reverse.fastq.gz suffix."/>
75 </when>
76 </conditional>
77 <param name="nowo_db_mode" type="select" label="Select the database with nowayout"
78 help="Please see below about different databases.">
79 <option value="mitomine2" selected="true">mitomine2</option>
80 <option value="mitomine">mitomine</option>
81 <option value="cytox1">cytox1</option>
82 <option value="voucher">voucher</option>
83 <option value="ganoderma">ganoderma</option>
84 <option value="listeria">listeria</option>
85 </param>
86 <param name="nowo_thresholds" type="select" label="Enter the type of base quality thresholds to be set with nowayout"
87 help="The default value sets strictest thresholds that tends to filter out most of the false positive hits.">
88 <option value="strict" selected="true">strict</option>
89 <option value="relax">relax</option>
90 </param>
91 <param name="fq_filename_delim" type="text" value="_" label="File name delimitor by which samples are grouped together (--fq_filename_delim)"
92 help="This is the delimitor by which samples are grouped together to display in the final MultiQC report. For example, if your input data sets are mango_replicate1.fastq.gz, mango_replicate2.fastq.gz, orange_replicate1_maryland.fastq.gz, orange_replicate2_maryland.fastq.gz, then to create 2 samples mango and orange, the value for --fq_filename_delim would be _ (underscore) and the value for --fq_filename_delim_idx would be 1, since you want to group by the first word (i.e. mango or orange) after splitting the filename based on _ (underscore)."/>
93 <param name="fq_filename_delim_idx" type="integer" value="1" label="File name delimitor index (--fq_filename_delim_idx)" />
94 </inputs>
95 <outputs>
96 <data name="krona_chart" format="html" label="nowayout: Krona Chart on ${on_string}" from_work_dir="CPIPES_nowayout_krona.html"/>
97 <data name="multiqc_report" format="html" label="nowayout: MultiQC Report on ${on_string}" from_work_dir="multiqc_report.html"/>
98 </outputs>
99 <tests>
100 <!--Test 01: long reads-->
101 <test expect_num_outputs="2">
102 <param name="input">
103 <collection type="list">
104 <element name="FAL11127.fastq.gz" value="FAL11127.fastq.gz" />
105 <element name="FAL11341.fastq.gz" value="FAL11341.fastq.gz" />
106 <element name="FAL11342.fastq.gz" value="FAL11342.fastq.gz" />
107 </collection>
108 </param>
109 <param name="fq_suffix" value=".fastq.gz"/>
110 <output name="multiqc_report" file="multiqc_report.html" ftype="html" compare="sim_size"/>
111 <!-- <output name="assembled_mags" file="FAL11127.assembly_filtered.contigs.fasta" ftype="fasta" compare="sim_size"/> -->
112 </test>
113 </tests>
114 <help><![CDATA[
115
116 .. class:: infomark
117
118 **Purpose**
119
120 nowayout is a mitochondrial metagenomics classifier for Eukaryotes.
121 It uses a custom kma database to identify mitochondrial reads and
122 performs read classification followed by further read classification
123 reinforcement using sourmash.
124
125 It is written in Nextflow and is part of the modular data analysis pipelines (CFSAN PIPELINES or CPIPES for short) at HFP.
126
127
128 ----
129
130 .. class:: infomark
131
132 **Databases**
133
134 - *mitomine2*: Big database that works in almost all scenarios.
135 - *cytox1*: Collection of only non-redundant COXI genes from NCBI.
136 - *voucher*: Collection of only non-redundant voucher sequences from NCBI.
137 - *ganoderma*: Collection of only non-redundant mtDNA sequences of Ganoderma fungi.
138 - *listeria*: Collection of organelle sequences and other rRNA genes for Listeria.
139
140
141 ----
142
143 .. class:: infomark
144
145 **Testing and Validation**
146
147 The CPIPES - nowayout Nextflow pipeline has been wrapped to make it work in Galaxy.
148 It takes in either paired or unpaired short reads list as an input and generates a MultiQC report
149 which contains relative abundances in context of number of mitochondrial reads identified. It also
150 generates a Krona chart for each sample. The pipeline has been tested on multiple internal insect
151 mixture samples. All the original testing and validation was done on the command line on the
152 HFP Reedling HPC Cluster.
153
154
155 ----
156
157 .. class:: infomark
158
159 **Please note**
160
161 - *nowayout* only works on Illumina short reads (paired or unpaired).
162 - *nowayout* uses a custom kma database named *mitomine*.
163 - The custom database will be incrementally augmented and refined over time.
164 - *mitomine* stats:
165 - Contains ~ 2.93M non-redundant mitochondrial and voucher sequences.
166 - Represents ~ 717K unique species.
167 - Other databases are also available but will be seldom updated.
168
169 ----
170
171 .. class:: infomark
172
173 **Outputs**
174
175 The main output file is a:
176
177 ::
178
179 - MultiQC Report: Contains a brief summary report including individual Mitochondrial reads identified
180 per sample and relative abundances in context of the total number of Mitochondrial reads
181 identified.
182
183 Please note that due to MultiQC customizations, the preview (eye icon) will not
184 work within Galaxy for the MultiQC report. Please download the file by clicking
185 on the floppy icon and view it in your browser on your local desktop/workstation.
186 You can export the tables and plots from the downloaded MultiQC report.
187
188 ]]></help>
189 <citations>
190 <citation type="bibtex">
191 @article{nowayout,
192 author = {Konganti, Kranti},
193 year = {2025},
194 month = {May},
195 title = {nowayout: An automated mitrochiondrial read classifier for Eukaryotes.},
196 journal = {Manuscript in preparation},
197 doi = {10.3389/xxxxxxxxxxxxxxxxxx},
198 url = {https://xxxxxxx/articles/10.3389/xxxxxxxxxxxx/full}}
199 </citation>
200 </citations>
201 </tool>