comparison metaphlan_gt.xml @ 0:a7bd05df0bea draft default tip

planemo upload commit e485da3e1b9eb674a52948a00a3328c1a3cc5ffa
author estrain
date Fri, 13 Mar 2026 12:06:00 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a7bd05df0bea
1 <tool id="metaphlan_gt" name="MetaPhlAn" version="@TOOL_VERSION@+galaxy4" profile="@PROFILE@">
2 <description>to profile the composition of microbial communities</description>
3 <macros>
4 <import>macros.xml</import>
5
6 <xml name="tax_lev">
7 <conditional name="tax_lev">
8 <param argument="--tax_lev" type="select" label="Taxonomic level for the relative abundance output">
9 <option value="a" selected="true">All taxonomic levels</option>
10 <option value="k">Kingdoms only</option>
11 <option value="p">Phyla only</option>
12 <option value="c">Classes only</option>
13 <option value="o">Orders only</option>
14 <option value="f">Families only</option>
15 <option value="g">Genera only</option>
16 <option value="s">Species only</option>
17 </param>
18 <when value="a">
19 <param name="split_levels" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Generate a report for each taxonomic level?" help="It will be in addition to the default output"/>
20 </when>
21 <when value="k"/>
22 <when value="p"/>
23 <when value="c"/>
24 <when value="o"/>
25 <when value="f"/>
26 <when value="g"/>
27 <when value="s"/>
28 </conditional>
29 </xml>
30 <token name="@FILE_FORMATS@">fastq,fastq.gz,fastq.bz2,fasta,fasta.gz</token>
31 </macros>
32 <expand macro="requirements"/>
33 <version_command>metaphlan -v</version_command>
34 <command detect_errors="aggressive"><![CDATA[
35 #if $inputs.in.selector == "raw"
36 #if $inputs.in.raw_in.selector == "single"
37 #set full_ext=$inputs.in.raw_in.in.datatype.file_ext
38 #if $full_ext.endswith("gz")
39 #set $file_path="in"
40 zcat '$inputs.in.raw_in.in' > '$file_path' &&
41 #else if $full_ext.endswith("bz2")
42 #set $file_path="in"
43 bzcat '$inputs.in.raw_in.in' > '$file_path' &&
44 #else
45 #set $file_path="'%s'" % $inputs.in.raw_in.in
46 #end if
47 #else if $inputs.in.raw_in.selector == "multiple"
48 #set full_ext=$inputs.in.raw_in.in[0].datatype.file_ext
49 #set file_path=""
50 #set sep=""
51 #for $i, $f in enumerate($inputs.in.raw_in.in)
52 #if $f.datatype.file_ext != $full_ext
53 echo "Different datatypes for input files" &&
54 exit 1
55 #end if
56 #if $full_ext.endswith("gz")
57 #set fp="input_%s" % ($i)
58 zcat '$f' > '$fp' &&
59 #else if $full_ext.endswith("bz2")
60 #set fp="input_%s" % ($i)
61 bzcat '$f' > '$fp' &&
62 #else
63 #set fp=$f
64 #end if
65 #set $file_path+="'%s%s'" % ($sep, $fp)
66 #set $sep=","
67 #end for
68 #else if $inputs.in.raw_in.selector == "paired"
69 #set full_ext=$inputs.in.raw_in.in_f.datatype.file_ext
70 #if $full_ext != $inputs.in.raw_in.in_r.datatype.file_ext
71 echo "Different datatypes for input paired-end files" &&
72 exit 1
73 #end if
74 #if $full_ext.endswith("gz")
75 zcat '$inputs.in.raw_in.in_f' > 'in_f' &&
76 zcat '$inputs.in.raw_in.in_r' > 'in_r' &&
77 #else if $full_ext.endswith("bz2")
78 bzcat '$inputs.in.raw_in.in_f' > 'in_f' &&
79 bzcat '$inputs.in.raw_in.in_r' > 'in_r' &&
80 #else:
81 ln -s '$inputs.in.raw_in.in_f' 'in_f' &&
82 ln -s '$inputs.in.raw_in.in_r' 'in_r' &&
83 #end if
84 ## paired data has by default no special treatment, i.e. it is given as comma separated list
85 ## except iff paired subsampling where -1 and -2 must be used
86 #if $subsample.selector == 'paired'
87 #set file_path="-1 in_f -2 in_r"
88 #else
89 #set file_path="in_f,in_r"
90 #end if
91 #else if $inputs.in.raw_in.selector == "paired_collection"
92 #set full_ext=$inputs.in.raw_in.in.forward.ext
93 #if $full_ext != $inputs.in.raw_in.in.reverse.ext
94 echo "Different datatypes for input paired-end files" &&
95 exit 1
96 #end if
97 #if $full_ext.endswith("gz")
98 zcat '$inputs.in.raw_in.in.forward' > 'in_f' &&
99 zcat '$inputs.in.raw_in.in.reverse' > 'in_r' &&
100 #else if $full_ext.endswith("bz2")
101 bzcat '$inputs.in.raw_in.in.forward' > 'in_f' &&
102 bzcat '$inputs.in.raw_in.in.reverse' > 'in_r' &&
103 #else
104 ln -s '$inputs.in.raw_in.in.forward' 'in_f' &&
105 ln -s '$inputs.in.raw_in.in.reverse' 'in_r' &&
106 #end if
107 #if $subsample.selector == 'paired'
108 #set file_path="-1 in_f -2 in_r"
109 #else
110 #set file_path="in_f,in_r"
111 #end if
112 #end if
113
114 #if $full_ext.startswith("fastq")
115 #set ext='fastq'
116 #else if $full_ext.startswith("fasta") and $full_ext.endswith(("gz","bz2"))
117 #set ext='fasta'
118 #else
119 #set ext=$full_ext
120 #end if
121 #end if
122
123 #if $inputs.db.db_selector == "history"
124 mkdir 'ref_db' &&
125 bowtie2-build --large-index '$inputs.db.bowtie2db' 'ref_db/custom_db' &&
126 python
127 '$__tool_directory__/customizemetadata.py'
128 transform_json_to_pkl
129 --json '$inputs.db.mpa_pkl'
130 --pkl 'ref_db/custom_db.pkl' &&
131 #end if
132
133 metaphlan
134 #if $inputs.in.selector == "raw"
135 $file_path
136 --input_type '$ext'
137 --read_min_len $inputs.in.read_min_len
138 --bt2_ps '$inputs.in.mapping.bt2_ps'
139 --min_mapq_val $inputs.in.mapping.min_mapq_val
140 #if $ext == "sam"
141 --nreads \$(cat '$file_path' | grep -c -v '^@')
142 #end if
143 #else
144 '$inputs.in.in'
145 --input_type '$inputs.in.selector'
146 #if $inputs.in.selector == "sam"
147 --nreads \$(cat '$inputs.in.in' | grep -c -v '^@')
148 #end if
149 #end if
150 #if $inputs.db.db_selector == "cached"
151 --bowtie2db '$inputs.db.cached_db.fields.path'
152 --index '$inputs.db.cached_db.fields.dbkey'
153 #else
154 --bowtie2db 'ref_db/'
155 --index 'custom_db'
156 #end if
157 -t '$analysis.analysis_type.t'
158 #if $analysis.analysis_type.t == "rel_ab" or $analysis.analysis_type.t == "rel_ab_w_read_stats"
159 --tax_lev '$analysis.analysis_type.tax_lev.tax_lev'
160 #else if $analysis.analysis_type.t == "clade_specific_strain_tracker"
161 --clade '$analysis.analysis_type.clade'
162 #if str($analysis.analysis_type.min_ab) != ''
163 --min_ab $analysis.analysis_type.min_ab
164 #end if
165 #else if $analysis.analysis_type.t == "marker_ab_table" and str($analysis.analysis_type.nreads) != ''
166 --nreads $$analysis.analysis_type.nreads
167 #else if $analysis.analysis_type.t == "marker_pres_table" and str($analysis.analysis_type.pres_th) != ''
168 --pres_th $analysis.analysis_type.pres_th
169 #end if
170 --min_cu_len $analysis.min_cu_len
171 #if str($analysis.min_alignment_len) != ''
172 --min_alignment_len $analysis.min_alignment_len
173 #end if
174 #if 'add_viruses' in $analysis.organism_profiling
175 --add_viruses
176 #end if
177 #if 'ignore_eukaryotes' in $analysis.organism_profiling
178 --ignore_eukaryotes
179 #end if
180 #if 'ignore_bacteria' in $analysis.organism_profiling
181 --ignore_bacteria
182 #end if
183 #if 'ignore_archaea' in $analysis.organism_profiling
184 --ignore_archaea
185 #end if
186 --stat $analysis.stat
187 --stat_q $analysis.stat_q
188 --perc_nonzero $analysis.perc_nonzero
189 #if $analysis.ignore_markers
190 --ignore_markers '$analysis.ignore_markers'
191 #end if
192 $analysis.avoid_disqm
193 --sample_id_key '$out.sample_id_key'
194 --sample_id '$out.sample_id'
195 $out.use_group_representative
196 $out.legacy_output
197 $out.CAMI_format_output
198 $out.unclassified_estimation
199 -o '$output_file'
200 --bowtie2out 'bowtie2out'
201 -s 'sam_output_file'
202 --biom '$biom_output_file'
203 --nproc \${GALAXY_SLOTS:-4}
204 #if $viral_analysis.profile_vsc
205 $viral_analysis.profile_vsc
206 --vsc_out '$vcs_breath_coverage'
207 --vsc_breadth $viral_analysis.vsc_breadth
208 #end if
209
210 #if $subsample.selector != "no"
211 #if $subsample.selector == "single"
212 --subsampling $subsample.subsampling
213 #else
214 --subsampling_paired $subsample.subsampling_paired
215 #end if
216 $subsample.mapping_subsampling
217 #if $subsample.subsampling_seed
218 --subsampling_seed $subsample.subsampling_seed
219 #end if
220 --subsampling_output subsampled.out
221 #end if
222
223 #if $test == "false"
224 --offline
225 #end if
226
227
228 #if $analysis.analysis_type.t in ['rel_ab', 'rel_ab_w_read_stats']
229 #if $analysis.analysis_type.tax_lev.tax_lev == 'a' and $analysis.analysis_type.tax_lev.split_levels
230 &&
231 mkdir 'split_levels'
232 &&
233 python '$__tool_directory__/formatoutput.py'
234 split_levels
235 --metaphlan_output '$output_file'
236 --outdir 'split_levels'
237 $out.legacy_output
238 #end if
239 #end if
240
241 #if $out.krona_output
242 &&
243 python '$__tool_directory__/formatoutput.py'
244 format_for_krona
245 --metaphlan_output '$output_file'
246 --krona_output '$krona_output_file'
247 #end if
248 ]]></command>
249 <inputs>
250 <section name="inputs" title="Inputs" expanded="true">
251 <conditional name="in">
252 <param name="selector" type="select" label="Input(s)">
253 <option value="raw" selected="true">Fasta/FastQ file(s) with microbiota reads</option>
254 <option value="sam">Externally BowTie2-mapped SAM file</option>
255 <option value="bowtie2out">Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run</option>
256 </param>
257 <when value="raw">
258 <conditional name="raw_in">
259 <param name="selector" type="select" label="Fasta/FastQ file(s) with microbiota reads">
260 <option value="single" selected="true">One single-end file</option>
261 <option value="multiple">Multiple single-end files</option>
262 <option value="paired_collection">Paired-end collection</option>
263 <option value="paired">Paired-end files</option>
264 </param>
265 <when value="single">
266 <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ file with microbiota reads"/>
267 </when>
268 <when value="multiple">
269 <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ files with microbiota reads" multiple="true"/>
270 </when>
271 <when value="paired_collection">
272 <param name="in" type="data_collection" format="@FILE_FORMATS@" label="Paired-end Fasta/FastQ file with microbiota reads" collection_type="paired"/>
273 </when>
274 <when value="paired">
275 <param name="in_f" type="data" format="@FILE_FORMATS@" label="Forward paired-end Fasta/FastQ file with microbiota reads"/>
276 <param name="in_r" type="data" format="@FILE_FORMATS@" label="Reverse paired-end Fasta/FastQ file with microbiota reads"/>
277 </when>
278 </conditional>
279 <param argument="--read_min_len" type="integer" value="70" label="Minimum length of the reads to be considered when parsing the input file"/>
280 <section name="mapping" title="Mapping" expanded="true">
281 <param argument="--bt2_ps" type="select" label="Presets options for BowTie2" help="Applied only with FASTA files">
282 <option value="sensitive">Sensitive</option>
283 <option value="very-sensitive" selected="true">Very sensitive</option>
284 <option value="sensitive-local">Sensitive local</option>
285 <option value="very-sensitive-local">Very sensitive local</option>
286 </param>
287 <param argument="--min_mapq_val" type="integer" value="5" label="Minimum mapping quality value (MAPQ)"/>
288 </section>
289 </when>
290 <when value="sam">
291 <param name="in" type="data" format="sam" label="Externally BowTie2-mapped SAM file" help="BowTie2 needs to be used first to map microbiota reads"/>
292 </when>
293 <when value="bowtie2out">
294 <param name="in" type="data" format="tabular" label="Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run" help="File needs to be generated with MetaPhlAn versions &gt;3.0"/>
295 </when>
296 </conditional>
297 <conditional name="db">
298 <param name="db_selector" type="select" label="Database with clade-specific marker genes">
299 <option value="cached" selected="true">Locally cached</option>
300 <option value="history">From history</option>
301 </param>
302 <when value="cached">
303 <param name="cached_db" type="select" label="Cached database with clade-specific marker genes">
304 <options from_data_table="@IDX_DATA_TABLE@">
305 <filter type="static_value" column="4" value="@IDX_VERSION@"/>
306 <validator message="No compatible MetaPhlAn database is available" type="no_options"/>
307 </options>
308 </param>
309 </when>
310 <when value="history">
311 <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/>
312 <param argument="--mpa_pkl" type="data" format="json" label="Metadata associated to the database with clade-specific marker genes from history"/>
313 </when>
314 </conditional>
315 </section>
316 <section name="analysis" title="Analysis" expanded="true">
317 <conditional name="analysis_type">
318 <param argument="-t" type="select" label="Type of analysis to perform">
319 <option value="rel_ab" selected="true">rel_ab: Profiling a microbiota in terms of relative abundances</option>
320 <option value="rel_ab_w_read_stats">rel_ab_w_read_stats: Profiling a microbiota in terms of relative abundances and estimate the number of reads comming from each clade</option>
321 <option value="reads_map">reads_map: Mapping from reads to clades (only reads hitting a marker)</option>
322 <option value="clade_profiles">clade_profiles: Normalized marker counts for clades with at least a non-null marker</option>
323 <option value="clade_specific_strain_tracker">clade_specific_strain_tracker: List of markers present for a specific clade and all its subclades</option>
324 <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when &gt; 0.0 and normalized by microbiota size if number of reads is specified)</option>
325 <option value="marker_counts">marker_counts: Non-normalized marker counts (use with extreme caution)</option>
326 <option value="marker_pres_table">marker_pres_table: List of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th</option>
327 </param>
328 <when value="rel_ab">
329 <expand macro="tax_lev"/>
330 </when>
331 <when value="rel_ab_w_read_stats">
332 <expand macro="tax_lev"/>
333 </when>
334 <when value="reads_map"/>
335 <when value="clade_profiles"/>
336 <when value="clade_specific_strain_tracker">
337 <param argument="--clade" type="text" value="" label="Clade for which to extract list of markers present" help="Markers are also extracted for subclades"/>
338 <param argument="--min_ab" type="float" optional="true" label="The minimum percentage abundance for the clade"/>
339 </when>
340 <when value="marker_ab_table">
341 <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original microbiota" help="It is used for normalizing the length-normalized counts with the microbiota size as well. No normalization applied if the value is not specified"/>
342 </when>
343 <when value="marker_counts"/>
344 <when value="marker_pres_table">
345 <param argument="--pres_th" type="integer" optional="true" label="Threshold for calling a marker present"/>
346 </when>
347 </conditional>
348 <param argument="--min_cu_len" type="integer" value="2000" label="Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances"/>
349 <param argument="--min_alignment_len" type="integer" optional="true" label="Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded."/>
350 <param name="organism_profiling" type="select" optional="true" label="Organisms to profile" multiple="true">
351 <option value="add_viruses" selected="true">Profile viral organisms (add_viruses)</option>
352 <option value="ignore_eukaryotes">Ignore eukaryotic organisms (ignore_eukaryotes)</option>
353 <option value="ignore_bacteria">Ignore bacteria organisms (ignore_bacteria)</option>
354 <option value="ignore_archaea">Ignore archea organisms (ignore_archaea)</option>
355 </param>
356 <param argument="--stat" type="select" label="Statistical approach for converting marker abundances into clade abundances">
357 <option value="avg_g">avg_g: Clade global (i.e. normalizing all markers together) average (avg_g)</option>
358 <option value="avg_l">avg_l: Average of length-normalized marker counts</option>
359 <option value="tavg_g" selected="true">tavg_g: Truncated clade global average at --stat_q quantile</option>
360 <option value="tavg_l">tavg_l: Truncated average of length-normalized marker counts (at --stat_q)</option>
361 <option value="wavg_g">wavg_g: Winsorized clade global average (at --stat_q)</option>
362 <option value="wavg_l">wavg_l: Winsorized average of length-normalized marker counts (at --stat_q)</option>
363 <option value="med">med: Median of length-normalized marker counts</option>
364 </param>
365 <param argument="--stat_q" type="float" value="0.2" label="Quantile value for the robust average"/>
366 <param argument="--perc_nonzero" type="float" value="0.33" label="Percentage of markers with a non zero relative abundance for misidentify a species"/>
367 <param argument="--ignore_markers" type="data" format="txt,tabular" optional="true" label="File containing a list of markers to ignore" help="One marker per line"/>
368 <param argument="--avoid_disqm" type="boolean" truevalue="--avoid_disqm" falsevalue="" checked="true" label="Deactivate the procedure of disambiguating the quasi-markers based on the marker abundance pattern found in the sample?" help="It is generally recommended to keep the disambiguation procedure in order to minimize false positives"/>
369 </section>
370 <conditional name="subsample">
371 <param name="selector" type="select" label="Subsample" help="Subsampling only works for fastq input">
372 <option value="no">No</option>
373 <option value="single">Yes: specify number of reads</option>
374 <option value="paired">Yes: specify number of paired reads</option>
375 </param>
376 <when value="no"/>
377 <when value="single">
378 <param argument="--subsampling" type="integer" min="1" value="" label="Sumbsample reads" help="Specify the number of reads to be considered"/>
379 <expand macro="subsample_common"/>
380 </when>
381 <when value="paired">
382 <param argument="--subsampling_paired" type="integer" min="1" value="" label="Sumbsample reads" help="Specify the number of paired reads to be considered. For N there will be floor(N/2) reads selected from the forward and reverse reads each."/>
383 <expand macro="subsample_common"/>
384 </when>
385 </conditional>
386 <conditional name="viral_analysis">
387 <param argument="--profile_vsc" type="select" label="Profile Viruses with VSCs approach">
388 <option value="--profile_vsc">Yes (requires FASTQ input and reference data with VSG fasta)</option>
389 <option value="" selected="true">No</option>
390 </param>
391 <when value="--profile_vsc">
392 <param argument="--vsc_breadth" type="float" min="0" max="1" value="0.75" label="Minimum Breadth of Coverage" help="Minimum coverage (fraction) for a Viral Group to be reported."/>
393 </when>
394 <when value=""/>
395 </conditional>
396 <section name="out" title="Outputs" expanded="true">
397 <param argument="--sample_id_key" type="text" value="SampleID" label="Sample ID key for this analysis"/>
398 <param argument="--sample_id" type="text" value="Metaphlan_Analysis" label="Sample ID for this analysis"/>
399 <param argument="--use_group_representative" type="boolean" truevalue="--use_group_representative" falsevalue="" checked="false" label="Use a species as representative for species groups?"/>
400 <param argument="--legacy-output" type="boolean" truevalue="--legacy-output" falsevalue="" checked="false" label="Old MetaPhlAn2 two columns output?"/>
401 <param argument="--CAMI_format_output" type="boolean" truevalue="--CAMI_format_output" falsevalue="" checked="false" label="Report the profiling using the CAMI output format?"/>
402 <param argument="--unclassified_estimation" type="boolean" truevalue="--unclassified_estimation" falsevalue="" checked="false" label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/>
403 <param name="krona_output" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output for Krona?"/>
404 </section>
405 <!-- enabling this in tests will allow metaphlan to download reference data (we do this only with the smallish TOY DB) -->
406 <param name="test" type="hidden" value="false"/>
407 </inputs>
408 <outputs>
409 <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances"/>
410 <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file"/>
411 <collection name="levels" type="list" label="${tool.name} on ${on_string}: Predicted taxon relative abundances at each taxonomic levels">
412 <discover_datasets pattern="(?P&lt;designation&gt;.+)" directory="split_levels/" format="tabular"/>
413 <filter>analysis['analysis_type']['t'] in ['rel_ab', 'rel_ab_w_read_stats'] and analysis['analysis_type']['tax_lev']['tax_lev'] == "a" and analysis['analysis_type']['tax_lev']['split_levels']</filter>
414 </collection>
415 <data name="krona_output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances for Krona">
416 <filter>out['krona_output']</filter>
417 </data>
418 <data name="vcs_breath_coverage" format="tabular" label="${tool.name} on ${on_string}: VSCs breadth-of-coverage">
419 <filter>viral_analysis['profile_vsc']</filter>
420 </data>
421 <data name="subsample_single" format="fastqsanger" from_work_dir="subsampled.out" label="${tool.name} on ${on_string}: Subsampled reads">
422 <filter>subsample['selector'] == 'single'</filter>
423 </data>
424 <collection name="subsample_paired" type="paired" label="${tool.name} on ${on_string}: Subsampled paired reads">
425 <data name="forward" format="fastqsanger" from_work_dir="subsampled.R1.out"/>
426 <data name="reverse" format="fastqsanger" from_work_dir="subsampled.R2.out"/>
427 <filter>subsample['selector'] == 'paired'</filter>
428 </collection>
429 </outputs>
430 <tests>
431 <!-- Single GZ file, Cached db -->
432 <test expect_num_outputs="6">
433 <section name="inputs">
434 <conditional name="in">
435 <param name="selector" value="raw"/>
436 <conditional name="raw_in">
437 <param name="selector" value="single"/>
438 <param name="in" value="no_taxon_input.fasta"/>
439 </conditional>
440 <param name="read_min_len" value="70"/>
441 <section name="mapping">
442 <param name="bt2_ps" value="sensitive"/>
443 <param name="min_mapq_val" value="5"/>
444 </section>
445 </conditional>
446 <conditional name="db">
447 <param name="db_selector" value="cached"/>
448 <param name="cached_db" value="test-db-20210409"/>
449 </conditional>
450 </section>
451 <section name="analysis">
452 <conditional name="analysis_type">
453 <param name="t" value="rel_ab"/>
454 <conditional name="tax_lev">
455 <param name="tax_lev" value="a"/>
456 <param name="split_levels" value="true"/>
457 </conditional>
458 </conditional>
459 <param name="min_cu_len" value="2000"/>
460 <param name="organism_profiling" value="add_viruses"/>
461 <param name="stat" value="avg_g"/>
462 <param name="stat_q" value="0.2"/>
463 <param name="perc_nonzero" value="0.33"/>
464 <param name="avoid_disqm" value="true"/>
465 </section>
466 <section name="out">
467 <param name="sample_id_key" value="SampleID"/>
468 <param name="sample_id" value="Metaphlan_Analysis"/>
469 <param name="use_group_representative" value="false"/>
470 <param name="legacy_output" value="false"/>
471 <param name="CAMI_format_output" value="false"/>
472 <param name="unclassified_estimation" value="false"/>
473 <param name="krona_output" value="true"/>
474 </section>
475 <output name="output_file" ftype="tabular">
476 <assert_contents>
477 <has_text text="UNCLASSIFIED"/>
478 </assert_contents>
479 </output>
480 <output name="biom_output_file" ftype="biom1">
481 <assert_contents>
482 <not_has_text text="k__Bacteria"/>
483 <not_has_text text="p__Actinobacteria"/>
484 </assert_contents>
485 </output>
486 <output_collection name="levels" type="list">
487 <element name="all" ftype="tabular">
488 <assert_contents>
489 <has_text text="class"/>
490 <has_n_columns n="17"/>
491 <has_n_lines n="1"/>
492 </assert_contents>
493 </element>
494 <element name="class" ftype="tabular">
495 <assert_contents>
496 <has_text text="class_id"/>
497 <not_has_text text="phylum_id"/>
498 <has_n_columns n="3"/>
499 <has_n_lines n="1"/>
500 </assert_contents>
501 </element>
502 <element name="family" ftype="tabular">
503 <assert_contents>
504 <has_text text="family_id"/>
505 <not_has_text text="order"/>
506 <has_n_columns n="3"/>
507 <has_n_lines n="1"/>
508 </assert_contents>
509 </element>
510 <element name="genus" ftype="tabular">
511 <assert_contents>
512 <has_text text="genus_id"/>
513 <not_has_text text="family"/>
514 <has_n_columns n="3"/>
515 <has_n_lines n="1"/>
516 </assert_contents>
517 </element>
518 <element name="kingdom" ftype="tabular">
519 <assert_contents>
520 <has_text text="kingdom_id"/>
521 <has_n_columns n="3"/>
522 <has_n_lines n="1"/>
523 </assert_contents>
524 </element>
525 <element name="order" ftype="tabular">
526 <assert_contents>
527 <has_text text="order_id"/>
528 <not_has_text text="class_id"/>
529 <has_n_columns n="3"/>
530 <has_n_lines n="1"/>
531 </assert_contents>
532 </element>
533 <element name="phylum" ftype="tabular">
534 <assert_contents>
535 <has_text text="phylum_id"/>
536 <not_has_text text="kingdom_id"/>
537 <has_n_columns n="3"/>
538 <has_n_lines n="1"/>
539 </assert_contents>
540 </element>
541 <element name="species" ftype="tabular">
542 <assert_contents>
543 <has_text text="species_id"/>
544 <not_has_text text="genus"/>
545 <has_n_columns n="3"/>
546 <has_n_lines n="1"/>
547 </assert_contents>
548 </element>
549 <element name="strains" ftype="tabular">
550 <assert_contents>
551 <has_text text="strains_id"/>
552 <not_has_text text="species_id"/>
553 <has_n_columns n="3"/>
554 <has_n_lines n="1"/>
555 </assert_contents>
556 </element>
557 </output_collection>
558 <output name="krona_output_file" ftype="tabular">
559 <assert_contents>
560 <not_has_text text="k__Bacteria"/>
561 <has_n_lines n="1" delta="1"/>
562 <has_size value="1" delta="1"/>
563 </assert_contents>
564 </output>
565 <assert_stderr>
566 <has_text text="Downloading" negate="true"/>
567 </assert_stderr>
568 </test>
569 <!-- Single GZ file, Cached db -->
570 <test expect_num_outputs="6">
571 <section name="inputs">
572 <conditional name="in">
573 <param name="selector" value="raw"/>
574 <conditional name="raw_in">
575 <param name="selector" value="single"/>
576 <param name="in" value="SRS014464-Anterior_nares.fasta.gz"/>
577 </conditional>
578 <param name="read_min_len" value="70"/>
579 <section name="mapping">
580 <param name="bt2_ps" value="sensitive"/>
581 <param name="min_mapq_val" value="5"/>
582 </section>
583 </conditional>
584 <conditional name="db">
585 <param name="db_selector" value="cached"/>
586 <param name="cached_db" value="test-db-20210409"/>
587 </conditional>
588 </section>
589 <section name="analysis">
590 <conditional name="analysis_type">
591 <param name="t" value="rel_ab"/>
592 <conditional name="tax_lev">
593 <param name="tax_lev" value="a"/>
594 <param name="split_levels" value="true"/>
595 </conditional>
596 </conditional>
597 <param name="min_cu_len" value="2000"/>
598 <param name="organism_profiling" value="add_viruses"/>
599 <param name="stat" value="avg_g"/>
600 <param name="stat_q" value="0.2"/>
601 <param name="perc_nonzero" value="0.33"/>
602 <param name="avoid_disqm" value="true"/>
603 </section>
604 <section name="out">
605 <param name="sample_id_key" value="SampleID"/>
606 <param name="sample_id" value="Metaphlan_Analysis"/>
607 <param name="use_group_representative" value="false"/>
608 <param name="legacy_output" value="false"/>
609 <param name="CAMI_format_output" value="false"/>
610 <param name="unclassified_estimation" value="false"/>
611 <param name="krona_output" value="true"/>
612 </section>
613 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
614 <assert_contents>
615 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
616 </assert_contents>
617 </output>
618 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
619 <assert_contents>
620 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
621 </assert_contents>
622 </output>
623 <output_collection name="levels" type="list">
624 <element name="all" ftype="tabular">
625 <assert_contents>
626 <has_text text="Gammaproteobacteria"/>
627 <has_text text="Corynebacterium accolens"/>
628 <has_n_columns n="17"/>
629 </assert_contents>
630 </element>
631 <element name="class" ftype="tabular">
632 <assert_contents>
633 <has_text text="class_id"/>
634 <not_has_text text="phylum_id"/>
635 <has_text text="Actinobacteria"/>
636 <has_n_columns n="3"/>
637 </assert_contents>
638 </element>
639 <element name="family" ftype="tabular">
640 <assert_contents>
641 <has_text text="family_id"/>
642 <not_has_text text="order"/>
643 <has_text text="Propionibacteriaceae"/>
644 <has_n_columns n="3"/>
645 </assert_contents>
646 </element>
647 <element name="genus" ftype="tabular">
648 <assert_contents>
649 <has_text text="genus_id"/>
650 <not_has_text text="family"/>
651 <has_text text="Cutibacterium"/>
652 <has_n_columns n="3"/>
653 </assert_contents>
654 </element>
655 <element name="kingdom" ftype="tabular">
656 <assert_contents>
657 <has_text text="kingdom_id"/>
658 <has_text text="Bacteria"/>
659 <has_n_columns n="3"/>
660 </assert_contents>
661 </element>
662 <element name="order" ftype="tabular">
663 <assert_contents>
664 <has_text text="order_id"/>
665 <not_has_text text="class_id"/>
666 <has_text text="Propionibacteriales"/>
667 <has_n_columns n="3"/>
668 </assert_contents>
669 </element>
670 <element name="phylum" ftype="tabular">
671 <assert_contents>
672 <has_text text="phylum_id"/>
673 <not_has_text text="kingdom_id"/>
674 <has_text text="Firmicutes"/>
675 <has_n_columns n="3"/>
676 </assert_contents>
677 </element>
678 <element name="species" ftype="tabular">
679 <assert_contents>
680 <has_text text="species_id"/>
681 <not_has_text text="genus"/>
682 <has_text text="Corynebacterium accolens"/>
683 <has_n_columns n="3"/>
684 </assert_contents>
685 </element>
686 <element name="strains" ftype="tabular">
687 <assert_contents>
688 <has_text text="strains_id"/>
689 <not_has_text text="species_id"/>
690 <has_n_columns n="3"/>
691 </assert_contents>
692 </element>
693 </output_collection>
694 <output name="krona_output_file" ftype="tabular">
695 <assert_contents>
696 <not_has_text text="k__Bacteria"/>
697 <has_text text="Corynebacterium accolens"/>
698 <has_n_columns n="9"/>
699 </assert_contents>
700 </output>
701 <assert_stderr>
702 <has_text text="Downloading" negate="true"/>
703 </assert_stderr>
704 </test>
705 <!-- Multiple GZ file, Local db-->
706 <test expect_num_outputs="4">
707 <section name="inputs">
708 <conditional name="in">
709 <param name="selector" value="raw"/>
710 <conditional name="raw_in">
711 <param name="selector" value="multiple"/>
712 <param name="in" value="SRS014464-Anterior_nares.fasta.gz,SRS014464-Anterior_nares.fasta.gz"/>
713 </conditional>
714 <param name="read_min_len" value="70"/>
715 <section name="mapping">
716 <param name="bt2_ps" value="sensitive"/>
717 <param name="min_mapq_val" value="5"/>
718 </section>
719 </conditional>
720 <conditional name="db">
721 <param name="db_selector" value="history"/>
722 <param name="bowtie2db" value="test-db.fasta"/>
723 <param name="mpa_pkl" value="test-db.json"/>
724 </conditional>
725 </section>
726 <section name="analysis">
727 <conditional name="analysis_type">
728 <param name="t" value="rel_ab"/>
729 <conditional name="tax_lev">
730 <param name="tax_lev" value="a"/>
731 <param name="split_levels" value="false"/>
732 </conditional>
733 </conditional>
734 <param name="min_cu_len" value="2000"/>
735 <param name="organism_profiling" value="add_viruses"/>
736 <param name="stat" value="avg_g"/>
737 <param name="stat_q" value="0.2"/>
738 <param name="perc_nonzero" value="0.33"/>
739 <param name="avoid_disqm" value="true"/>
740 </section>
741 <section name="out">
742 <param name="sample_id_key" value="SampleID"/>
743 <param name="sample_id" value="Metaphlan_Analysis"/>
744 <param name="use_group_representative" value="false"/>
745 <param name="legacy_output" value="false"/>
746 <param name="CAMI_format_output" value="false"/>
747 <param name="unclassified_estimation" value="false"/>
748 <param name="krona_output" value="false"/>
749 </section>
750 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
751 <assert_contents>
752 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
753 <has_text text="relative_abundance"/>
754 <has_text text="NCBI_tax_id"/>
755 <has_text text="clade_name"/>
756 </assert_contents>
757 </output>
758 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
759 <assert_contents>
760 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
761 </assert_contents>
762 </output>
763 <assert_stderr>
764 <has_text text="Downloading" negate="true"/>
765 </assert_stderr>
766 </test>
767 <!-- Paired GZ file, Cached db (note sumsample_paired and the included forward and reverse reads are counted separatelym because they are all statically defined) -->
768 <test expect_num_outputs="7">
769 <section name="inputs">
770 <conditional name="in">
771 <param name="selector" value="raw"/>
772 <conditional name="raw_in">
773 <param name="selector" value="paired"/>
774 <param name="in_f" value="SRS014464-Anterior_nares.fastq.gz"/>
775 <param name="in_r" value="SRS014464-Anterior_nares.fastq.gz"/>
776 </conditional>
777 <param name="read_min_len" value="70"/>
778 <section name="mapping">
779 <param name="bt2_ps" value="sensitive"/>
780 <param name="min_mapq_val" value="5"/>
781 </section>
782 </conditional>
783 <conditional name="db">
784 <param name="db_selector" value="cached"/>
785 <param name="cached_db" value="test-db-20210409"/>
786 </conditional>
787 </section>
788 <section name="analysis">
789 <conditional name="analysis_type">
790 <param name="t" value="rel_ab"/>
791 <conditional name="tax_lev">
792 <param name="tax_lev" value="a"/>
793 <param name="split_levels" value="false"/>
794 </conditional>
795 </conditional>
796 <param name="min_cu_len" value="2000"/>
797 <param name="organism_profiling" value="add_viruses"/>
798 <param name="stat" value="avg_g"/>
799 <param name="stat_q" value="0.2"/>
800 <param name="perc_nonzero" value="0.33"/>
801 <param name="avoid_disqm" value="true"/>
802 </section>
803 <conditional name="subsample">
804 <param name="selector" value="paired"/>
805 <param name="subsampling_paired" value="20257"/>
806 <param name="subsampling_seed" value="42"/>
807 </conditional>
808 <section name="out">
809 <param name="sample_id_key" value="SampleID"/>
810 <param name="sample_id" value="Metaphlan_Analysis"/>
811 <param name="use_group_representative" value="false"/>
812 <param name="legacy_output" value="false"/>
813 <param name="CAMI_format_output" value="false"/>
814 <param name="unclassified_estimation" value="false"/>
815 <param name="krona_output" value="false"/>
816 </section>
817 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
818 <assert_contents>
819 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
820 <has_text text="relative_abundance"/>
821 <has_text text="NCBI_tax_id"/>
822 <has_text text="clade_name"/>
823 </assert_contents>
824 </output>
825 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
826 <assert_contents>
827 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
828 </assert_contents>
829 </output>
830 <output_collection name="subsample_paired" type="paired">
831 <element name="forward">
832 <assert_contents>
833 <has_line_matching expression="^@.*" n="10128"/>
834 </assert_contents>
835 </element>
836 <element name="reverse">
837 <assert_contents>
838 <has_line_matching expression="^@.*" n="10128"/>
839 </assert_contents>
840 </element>
841 </output_collection>
842 <assert_stderr>
843 <has_text text="Downloading" negate="true"/>
844 </assert_stderr>
845 </test>
846 <!-- Paired GZ file as collection, Cached db (note sumsample_paired and the included forward and reverse reads are counted separatelym because they are all statically defined) -->
847 <test expect_num_outputs="7">
848 <section name="inputs">
849 <conditional name="in">
850 <param name="selector" value="raw"/>
851 <conditional name="raw_in">
852 <param name="selector" value="paired_collection"/>
853 <param name="in">
854 <collection type="paired" name="pair">
855 <element name="forward" value="SRS014464-Anterior_nares.fastq.gz"/>
856 <element name="reverse" value="SRS014464-Anterior_nares.fastq.gz"/>
857 </collection>
858 </param>
859 </conditional>
860 <param name="read_min_len" value="70"/>
861 <section name="mapping">
862 <param name="bt2_ps" value="sensitive"/>
863 <param name="min_mapq_val" value="5"/>
864 </section>
865 </conditional>
866 <conditional name="db">
867 <param name="db_selector" value="cached"/>
868 <param name="cached_db" value="test-db-20210409"/>
869 </conditional>
870 </section>
871 <section name="analysis">
872 <conditional name="analysis_type">
873 <param name="t" value="rel_ab"/>
874 <conditional name="tax_lev">
875 <param name="tax_lev" value="a"/>
876 <param name="split_levels" value="false"/>
877 </conditional>
878 </conditional>
879 <param name="min_cu_len" value="2000"/>
880 <param name="organism_profiling" value="add_viruses"/>
881 <param name="stat" value="avg_g"/>
882 <param name="stat_q" value="0.2"/>
883 <param name="perc_nonzero" value="0.33"/>
884 <param name="avoid_disqm" value="true"/>
885 </section>
886 <conditional name="subsample">
887 <param name="selector" value="paired"/>
888 <param name="subsampling_paired" value="20257"/>
889 <param name="subsampling_seed" value="42"/>
890 </conditional>
891 <section name="out">
892 <param name="sample_id_key" value="SampleID"/>
893 <param name="sample_id" value="Metaphlan_Analysis"/>
894 <param name="use_group_representative" value="false"/>
895 <param name="legacy_output" value="false"/>
896 <param name="CAMI_format_output" value="false"/>
897 <param name="unclassified_estimation" value="false"/>
898 <param name="krona_output" value="false"/>
899 </section>
900 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
901 <assert_contents>
902 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
903 <has_text text="relative_abundance"/>
904 <has_text text="NCBI_tax_id"/>
905 <has_text text="clade_name"/>
906 </assert_contents>
907 </output>
908 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
909 <assert_contents>
910 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
911 </assert_contents>
912 </output>
913 <output_collection name="subsample_paired" type="paired">
914 <element name="forward">
915 <assert_contents>
916 <has_line_matching expression="^@.*" n="10128"/>
917 </assert_contents>
918 </element>
919 <element name="reverse">
920 <assert_contents>
921 <has_line_matching expression="^@.*" n="10128"/>
922 </assert_contents>
923 </element>
924 </output_collection>
925 <assert_stderr>
926 <has_text text="Downloading" negate="true"/>
927 </assert_stderr>
928 </test>
929 <!-- Paired fastq file as collection, Cached db -->
930 <test expect_num_outputs="4">
931 <section name="inputs">
932 <conditional name="in">
933 <param name="selector" value="raw"/>
934 <conditional name="raw_in">
935 <param name="selector" value="paired_collection"/>
936 <param name="in">
937 <collection type="paired" name="pair">
938 <element name="forward" value="SRS014464-Anterior_nares_mini.fastq" />
939 <element name="reverse" value="SRS014464-Anterior_nares_mini.fastq" />
940 </collection>
941 </param>
942 </conditional>
943 <param name="read_min_len" value="70"/>
944 <section name="mapping">
945 <param name="bt2_ps" value="sensitive"/>
946 <param name="min_mapq_val" value="5"/>
947 </section>
948 </conditional>
949 <conditional name="db">
950 <param name="db_selector" value="cached"/>
951 <param name="cached_db" value="test-db-20210409"/>
952 </conditional>
953 </section>
954 <section name="analysis">
955 <conditional name="analysis_type">
956 <param name="t" value="rel_ab"/>
957 <conditional name="tax_lev">
958 <param name="tax_lev" value="a"/>
959 <param name="split_levels" value="false"/>
960 </conditional>
961 </conditional>
962 <param name="min_cu_len" value="2000"/>
963 <param name="organism_profiling" value="add_viruses"/>
964 <param name="stat" value="avg_g"/>
965 <param name="stat_q" value="0.2"/>
966 <param name="perc_nonzero" value="0.33"/>
967 <param name="avoid_disqm" value="true"/>
968 </section>
969 <conditional name="subsample">
970 <param name="selector" value="no"/>
971 </conditional>
972 <section name="out">
973 <param name="sample_id_key" value="SampleID"/>
974 <param name="sample_id" value="Metaphlan_Analysis"/>
975 <param name="use_group_representative" value="false"/>
976 <param name="legacy_output" value="false"/>
977 <param name="CAMI_format_output" value="false"/>
978 <param name="unclassified_estimation" value="false"/>
979 <param name="krona_output" value="false"/>
980 </section>
981 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
982 <assert_contents>
983 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
984 <has_text text="relative_abundance"/>
985 <has_text text="NCBI_tax_id"/>
986 <has_text text="clade_name"/>
987 </assert_contents>
988 </output>
989 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
990 <assert_contents>
991 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
992 </assert_contents>
993 </output>
994 <assert_stderr>
995 <has_text text="Downloading" negate="true"/>
996 </assert_stderr>
997 </test>
998 <!-- SAM, cached DB -->
999 <test expect_num_outputs="2">
1000 <section name="inputs">
1001 <conditional name="in">
1002 <param name="selector" value="sam"/>
1003 <param name="in" value="SRS014464-Anterior_nares.sam"/>
1004 </conditional>
1005 <conditional name="db">
1006 <param name="db_selector" value="cached"/>
1007 <param name="cached_db" value="test-db-20210409"/>
1008 </conditional>
1009 </section>
1010 <section name="analysis">
1011 <conditional name="analysis_type">
1012 <param name="t" value="rel_ab"/>
1013 <conditional name="tax_lev">
1014 <param name="tax_lev" value="a"/>
1015 <param name="split_levels" value="false"/>
1016 </conditional>
1017 </conditional>
1018 <param name="min_cu_len" value="2000"/>
1019 <param name="organism_profiling" value="add_viruses"/>
1020 <param name="stat" value="avg_g"/>
1021 <param name="stat_q" value="0.2"/>
1022 <param name="perc_nonzero" value="0.33"/>
1023 <param name="avoid_disqm" value="true"/>
1024 </section>
1025 <section name="out">
1026 <param name="sample_id_key" value="SampleID"/>
1027 <param name="sample_id" value="Metaphlan_Analysis"/>
1028 <param name="use_group_representative" value="false"/>
1029 <param name="legacy_output" value="false"/>
1030 <param name="CAMI_format_output" value="false"/>
1031 <param name="unclassified_estimation" value="false"/>
1032 <param name="krona_output" value="false"/>
1033 </section>
1034 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
1035 <assert_contents>
1036 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
1037 <has_text text="relative_abundance"/>
1038 <has_text text="NCBI_tax_id"/>
1039 <has_text text="clade_name"/>
1040 </assert_contents>
1041 </output>
1042 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
1043 <assert_contents>
1044 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
1045 </assert_contents>
1046 </output>
1047 <assert_stderr>
1048 <has_text text="Downloading" negate="true"/>
1049 </assert_stderr>
1050 </test>
1051 <!-- bowtie2out, cached DB -->
1052 <test expect_num_outputs="2">
1053 <section name="inputs">
1054 <conditional name="in">
1055 <param name="selector" value="bowtie2out"/>
1056 <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/>
1057 </conditional>
1058 <conditional name="db">
1059 <param name="db_selector" value="cached"/>
1060 <param name="cached_db" value="test-db-20210409"/>
1061 </conditional>
1062 </section>
1063 <section name="analysis">
1064 <conditional name="analysis_type">
1065 <param name="t" value="rel_ab"/>
1066 <conditional name="tax_lev">
1067 <param name="tax_lev" value="a"/>
1068 <param name="split_levels" value="false"/>
1069 </conditional>
1070 </conditional>
1071 <param name="min_cu_len" value="2000"/>
1072 <param name="organism_profiling" value="add_viruses"/>
1073 <param name="stat" value="avg_g"/>
1074 <param name="stat_q" value="0.2"/>
1075 <param name="perc_nonzero" value="0.33"/>
1076 <param name="avoid_disqm" value="true"/>
1077 </section>
1078 <section name="out">
1079 <param name="sample_id_key" value="SampleID"/>
1080 <param name="sample_id" value="Metaphlan_Analysis"/>
1081 <param name="use_group_representative" value="false"/>
1082 <param name="legacy_output" value="false"/>
1083 <param name="CAMI_format_output" value="false"/>
1084 <param name="unclassified_estimation" value="false"/>
1085 <param name="krona_output" value="false"/>
1086 </section>
1087 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
1088 <assert_contents>
1089 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
1090 <has_text text="relative_abundance"/>
1091 <has_text text="NCBI_tax_id"/>
1092 <has_text text="clade_name"/>
1093 </assert_contents>
1094 </output>
1095 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
1096 <assert_contents>
1097 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
1098 </assert_contents>
1099 </output>
1100 <assert_stderr>
1101 <has_text text="Downloading" negate="true"/>
1102 </assert_stderr>
1103 </test>
1104 <!-- Single FASTA file, Cached db -->
1105 <test expect_num_outputs="6">
1106 <section name="inputs">
1107 <conditional name="in">
1108 <param name="selector" value="raw"/>
1109 <conditional name="raw_in">
1110 <param name="selector" value="single"/>
1111 <param name="in" value="SRS014464-Anterior_nares.fasta"/>
1112 </conditional>
1113 <param name="read_min_len" value="70"/>
1114 <section name="mapping">
1115 <param name="bt2_ps" value="sensitive"/>
1116 <param name="min_mapq_val" value="5"/>
1117 </section>
1118 </conditional>
1119 <conditional name="db">
1120 <param name="db_selector" value="cached"/>
1121 <param name="cached_db" value="test-db-20210409"/>
1122 </conditional>
1123 </section>
1124 <section name="analysis">
1125 <conditional name="analysis_type">
1126 <param name="t" value="rel_ab"/>
1127 <conditional name="tax_lev">
1128 <param name="tax_lev" value="a"/>
1129 <param name="split_levels" value="true"/>
1130 </conditional>
1131 </conditional>
1132 <param name="min_cu_len" value="2000"/>
1133 <param name="organism_profiling" value="add_viruses"/>
1134 <param name="stat" value="avg_g"/>
1135 <param name="stat_q" value="0.2"/>
1136 <param name="perc_nonzero" value="0.33"/>
1137 <param name="ignore_markers" value="marker.txt"/>
1138 <param name="avoid_disqm" value="true"/>
1139 </section>
1140 <section name="out">
1141 <param name="sample_id_key" value="SampleID"/>
1142 <param name="sample_id" value="Metaphlan_Analysis"/>
1143 <param name="use_group_representative" value="false"/>
1144 <param name="legacy_output" value="true"/>
1145 <param name="CAMI_format_output" value="false"/>
1146 <param name="unclassified_estimation" value="false"/>
1147 <param name="krona_output" value="true"/>
1148 </section>
1149 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size">
1150 <assert_contents>
1151 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
1152 <has_text text="SampleID"/>
1153 <has_text text="Metaphlan_Analysis"/>
1154 </assert_contents>
1155 </output>
1156 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
1157 <assert_contents>
1158 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
1159 </assert_contents>
1160 </output>
1161 <output_collection name="levels" type="list">
1162 <element name="all" ftype="tabular">
1163 <assert_contents>
1164 <has_text text="Gammaproteobacteria"/>
1165 <has_text text="Corynebacterium accolens"/>
1166 <has_n_columns n="9"/>
1167 </assert_contents>
1168 </element>
1169 <element name="class" ftype="tabular">
1170 <assert_contents>
1171 <has_text text="class"/>
1172 <has_text text="Actinobacteria"/>
1173 <has_n_columns n="2"/>
1174 </assert_contents>
1175 </element>
1176 <element name="family" ftype="tabular">
1177 <assert_contents>
1178 <has_text text="family"/>
1179 <has_text text="Propionibacteriaceae"/>
1180 <has_n_columns n="2"/>
1181 </assert_contents>
1182 </element>
1183 <element name="genus" ftype="tabular">
1184 <assert_contents>
1185 <has_text text="genus"/>
1186 <has_text text="Cutibacterium"/>
1187 <has_n_columns n="2"/>
1188 </assert_contents>
1189 </element>
1190 <element name="kingdom" ftype="tabular">
1191 <assert_contents>
1192 <has_text text="kingdom"/>
1193 <has_text text="Bacteria"/>
1194 <has_n_columns n="2"/>
1195 </assert_contents>
1196 </element>
1197 <element name="order" ftype="tabular">
1198 <assert_contents>
1199 <has_text text="order"/>
1200 <has_text text="Propionibacteriales"/>
1201 <has_n_columns n="2"/>
1202 </assert_contents>
1203 </element>
1204 <element name="phylum" ftype="tabular">
1205 <assert_contents>
1206 <has_text text="phylum"/>
1207 <has_text text="Firmicutes"/>
1208 <has_n_columns n="2"/>
1209 </assert_contents>
1210 </element>
1211 <element name="species" ftype="tabular">
1212 <assert_contents>
1213 <has_text text="species"/>
1214 <has_text text="Corynebacterium accolens"/>
1215 <has_n_columns n="2"/>
1216 </assert_contents>
1217 </element>
1218 <element name="strains" ftype="tabular">
1219 <assert_contents>
1220 <has_text text="strains"/>
1221 <has_n_columns n="2"/>
1222 </assert_contents>
1223 </element>
1224 </output_collection>
1225 <output name="krona_output_file" ftype="tabular">
1226 <assert_contents>
1227 <not_has_text text="k__Bacteria"/>
1228 <has_text text="Corynebacterium accolens"/>
1229 <has_n_columns n="9"/>
1230 </assert_contents>
1231 </output>
1232 <assert_stderr>
1233 <has_text text="Downloading" negate="true"/>
1234 </assert_stderr>
1235 </test>
1236 <!-- Check a non-default analysis mode
1237 and viral analysis -->
1238 <test expect_num_outputs="6">
1239 <section name="inputs">
1240 <conditional name="in">
1241 <param name="selector" value="raw"/>
1242 <conditional name="raw_in">
1243 <param name="selector" value="single"/>
1244 <param name="in" value="SRS014464-Anterior_nares.fastq.gz"/>
1245 </conditional>
1246 </conditional>
1247 <conditional name="db">
1248 <param name="db_selector" value="cached"/>
1249 <param name="cached_db" value="mpa_vJan21_TOY_CHOCOPhlAnSGB"/>
1250 </conditional>
1251 </section>
1252 <section name="analysis">
1253 <conditional name="analysis_type">
1254 <param name="t" value="marker_ab_table"/>
1255 </conditional>
1256 </section>
1257 <conditional name="viral_analysis">
1258 <param name="profile_vsc" value="--profile_vsc"/>
1259 </conditional>
1260 <conditional name="subsample">
1261 <param name="selector" value="single"/>
1262 <param name="subsampling" value="10000"/>
1263 <param name="subsampling_seed" value="42"/>
1264 </conditional>
1265 <param name="test" value="true"/>
1266 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size">
1267 <assert_contents>
1268 <has_text text="SGB7017__MKDPKOFL_00679"/>
1269 <has_text text="SampleID"/>
1270 <has_text text="Metaphlan_Analysis"/>
1271 </assert_contents>
1272 </output>
1273 <output name="subsample_single">
1274 <assert_contents>
1275 <has_text text="@" n="10000"/>
1276 </assert_contents>
1277 </output>
1278 <!-- reference data empty -> empty output -->
1279 <output name="vcs_breath_coverage" ftype="tabular">
1280 <assert_contents>
1281 <has_size size="0"/>
1282 </assert_contents>
1283 </output>
1284 <assert_command>
1285 <has_text text="--profile_vsc"/>
1286 <has_text text="--vsc_breadth 0.75"/>
1287 <has_text text="--vsc_out"/>
1288 </assert_command>
1289 <assert_stderr>
1290 <has_text text="Downloading"/>
1291 <!-- due to test=true and the absence of the TOY reference DB Metaphlan will download to ~10MB-->
1292 <has_text text="No reads aligning to VSC markers"/>
1293 </assert_stderr>
1294 </test>
1295 </tests>
1296 <help><![CDATA[
1297 What it does
1298 ============
1299
1300 MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria,
1301 Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) at species-level.
1302
1303 MetaPhlAn relies on ~1.1M unique clade-specific marker genes identified from ~100,000 reference genomes
1304 (~99,500 bacterial and archaeal and ~500 eukaryotic), allowing:
1305
1306 - unambiguous taxonomic assignments;
1307 - accurate estimation of organismal relative abundance;
1308 - species-level resolution for bacteria, archaea, eukaryotes and viruses;
1309 - strain identification and tracking
1310 - orders of magnitude speedups compared to existing methods.
1311 - microbiota strain-level population genomics
1312
1313 MetaPhlAn clade-abundance estimation
1314 ------------------------------------
1315
1316 The basic usage of MetaPhlAn consists in the identification of the clades (from phyla to species and
1317 strains in particular cases) present in the microbiota obtained from a microbiome sample and their
1318 relative abundance.
1319
1320 Marker level analysis
1321 ---------------------
1322
1323 MetaPhlAn introduces the capability of characterizing organisms at the strain level using non
1324 aggregated marker information. Such capability comes with several slightly different flavours and
1325 are a way to perform strain tracking and comparison across multiple samples.
1326
1327 Usually, MetaPhlAn is first ran with default parameters for the type of analysis to profile the
1328 species present in the community, and then a strain-level profiling can be performed to zoom-in on
1329 specific species of interest. This operation can be performed quickly as it exploits the bowtie2out
1330 intermediate file saved during the execution of the default analysis type.
1331
1332 Inputs
1333 ======
1334
1335 Metaphlan takes as input either:
1336
1337 - one or several sequence files in Fasta, FastQ (whether compressed or not)
1338 - a BowTie2 produced SAM file
1339 - an intermediary mapping file of the microbiota generated by a previous MetaPhlAn run
1340
1341 It also needs the reference database, which can be locally installed or customized using the dedicated tools.
1342
1343 Outputs
1344 =======
1345
1346 The main output is a tab-separated file with the predicted taxon relative abundances.
1347
1348 It also generates a BIOM file and some intermediate files (SAM and BowTie2out) if sequence files are given as inputs.
1349
1350
1351 More help and use cases
1352 =======================
1353
1354 To get more information about MetaPhlAn usage and use cases, please refer to the `Metaphlan documentation`_.
1355
1356 .. _Metaphlan documentation: https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-4#Basic-Usage
1357
1358 ]]></help>
1359 <expand macro="citations"/>
1360 </tool>