Mercurial > repos > estrain > metaphlan_gt

diff metaphlan_gt.xml @ 0:a7bd05df0bea draft default tip
planemo upload commit e485da3e1b9eb674a52948a00a3328c1a3cc5ffa
author: estrain
date: Fri, 13 Mar 2026 12:06:00 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metaphlan_gt.xml	Fri Mar 13 12:06:00 2026 +0000
@@ -0,0 +1,1360 @@
+<tool id="metaphlan_gt" name="MetaPhlAn" version="@TOOL_VERSION@+galaxy4" profile="@PROFILE@">
+    <description>to profile the composition of microbial communities</description>
+    <macros>
+        <import>macros.xml</import>
+        
+        <xml name="tax_lev">
+            <conditional name="tax_lev">
+                <param argument="--tax_lev" type="select" label="Taxonomic level for the relative abundance output">
+                    <option value="a" selected="true">All taxonomic levels</option>
+                    <option value="k">Kingdoms only</option>
+                    <option value="p">Phyla only</option>
+                    <option value="c">Classes only</option>
+                    <option value="o">Orders only</option>
+                    <option value="f">Families only</option>
+                    <option value="g">Genera only</option>
+                    <option value="s">Species only</option>
+                </param>
+                <when value="a">
+                    <param name="split_levels" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Generate a report for each taxonomic level?" help="It will be in addition to the default output"/>
+                </when>
+                <when value="k"/>
+                <when value="p"/>
+                <when value="c"/>
+                <when value="o"/>
+                <when value="f"/>
+                <when value="g"/>
+                <when value="s"/>
+            </conditional>
+        </xml>
+        <token name="@FILE_FORMATS@">fastq,fastq.gz,fastq.bz2,fasta,fasta.gz</token>
+    </macros>
+    <expand macro="requirements"/>
+    <version_command>metaphlan -v</version_command>
+    <command detect_errors="aggressive"><![CDATA[
+#if $inputs.in.selector == "raw"
+    #if $inputs.in.raw_in.selector == "single"
+        #set full_ext=$inputs.in.raw_in.in.datatype.file_ext
+        #if $full_ext.endswith("gz")
+            #set $file_path="in"     
+            zcat '$inputs.in.raw_in.in' > '$file_path' &&
+        #else if $full_ext.endswith("bz2")
+            #set $file_path="in"
+            bzcat '$inputs.in.raw_in.in' > '$file_path' &&
+        #else
+            #set $file_path="'%s'" % $inputs.in.raw_in.in
+        #end if
+    #else if $inputs.in.raw_in.selector == "multiple"
+        #set full_ext=$inputs.in.raw_in.in[0].datatype.file_ext
+        #set file_path=""
+        #set sep=""
+        #for $i, $f in enumerate($inputs.in.raw_in.in)
+            #if $f.datatype.file_ext != $full_ext
+            echo "Different datatypes for input files" &&
+            exit 1
+            #end if          
+            #if $full_ext.endswith("gz")
+                #set fp="input_%s" % ($i)
+                zcat '$f' > '$fp' &&
+            #else if $full_ext.endswith("bz2")
+                #set fp="input_%s" % ($i)
+                bzcat '$f' > '$fp' &&
+            #else
+                #set fp=$f
+            #end if
+            #set $file_path+="'%s%s'" % ($sep, $fp)
+            #set $sep=","
+        #end for
+    #else if $inputs.in.raw_in.selector == "paired"
+        #set full_ext=$inputs.in.raw_in.in_f.datatype.file_ext
+        #if $full_ext != $inputs.in.raw_in.in_r.datatype.file_ext
+            echo "Different datatypes for input paired-end files" &&
+            exit 1
+        #end if
+        #if $full_ext.endswith("gz")
+            zcat '$inputs.in.raw_in.in_f' > 'in_f' &&
+            zcat '$inputs.in.raw_in.in_r' > 'in_r' &&
+        #else if $full_ext.endswith("bz2")
+            bzcat '$inputs.in.raw_in.in_f' > 'in_f' && 
+            bzcat '$inputs.in.raw_in.in_r' > 'in_r' &&
+        #else:
+            ln -s '$inputs.in.raw_in.in_f' 'in_f' &&
+            ln -s '$inputs.in.raw_in.in_r' 'in_r' &&
+        #end if
+        ## paired data has by default no special treatment, i.e. it is given as comma separated list
+        ## except iff paired subsampling where -1 and -2 must be used
+        #if $subsample.selector == 'paired'
+            #set file_path="-1 in_f -2 in_r"
+        #else
+            #set file_path="in_f,in_r"
+        #end if
+    #else if $inputs.in.raw_in.selector == "paired_collection"
+        #set full_ext=$inputs.in.raw_in.in.forward.ext
+        #if $full_ext != $inputs.in.raw_in.in.reverse.ext
+            echo "Different datatypes for input paired-end files" &&
+            exit 1
+        #end if
+        #if $full_ext.endswith("gz")
+            zcat '$inputs.in.raw_in.in.forward' > 'in_f' &&
+            zcat '$inputs.in.raw_in.in.reverse' > 'in_r' &&
+        #else if $full_ext.endswith("bz2")
+            bzcat '$inputs.in.raw_in.in.forward' > 'in_f' && 
+            bzcat '$inputs.in.raw_in.in.reverse' > 'in_r' &&
+        #else
+            ln -s '$inputs.in.raw_in.in.forward' 'in_f' &&
+            ln -s '$inputs.in.raw_in.in.reverse' 'in_r' &&
+        #end if
+        #if $subsample.selector == 'paired'
+            #set file_path="-1 in_f -2 in_r"
+        #else
+            #set file_path="in_f,in_r"
+        #end if
+    #end if
+
+    #if $full_ext.startswith("fastq")
+        #set ext='fastq'
+    #else if $full_ext.startswith("fasta") and $full_ext.endswith(("gz","bz2"))
+        #set ext='fasta'
+    #else
+        #set ext=$full_ext
+    #end if
+#end if
+
+#if $inputs.db.db_selector == "history"
+mkdir 'ref_db' &&
+bowtie2-build --large-index '$inputs.db.bowtie2db' 'ref_db/custom_db' &&
+python
+    '$__tool_directory__/customizemetadata.py'
+    transform_json_to_pkl
+    --json '$inputs.db.mpa_pkl'
+    --pkl 'ref_db/custom_db.pkl' &&
+#end if
+
+metaphlan
+#if $inputs.in.selector == "raw"
+    $file_path
+    --input_type '$ext'
+    --read_min_len $inputs.in.read_min_len
+    --bt2_ps '$inputs.in.mapping.bt2_ps'
+    --min_mapq_val $inputs.in.mapping.min_mapq_val
+    #if $ext == "sam"
+        --nreads \$(cat '$file_path' | grep -c -v '^@')
+    #end if
+#else
+    '$inputs.in.in'
+    --input_type '$inputs.in.selector'
+    #if $inputs.in.selector == "sam"
+        --nreads \$(cat '$inputs.in.in' | grep -c -v '^@')
+    #end if
+#end if
+#if $inputs.db.db_selector == "cached"
+    --bowtie2db '$inputs.db.cached_db.fields.path'
+    --index '$inputs.db.cached_db.fields.dbkey'
+#else
+    --bowtie2db 'ref_db/'
+    --index 'custom_db'
+#end if
+    -t '$analysis.analysis_type.t'
+#if $analysis.analysis_type.t == "rel_ab" or $analysis.analysis_type.t == "rel_ab_w_read_stats"
+    --tax_lev '$analysis.analysis_type.tax_lev.tax_lev'
+#else if $analysis.analysis_type.t == "clade_specific_strain_tracker"
+    --clade '$analysis.analysis_type.clade'
+    #if str($analysis.analysis_type.min_ab) != ''
+    --min_ab $analysis.analysis_type.min_ab
+    #end if
+#else if $analysis.analysis_type.t == "marker_ab_table" and str($analysis.analysis_type.nreads) != ''
+    --nreads $$analysis.analysis_type.nreads
+#else if $analysis.analysis_type.t == "marker_pres_table" and str($analysis.analysis_type.pres_th) != ''
+    --pres_th $analysis.analysis_type.pres_th
+#end if
+    --min_cu_len $analysis.min_cu_len
+#if str($analysis.min_alignment_len) != ''
+    --min_alignment_len $analysis.min_alignment_len
+#end if
+#if 'add_viruses' in $analysis.organism_profiling
+    --add_viruses
+#end if
+#if 'ignore_eukaryotes' in $analysis.organism_profiling
+    --ignore_eukaryotes
+#end if
+#if 'ignore_bacteria' in $analysis.organism_profiling
+    --ignore_bacteria
+#end if
+#if 'ignore_archaea' in $analysis.organism_profiling
+    --ignore_archaea
+#end if
+    --stat $analysis.stat
+    --stat_q $analysis.stat_q
+    --perc_nonzero $analysis.perc_nonzero
+#if $analysis.ignore_markers
+    --ignore_markers '$analysis.ignore_markers'
+#end if
+    $analysis.avoid_disqm
+    --sample_id_key '$out.sample_id_key'
+    --sample_id '$out.sample_id'
+    $out.use_group_representative
+    $out.legacy_output
+    $out.CAMI_format_output
+    $out.unclassified_estimation
+    -o '$output_file'
+    --bowtie2out 'bowtie2out'
+    -s 'sam_output_file'
+    --biom '$biom_output_file'
+    --nproc \${GALAXY_SLOTS:-4}
+#if $viral_analysis.profile_vsc
+    $viral_analysis.profile_vsc
+    --vsc_out '$vcs_breath_coverage'
+    --vsc_breadth $viral_analysis.vsc_breadth
+#end if
+
+#if $subsample.selector != "no"
+    #if $subsample.selector == "single"
+        --subsampling $subsample.subsampling
+    #else
+        --subsampling_paired $subsample.subsampling_paired
+    #end if
+    $subsample.mapping_subsampling
+    #if $subsample.subsampling_seed
+        --subsampling_seed $subsample.subsampling_seed
+    #end if
+    --subsampling_output subsampled.out
+#end if
+
+#if $test == "false"
+    --offline
+#end if
+
+
+#if $analysis.analysis_type.t in ['rel_ab', 'rel_ab_w_read_stats']
+    #if $analysis.analysis_type.tax_lev.tax_lev == 'a' and $analysis.analysis_type.tax_lev.split_levels
+        &&
+        mkdir 'split_levels'
+        &&
+        python '$__tool_directory__/formatoutput.py'
+            split_levels
+            --metaphlan_output '$output_file'
+            --outdir 'split_levels'
+            $out.legacy_output
+    #end if
+#end if
+
+#if $out.krona_output
+&&
+python '$__tool_directory__/formatoutput.py'
+    format_for_krona
+    --metaphlan_output '$output_file'
+    --krona_output '$krona_output_file'
+#end if
+    ]]></command>
+    <inputs>
+        <section name="inputs" title="Inputs" expanded="true">
+            <conditional name="in">
+                <param name="selector" type="select" label="Input(s)">
+                    <option value="raw" selected="true">Fasta/FastQ file(s) with microbiota reads</option>
+                    <option value="sam">Externally BowTie2-mapped SAM file</option>
+                    <option value="bowtie2out">Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run</option>
+                </param>
+                <when value="raw">
+                    <conditional name="raw_in">
+                        <param name="selector" type="select" label="Fasta/FastQ file(s) with microbiota reads">
+                            <option value="single" selected="true">One single-end file</option>
+                            <option value="multiple">Multiple single-end files</option>
+                            <option value="paired_collection">Paired-end collection</option>
+                            <option value="paired">Paired-end files</option>
+                        </param>
+                        <when value="single">
+                            <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ file with microbiota reads"/>
+                        </when>
+                        <when value="multiple">
+                            <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ files with microbiota reads" multiple="true"/>
+                        </when>
+                        <when value="paired_collection">
+                            <param name="in" type="data_collection" format="@FILE_FORMATS@" label="Paired-end Fasta/FastQ file with microbiota reads" collection_type="paired"/>
+                        </when>
+                        <when value="paired">
+                            <param name="in_f" type="data" format="@FILE_FORMATS@" label="Forward paired-end Fasta/FastQ file with microbiota reads"/>
+                            <param name="in_r" type="data" format="@FILE_FORMATS@" label="Reverse paired-end Fasta/FastQ file with microbiota reads"/>
+                        </when>
+                    </conditional>
+                    <param argument="--read_min_len" type="integer" value="70" label="Minimum length of the reads to be considered when parsing the input file"/>
+                    <section name="mapping" title="Mapping" expanded="true">
+                        <param argument="--bt2_ps" type="select" label="Presets options for BowTie2" help="Applied only with FASTA files">
+                            <option value="sensitive">Sensitive</option>
+                            <option value="very-sensitive" selected="true">Very sensitive</option>
+                            <option value="sensitive-local">Sensitive local</option>
+                            <option value="very-sensitive-local">Very sensitive local</option>
+                        </param>
+                        <param argument="--min_mapq_val" type="integer" value="5" label="Minimum mapping quality value (MAPQ)"/>
+                    </section>
+                </when>
+                <when value="sam">
+                    <param name="in" type="data" format="sam" label="Externally BowTie2-mapped SAM file" help="BowTie2 needs to be used first to map microbiota reads"/>
+                </when>
+                <when value="bowtie2out">
+                    <param name="in" type="data" format="tabular" label="Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run" help="File needs to be generated with MetaPhlAn versions &gt;3.0"/>
+                </when>
+            </conditional>
+            <conditional name="db">
+                <param name="db_selector" type="select" label="Database with clade-specific marker genes">
+                    <option value="cached" selected="true">Locally cached</option>
+                    <option value="history">From history</option>
+                </param>
+                <when value="cached">
+                    <param name="cached_db" type="select" label="Cached database with clade-specific marker genes">
+                        <options from_data_table="@IDX_DATA_TABLE@">
+                            <filter type="static_value" column="4" value="@IDX_VERSION@"/>
+                            <validator message="No compatible MetaPhlAn database is available" type="no_options"/>
+                        </options>
+                    </param>
+                </when>
+                <when value="history">
+                    <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/>
+                    <param argument="--mpa_pkl" type="data" format="json" label="Metadata associated to the database with clade-specific marker genes from history"/>
+                </when>
+            </conditional>
+        </section>
+        <section name="analysis" title="Analysis" expanded="true">
+            <conditional name="analysis_type">
+                <param argument="-t" type="select" label="Type of analysis to perform">
+                    <option value="rel_ab" selected="true">rel_ab: Profiling a microbiota in terms of relative abundances</option>
+                    <option value="rel_ab_w_read_stats">rel_ab_w_read_stats: Profiling a microbiota in terms of relative abundances and estimate the number of reads comming from each clade</option>
+                    <option value="reads_map">reads_map: Mapping from reads to clades (only reads hitting a marker)</option>
+                    <option value="clade_profiles">clade_profiles: Normalized marker counts for clades with at least a non-null marker</option>
+                    <option value="clade_specific_strain_tracker">clade_specific_strain_tracker: List of markers present for a specific clade and all its subclades</option>
+                    <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when &gt; 0.0 and normalized by microbiota size if number of reads is specified)</option>
+                    <option value="marker_counts">marker_counts: Non-normalized marker counts (use with extreme caution)</option>
+                    <option value="marker_pres_table">marker_pres_table: List of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th</option>
+                </param>
+                <when value="rel_ab">
+                    <expand macro="tax_lev"/>
+                </when>
+                <when value="rel_ab_w_read_stats">
+                    <expand macro="tax_lev"/>
+                </when>
+                <when value="reads_map"/>
+                <when value="clade_profiles"/>
+                <when value="clade_specific_strain_tracker">
+                    <param argument="--clade" type="text" value="" label="Clade for which to extract list of markers present" help="Markers are also extracted for subclades"/>
+                    <param argument="--min_ab" type="float" optional="true" label="The minimum percentage abundance for the clade"/>
+                </when>
+                <when value="marker_ab_table">
+                    <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original microbiota" help="It is used for normalizing the length-normalized counts with the microbiota size as well. No normalization applied if the value is not specified"/>
+                </when>
+                <when value="marker_counts"/>
+                <when value="marker_pres_table">
+                    <param argument="--pres_th" type="integer" optional="true" label="Threshold for calling a marker present"/>
+                </when>
+            </conditional>
+            <param argument="--min_cu_len" type="integer" value="2000" label="Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances"/>
+            <param argument="--min_alignment_len" type="integer" optional="true" label="Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded."/>
+            <param name="organism_profiling" type="select" optional="true" label="Organisms to profile" multiple="true">
+                <option value="add_viruses" selected="true">Profile viral organisms (add_viruses)</option>
+                <option value="ignore_eukaryotes">Ignore eukaryotic organisms (ignore_eukaryotes)</option>
+                <option value="ignore_bacteria">Ignore bacteria organisms (ignore_bacteria)</option>
+                <option value="ignore_archaea">Ignore archea organisms (ignore_archaea)</option>
+            </param>
+            <param argument="--stat" type="select" label="Statistical approach for converting marker abundances into clade abundances">
+                <option value="avg_g">avg_g: Clade global (i.e. normalizing all markers together) average (avg_g)</option>
+                <option value="avg_l">avg_l: Average of length-normalized marker counts</option>
+                <option value="tavg_g" selected="true">tavg_g: Truncated clade global average at --stat_q quantile</option>
+                <option value="tavg_l">tavg_l: Truncated average of length-normalized marker counts (at --stat_q)</option>
+                <option value="wavg_g">wavg_g: Winsorized clade global average (at --stat_q)</option>
+                <option value="wavg_l">wavg_l: Winsorized average of length-normalized marker counts (at --stat_q)</option>
+                <option value="med">med: Median of length-normalized marker counts</option>
+            </param>
+            <param argument="--stat_q" type="float" value="0.2" label="Quantile value for the robust average"/>
+            <param argument="--perc_nonzero" type="float" value="0.33" label="Percentage of markers with a non zero relative abundance for misidentify a species"/>
+            <param argument="--ignore_markers" type="data" format="txt,tabular" optional="true" label="File containing a list of markers to ignore" help="One marker per line"/>
+            <param argument="--avoid_disqm" type="boolean" truevalue="--avoid_disqm" falsevalue="" checked="true" label="Deactivate the procedure of disambiguating the quasi-markers based on the marker abundance pattern found in the sample?" help="It is generally recommended to keep the disambiguation procedure in order to minimize false positives"/>
+        </section>
+        <conditional name="subsample">
+            <param name="selector" type="select" label="Subsample" help="Subsampling only works for fastq input">
+                <option value="no">No</option>
+                <option value="single">Yes: specify number of reads</option>
+                <option value="paired">Yes: specify number of paired reads</option>
+            </param>
+            <when value="no"/>
+            <when value="single">
+                <param argument="--subsampling" type="integer" min="1" value="" label="Sumbsample reads" help="Specify the number of reads to be considered"/>
+                <expand macro="subsample_common"/>
+            </when>
+            <when value="paired">
+                <param argument="--subsampling_paired" type="integer" min="1" value="" label="Sumbsample reads" help="Specify the number of paired reads to be considered. For N there will be floor(N/2) reads selected from the forward and reverse reads each."/>
+                <expand macro="subsample_common"/>
+            </when>
+        </conditional>
+        <conditional name="viral_analysis">
+            <param argument="--profile_vsc" type="select" label="Profile Viruses with VSCs approach">
+                <option value="--profile_vsc">Yes (requires FASTQ input and reference data with VSG fasta)</option>
+                <option value="" selected="true">No</option>
+            </param>
+            <when value="--profile_vsc">
+                <param argument="--vsc_breadth" type="float" min="0" max="1" value="0.75" label="Minimum Breadth of Coverage" help="Minimum coverage (fraction) for a Viral Group to be reported."/>
+            </when>
+            <when value=""/>
+        </conditional>
+        <section name="out" title="Outputs" expanded="true">
+            <param argument="--sample_id_key" type="text" value="SampleID" label="Sample ID key for this analysis"/>
+            <param argument="--sample_id" type="text" value="Metaphlan_Analysis" label="Sample ID for this analysis"/>
+            <param argument="--use_group_representative" type="boolean" truevalue="--use_group_representative" falsevalue="" checked="false" label="Use a species as representative for species groups?"/>
+            <param argument="--legacy-output" type="boolean" truevalue="--legacy-output" falsevalue="" checked="false" label="Old MetaPhlAn2 two columns output?"/>
+            <param argument="--CAMI_format_output" type="boolean" truevalue="--CAMI_format_output" falsevalue="" checked="false" label="Report the profiling using the CAMI output format?"/>
+            <param argument="--unclassified_estimation" type="boolean" truevalue="--unclassified_estimation" falsevalue="" checked="false" label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/>
+            <param name="krona_output" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output for Krona?"/>
+        </section>
+        <!-- enabling this in tests will allow metaphlan to download reference data (we do this only with the smallish TOY DB) -->
+        <param name="test" type="hidden" value="false"/>
+    </inputs>
+    <outputs>
+        <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances"/>
+        <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file"/>
+        <collection name="levels" type="list" label="${tool.name} on ${on_string}: Predicted taxon relative abundances at each taxonomic levels">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)" directory="split_levels/" format="tabular"/>
+            <filter>analysis['analysis_type']['t'] in ['rel_ab', 'rel_ab_w_read_stats'] and analysis['analysis_type']['tax_lev']['tax_lev'] == "a" and analysis['analysis_type']['tax_lev']['split_levels']</filter>
+        </collection>
+        <data name="krona_output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances for Krona">
+            <filter>out['krona_output']</filter>
+        </data>
+        <data name="vcs_breath_coverage" format="tabular" label="${tool.name} on ${on_string}: VSCs breadth-of-coverage">
+            <filter>viral_analysis['profile_vsc']</filter>
+        </data>
+        <data name="subsample_single" format="fastqsanger" from_work_dir="subsampled.out" label="${tool.name} on ${on_string}: Subsampled reads">
+            <filter>subsample['selector'] == 'single'</filter>
+        </data>
+        <collection name="subsample_paired" type="paired" label="${tool.name} on ${on_string}: Subsampled paired reads">
+            <data name="forward" format="fastqsanger" from_work_dir="subsampled.R1.out"/>
+            <data name="reverse" format="fastqsanger" from_work_dir="subsampled.R2.out"/>
+            <filter>subsample['selector'] == 'paired'</filter>
+        </collection>
+    </outputs>
+    <tests>
+        <!-- Single GZ file, Cached db -->
+        <test expect_num_outputs="6">
+            <section name="inputs">
+                <conditional name="in">
+                    <param name="selector" value="raw"/>
+                    <conditional name="raw_in">
+                        <param name="selector" value="single"/>
+                        <param name="in" value="no_taxon_input.fasta"/>
+                    </conditional>
+                    <param name="read_min_len" value="70"/>
+                    <section name="mapping">
+                        <param name="bt2_ps" value="sensitive"/>
+                        <param name="min_mapq_val" value="5"/>
+                    </section>
+                </conditional>
+                <conditional name="db">
+                    <param name="db_selector" value="cached"/>
+                    <param name="cached_db" value="test-db-20210409"/>
+                </conditional>
+            </section>
+            <section name="analysis">
+                <conditional name="analysis_type">
+                    <param name="t" value="rel_ab"/>
+                    <conditional name="tax_lev">
+                        <param name="tax_lev" value="a"/>
+                        <param name="split_levels" value="true"/>
+                    </conditional>
+                </conditional>
+                <param name="min_cu_len" value="2000"/>
+                <param name="organism_profiling" value="add_viruses"/>
+                <param name="stat" value="avg_g"/>
+                <param name="stat_q" value="0.2"/>
+                <param name="perc_nonzero" value="0.33"/>
+                <param name="avoid_disqm" value="true"/>
+            </section>
+            <section name="out">
+                <param name="sample_id_key" value="SampleID"/>
+                <param name="sample_id" value="Metaphlan_Analysis"/>
+                <param name="use_group_representative" value="false"/>
+                <param name="legacy_output" value="false"/>
+                <param name="CAMI_format_output" value="false"/>
+                <param name="unclassified_estimation" value="false"/>
+                <param name="krona_output" value="true"/>
+            </section>
+            <output name="output_file" ftype="tabular">
+                <assert_contents>
+                    <has_text text="UNCLASSIFIED"/>
+                </assert_contents>
+            </output>
+            <output name="biom_output_file" ftype="biom1">
+                <assert_contents>
+                    <not_has_text text="k__Bacteria"/>
+                    <not_has_text text="p__Actinobacteria"/>
+                </assert_contents>
+            </output>
+            <output_collection name="levels" type="list">
+                <element name="all" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="class"/>
+                        <has_n_columns n="17"/>
+                        <has_n_lines n="1"/>
+                    </assert_contents>
+                </element>
+                <element name="class" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="class_id"/>
+                        <not_has_text text="phylum_id"/>
+                        <has_n_columns n="3"/>
+                        <has_n_lines n="1"/>
+                    </assert_contents>
+                </element>
+                <element name="family" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="family_id"/>
+                        <not_has_text text="order"/>
+                        <has_n_columns n="3"/>
+                        <has_n_lines n="1"/>
+                    </assert_contents>
+                </element>
+                <element name="genus" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="genus_id"/>
+                        <not_has_text text="family"/>
+                        <has_n_columns n="3"/>
+                        <has_n_lines n="1"/>
+                    </assert_contents>
+                </element>
+                <element name="kingdom" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="kingdom_id"/>
+                        <has_n_columns n="3"/>
+                        <has_n_lines n="1"/>
+                    </assert_contents>
+                </element>
+                <element name="order" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="order_id"/>
+                        <not_has_text text="class_id"/>
+                        <has_n_columns n="3"/>
+                        <has_n_lines n="1"/>
+                    </assert_contents>
+                </element>
+                <element name="phylum" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="phylum_id"/>
+                        <not_has_text text="kingdom_id"/>
+                        <has_n_columns n="3"/>
+                        <has_n_lines n="1"/>
+                    </assert_contents>
+                </element>
+                <element name="species" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="species_id"/>
+                        <not_has_text text="genus"/>
+                        <has_n_columns n="3"/>
+                        <has_n_lines n="1"/>
+                    </assert_contents>
+                </element>
+                <element name="strains" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="strains_id"/>
+                        <not_has_text text="species_id"/>
+                        <has_n_columns n="3"/>
+                        <has_n_lines n="1"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output name="krona_output_file" ftype="tabular">
+                <assert_contents>
+                    <not_has_text text="k__Bacteria"/>
+                    <has_n_lines n="1" delta="1"/>
+                    <has_size value="1" delta="1"/>
+                </assert_contents>
+            </output>
+            <assert_stderr>
+                <has_text text="Downloading" negate="true"/>
+            </assert_stderr>
+        </test>
+        <!-- Single GZ file, Cached db -->
+        <test expect_num_outputs="6">
+            <section name="inputs">
+                <conditional name="in">
+                    <param name="selector" value="raw"/>
+                    <conditional name="raw_in">
+                        <param name="selector" value="single"/>
+                        <param name="in" value="SRS014464-Anterior_nares.fasta.gz"/>
+                    </conditional>
+                    <param name="read_min_len" value="70"/>
+                    <section name="mapping">
+                        <param name="bt2_ps" value="sensitive"/>
+                        <param name="min_mapq_val" value="5"/>
+                    </section>
+                </conditional>
+                <conditional name="db">
+                    <param name="db_selector" value="cached"/>
+                    <param name="cached_db" value="test-db-20210409"/>
+                </conditional>
+            </section>
+            <section name="analysis">
+                <conditional name="analysis_type">
+                    <param name="t" value="rel_ab"/>
+                    <conditional name="tax_lev">
+                        <param name="tax_lev" value="a"/>
+                        <param name="split_levels" value="true"/>
+                    </conditional>
+                </conditional>
+                <param name="min_cu_len" value="2000"/>
+                <param name="organism_profiling" value="add_viruses"/>
+                <param name="stat" value="avg_g"/>
+                <param name="stat_q" value="0.2"/>
+                <param name="perc_nonzero" value="0.33"/>
+                <param name="avoid_disqm" value="true"/>
+            </section>
+            <section name="out">
+                <param name="sample_id_key" value="SampleID"/>
+                <param name="sample_id" value="Metaphlan_Analysis"/>
+                <param name="use_group_representative" value="false"/>
+                <param name="legacy_output" value="false"/>
+                <param name="CAMI_format_output" value="false"/>
+                <param name="unclassified_estimation" value="false"/>
+                <param name="krona_output" value="true"/>
+            </section>
+            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
+                </assert_contents>
+            </output>
+            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
+                </assert_contents>
+            </output>
+            <output_collection name="levels" type="list">
+                <element name="all" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="Gammaproteobacteria"/>
+                        <has_text text="Corynebacterium accolens"/>
+                        <has_n_columns n="17"/>
+                    </assert_contents>
+                </element>
+                <element name="class" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="class_id"/>
+                        <not_has_text text="phylum_id"/>
+                        <has_text text="Actinobacteria"/>
+                        <has_n_columns n="3"/>
+                    </assert_contents>
+                </element>
+                <element name="family" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="family_id"/>
+                        <not_has_text text="order"/>
+                        <has_text text="Propionibacteriaceae"/>
+                        <has_n_columns n="3"/>
+                    </assert_contents>
+                </element>
+                <element name="genus" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="genus_id"/>
+                        <not_has_text text="family"/>
+                        <has_text text="Cutibacterium"/>
+                        <has_n_columns n="3"/>
+                    </assert_contents>
+                </element>
+                <element name="kingdom" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="kingdom_id"/>
+                        <has_text text="Bacteria"/>
+                        <has_n_columns n="3"/>
+                    </assert_contents>
+                </element>
+                <element name="order" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="order_id"/>
+                        <not_has_text text="class_id"/>
+                        <has_text text="Propionibacteriales"/>
+                        <has_n_columns n="3"/>
+                    </assert_contents>
+                </element>
+                <element name="phylum" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="phylum_id"/>
+                        <not_has_text text="kingdom_id"/>
+                        <has_text text="Firmicutes"/>
+                        <has_n_columns n="3"/>
+                    </assert_contents>
+                </element>
+                <element name="species" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="species_id"/>
+                        <not_has_text text="genus"/>
+                        <has_text text="Corynebacterium accolens"/>
+                        <has_n_columns n="3"/>
+                    </assert_contents>
+                </element>
+                <element name="strains" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="strains_id"/>
+                        <not_has_text text="species_id"/>
+                        <has_n_columns n="3"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output name="krona_output_file" ftype="tabular">
+                <assert_contents>
+                    <not_has_text text="k__Bacteria"/>
+                    <has_text text="Corynebacterium accolens"/>
+                    <has_n_columns n="9"/>
+                </assert_contents>
+            </output>
+            <assert_stderr>
+                <has_text text="Downloading" negate="true"/>
+            </assert_stderr>
+        </test>
+        <!-- Multiple GZ file, Local db-->
+        <test expect_num_outputs="4">
+            <section name="inputs">
+                <conditional name="in">
+                    <param name="selector" value="raw"/>
+                    <conditional name="raw_in">
+                        <param name="selector" value="multiple"/>
+                        <param name="in" value="SRS014464-Anterior_nares.fasta.gz,SRS014464-Anterior_nares.fasta.gz"/>
+                    </conditional>
+                    <param name="read_min_len" value="70"/>
+                    <section name="mapping">
+                        <param name="bt2_ps" value="sensitive"/>
+                        <param name="min_mapq_val" value="5"/>
+                    </section>
+                </conditional>
+                <conditional name="db">
+                    <param name="db_selector" value="history"/>
+                    <param name="bowtie2db" value="test-db.fasta"/>
+                    <param name="mpa_pkl" value="test-db.json"/>
+                </conditional>
+            </section>
+            <section name="analysis">
+                <conditional name="analysis_type">
+                    <param name="t" value="rel_ab"/>
+                    <conditional name="tax_lev">
+                        <param name="tax_lev" value="a"/>
+                        <param name="split_levels" value="false"/>
+                    </conditional>
+                </conditional>
+                <param name="min_cu_len" value="2000"/>
+                <param name="organism_profiling" value="add_viruses"/>
+                <param name="stat" value="avg_g"/>
+                <param name="stat_q" value="0.2"/>
+                <param name="perc_nonzero" value="0.33"/>
+                <param name="avoid_disqm" value="true"/>
+            </section>
+            <section name="out">
+                <param name="sample_id_key" value="SampleID"/>
+                <param name="sample_id" value="Metaphlan_Analysis"/>
+                <param name="use_group_representative" value="false"/>
+                <param name="legacy_output" value="false"/>
+                <param name="CAMI_format_output" value="false"/>
+                <param name="unclassified_estimation" value="false"/>
+                <param name="krona_output" value="false"/>
+            </section>
+            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
+                    <has_text text="relative_abundance"/>
+                    <has_text text="NCBI_tax_id"/>
+                    <has_text text="clade_name"/>
+                </assert_contents>
+            </output>
+            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
+                </assert_contents>
+            </output>
+            <assert_stderr>
+                <has_text text="Downloading" negate="true"/>
+            </assert_stderr>
+        </test>
+        <!-- Paired GZ file, Cached db (note sumsample_paired and the included forward and reverse reads are counted separatelym because they are all statically defined) -->
+        <test expect_num_outputs="7">
+            <section name="inputs">
+                <conditional name="in">
+                    <param name="selector" value="raw"/>
+                    <conditional name="raw_in">
+                        <param name="selector" value="paired"/>
+                        <param name="in_f" value="SRS014464-Anterior_nares.fastq.gz"/>
+                        <param name="in_r" value="SRS014464-Anterior_nares.fastq.gz"/>
+                    </conditional>
+                    <param name="read_min_len" value="70"/>
+                    <section name="mapping">
+                        <param name="bt2_ps" value="sensitive"/>
+                        <param name="min_mapq_val" value="5"/>
+                    </section>
+                </conditional>
+                <conditional name="db">
+                    <param name="db_selector" value="cached"/>
+                    <param name="cached_db" value="test-db-20210409"/>
+                </conditional>
+            </section>
+            <section name="analysis">
+                <conditional name="analysis_type">
+                    <param name="t" value="rel_ab"/>
+                    <conditional name="tax_lev">
+                        <param name="tax_lev" value="a"/>
+                        <param name="split_levels" value="false"/>
+                    </conditional>
+                </conditional>
+                <param name="min_cu_len" value="2000"/>
+                <param name="organism_profiling" value="add_viruses"/>
+                <param name="stat" value="avg_g"/>
+                <param name="stat_q" value="0.2"/>
+                <param name="perc_nonzero" value="0.33"/>
+                <param name="avoid_disqm" value="true"/>
+            </section>
+            <conditional name="subsample">
+                <param name="selector" value="paired"/>
+                <param name="subsampling_paired" value="20257"/>
+                <param name="subsampling_seed" value="42"/>
+            </conditional>
+            <section name="out">
+                <param name="sample_id_key" value="SampleID"/>
+                <param name="sample_id" value="Metaphlan_Analysis"/>
+                <param name="use_group_representative" value="false"/>
+                <param name="legacy_output" value="false"/>
+                <param name="CAMI_format_output" value="false"/>
+                <param name="unclassified_estimation" value="false"/>
+                <param name="krona_output" value="false"/>
+            </section>
+            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
+                    <has_text text="relative_abundance"/>
+                    <has_text text="NCBI_tax_id"/>
+                    <has_text text="clade_name"/>
+                </assert_contents>
+            </output>
+            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
+                </assert_contents>
+            </output>
+            <output_collection name="subsample_paired" type="paired">
+                <element name="forward">
+                    <assert_contents>
+                        <has_line_matching expression="^@.*" n="10128"/>
+                    </assert_contents>
+                </element>
+                <element name="reverse">
+                    <assert_contents>
+                        <has_line_matching expression="^@.*" n="10128"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <assert_stderr>
+                <has_text text="Downloading" negate="true"/>
+            </assert_stderr>
+        </test>
+        <!-- Paired GZ file as collection, Cached db (note sumsample_paired and the included forward and reverse reads are counted separatelym because they are all statically defined) -->
+        <test expect_num_outputs="7">
+            <section name="inputs">
+                <conditional name="in">
+                    <param name="selector" value="raw"/>
+                    <conditional name="raw_in">
+                        <param name="selector" value="paired_collection"/>
+                        <param name="in">
+                            <collection type="paired" name="pair">
+                                <element name="forward" value="SRS014464-Anterior_nares.fastq.gz"/>
+                                <element name="reverse" value="SRS014464-Anterior_nares.fastq.gz"/>
+                            </collection>
+                        </param>
+                    </conditional>
+                    <param name="read_min_len" value="70"/>
+                    <section name="mapping">
+                        <param name="bt2_ps" value="sensitive"/>
+                        <param name="min_mapq_val" value="5"/>
+                    </section>
+                </conditional>
+                <conditional name="db">
+                    <param name="db_selector" value="cached"/>
+                    <param name="cached_db" value="test-db-20210409"/>
+                </conditional>
+            </section>
+            <section name="analysis">
+                <conditional name="analysis_type">
+                    <param name="t" value="rel_ab"/>
+                    <conditional name="tax_lev">
+                        <param name="tax_lev" value="a"/>
+                        <param name="split_levels" value="false"/>
+                    </conditional>
+                </conditional>
+                <param name="min_cu_len" value="2000"/>
+                <param name="organism_profiling" value="add_viruses"/>
+                <param name="stat" value="avg_g"/>
+                <param name="stat_q" value="0.2"/>
+                <param name="perc_nonzero" value="0.33"/>
+                <param name="avoid_disqm" value="true"/>
+            </section>
+            <conditional name="subsample">
+                <param name="selector" value="paired"/>
+                <param name="subsampling_paired" value="20257"/>
+                <param name="subsampling_seed" value="42"/>
+            </conditional>
+            <section name="out">
+                <param name="sample_id_key" value="SampleID"/>
+                <param name="sample_id" value="Metaphlan_Analysis"/>
+                <param name="use_group_representative" value="false"/>
+                <param name="legacy_output" value="false"/>
+                <param name="CAMI_format_output" value="false"/>
+                <param name="unclassified_estimation" value="false"/>
+                <param name="krona_output" value="false"/>
+            </section>
+            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
+                    <has_text text="relative_abundance"/>
+                    <has_text text="NCBI_tax_id"/>
+                    <has_text text="clade_name"/>
+                </assert_contents>
+            </output>
+            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
+                </assert_contents>
+            </output>
+            <output_collection name="subsample_paired" type="paired">
+                <element name="forward">
+                    <assert_contents>
+                        <has_line_matching expression="^@.*" n="10128"/>
+                    </assert_contents>
+                </element>
+                <element name="reverse">
+                    <assert_contents>
+                        <has_line_matching expression="^@.*" n="10128"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <assert_stderr>
+                <has_text text="Downloading" negate="true"/>
+            </assert_stderr>
+        </test>
+        <!-- Paired fastq file as collection, Cached db -->
+        <test expect_num_outputs="4">
+            <section name="inputs">
+                <conditional name="in">
+                    <param name="selector" value="raw"/>
+                    <conditional name="raw_in">
+                        <param name="selector" value="paired_collection"/>
+                        <param name="in">
+                            <collection type="paired" name="pair">
+                                <element name="forward" value="SRS014464-Anterior_nares_mini.fastq" />
+                                <element name="reverse" value="SRS014464-Anterior_nares_mini.fastq" />
+                            </collection>
+                        </param>
+                    </conditional>
+                    <param name="read_min_len" value="70"/>
+                    <section name="mapping">
+                        <param name="bt2_ps" value="sensitive"/>
+                        <param name="min_mapq_val" value="5"/>
+                    </section>
+                </conditional>
+                <conditional name="db">
+                    <param name="db_selector" value="cached"/>
+                    <param name="cached_db" value="test-db-20210409"/>
+                </conditional>
+            </section>
+            <section name="analysis">
+                <conditional name="analysis_type">
+                    <param name="t" value="rel_ab"/>
+                    <conditional name="tax_lev">
+                        <param name="tax_lev" value="a"/>
+                        <param name="split_levels" value="false"/>
+                    </conditional>
+                </conditional>
+                <param name="min_cu_len" value="2000"/>
+                <param name="organism_profiling" value="add_viruses"/>
+                <param name="stat" value="avg_g"/>
+                <param name="stat_q" value="0.2"/>
+                <param name="perc_nonzero" value="0.33"/>
+                <param name="avoid_disqm" value="true"/>
+            </section>
+            <conditional name="subsample">
+                <param name="selector" value="no"/>
+            </conditional>
+            <section name="out">
+                <param name="sample_id_key" value="SampleID"/>
+                <param name="sample_id" value="Metaphlan_Analysis"/>
+                <param name="use_group_representative" value="false"/>
+                <param name="legacy_output" value="false"/>
+                <param name="CAMI_format_output" value="false"/>
+                <param name="unclassified_estimation" value="false"/>
+                <param name="krona_output" value="false"/>
+            </section>
+            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
+                    <has_text text="relative_abundance"/>
+                    <has_text text="NCBI_tax_id"/>
+                    <has_text text="clade_name"/>
+                </assert_contents>
+            </output>
+            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
+                </assert_contents>
+            </output>
+            <assert_stderr>
+                <has_text text="Downloading" negate="true"/>
+            </assert_stderr>
+        </test>
+        <!-- SAM, cached DB -->
+        <test expect_num_outputs="2">
+            <section name="inputs">
+                <conditional name="in">
+                    <param name="selector" value="sam"/>
+                    <param name="in" value="SRS014464-Anterior_nares.sam"/>
+                </conditional>
+                <conditional name="db">
+                    <param name="db_selector" value="cached"/>
+                    <param name="cached_db" value="test-db-20210409"/>
+                </conditional>
+            </section>
+            <section name="analysis">
+                <conditional name="analysis_type">
+                    <param name="t" value="rel_ab"/>
+                    <conditional name="tax_lev">
+                        <param name="tax_lev" value="a"/>
+                        <param name="split_levels" value="false"/>
+                    </conditional>
+                </conditional>
+                <param name="min_cu_len" value="2000"/>
+                <param name="organism_profiling" value="add_viruses"/>
+                <param name="stat" value="avg_g"/>
+                <param name="stat_q" value="0.2"/>
+                <param name="perc_nonzero" value="0.33"/>
+                <param name="avoid_disqm" value="true"/>
+            </section>
+            <section name="out">
+                <param name="sample_id_key" value="SampleID"/>
+                <param name="sample_id" value="Metaphlan_Analysis"/>
+                <param name="use_group_representative" value="false"/>
+                <param name="legacy_output" value="false"/>
+                <param name="CAMI_format_output" value="false"/>
+                <param name="unclassified_estimation" value="false"/>
+                <param name="krona_output" value="false"/>
+            </section>
+            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
+                    <has_text text="relative_abundance"/>
+                    <has_text text="NCBI_tax_id"/>
+                    <has_text text="clade_name"/>
+                </assert_contents>
+            </output>
+            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
+                </assert_contents>
+            </output>
+            <assert_stderr>
+                <has_text text="Downloading" negate="true"/>
+            </assert_stderr>
+        </test>
+        <!-- bowtie2out, cached DB -->
+        <test expect_num_outputs="2">
+            <section name="inputs">
+                <conditional name="in">
+                    <param name="selector" value="bowtie2out"/>
+                    <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/>
+                </conditional>
+                <conditional name="db">
+                    <param name="db_selector" value="cached"/>
+                    <param name="cached_db" value="test-db-20210409"/>
+                </conditional>
+            </section>
+            <section name="analysis">
+                <conditional name="analysis_type">
+                    <param name="t" value="rel_ab"/>
+                    <conditional name="tax_lev">
+                        <param name="tax_lev" value="a"/>
+                        <param name="split_levels" value="false"/>
+                    </conditional>
+                </conditional>
+                <param name="min_cu_len" value="2000"/>
+                <param name="organism_profiling" value="add_viruses"/>
+                <param name="stat" value="avg_g"/>
+                <param name="stat_q" value="0.2"/>
+                <param name="perc_nonzero" value="0.33"/>
+                <param name="avoid_disqm" value="true"/>
+            </section>
+            <section name="out">
+                <param name="sample_id_key" value="SampleID"/>
+                <param name="sample_id" value="Metaphlan_Analysis"/>
+                <param name="use_group_representative" value="false"/>
+                <param name="legacy_output" value="false"/>
+                <param name="CAMI_format_output" value="false"/>
+                <param name="unclassified_estimation" value="false"/>
+                <param name="krona_output" value="false"/>
+            </section>
+            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
+                    <has_text text="relative_abundance"/>
+                    <has_text text="NCBI_tax_id"/>
+                    <has_text text="clade_name"/>
+                </assert_contents>
+            </output>
+            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
+                </assert_contents>
+            </output>
+            <assert_stderr>
+                <has_text text="Downloading" negate="true"/>
+            </assert_stderr>
+        </test>
+        <!-- Single FASTA file, Cached db -->
+        <test expect_num_outputs="6">
+            <section name="inputs">
+                <conditional name="in">
+                    <param name="selector" value="raw"/>
+                    <conditional name="raw_in">
+                        <param name="selector" value="single"/>
+                        <param name="in" value="SRS014464-Anterior_nares.fasta"/>
+                    </conditional>
+                    <param name="read_min_len" value="70"/>
+                    <section name="mapping">
+                        <param name="bt2_ps" value="sensitive"/>
+                        <param name="min_mapq_val" value="5"/>
+                    </section>
+                </conditional>
+                <conditional name="db">
+                    <param name="db_selector" value="cached"/>
+                    <param name="cached_db" value="test-db-20210409"/>
+                </conditional>
+            </section>
+            <section name="analysis">
+                <conditional name="analysis_type">
+                    <param name="t" value="rel_ab"/>
+                    <conditional name="tax_lev">
+                        <param name="tax_lev" value="a"/>
+                        <param name="split_levels" value="true"/>
+                    </conditional>
+                </conditional>
+                <param name="min_cu_len" value="2000"/>
+                <param name="organism_profiling" value="add_viruses"/>
+                <param name="stat" value="avg_g"/>
+                <param name="stat_q" value="0.2"/>
+                <param name="perc_nonzero" value="0.33"/>
+                <param name="ignore_markers" value="marker.txt"/>
+                <param name="avoid_disqm" value="true"/>
+            </section>
+            <section name="out">
+                <param name="sample_id_key" value="SampleID"/>
+                <param name="sample_id" value="Metaphlan_Analysis"/>
+                <param name="use_group_representative" value="false"/>
+                <param name="legacy_output" value="true"/>
+                <param name="CAMI_format_output" value="false"/>
+                <param name="unclassified_estimation" value="false"/>
+                <param name="krona_output" value="true"/>
+            </section>
+            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
+                    <has_text text="SampleID"/>
+                    <has_text text="Metaphlan_Analysis"/>
+                </assert_contents>
+            </output>
+            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
+                </assert_contents>
+            </output>
+            <output_collection name="levels" type="list">
+                <element name="all" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="Gammaproteobacteria"/>
+                        <has_text text="Corynebacterium accolens"/>
+                        <has_n_columns n="9"/>
+                    </assert_contents>
+                </element>
+                <element name="class" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="class"/>
+                        <has_text text="Actinobacteria"/>
+                        <has_n_columns n="2"/>
+                    </assert_contents>
+                </element>
+                <element name="family" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="family"/>
+                        <has_text text="Propionibacteriaceae"/>
+                        <has_n_columns n="2"/>
+                    </assert_contents>
+                </element>
+                <element name="genus" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="genus"/>
+                        <has_text text="Cutibacterium"/>
+                        <has_n_columns n="2"/>
+                    </assert_contents>
+                </element>
+                <element name="kingdom" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="kingdom"/>
+                        <has_text text="Bacteria"/>
+                        <has_n_columns n="2"/>
+                    </assert_contents>
+                </element>
+                <element name="order" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="order"/>
+                        <has_text text="Propionibacteriales"/>
+                        <has_n_columns n="2"/>
+                    </assert_contents>
+                </element>
+                <element name="phylum" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="phylum"/>
+                        <has_text text="Firmicutes"/>
+                        <has_n_columns n="2"/>
+                    </assert_contents>
+                </element>
+                <element name="species" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="species"/>
+                        <has_text text="Corynebacterium accolens"/>
+                        <has_n_columns n="2"/>
+                    </assert_contents>
+                </element>
+                <element name="strains" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="strains"/>
+                        <has_n_columns n="2"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output name="krona_output_file" ftype="tabular">
+                <assert_contents>
+                    <not_has_text text="k__Bacteria"/>
+                    <has_text text="Corynebacterium accolens"/>
+                    <has_n_columns n="9"/>
+                </assert_contents>
+            </output>
+            <assert_stderr>
+                <has_text text="Downloading" negate="true"/>
+            </assert_stderr>
+        </test>
+        <!-- Check a non-default analysis mode 
+             and viral analysis -->
+        <test expect_num_outputs="6">
+            <section name="inputs">
+                <conditional name="in">
+                    <param name="selector" value="raw"/>
+                    <conditional name="raw_in">
+                        <param name="selector" value="single"/>
+                        <param name="in" value="SRS014464-Anterior_nares.fastq.gz"/>
+                    </conditional>
+                </conditional>
+                <conditional name="db">
+                    <param name="db_selector" value="cached"/>
+                    <param name="cached_db" value="mpa_vJan21_TOY_CHOCOPhlAnSGB"/>
+                </conditional>
+            </section>
+            <section name="analysis">
+                <conditional name="analysis_type">
+                    <param name="t" value="marker_ab_table"/>
+                </conditional>
+            </section>
+            <conditional name="viral_analysis">
+                <param name="profile_vsc" value="--profile_vsc"/>
+            </conditional>
+            <conditional name="subsample">
+                <param name="selector" value="single"/>
+                <param name="subsampling" value="10000"/>
+                <param name="subsampling_seed" value="42"/>
+            </conditional>
+            <param name="test" value="true"/>
+            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="SGB7017__MKDPKOFL_00679"/>
+                    <has_text text="SampleID"/>
+                    <has_text text="Metaphlan_Analysis"/>
+                </assert_contents>
+            </output>
+            <output name="subsample_single">
+                <assert_contents>
+                    <has_text text="@" n="10000"/>
+                </assert_contents>
+            </output>
+            <!-- reference data empty -> empty output -->
+            <output name="vcs_breath_coverage" ftype="tabular">
+                <assert_contents>
+                    <has_size size="0"/>
+                </assert_contents>
+            </output>
+            <assert_command>
+                <has_text text="--profile_vsc"/>
+                <has_text text="--vsc_breadth 0.75"/>
+                <has_text text="--vsc_out"/>
+            </assert_command>
+            <assert_stderr>
+                <has_text text="Downloading"/>
+                <!-- due to test=true and the absence of the TOY reference DB Metaphlan will download to ~10MB-->
+                <has_text text="No reads aligning to VSC markers"/>
+            </assert_stderr>
+        </test>
+    </tests>
+    <help><![CDATA[
+What it does
+============
+
+MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria, 
+Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) at species-level. 
+
+MetaPhlAn relies on ~1.1M unique clade-specific marker genes identified from ~100,000 reference genomes 
+(~99,500 bacterial and archaeal and ~500 eukaryotic), allowing:
+
+- unambiguous taxonomic assignments;
+- accurate estimation of organismal relative abundance;
+- species-level resolution for bacteria, archaea, eukaryotes and viruses;
+- strain identification and tracking
+- orders of magnitude speedups compared to existing methods.
+- microbiota strain-level population genomics
+
+MetaPhlAn clade-abundance estimation
+------------------------------------
+
+The basic usage of MetaPhlAn consists in the identification of the clades (from phyla to species and 
+strains in particular cases) present in the microbiota obtained from a microbiome sample and their 
+relative abundance.
+
+Marker level analysis
+---------------------
+
+MetaPhlAn introduces the capability of characterizing organisms at the strain level using non
+aggregated marker information. Such capability comes with several slightly different flavours and 
+are a way to perform strain tracking and comparison across multiple samples.
+
+Usually, MetaPhlAn is first ran with default parameters for the type of analysis to profile the 
+species present in the community, and then a strain-level profiling can be performed to zoom-in on 
+specific species of interest. This operation can be performed quickly as it exploits the bowtie2out 
+intermediate file saved during the execution of the default analysis type.
+
+Inputs
+======
+
+Metaphlan takes as input either:
+
+- one or several sequence files in Fasta, FastQ (whether compressed or not)
+- a BowTie2 produced SAM file
+- an intermediary mapping file of the microbiota generated by a previous MetaPhlAn run
+
+It also needs the reference database, which can be locally installed or customized using the dedicated tools.
+
+Outputs
+=======
+
+The main output is a tab-separated file with the predicted taxon relative abundances.
+
+It also generates a BIOM file and some intermediate files (SAM and BowTie2out) if sequence files are given as inputs.
+
+
+More help and use cases
+=======================
+
+To get more information about MetaPhlAn usage and use cases, please refer to the `Metaphlan documentation`_.
+
+.. _Metaphlan documentation: https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-4#Basic-Usage
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
author	estrain
date	Fri, 13 Mar 2026 12:06:00 +0000
parents
children