Mercurial > repos > estrain > metaphlan_gt
diff metaphlan_gt.xml @ 0:a7bd05df0bea draft default tip
planemo upload commit e485da3e1b9eb674a52948a00a3328c1a3cc5ffa
| author | estrain |
|---|---|
| date | Fri, 13 Mar 2026 12:06:00 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metaphlan_gt.xml Fri Mar 13 12:06:00 2026 +0000 @@ -0,0 +1,1360 @@ +<tool id="metaphlan_gt" name="MetaPhlAn" version="@TOOL_VERSION@+galaxy4" profile="@PROFILE@"> + <description>to profile the composition of microbial communities</description> + <macros> + <import>macros.xml</import> + + <xml name="tax_lev"> + <conditional name="tax_lev"> + <param argument="--tax_lev" type="select" label="Taxonomic level for the relative abundance output"> + <option value="a" selected="true">All taxonomic levels</option> + <option value="k">Kingdoms only</option> + <option value="p">Phyla only</option> + <option value="c">Classes only</option> + <option value="o">Orders only</option> + <option value="f">Families only</option> + <option value="g">Genera only</option> + <option value="s">Species only</option> + </param> + <when value="a"> + <param name="split_levels" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Generate a report for each taxonomic level?" help="It will be in addition to the default output"/> + </when> + <when value="k"/> + <when value="p"/> + <when value="c"/> + <when value="o"/> + <when value="f"/> + <when value="g"/> + <when value="s"/> + </conditional> + </xml> + <token name="@FILE_FORMATS@">fastq,fastq.gz,fastq.bz2,fasta,fasta.gz</token> + </macros> + <expand macro="requirements"/> + <version_command>metaphlan -v</version_command> + <command detect_errors="aggressive"><![CDATA[ +#if $inputs.in.selector == "raw" + #if $inputs.in.raw_in.selector == "single" + #set full_ext=$inputs.in.raw_in.in.datatype.file_ext + #if $full_ext.endswith("gz") + #set $file_path="in" + zcat '$inputs.in.raw_in.in' > '$file_path' && + #else if $full_ext.endswith("bz2") + #set $file_path="in" + bzcat '$inputs.in.raw_in.in' > '$file_path' && + #else + #set $file_path="'%s'" % $inputs.in.raw_in.in + #end if + #else if $inputs.in.raw_in.selector == "multiple" + #set full_ext=$inputs.in.raw_in.in[0].datatype.file_ext + #set file_path="" + #set sep="" + #for $i, $f in enumerate($inputs.in.raw_in.in) + #if $f.datatype.file_ext != $full_ext + echo "Different datatypes for input files" && + exit 1 + #end if + #if $full_ext.endswith("gz") + #set fp="input_%s" % ($i) + zcat '$f' > '$fp' && + #else if $full_ext.endswith("bz2") + #set fp="input_%s" % ($i) + bzcat '$f' > '$fp' && + #else + #set fp=$f + #end if + #set $file_path+="'%s%s'" % ($sep, $fp) + #set $sep="," + #end for + #else if $inputs.in.raw_in.selector == "paired" + #set full_ext=$inputs.in.raw_in.in_f.datatype.file_ext + #if $full_ext != $inputs.in.raw_in.in_r.datatype.file_ext + echo "Different datatypes for input paired-end files" && + exit 1 + #end if + #if $full_ext.endswith("gz") + zcat '$inputs.in.raw_in.in_f' > 'in_f' && + zcat '$inputs.in.raw_in.in_r' > 'in_r' && + #else if $full_ext.endswith("bz2") + bzcat '$inputs.in.raw_in.in_f' > 'in_f' && + bzcat '$inputs.in.raw_in.in_r' > 'in_r' && + #else: + ln -s '$inputs.in.raw_in.in_f' 'in_f' && + ln -s '$inputs.in.raw_in.in_r' 'in_r' && + #end if + ## paired data has by default no special treatment, i.e. it is given as comma separated list + ## except iff paired subsampling where -1 and -2 must be used + #if $subsample.selector == 'paired' + #set file_path="-1 in_f -2 in_r" + #else + #set file_path="in_f,in_r" + #end if + #else if $inputs.in.raw_in.selector == "paired_collection" + #set full_ext=$inputs.in.raw_in.in.forward.ext + #if $full_ext != $inputs.in.raw_in.in.reverse.ext + echo "Different datatypes for input paired-end files" && + exit 1 + #end if + #if $full_ext.endswith("gz") + zcat '$inputs.in.raw_in.in.forward' > 'in_f' && + zcat '$inputs.in.raw_in.in.reverse' > 'in_r' && + #else if $full_ext.endswith("bz2") + bzcat '$inputs.in.raw_in.in.forward' > 'in_f' && + bzcat '$inputs.in.raw_in.in.reverse' > 'in_r' && + #else + ln -s '$inputs.in.raw_in.in.forward' 'in_f' && + ln -s '$inputs.in.raw_in.in.reverse' 'in_r' && + #end if + #if $subsample.selector == 'paired' + #set file_path="-1 in_f -2 in_r" + #else + #set file_path="in_f,in_r" + #end if + #end if + + #if $full_ext.startswith("fastq") + #set ext='fastq' + #else if $full_ext.startswith("fasta") and $full_ext.endswith(("gz","bz2")) + #set ext='fasta' + #else + #set ext=$full_ext + #end if +#end if + +#if $inputs.db.db_selector == "history" +mkdir 'ref_db' && +bowtie2-build --large-index '$inputs.db.bowtie2db' 'ref_db/custom_db' && +python + '$__tool_directory__/customizemetadata.py' + transform_json_to_pkl + --json '$inputs.db.mpa_pkl' + --pkl 'ref_db/custom_db.pkl' && +#end if + +metaphlan +#if $inputs.in.selector == "raw" + $file_path + --input_type '$ext' + --read_min_len $inputs.in.read_min_len + --bt2_ps '$inputs.in.mapping.bt2_ps' + --min_mapq_val $inputs.in.mapping.min_mapq_val + #if $ext == "sam" + --nreads \$(cat '$file_path' | grep -c -v '^@') + #end if +#else + '$inputs.in.in' + --input_type '$inputs.in.selector' + #if $inputs.in.selector == "sam" + --nreads \$(cat '$inputs.in.in' | grep -c -v '^@') + #end if +#end if +#if $inputs.db.db_selector == "cached" + --bowtie2db '$inputs.db.cached_db.fields.path' + --index '$inputs.db.cached_db.fields.dbkey' +#else + --bowtie2db 'ref_db/' + --index 'custom_db' +#end if + -t '$analysis.analysis_type.t' +#if $analysis.analysis_type.t == "rel_ab" or $analysis.analysis_type.t == "rel_ab_w_read_stats" + --tax_lev '$analysis.analysis_type.tax_lev.tax_lev' +#else if $analysis.analysis_type.t == "clade_specific_strain_tracker" + --clade '$analysis.analysis_type.clade' + #if str($analysis.analysis_type.min_ab) != '' + --min_ab $analysis.analysis_type.min_ab + #end if +#else if $analysis.analysis_type.t == "marker_ab_table" and str($analysis.analysis_type.nreads) != '' + --nreads $$analysis.analysis_type.nreads +#else if $analysis.analysis_type.t == "marker_pres_table" and str($analysis.analysis_type.pres_th) != '' + --pres_th $analysis.analysis_type.pres_th +#end if + --min_cu_len $analysis.min_cu_len +#if str($analysis.min_alignment_len) != '' + --min_alignment_len $analysis.min_alignment_len +#end if +#if 'add_viruses' in $analysis.organism_profiling + --add_viruses +#end if +#if 'ignore_eukaryotes' in $analysis.organism_profiling + --ignore_eukaryotes +#end if +#if 'ignore_bacteria' in $analysis.organism_profiling + --ignore_bacteria +#end if +#if 'ignore_archaea' in $analysis.organism_profiling + --ignore_archaea +#end if + --stat $analysis.stat + --stat_q $analysis.stat_q + --perc_nonzero $analysis.perc_nonzero +#if $analysis.ignore_markers + --ignore_markers '$analysis.ignore_markers' +#end if + $analysis.avoid_disqm + --sample_id_key '$out.sample_id_key' + --sample_id '$out.sample_id' + $out.use_group_representative + $out.legacy_output + $out.CAMI_format_output + $out.unclassified_estimation + -o '$output_file' + --bowtie2out 'bowtie2out' + -s 'sam_output_file' + --biom '$biom_output_file' + --nproc \${GALAXY_SLOTS:-4} +#if $viral_analysis.profile_vsc + $viral_analysis.profile_vsc + --vsc_out '$vcs_breath_coverage' + --vsc_breadth $viral_analysis.vsc_breadth +#end if + +#if $subsample.selector != "no" + #if $subsample.selector == "single" + --subsampling $subsample.subsampling + #else + --subsampling_paired $subsample.subsampling_paired + #end if + $subsample.mapping_subsampling + #if $subsample.subsampling_seed + --subsampling_seed $subsample.subsampling_seed + #end if + --subsampling_output subsampled.out +#end if + +#if $test == "false" + --offline +#end if + + +#if $analysis.analysis_type.t in ['rel_ab', 'rel_ab_w_read_stats'] + #if $analysis.analysis_type.tax_lev.tax_lev == 'a' and $analysis.analysis_type.tax_lev.split_levels + && + mkdir 'split_levels' + && + python '$__tool_directory__/formatoutput.py' + split_levels + --metaphlan_output '$output_file' + --outdir 'split_levels' + $out.legacy_output + #end if +#end if + +#if $out.krona_output +&& +python '$__tool_directory__/formatoutput.py' + format_for_krona + --metaphlan_output '$output_file' + --krona_output '$krona_output_file' +#end if + ]]></command> + <inputs> + <section name="inputs" title="Inputs" expanded="true"> + <conditional name="in"> + <param name="selector" type="select" label="Input(s)"> + <option value="raw" selected="true">Fasta/FastQ file(s) with microbiota reads</option> + <option value="sam">Externally BowTie2-mapped SAM file</option> + <option value="bowtie2out">Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run</option> + </param> + <when value="raw"> + <conditional name="raw_in"> + <param name="selector" type="select" label="Fasta/FastQ file(s) with microbiota reads"> + <option value="single" selected="true">One single-end file</option> + <option value="multiple">Multiple single-end files</option> + <option value="paired_collection">Paired-end collection</option> + <option value="paired">Paired-end files</option> + </param> + <when value="single"> + <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ file with microbiota reads"/> + </when> + <when value="multiple"> + <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ files with microbiota reads" multiple="true"/> + </when> + <when value="paired_collection"> + <param name="in" type="data_collection" format="@FILE_FORMATS@" label="Paired-end Fasta/FastQ file with microbiota reads" collection_type="paired"/> + </when> + <when value="paired"> + <param name="in_f" type="data" format="@FILE_FORMATS@" label="Forward paired-end Fasta/FastQ file with microbiota reads"/> + <param name="in_r" type="data" format="@FILE_FORMATS@" label="Reverse paired-end Fasta/FastQ file with microbiota reads"/> + </when> + </conditional> + <param argument="--read_min_len" type="integer" value="70" label="Minimum length of the reads to be considered when parsing the input file"/> + <section name="mapping" title="Mapping" expanded="true"> + <param argument="--bt2_ps" type="select" label="Presets options for BowTie2" help="Applied only with FASTA files"> + <option value="sensitive">Sensitive</option> + <option value="very-sensitive" selected="true">Very sensitive</option> + <option value="sensitive-local">Sensitive local</option> + <option value="very-sensitive-local">Very sensitive local</option> + </param> + <param argument="--min_mapq_val" type="integer" value="5" label="Minimum mapping quality value (MAPQ)"/> + </section> + </when> + <when value="sam"> + <param name="in" type="data" format="sam" label="Externally BowTie2-mapped SAM file" help="BowTie2 needs to be used first to map microbiota reads"/> + </when> + <when value="bowtie2out"> + <param name="in" type="data" format="tabular" label="Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run" help="File needs to be generated with MetaPhlAn versions >3.0"/> + </when> + </conditional> + <conditional name="db"> + <param name="db_selector" type="select" label="Database with clade-specific marker genes"> + <option value="cached" selected="true">Locally cached</option> + <option value="history">From history</option> + </param> + <when value="cached"> + <param name="cached_db" type="select" label="Cached database with clade-specific marker genes"> + <options from_data_table="@IDX_DATA_TABLE@"> + <filter type="static_value" column="4" value="@IDX_VERSION@"/> + <validator message="No compatible MetaPhlAn database is available" type="no_options"/> + </options> + </param> + </when> + <when value="history"> + <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/> + <param argument="--mpa_pkl" type="data" format="json" label="Metadata associated to the database with clade-specific marker genes from history"/> + </when> + </conditional> + </section> + <section name="analysis" title="Analysis" expanded="true"> + <conditional name="analysis_type"> + <param argument="-t" type="select" label="Type of analysis to perform"> + <option value="rel_ab" selected="true">rel_ab: Profiling a microbiota in terms of relative abundances</option> + <option value="rel_ab_w_read_stats">rel_ab_w_read_stats: Profiling a microbiota in terms of relative abundances and estimate the number of reads comming from each clade</option> + <option value="reads_map">reads_map: Mapping from reads to clades (only reads hitting a marker)</option> + <option value="clade_profiles">clade_profiles: Normalized marker counts for clades with at least a non-null marker</option> + <option value="clade_specific_strain_tracker">clade_specific_strain_tracker: List of markers present for a specific clade and all its subclades</option> + <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when > 0.0 and normalized by microbiota size if number of reads is specified)</option> + <option value="marker_counts">marker_counts: Non-normalized marker counts (use with extreme caution)</option> + <option value="marker_pres_table">marker_pres_table: List of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th</option> + </param> + <when value="rel_ab"> + <expand macro="tax_lev"/> + </when> + <when value="rel_ab_w_read_stats"> + <expand macro="tax_lev"/> + </when> + <when value="reads_map"/> + <when value="clade_profiles"/> + <when value="clade_specific_strain_tracker"> + <param argument="--clade" type="text" value="" label="Clade for which to extract list of markers present" help="Markers are also extracted for subclades"/> + <param argument="--min_ab" type="float" optional="true" label="The minimum percentage abundance for the clade"/> + </when> + <when value="marker_ab_table"> + <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original microbiota" help="It is used for normalizing the length-normalized counts with the microbiota size as well. No normalization applied if the value is not specified"/> + </when> + <when value="marker_counts"/> + <when value="marker_pres_table"> + <param argument="--pres_th" type="integer" optional="true" label="Threshold for calling a marker present"/> + </when> + </conditional> + <param argument="--min_cu_len" type="integer" value="2000" label="Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances"/> + <param argument="--min_alignment_len" type="integer" optional="true" label="Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded."/> + <param name="organism_profiling" type="select" optional="true" label="Organisms to profile" multiple="true"> + <option value="add_viruses" selected="true">Profile viral organisms (add_viruses)</option> + <option value="ignore_eukaryotes">Ignore eukaryotic organisms (ignore_eukaryotes)</option> + <option value="ignore_bacteria">Ignore bacteria organisms (ignore_bacteria)</option> + <option value="ignore_archaea">Ignore archea organisms (ignore_archaea)</option> + </param> + <param argument="--stat" type="select" label="Statistical approach for converting marker abundances into clade abundances"> + <option value="avg_g">avg_g: Clade global (i.e. normalizing all markers together) average (avg_g)</option> + <option value="avg_l">avg_l: Average of length-normalized marker counts</option> + <option value="tavg_g" selected="true">tavg_g: Truncated clade global average at --stat_q quantile</option> + <option value="tavg_l">tavg_l: Truncated average of length-normalized marker counts (at --stat_q)</option> + <option value="wavg_g">wavg_g: Winsorized clade global average (at --stat_q)</option> + <option value="wavg_l">wavg_l: Winsorized average of length-normalized marker counts (at --stat_q)</option> + <option value="med">med: Median of length-normalized marker counts</option> + </param> + <param argument="--stat_q" type="float" value="0.2" label="Quantile value for the robust average"/> + <param argument="--perc_nonzero" type="float" value="0.33" label="Percentage of markers with a non zero relative abundance for misidentify a species"/> + <param argument="--ignore_markers" type="data" format="txt,tabular" optional="true" label="File containing a list of markers to ignore" help="One marker per line"/> + <param argument="--avoid_disqm" type="boolean" truevalue="--avoid_disqm" falsevalue="" checked="true" label="Deactivate the procedure of disambiguating the quasi-markers based on the marker abundance pattern found in the sample?" help="It is generally recommended to keep the disambiguation procedure in order to minimize false positives"/> + </section> + <conditional name="subsample"> + <param name="selector" type="select" label="Subsample" help="Subsampling only works for fastq input"> + <option value="no">No</option> + <option value="single">Yes: specify number of reads</option> + <option value="paired">Yes: specify number of paired reads</option> + </param> + <when value="no"/> + <when value="single"> + <param argument="--subsampling" type="integer" min="1" value="" label="Sumbsample reads" help="Specify the number of reads to be considered"/> + <expand macro="subsample_common"/> + </when> + <when value="paired"> + <param argument="--subsampling_paired" type="integer" min="1" value="" label="Sumbsample reads" help="Specify the number of paired reads to be considered. For N there will be floor(N/2) reads selected from the forward and reverse reads each."/> + <expand macro="subsample_common"/> + </when> + </conditional> + <conditional name="viral_analysis"> + <param argument="--profile_vsc" type="select" label="Profile Viruses with VSCs approach"> + <option value="--profile_vsc">Yes (requires FASTQ input and reference data with VSG fasta)</option> + <option value="" selected="true">No</option> + </param> + <when value="--profile_vsc"> + <param argument="--vsc_breadth" type="float" min="0" max="1" value="0.75" label="Minimum Breadth of Coverage" help="Minimum coverage (fraction) for a Viral Group to be reported."/> + </when> + <when value=""/> + </conditional> + <section name="out" title="Outputs" expanded="true"> + <param argument="--sample_id_key" type="text" value="SampleID" label="Sample ID key for this analysis"/> + <param argument="--sample_id" type="text" value="Metaphlan_Analysis" label="Sample ID for this analysis"/> + <param argument="--use_group_representative" type="boolean" truevalue="--use_group_representative" falsevalue="" checked="false" label="Use a species as representative for species groups?"/> + <param argument="--legacy-output" type="boolean" truevalue="--legacy-output" falsevalue="" checked="false" label="Old MetaPhlAn2 two columns output?"/> + <param argument="--CAMI_format_output" type="boolean" truevalue="--CAMI_format_output" falsevalue="" checked="false" label="Report the profiling using the CAMI output format?"/> + <param argument="--unclassified_estimation" type="boolean" truevalue="--unclassified_estimation" falsevalue="" checked="false" label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/> + <param name="krona_output" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output for Krona?"/> + </section> + <!-- enabling this in tests will allow metaphlan to download reference data (we do this only with the smallish TOY DB) --> + <param name="test" type="hidden" value="false"/> + </inputs> + <outputs> + <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances"/> + <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file"/> + <collection name="levels" type="list" label="${tool.name} on ${on_string}: Predicted taxon relative abundances at each taxonomic levels"> + <discover_datasets pattern="(?P<designation>.+)" directory="split_levels/" format="tabular"/> + <filter>analysis['analysis_type']['t'] in ['rel_ab', 'rel_ab_w_read_stats'] and analysis['analysis_type']['tax_lev']['tax_lev'] == "a" and analysis['analysis_type']['tax_lev']['split_levels']</filter> + </collection> + <data name="krona_output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances for Krona"> + <filter>out['krona_output']</filter> + </data> + <data name="vcs_breath_coverage" format="tabular" label="${tool.name} on ${on_string}: VSCs breadth-of-coverage"> + <filter>viral_analysis['profile_vsc']</filter> + </data> + <data name="subsample_single" format="fastqsanger" from_work_dir="subsampled.out" label="${tool.name} on ${on_string}: Subsampled reads"> + <filter>subsample['selector'] == 'single'</filter> + </data> + <collection name="subsample_paired" type="paired" label="${tool.name} on ${on_string}: Subsampled paired reads"> + <data name="forward" format="fastqsanger" from_work_dir="subsampled.R1.out"/> + <data name="reverse" format="fastqsanger" from_work_dir="subsampled.R2.out"/> + <filter>subsample['selector'] == 'paired'</filter> + </collection> + </outputs> + <tests> + <!-- Single GZ file, Cached db --> + <test expect_num_outputs="6"> + <section name="inputs"> + <conditional name="in"> + <param name="selector" value="raw"/> + <conditional name="raw_in"> + <param name="selector" value="single"/> + <param name="in" value="no_taxon_input.fasta"/> + </conditional> + <param name="read_min_len" value="70"/> + <section name="mapping"> + <param name="bt2_ps" value="sensitive"/> + <param name="min_mapq_val" value="5"/> + </section> + </conditional> + <conditional name="db"> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="test-db-20210409"/> + </conditional> + </section> + <section name="analysis"> + <conditional name="analysis_type"> + <param name="t" value="rel_ab"/> + <conditional name="tax_lev"> + <param name="tax_lev" value="a"/> + <param name="split_levels" value="true"/> + </conditional> + </conditional> + <param name="min_cu_len" value="2000"/> + <param name="organism_profiling" value="add_viruses"/> + <param name="stat" value="avg_g"/> + <param name="stat_q" value="0.2"/> + <param name="perc_nonzero" value="0.33"/> + <param name="avoid_disqm" value="true"/> + </section> + <section name="out"> + <param name="sample_id_key" value="SampleID"/> + <param name="sample_id" value="Metaphlan_Analysis"/> + <param name="use_group_representative" value="false"/> + <param name="legacy_output" value="false"/> + <param name="CAMI_format_output" value="false"/> + <param name="unclassified_estimation" value="false"/> + <param name="krona_output" value="true"/> + </section> + <output name="output_file" ftype="tabular"> + <assert_contents> + <has_text text="UNCLASSIFIED"/> + </assert_contents> + </output> + <output name="biom_output_file" ftype="biom1"> + <assert_contents> + <not_has_text text="k__Bacteria"/> + <not_has_text text="p__Actinobacteria"/> + </assert_contents> + </output> + <output_collection name="levels" type="list"> + <element name="all" ftype="tabular"> + <assert_contents> + <has_text text="class"/> + <has_n_columns n="17"/> + <has_n_lines n="1"/> + </assert_contents> + </element> + <element name="class" ftype="tabular"> + <assert_contents> + <has_text text="class_id"/> + <not_has_text text="phylum_id"/> + <has_n_columns n="3"/> + <has_n_lines n="1"/> + </assert_contents> + </element> + <element name="family" ftype="tabular"> + <assert_contents> + <has_text text="family_id"/> + <not_has_text text="order"/> + <has_n_columns n="3"/> + <has_n_lines n="1"/> + </assert_contents> + </element> + <element name="genus" ftype="tabular"> + <assert_contents> + <has_text text="genus_id"/> + <not_has_text text="family"/> + <has_n_columns n="3"/> + <has_n_lines n="1"/> + </assert_contents> + </element> + <element name="kingdom" ftype="tabular"> + <assert_contents> + <has_text text="kingdom_id"/> + <has_n_columns n="3"/> + <has_n_lines n="1"/> + </assert_contents> + </element> + <element name="order" ftype="tabular"> + <assert_contents> + <has_text text="order_id"/> + <not_has_text text="class_id"/> + <has_n_columns n="3"/> + <has_n_lines n="1"/> + </assert_contents> + </element> + <element name="phylum" ftype="tabular"> + <assert_contents> + <has_text text="phylum_id"/> + <not_has_text text="kingdom_id"/> + <has_n_columns n="3"/> + <has_n_lines n="1"/> + </assert_contents> + </element> + <element name="species" ftype="tabular"> + <assert_contents> + <has_text text="species_id"/> + <not_has_text text="genus"/> + <has_n_columns n="3"/> + <has_n_lines n="1"/> + </assert_contents> + </element> + <element name="strains" ftype="tabular"> + <assert_contents> + <has_text text="strains_id"/> + <not_has_text text="species_id"/> + <has_n_columns n="3"/> + <has_n_lines n="1"/> + </assert_contents> + </element> + </output_collection> + <output name="krona_output_file" ftype="tabular"> + <assert_contents> + <not_has_text text="k__Bacteria"/> + <has_n_lines n="1" delta="1"/> + <has_size value="1" delta="1"/> + </assert_contents> + </output> + <assert_stderr> + <has_text text="Downloading" negate="true"/> + </assert_stderr> + </test> + <!-- Single GZ file, Cached db --> + <test expect_num_outputs="6"> + <section name="inputs"> + <conditional name="in"> + <param name="selector" value="raw"/> + <conditional name="raw_in"> + <param name="selector" value="single"/> + <param name="in" value="SRS014464-Anterior_nares.fasta.gz"/> + </conditional> + <param name="read_min_len" value="70"/> + <section name="mapping"> + <param name="bt2_ps" value="sensitive"/> + <param name="min_mapq_val" value="5"/> + </section> + </conditional> + <conditional name="db"> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="test-db-20210409"/> + </conditional> + </section> + <section name="analysis"> + <conditional name="analysis_type"> + <param name="t" value="rel_ab"/> + <conditional name="tax_lev"> + <param name="tax_lev" value="a"/> + <param name="split_levels" value="true"/> + </conditional> + </conditional> + <param name="min_cu_len" value="2000"/> + <param name="organism_profiling" value="add_viruses"/> + <param name="stat" value="avg_g"/> + <param name="stat_q" value="0.2"/> + <param name="perc_nonzero" value="0.33"/> + <param name="avoid_disqm" value="true"/> + </section> + <section name="out"> + <param name="sample_id_key" value="SampleID"/> + <param name="sample_id" value="Metaphlan_Analysis"/> + <param name="use_group_representative" value="false"/> + <param name="legacy_output" value="false"/> + <param name="CAMI_format_output" value="false"/> + <param name="unclassified_estimation" value="false"/> + <param name="krona_output" value="true"/> + </section> + <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + </assert_contents> + </output> + <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> + </assert_contents> + </output> + <output_collection name="levels" type="list"> + <element name="all" ftype="tabular"> + <assert_contents> + <has_text text="Gammaproteobacteria"/> + <has_text text="Corynebacterium accolens"/> + <has_n_columns n="17"/> + </assert_contents> + </element> + <element name="class" ftype="tabular"> + <assert_contents> + <has_text text="class_id"/> + <not_has_text text="phylum_id"/> + <has_text text="Actinobacteria"/> + <has_n_columns n="3"/> + </assert_contents> + </element> + <element name="family" ftype="tabular"> + <assert_contents> + <has_text text="family_id"/> + <not_has_text text="order"/> + <has_text text="Propionibacteriaceae"/> + <has_n_columns n="3"/> + </assert_contents> + </element> + <element name="genus" ftype="tabular"> + <assert_contents> + <has_text text="genus_id"/> + <not_has_text text="family"/> + <has_text text="Cutibacterium"/> + <has_n_columns n="3"/> + </assert_contents> + </element> + <element name="kingdom" ftype="tabular"> + <assert_contents> + <has_text text="kingdom_id"/> + <has_text text="Bacteria"/> + <has_n_columns n="3"/> + </assert_contents> + </element> + <element name="order" ftype="tabular"> + <assert_contents> + <has_text text="order_id"/> + <not_has_text text="class_id"/> + <has_text text="Propionibacteriales"/> + <has_n_columns n="3"/> + </assert_contents> + </element> + <element name="phylum" ftype="tabular"> + <assert_contents> + <has_text text="phylum_id"/> + <not_has_text text="kingdom_id"/> + <has_text text="Firmicutes"/> + <has_n_columns n="3"/> + </assert_contents> + </element> + <element name="species" ftype="tabular"> + <assert_contents> + <has_text text="species_id"/> + <not_has_text text="genus"/> + <has_text text="Corynebacterium accolens"/> + <has_n_columns n="3"/> + </assert_contents> + </element> + <element name="strains" ftype="tabular"> + <assert_contents> + <has_text text="strains_id"/> + <not_has_text text="species_id"/> + <has_n_columns n="3"/> + </assert_contents> + </element> + </output_collection> + <output name="krona_output_file" ftype="tabular"> + <assert_contents> + <not_has_text text="k__Bacteria"/> + <has_text text="Corynebacterium accolens"/> + <has_n_columns n="9"/> + </assert_contents> + </output> + <assert_stderr> + <has_text text="Downloading" negate="true"/> + </assert_stderr> + </test> + <!-- Multiple GZ file, Local db--> + <test expect_num_outputs="4"> + <section name="inputs"> + <conditional name="in"> + <param name="selector" value="raw"/> + <conditional name="raw_in"> + <param name="selector" value="multiple"/> + <param name="in" value="SRS014464-Anterior_nares.fasta.gz,SRS014464-Anterior_nares.fasta.gz"/> + </conditional> + <param name="read_min_len" value="70"/> + <section name="mapping"> + <param name="bt2_ps" value="sensitive"/> + <param name="min_mapq_val" value="5"/> + </section> + </conditional> + <conditional name="db"> + <param name="db_selector" value="history"/> + <param name="bowtie2db" value="test-db.fasta"/> + <param name="mpa_pkl" value="test-db.json"/> + </conditional> + </section> + <section name="analysis"> + <conditional name="analysis_type"> + <param name="t" value="rel_ab"/> + <conditional name="tax_lev"> + <param name="tax_lev" value="a"/> + <param name="split_levels" value="false"/> + </conditional> + </conditional> + <param name="min_cu_len" value="2000"/> + <param name="organism_profiling" value="add_viruses"/> + <param name="stat" value="avg_g"/> + <param name="stat_q" value="0.2"/> + <param name="perc_nonzero" value="0.33"/> + <param name="avoid_disqm" value="true"/> + </section> + <section name="out"> + <param name="sample_id_key" value="SampleID"/> + <param name="sample_id" value="Metaphlan_Analysis"/> + <param name="use_group_representative" value="false"/> + <param name="legacy_output" value="false"/> + <param name="CAMI_format_output" value="false"/> + <param name="unclassified_estimation" value="false"/> + <param name="krona_output" value="false"/> + </section> + <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + <has_text text="relative_abundance"/> + <has_text text="NCBI_tax_id"/> + <has_text text="clade_name"/> + </assert_contents> + </output> + <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> + </assert_contents> + </output> + <assert_stderr> + <has_text text="Downloading" negate="true"/> + </assert_stderr> + </test> + <!-- Paired GZ file, Cached db (note sumsample_paired and the included forward and reverse reads are counted separatelym because they are all statically defined) --> + <test expect_num_outputs="7"> + <section name="inputs"> + <conditional name="in"> + <param name="selector" value="raw"/> + <conditional name="raw_in"> + <param name="selector" value="paired"/> + <param name="in_f" value="SRS014464-Anterior_nares.fastq.gz"/> + <param name="in_r" value="SRS014464-Anterior_nares.fastq.gz"/> + </conditional> + <param name="read_min_len" value="70"/> + <section name="mapping"> + <param name="bt2_ps" value="sensitive"/> + <param name="min_mapq_val" value="5"/> + </section> + </conditional> + <conditional name="db"> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="test-db-20210409"/> + </conditional> + </section> + <section name="analysis"> + <conditional name="analysis_type"> + <param name="t" value="rel_ab"/> + <conditional name="tax_lev"> + <param name="tax_lev" value="a"/> + <param name="split_levels" value="false"/> + </conditional> + </conditional> + <param name="min_cu_len" value="2000"/> + <param name="organism_profiling" value="add_viruses"/> + <param name="stat" value="avg_g"/> + <param name="stat_q" value="0.2"/> + <param name="perc_nonzero" value="0.33"/> + <param name="avoid_disqm" value="true"/> + </section> + <conditional name="subsample"> + <param name="selector" value="paired"/> + <param name="subsampling_paired" value="20257"/> + <param name="subsampling_seed" value="42"/> + </conditional> + <section name="out"> + <param name="sample_id_key" value="SampleID"/> + <param name="sample_id" value="Metaphlan_Analysis"/> + <param name="use_group_representative" value="false"/> + <param name="legacy_output" value="false"/> + <param name="CAMI_format_output" value="false"/> + <param name="unclassified_estimation" value="false"/> + <param name="krona_output" value="false"/> + </section> + <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + <has_text text="relative_abundance"/> + <has_text text="NCBI_tax_id"/> + <has_text text="clade_name"/> + </assert_contents> + </output> + <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + </assert_contents> + </output> + <output_collection name="subsample_paired" type="paired"> + <element name="forward"> + <assert_contents> + <has_line_matching expression="^@.*" n="10128"/> + </assert_contents> + </element> + <element name="reverse"> + <assert_contents> + <has_line_matching expression="^@.*" n="10128"/> + </assert_contents> + </element> + </output_collection> + <assert_stderr> + <has_text text="Downloading" negate="true"/> + </assert_stderr> + </test> + <!-- Paired GZ file as collection, Cached db (note sumsample_paired and the included forward and reverse reads are counted separatelym because they are all statically defined) --> + <test expect_num_outputs="7"> + <section name="inputs"> + <conditional name="in"> + <param name="selector" value="raw"/> + <conditional name="raw_in"> + <param name="selector" value="paired_collection"/> + <param name="in"> + <collection type="paired" name="pair"> + <element name="forward" value="SRS014464-Anterior_nares.fastq.gz"/> + <element name="reverse" value="SRS014464-Anterior_nares.fastq.gz"/> + </collection> + </param> + </conditional> + <param name="read_min_len" value="70"/> + <section name="mapping"> + <param name="bt2_ps" value="sensitive"/> + <param name="min_mapq_val" value="5"/> + </section> + </conditional> + <conditional name="db"> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="test-db-20210409"/> + </conditional> + </section> + <section name="analysis"> + <conditional name="analysis_type"> + <param name="t" value="rel_ab"/> + <conditional name="tax_lev"> + <param name="tax_lev" value="a"/> + <param name="split_levels" value="false"/> + </conditional> + </conditional> + <param name="min_cu_len" value="2000"/> + <param name="organism_profiling" value="add_viruses"/> + <param name="stat" value="avg_g"/> + <param name="stat_q" value="0.2"/> + <param name="perc_nonzero" value="0.33"/> + <param name="avoid_disqm" value="true"/> + </section> + <conditional name="subsample"> + <param name="selector" value="paired"/> + <param name="subsampling_paired" value="20257"/> + <param name="subsampling_seed" value="42"/> + </conditional> + <section name="out"> + <param name="sample_id_key" value="SampleID"/> + <param name="sample_id" value="Metaphlan_Analysis"/> + <param name="use_group_representative" value="false"/> + <param name="legacy_output" value="false"/> + <param name="CAMI_format_output" value="false"/> + <param name="unclassified_estimation" value="false"/> + <param name="krona_output" value="false"/> + </section> + <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + <has_text text="relative_abundance"/> + <has_text text="NCBI_tax_id"/> + <has_text text="clade_name"/> + </assert_contents> + </output> + <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + </assert_contents> + </output> + <output_collection name="subsample_paired" type="paired"> + <element name="forward"> + <assert_contents> + <has_line_matching expression="^@.*" n="10128"/> + </assert_contents> + </element> + <element name="reverse"> + <assert_contents> + <has_line_matching expression="^@.*" n="10128"/> + </assert_contents> + </element> + </output_collection> + <assert_stderr> + <has_text text="Downloading" negate="true"/> + </assert_stderr> + </test> + <!-- Paired fastq file as collection, Cached db --> + <test expect_num_outputs="4"> + <section name="inputs"> + <conditional name="in"> + <param name="selector" value="raw"/> + <conditional name="raw_in"> + <param name="selector" value="paired_collection"/> + <param name="in"> + <collection type="paired" name="pair"> + <element name="forward" value="SRS014464-Anterior_nares_mini.fastq" /> + <element name="reverse" value="SRS014464-Anterior_nares_mini.fastq" /> + </collection> + </param> + </conditional> + <param name="read_min_len" value="70"/> + <section name="mapping"> + <param name="bt2_ps" value="sensitive"/> + <param name="min_mapq_val" value="5"/> + </section> + </conditional> + <conditional name="db"> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="test-db-20210409"/> + </conditional> + </section> + <section name="analysis"> + <conditional name="analysis_type"> + <param name="t" value="rel_ab"/> + <conditional name="tax_lev"> + <param name="tax_lev" value="a"/> + <param name="split_levels" value="false"/> + </conditional> + </conditional> + <param name="min_cu_len" value="2000"/> + <param name="organism_profiling" value="add_viruses"/> + <param name="stat" value="avg_g"/> + <param name="stat_q" value="0.2"/> + <param name="perc_nonzero" value="0.33"/> + <param name="avoid_disqm" value="true"/> + </section> + <conditional name="subsample"> + <param name="selector" value="no"/> + </conditional> + <section name="out"> + <param name="sample_id_key" value="SampleID"/> + <param name="sample_id" value="Metaphlan_Analysis"/> + <param name="use_group_representative" value="false"/> + <param name="legacy_output" value="false"/> + <param name="CAMI_format_output" value="false"/> + <param name="unclassified_estimation" value="false"/> + <param name="krona_output" value="false"/> + </section> + <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + <has_text text="relative_abundance"/> + <has_text text="NCBI_tax_id"/> + <has_text text="clade_name"/> + </assert_contents> + </output> + <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + </assert_contents> + </output> + <assert_stderr> + <has_text text="Downloading" negate="true"/> + </assert_stderr> + </test> + <!-- SAM, cached DB --> + <test expect_num_outputs="2"> + <section name="inputs"> + <conditional name="in"> + <param name="selector" value="sam"/> + <param name="in" value="SRS014464-Anterior_nares.sam"/> + </conditional> + <conditional name="db"> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="test-db-20210409"/> + </conditional> + </section> + <section name="analysis"> + <conditional name="analysis_type"> + <param name="t" value="rel_ab"/> + <conditional name="tax_lev"> + <param name="tax_lev" value="a"/> + <param name="split_levels" value="false"/> + </conditional> + </conditional> + <param name="min_cu_len" value="2000"/> + <param name="organism_profiling" value="add_viruses"/> + <param name="stat" value="avg_g"/> + <param name="stat_q" value="0.2"/> + <param name="perc_nonzero" value="0.33"/> + <param name="avoid_disqm" value="true"/> + </section> + <section name="out"> + <param name="sample_id_key" value="SampleID"/> + <param name="sample_id" value="Metaphlan_Analysis"/> + <param name="use_group_representative" value="false"/> + <param name="legacy_output" value="false"/> + <param name="CAMI_format_output" value="false"/> + <param name="unclassified_estimation" value="false"/> + <param name="krona_output" value="false"/> + </section> + <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + <has_text text="relative_abundance"/> + <has_text text="NCBI_tax_id"/> + <has_text text="clade_name"/> + </assert_contents> + </output> + <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> + </assert_contents> + </output> + <assert_stderr> + <has_text text="Downloading" negate="true"/> + </assert_stderr> + </test> + <!-- bowtie2out, cached DB --> + <test expect_num_outputs="2"> + <section name="inputs"> + <conditional name="in"> + <param name="selector" value="bowtie2out"/> + <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/> + </conditional> + <conditional name="db"> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="test-db-20210409"/> + </conditional> + </section> + <section name="analysis"> + <conditional name="analysis_type"> + <param name="t" value="rel_ab"/> + <conditional name="tax_lev"> + <param name="tax_lev" value="a"/> + <param name="split_levels" value="false"/> + </conditional> + </conditional> + <param name="min_cu_len" value="2000"/> + <param name="organism_profiling" value="add_viruses"/> + <param name="stat" value="avg_g"/> + <param name="stat_q" value="0.2"/> + <param name="perc_nonzero" value="0.33"/> + <param name="avoid_disqm" value="true"/> + </section> + <section name="out"> + <param name="sample_id_key" value="SampleID"/> + <param name="sample_id" value="Metaphlan_Analysis"/> + <param name="use_group_representative" value="false"/> + <param name="legacy_output" value="false"/> + <param name="CAMI_format_output" value="false"/> + <param name="unclassified_estimation" value="false"/> + <param name="krona_output" value="false"/> + </section> + <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + <has_text text="relative_abundance"/> + <has_text text="NCBI_tax_id"/> + <has_text text="clade_name"/> + </assert_contents> + </output> + <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> + </assert_contents> + </output> + <assert_stderr> + <has_text text="Downloading" negate="true"/> + </assert_stderr> + </test> + <!-- Single FASTA file, Cached db --> + <test expect_num_outputs="6"> + <section name="inputs"> + <conditional name="in"> + <param name="selector" value="raw"/> + <conditional name="raw_in"> + <param name="selector" value="single"/> + <param name="in" value="SRS014464-Anterior_nares.fasta"/> + </conditional> + <param name="read_min_len" value="70"/> + <section name="mapping"> + <param name="bt2_ps" value="sensitive"/> + <param name="min_mapq_val" value="5"/> + </section> + </conditional> + <conditional name="db"> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="test-db-20210409"/> + </conditional> + </section> + <section name="analysis"> + <conditional name="analysis_type"> + <param name="t" value="rel_ab"/> + <conditional name="tax_lev"> + <param name="tax_lev" value="a"/> + <param name="split_levels" value="true"/> + </conditional> + </conditional> + <param name="min_cu_len" value="2000"/> + <param name="organism_profiling" value="add_viruses"/> + <param name="stat" value="avg_g"/> + <param name="stat_q" value="0.2"/> + <param name="perc_nonzero" value="0.33"/> + <param name="ignore_markers" value="marker.txt"/> + <param name="avoid_disqm" value="true"/> + </section> + <section name="out"> + <param name="sample_id_key" value="SampleID"/> + <param name="sample_id" value="Metaphlan_Analysis"/> + <param name="use_group_representative" value="false"/> + <param name="legacy_output" value="true"/> + <param name="CAMI_format_output" value="false"/> + <param name="unclassified_estimation" value="false"/> + <param name="krona_output" value="true"/> + </section> + <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + <has_text text="SampleID"/> + <has_text text="Metaphlan_Analysis"/> + </assert_contents> + </output> + <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> + </assert_contents> + </output> + <output_collection name="levels" type="list"> + <element name="all" ftype="tabular"> + <assert_contents> + <has_text text="Gammaproteobacteria"/> + <has_text text="Corynebacterium accolens"/> + <has_n_columns n="9"/> + </assert_contents> + </element> + <element name="class" ftype="tabular"> + <assert_contents> + <has_text text="class"/> + <has_text text="Actinobacteria"/> + <has_n_columns n="2"/> + </assert_contents> + </element> + <element name="family" ftype="tabular"> + <assert_contents> + <has_text text="family"/> + <has_text text="Propionibacteriaceae"/> + <has_n_columns n="2"/> + </assert_contents> + </element> + <element name="genus" ftype="tabular"> + <assert_contents> + <has_text text="genus"/> + <has_text text="Cutibacterium"/> + <has_n_columns n="2"/> + </assert_contents> + </element> + <element name="kingdom" ftype="tabular"> + <assert_contents> + <has_text text="kingdom"/> + <has_text text="Bacteria"/> + <has_n_columns n="2"/> + </assert_contents> + </element> + <element name="order" ftype="tabular"> + <assert_contents> + <has_text text="order"/> + <has_text text="Propionibacteriales"/> + <has_n_columns n="2"/> + </assert_contents> + </element> + <element name="phylum" ftype="tabular"> + <assert_contents> + <has_text text="phylum"/> + <has_text text="Firmicutes"/> + <has_n_columns n="2"/> + </assert_contents> + </element> + <element name="species" ftype="tabular"> + <assert_contents> + <has_text text="species"/> + <has_text text="Corynebacterium accolens"/> + <has_n_columns n="2"/> + </assert_contents> + </element> + <element name="strains" ftype="tabular"> + <assert_contents> + <has_text text="strains"/> + <has_n_columns n="2"/> + </assert_contents> + </element> + </output_collection> + <output name="krona_output_file" ftype="tabular"> + <assert_contents> + <not_has_text text="k__Bacteria"/> + <has_text text="Corynebacterium accolens"/> + <has_n_columns n="9"/> + </assert_contents> + </output> + <assert_stderr> + <has_text text="Downloading" negate="true"/> + </assert_stderr> + </test> + <!-- Check a non-default analysis mode + and viral analysis --> + <test expect_num_outputs="6"> + <section name="inputs"> + <conditional name="in"> + <param name="selector" value="raw"/> + <conditional name="raw_in"> + <param name="selector" value="single"/> + <param name="in" value="SRS014464-Anterior_nares.fastq.gz"/> + </conditional> + </conditional> + <conditional name="db"> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="mpa_vJan21_TOY_CHOCOPhlAnSGB"/> + </conditional> + </section> + <section name="analysis"> + <conditional name="analysis_type"> + <param name="t" value="marker_ab_table"/> + </conditional> + </section> + <conditional name="viral_analysis"> + <param name="profile_vsc" value="--profile_vsc"/> + </conditional> + <conditional name="subsample"> + <param name="selector" value="single"/> + <param name="subsampling" value="10000"/> + <param name="subsampling_seed" value="42"/> + </conditional> + <param name="test" value="true"/> + <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size"> + <assert_contents> + <has_text text="SGB7017__MKDPKOFL_00679"/> + <has_text text="SampleID"/> + <has_text text="Metaphlan_Analysis"/> + </assert_contents> + </output> + <output name="subsample_single"> + <assert_contents> + <has_text text="@" n="10000"/> + </assert_contents> + </output> + <!-- reference data empty -> empty output --> + <output name="vcs_breath_coverage" ftype="tabular"> + <assert_contents> + <has_size size="0"/> + </assert_contents> + </output> + <assert_command> + <has_text text="--profile_vsc"/> + <has_text text="--vsc_breadth 0.75"/> + <has_text text="--vsc_out"/> + </assert_command> + <assert_stderr> + <has_text text="Downloading"/> + <!-- due to test=true and the absence of the TOY reference DB Metaphlan will download to ~10MB--> + <has_text text="No reads aligning to VSC markers"/> + </assert_stderr> + </test> + </tests> + <help><![CDATA[ +What it does +============ + +MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria, +Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) at species-level. + +MetaPhlAn relies on ~1.1M unique clade-specific marker genes identified from ~100,000 reference genomes +(~99,500 bacterial and archaeal and ~500 eukaryotic), allowing: + +- unambiguous taxonomic assignments; +- accurate estimation of organismal relative abundance; +- species-level resolution for bacteria, archaea, eukaryotes and viruses; +- strain identification and tracking +- orders of magnitude speedups compared to existing methods. +- microbiota strain-level population genomics + +MetaPhlAn clade-abundance estimation +------------------------------------ + +The basic usage of MetaPhlAn consists in the identification of the clades (from phyla to species and +strains in particular cases) present in the microbiota obtained from a microbiome sample and their +relative abundance. + +Marker level analysis +--------------------- + +MetaPhlAn introduces the capability of characterizing organisms at the strain level using non +aggregated marker information. Such capability comes with several slightly different flavours and +are a way to perform strain tracking and comparison across multiple samples. + +Usually, MetaPhlAn is first ran with default parameters for the type of analysis to profile the +species present in the community, and then a strain-level profiling can be performed to zoom-in on +specific species of interest. This operation can be performed quickly as it exploits the bowtie2out +intermediate file saved during the execution of the default analysis type. + +Inputs +====== + +Metaphlan takes as input either: + +- one or several sequence files in Fasta, FastQ (whether compressed or not) +- a BowTie2 produced SAM file +- an intermediary mapping file of the microbiota generated by a previous MetaPhlAn run + +It also needs the reference database, which can be locally installed or customized using the dedicated tools. + +Outputs +======= + +The main output is a tab-separated file with the predicted taxon relative abundances. + +It also generates a BIOM file and some intermediate files (SAM and BowTie2out) if sequence files are given as inputs. + + +More help and use cases +======================= + +To get more information about MetaPhlAn usage and use cases, please refer to the `Metaphlan documentation`_. + +.. _Metaphlan documentation: https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-4#Basic-Usage + + ]]></help> + <expand macro="citations"/> +</tool>
