Mercurial > repos > estrain > metaphlan_gt
comparison metaphlan_gt.xml @ 0:a7bd05df0bea draft default tip
planemo upload commit e485da3e1b9eb674a52948a00a3328c1a3cc5ffa
| author | estrain |
|---|---|
| date | Fri, 13 Mar 2026 12:06:00 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a7bd05df0bea |
|---|---|
| 1 <tool id="metaphlan_gt" name="MetaPhlAn" version="@TOOL_VERSION@+galaxy4" profile="@PROFILE@"> | |
| 2 <description>to profile the composition of microbial communities</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 | |
| 6 <xml name="tax_lev"> | |
| 7 <conditional name="tax_lev"> | |
| 8 <param argument="--tax_lev" type="select" label="Taxonomic level for the relative abundance output"> | |
| 9 <option value="a" selected="true">All taxonomic levels</option> | |
| 10 <option value="k">Kingdoms only</option> | |
| 11 <option value="p">Phyla only</option> | |
| 12 <option value="c">Classes only</option> | |
| 13 <option value="o">Orders only</option> | |
| 14 <option value="f">Families only</option> | |
| 15 <option value="g">Genera only</option> | |
| 16 <option value="s">Species only</option> | |
| 17 </param> | |
| 18 <when value="a"> | |
| 19 <param name="split_levels" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Generate a report for each taxonomic level?" help="It will be in addition to the default output"/> | |
| 20 </when> | |
| 21 <when value="k"/> | |
| 22 <when value="p"/> | |
| 23 <when value="c"/> | |
| 24 <when value="o"/> | |
| 25 <when value="f"/> | |
| 26 <when value="g"/> | |
| 27 <when value="s"/> | |
| 28 </conditional> | |
| 29 </xml> | |
| 30 <token name="@FILE_FORMATS@">fastq,fastq.gz,fastq.bz2,fasta,fasta.gz</token> | |
| 31 </macros> | |
| 32 <expand macro="requirements"/> | |
| 33 <version_command>metaphlan -v</version_command> | |
| 34 <command detect_errors="aggressive"><![CDATA[ | |
| 35 #if $inputs.in.selector == "raw" | |
| 36 #if $inputs.in.raw_in.selector == "single" | |
| 37 #set full_ext=$inputs.in.raw_in.in.datatype.file_ext | |
| 38 #if $full_ext.endswith("gz") | |
| 39 #set $file_path="in" | |
| 40 zcat '$inputs.in.raw_in.in' > '$file_path' && | |
| 41 #else if $full_ext.endswith("bz2") | |
| 42 #set $file_path="in" | |
| 43 bzcat '$inputs.in.raw_in.in' > '$file_path' && | |
| 44 #else | |
| 45 #set $file_path="'%s'" % $inputs.in.raw_in.in | |
| 46 #end if | |
| 47 #else if $inputs.in.raw_in.selector == "multiple" | |
| 48 #set full_ext=$inputs.in.raw_in.in[0].datatype.file_ext | |
| 49 #set file_path="" | |
| 50 #set sep="" | |
| 51 #for $i, $f in enumerate($inputs.in.raw_in.in) | |
| 52 #if $f.datatype.file_ext != $full_ext | |
| 53 echo "Different datatypes for input files" && | |
| 54 exit 1 | |
| 55 #end if | |
| 56 #if $full_ext.endswith("gz") | |
| 57 #set fp="input_%s" % ($i) | |
| 58 zcat '$f' > '$fp' && | |
| 59 #else if $full_ext.endswith("bz2") | |
| 60 #set fp="input_%s" % ($i) | |
| 61 bzcat '$f' > '$fp' && | |
| 62 #else | |
| 63 #set fp=$f | |
| 64 #end if | |
| 65 #set $file_path+="'%s%s'" % ($sep, $fp) | |
| 66 #set $sep="," | |
| 67 #end for | |
| 68 #else if $inputs.in.raw_in.selector == "paired" | |
| 69 #set full_ext=$inputs.in.raw_in.in_f.datatype.file_ext | |
| 70 #if $full_ext != $inputs.in.raw_in.in_r.datatype.file_ext | |
| 71 echo "Different datatypes for input paired-end files" && | |
| 72 exit 1 | |
| 73 #end if | |
| 74 #if $full_ext.endswith("gz") | |
| 75 zcat '$inputs.in.raw_in.in_f' > 'in_f' && | |
| 76 zcat '$inputs.in.raw_in.in_r' > 'in_r' && | |
| 77 #else if $full_ext.endswith("bz2") | |
| 78 bzcat '$inputs.in.raw_in.in_f' > 'in_f' && | |
| 79 bzcat '$inputs.in.raw_in.in_r' > 'in_r' && | |
| 80 #else: | |
| 81 ln -s '$inputs.in.raw_in.in_f' 'in_f' && | |
| 82 ln -s '$inputs.in.raw_in.in_r' 'in_r' && | |
| 83 #end if | |
| 84 ## paired data has by default no special treatment, i.e. it is given as comma separated list | |
| 85 ## except iff paired subsampling where -1 and -2 must be used | |
| 86 #if $subsample.selector == 'paired' | |
| 87 #set file_path="-1 in_f -2 in_r" | |
| 88 #else | |
| 89 #set file_path="in_f,in_r" | |
| 90 #end if | |
| 91 #else if $inputs.in.raw_in.selector == "paired_collection" | |
| 92 #set full_ext=$inputs.in.raw_in.in.forward.ext | |
| 93 #if $full_ext != $inputs.in.raw_in.in.reverse.ext | |
| 94 echo "Different datatypes for input paired-end files" && | |
| 95 exit 1 | |
| 96 #end if | |
| 97 #if $full_ext.endswith("gz") | |
| 98 zcat '$inputs.in.raw_in.in.forward' > 'in_f' && | |
| 99 zcat '$inputs.in.raw_in.in.reverse' > 'in_r' && | |
| 100 #else if $full_ext.endswith("bz2") | |
| 101 bzcat '$inputs.in.raw_in.in.forward' > 'in_f' && | |
| 102 bzcat '$inputs.in.raw_in.in.reverse' > 'in_r' && | |
| 103 #else | |
| 104 ln -s '$inputs.in.raw_in.in.forward' 'in_f' && | |
| 105 ln -s '$inputs.in.raw_in.in.reverse' 'in_r' && | |
| 106 #end if | |
| 107 #if $subsample.selector == 'paired' | |
| 108 #set file_path="-1 in_f -2 in_r" | |
| 109 #else | |
| 110 #set file_path="in_f,in_r" | |
| 111 #end if | |
| 112 #end if | |
| 113 | |
| 114 #if $full_ext.startswith("fastq") | |
| 115 #set ext='fastq' | |
| 116 #else if $full_ext.startswith("fasta") and $full_ext.endswith(("gz","bz2")) | |
| 117 #set ext='fasta' | |
| 118 #else | |
| 119 #set ext=$full_ext | |
| 120 #end if | |
| 121 #end if | |
| 122 | |
| 123 #if $inputs.db.db_selector == "history" | |
| 124 mkdir 'ref_db' && | |
| 125 bowtie2-build --large-index '$inputs.db.bowtie2db' 'ref_db/custom_db' && | |
| 126 python | |
| 127 '$__tool_directory__/customizemetadata.py' | |
| 128 transform_json_to_pkl | |
| 129 --json '$inputs.db.mpa_pkl' | |
| 130 --pkl 'ref_db/custom_db.pkl' && | |
| 131 #end if | |
| 132 | |
| 133 metaphlan | |
| 134 #if $inputs.in.selector == "raw" | |
| 135 $file_path | |
| 136 --input_type '$ext' | |
| 137 --read_min_len $inputs.in.read_min_len | |
| 138 --bt2_ps '$inputs.in.mapping.bt2_ps' | |
| 139 --min_mapq_val $inputs.in.mapping.min_mapq_val | |
| 140 #if $ext == "sam" | |
| 141 --nreads \$(cat '$file_path' | grep -c -v '^@') | |
| 142 #end if | |
| 143 #else | |
| 144 '$inputs.in.in' | |
| 145 --input_type '$inputs.in.selector' | |
| 146 #if $inputs.in.selector == "sam" | |
| 147 --nreads \$(cat '$inputs.in.in' | grep -c -v '^@') | |
| 148 #end if | |
| 149 #end if | |
| 150 #if $inputs.db.db_selector == "cached" | |
| 151 --bowtie2db '$inputs.db.cached_db.fields.path' | |
| 152 --index '$inputs.db.cached_db.fields.dbkey' | |
| 153 #else | |
| 154 --bowtie2db 'ref_db/' | |
| 155 --index 'custom_db' | |
| 156 #end if | |
| 157 -t '$analysis.analysis_type.t' | |
| 158 #if $analysis.analysis_type.t == "rel_ab" or $analysis.analysis_type.t == "rel_ab_w_read_stats" | |
| 159 --tax_lev '$analysis.analysis_type.tax_lev.tax_lev' | |
| 160 #else if $analysis.analysis_type.t == "clade_specific_strain_tracker" | |
| 161 --clade '$analysis.analysis_type.clade' | |
| 162 #if str($analysis.analysis_type.min_ab) != '' | |
| 163 --min_ab $analysis.analysis_type.min_ab | |
| 164 #end if | |
| 165 #else if $analysis.analysis_type.t == "marker_ab_table" and str($analysis.analysis_type.nreads) != '' | |
| 166 --nreads $$analysis.analysis_type.nreads | |
| 167 #else if $analysis.analysis_type.t == "marker_pres_table" and str($analysis.analysis_type.pres_th) != '' | |
| 168 --pres_th $analysis.analysis_type.pres_th | |
| 169 #end if | |
| 170 --min_cu_len $analysis.min_cu_len | |
| 171 #if str($analysis.min_alignment_len) != '' | |
| 172 --min_alignment_len $analysis.min_alignment_len | |
| 173 #end if | |
| 174 #if 'add_viruses' in $analysis.organism_profiling | |
| 175 --add_viruses | |
| 176 #end if | |
| 177 #if 'ignore_eukaryotes' in $analysis.organism_profiling | |
| 178 --ignore_eukaryotes | |
| 179 #end if | |
| 180 #if 'ignore_bacteria' in $analysis.organism_profiling | |
| 181 --ignore_bacteria | |
| 182 #end if | |
| 183 #if 'ignore_archaea' in $analysis.organism_profiling | |
| 184 --ignore_archaea | |
| 185 #end if | |
| 186 --stat $analysis.stat | |
| 187 --stat_q $analysis.stat_q | |
| 188 --perc_nonzero $analysis.perc_nonzero | |
| 189 #if $analysis.ignore_markers | |
| 190 --ignore_markers '$analysis.ignore_markers' | |
| 191 #end if | |
| 192 $analysis.avoid_disqm | |
| 193 --sample_id_key '$out.sample_id_key' | |
| 194 --sample_id '$out.sample_id' | |
| 195 $out.use_group_representative | |
| 196 $out.legacy_output | |
| 197 $out.CAMI_format_output | |
| 198 $out.unclassified_estimation | |
| 199 -o '$output_file' | |
| 200 --bowtie2out 'bowtie2out' | |
| 201 -s 'sam_output_file' | |
| 202 --biom '$biom_output_file' | |
| 203 --nproc \${GALAXY_SLOTS:-4} | |
| 204 #if $viral_analysis.profile_vsc | |
| 205 $viral_analysis.profile_vsc | |
| 206 --vsc_out '$vcs_breath_coverage' | |
| 207 --vsc_breadth $viral_analysis.vsc_breadth | |
| 208 #end if | |
| 209 | |
| 210 #if $subsample.selector != "no" | |
| 211 #if $subsample.selector == "single" | |
| 212 --subsampling $subsample.subsampling | |
| 213 #else | |
| 214 --subsampling_paired $subsample.subsampling_paired | |
| 215 #end if | |
| 216 $subsample.mapping_subsampling | |
| 217 #if $subsample.subsampling_seed | |
| 218 --subsampling_seed $subsample.subsampling_seed | |
| 219 #end if | |
| 220 --subsampling_output subsampled.out | |
| 221 #end if | |
| 222 | |
| 223 #if $test == "false" | |
| 224 --offline | |
| 225 #end if | |
| 226 | |
| 227 | |
| 228 #if $analysis.analysis_type.t in ['rel_ab', 'rel_ab_w_read_stats'] | |
| 229 #if $analysis.analysis_type.tax_lev.tax_lev == 'a' and $analysis.analysis_type.tax_lev.split_levels | |
| 230 && | |
| 231 mkdir 'split_levels' | |
| 232 && | |
| 233 python '$__tool_directory__/formatoutput.py' | |
| 234 split_levels | |
| 235 --metaphlan_output '$output_file' | |
| 236 --outdir 'split_levels' | |
| 237 $out.legacy_output | |
| 238 #end if | |
| 239 #end if | |
| 240 | |
| 241 #if $out.krona_output | |
| 242 && | |
| 243 python '$__tool_directory__/formatoutput.py' | |
| 244 format_for_krona | |
| 245 --metaphlan_output '$output_file' | |
| 246 --krona_output '$krona_output_file' | |
| 247 #end if | |
| 248 ]]></command> | |
| 249 <inputs> | |
| 250 <section name="inputs" title="Inputs" expanded="true"> | |
| 251 <conditional name="in"> | |
| 252 <param name="selector" type="select" label="Input(s)"> | |
| 253 <option value="raw" selected="true">Fasta/FastQ file(s) with microbiota reads</option> | |
| 254 <option value="sam">Externally BowTie2-mapped SAM file</option> | |
| 255 <option value="bowtie2out">Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run</option> | |
| 256 </param> | |
| 257 <when value="raw"> | |
| 258 <conditional name="raw_in"> | |
| 259 <param name="selector" type="select" label="Fasta/FastQ file(s) with microbiota reads"> | |
| 260 <option value="single" selected="true">One single-end file</option> | |
| 261 <option value="multiple">Multiple single-end files</option> | |
| 262 <option value="paired_collection">Paired-end collection</option> | |
| 263 <option value="paired">Paired-end files</option> | |
| 264 </param> | |
| 265 <when value="single"> | |
| 266 <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ file with microbiota reads"/> | |
| 267 </when> | |
| 268 <when value="multiple"> | |
| 269 <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ files with microbiota reads" multiple="true"/> | |
| 270 </when> | |
| 271 <when value="paired_collection"> | |
| 272 <param name="in" type="data_collection" format="@FILE_FORMATS@" label="Paired-end Fasta/FastQ file with microbiota reads" collection_type="paired"/> | |
| 273 </when> | |
| 274 <when value="paired"> | |
| 275 <param name="in_f" type="data" format="@FILE_FORMATS@" label="Forward paired-end Fasta/FastQ file with microbiota reads"/> | |
| 276 <param name="in_r" type="data" format="@FILE_FORMATS@" label="Reverse paired-end Fasta/FastQ file with microbiota reads"/> | |
| 277 </when> | |
| 278 </conditional> | |
| 279 <param argument="--read_min_len" type="integer" value="70" label="Minimum length of the reads to be considered when parsing the input file"/> | |
| 280 <section name="mapping" title="Mapping" expanded="true"> | |
| 281 <param argument="--bt2_ps" type="select" label="Presets options for BowTie2" help="Applied only with FASTA files"> | |
| 282 <option value="sensitive">Sensitive</option> | |
| 283 <option value="very-sensitive" selected="true">Very sensitive</option> | |
| 284 <option value="sensitive-local">Sensitive local</option> | |
| 285 <option value="very-sensitive-local">Very sensitive local</option> | |
| 286 </param> | |
| 287 <param argument="--min_mapq_val" type="integer" value="5" label="Minimum mapping quality value (MAPQ)"/> | |
| 288 </section> | |
| 289 </when> | |
| 290 <when value="sam"> | |
| 291 <param name="in" type="data" format="sam" label="Externally BowTie2-mapped SAM file" help="BowTie2 needs to be used first to map microbiota reads"/> | |
| 292 </when> | |
| 293 <when value="bowtie2out"> | |
| 294 <param name="in" type="data" format="tabular" label="Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run" help="File needs to be generated with MetaPhlAn versions >3.0"/> | |
| 295 </when> | |
| 296 </conditional> | |
| 297 <conditional name="db"> | |
| 298 <param name="db_selector" type="select" label="Database with clade-specific marker genes"> | |
| 299 <option value="cached" selected="true">Locally cached</option> | |
| 300 <option value="history">From history</option> | |
| 301 </param> | |
| 302 <when value="cached"> | |
| 303 <param name="cached_db" type="select" label="Cached database with clade-specific marker genes"> | |
| 304 <options from_data_table="@IDX_DATA_TABLE@"> | |
| 305 <filter type="static_value" column="4" value="@IDX_VERSION@"/> | |
| 306 <validator message="No compatible MetaPhlAn database is available" type="no_options"/> | |
| 307 </options> | |
| 308 </param> | |
| 309 </when> | |
| 310 <when value="history"> | |
| 311 <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/> | |
| 312 <param argument="--mpa_pkl" type="data" format="json" label="Metadata associated to the database with clade-specific marker genes from history"/> | |
| 313 </when> | |
| 314 </conditional> | |
| 315 </section> | |
| 316 <section name="analysis" title="Analysis" expanded="true"> | |
| 317 <conditional name="analysis_type"> | |
| 318 <param argument="-t" type="select" label="Type of analysis to perform"> | |
| 319 <option value="rel_ab" selected="true">rel_ab: Profiling a microbiota in terms of relative abundances</option> | |
| 320 <option value="rel_ab_w_read_stats">rel_ab_w_read_stats: Profiling a microbiota in terms of relative abundances and estimate the number of reads comming from each clade</option> | |
| 321 <option value="reads_map">reads_map: Mapping from reads to clades (only reads hitting a marker)</option> | |
| 322 <option value="clade_profiles">clade_profiles: Normalized marker counts for clades with at least a non-null marker</option> | |
| 323 <option value="clade_specific_strain_tracker">clade_specific_strain_tracker: List of markers present for a specific clade and all its subclades</option> | |
| 324 <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when > 0.0 and normalized by microbiota size if number of reads is specified)</option> | |
| 325 <option value="marker_counts">marker_counts: Non-normalized marker counts (use with extreme caution)</option> | |
| 326 <option value="marker_pres_table">marker_pres_table: List of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th</option> | |
| 327 </param> | |
| 328 <when value="rel_ab"> | |
| 329 <expand macro="tax_lev"/> | |
| 330 </when> | |
| 331 <when value="rel_ab_w_read_stats"> | |
| 332 <expand macro="tax_lev"/> | |
| 333 </when> | |
| 334 <when value="reads_map"/> | |
| 335 <when value="clade_profiles"/> | |
| 336 <when value="clade_specific_strain_tracker"> | |
| 337 <param argument="--clade" type="text" value="" label="Clade for which to extract list of markers present" help="Markers are also extracted for subclades"/> | |
| 338 <param argument="--min_ab" type="float" optional="true" label="The minimum percentage abundance for the clade"/> | |
| 339 </when> | |
| 340 <when value="marker_ab_table"> | |
| 341 <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original microbiota" help="It is used for normalizing the length-normalized counts with the microbiota size as well. No normalization applied if the value is not specified"/> | |
| 342 </when> | |
| 343 <when value="marker_counts"/> | |
| 344 <when value="marker_pres_table"> | |
| 345 <param argument="--pres_th" type="integer" optional="true" label="Threshold for calling a marker present"/> | |
| 346 </when> | |
| 347 </conditional> | |
| 348 <param argument="--min_cu_len" type="integer" value="2000" label="Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances"/> | |
| 349 <param argument="--min_alignment_len" type="integer" optional="true" label="Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded."/> | |
| 350 <param name="organism_profiling" type="select" optional="true" label="Organisms to profile" multiple="true"> | |
| 351 <option value="add_viruses" selected="true">Profile viral organisms (add_viruses)</option> | |
| 352 <option value="ignore_eukaryotes">Ignore eukaryotic organisms (ignore_eukaryotes)</option> | |
| 353 <option value="ignore_bacteria">Ignore bacteria organisms (ignore_bacteria)</option> | |
| 354 <option value="ignore_archaea">Ignore archea organisms (ignore_archaea)</option> | |
| 355 </param> | |
| 356 <param argument="--stat" type="select" label="Statistical approach for converting marker abundances into clade abundances"> | |
| 357 <option value="avg_g">avg_g: Clade global (i.e. normalizing all markers together) average (avg_g)</option> | |
| 358 <option value="avg_l">avg_l: Average of length-normalized marker counts</option> | |
| 359 <option value="tavg_g" selected="true">tavg_g: Truncated clade global average at --stat_q quantile</option> | |
| 360 <option value="tavg_l">tavg_l: Truncated average of length-normalized marker counts (at --stat_q)</option> | |
| 361 <option value="wavg_g">wavg_g: Winsorized clade global average (at --stat_q)</option> | |
| 362 <option value="wavg_l">wavg_l: Winsorized average of length-normalized marker counts (at --stat_q)</option> | |
| 363 <option value="med">med: Median of length-normalized marker counts</option> | |
| 364 </param> | |
| 365 <param argument="--stat_q" type="float" value="0.2" label="Quantile value for the robust average"/> | |
| 366 <param argument="--perc_nonzero" type="float" value="0.33" label="Percentage of markers with a non zero relative abundance for misidentify a species"/> | |
| 367 <param argument="--ignore_markers" type="data" format="txt,tabular" optional="true" label="File containing a list of markers to ignore" help="One marker per line"/> | |
| 368 <param argument="--avoid_disqm" type="boolean" truevalue="--avoid_disqm" falsevalue="" checked="true" label="Deactivate the procedure of disambiguating the quasi-markers based on the marker abundance pattern found in the sample?" help="It is generally recommended to keep the disambiguation procedure in order to minimize false positives"/> | |
| 369 </section> | |
| 370 <conditional name="subsample"> | |
| 371 <param name="selector" type="select" label="Subsample" help="Subsampling only works for fastq input"> | |
| 372 <option value="no">No</option> | |
| 373 <option value="single">Yes: specify number of reads</option> | |
| 374 <option value="paired">Yes: specify number of paired reads</option> | |
| 375 </param> | |
| 376 <when value="no"/> | |
| 377 <when value="single"> | |
| 378 <param argument="--subsampling" type="integer" min="1" value="" label="Sumbsample reads" help="Specify the number of reads to be considered"/> | |
| 379 <expand macro="subsample_common"/> | |
| 380 </when> | |
| 381 <when value="paired"> | |
| 382 <param argument="--subsampling_paired" type="integer" min="1" value="" label="Sumbsample reads" help="Specify the number of paired reads to be considered. For N there will be floor(N/2) reads selected from the forward and reverse reads each."/> | |
| 383 <expand macro="subsample_common"/> | |
| 384 </when> | |
| 385 </conditional> | |
| 386 <conditional name="viral_analysis"> | |
| 387 <param argument="--profile_vsc" type="select" label="Profile Viruses with VSCs approach"> | |
| 388 <option value="--profile_vsc">Yes (requires FASTQ input and reference data with VSG fasta)</option> | |
| 389 <option value="" selected="true">No</option> | |
| 390 </param> | |
| 391 <when value="--profile_vsc"> | |
| 392 <param argument="--vsc_breadth" type="float" min="0" max="1" value="0.75" label="Minimum Breadth of Coverage" help="Minimum coverage (fraction) for a Viral Group to be reported."/> | |
| 393 </when> | |
| 394 <when value=""/> | |
| 395 </conditional> | |
| 396 <section name="out" title="Outputs" expanded="true"> | |
| 397 <param argument="--sample_id_key" type="text" value="SampleID" label="Sample ID key for this analysis"/> | |
| 398 <param argument="--sample_id" type="text" value="Metaphlan_Analysis" label="Sample ID for this analysis"/> | |
| 399 <param argument="--use_group_representative" type="boolean" truevalue="--use_group_representative" falsevalue="" checked="false" label="Use a species as representative for species groups?"/> | |
| 400 <param argument="--legacy-output" type="boolean" truevalue="--legacy-output" falsevalue="" checked="false" label="Old MetaPhlAn2 two columns output?"/> | |
| 401 <param argument="--CAMI_format_output" type="boolean" truevalue="--CAMI_format_output" falsevalue="" checked="false" label="Report the profiling using the CAMI output format?"/> | |
| 402 <param argument="--unclassified_estimation" type="boolean" truevalue="--unclassified_estimation" falsevalue="" checked="false" label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/> | |
| 403 <param name="krona_output" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output for Krona?"/> | |
| 404 </section> | |
| 405 <!-- enabling this in tests will allow metaphlan to download reference data (we do this only with the smallish TOY DB) --> | |
| 406 <param name="test" type="hidden" value="false"/> | |
| 407 </inputs> | |
| 408 <outputs> | |
| 409 <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances"/> | |
| 410 <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file"/> | |
| 411 <collection name="levels" type="list" label="${tool.name} on ${on_string}: Predicted taxon relative abundances at each taxonomic levels"> | |
| 412 <discover_datasets pattern="(?P<designation>.+)" directory="split_levels/" format="tabular"/> | |
| 413 <filter>analysis['analysis_type']['t'] in ['rel_ab', 'rel_ab_w_read_stats'] and analysis['analysis_type']['tax_lev']['tax_lev'] == "a" and analysis['analysis_type']['tax_lev']['split_levels']</filter> | |
| 414 </collection> | |
| 415 <data name="krona_output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances for Krona"> | |
| 416 <filter>out['krona_output']</filter> | |
| 417 </data> | |
| 418 <data name="vcs_breath_coverage" format="tabular" label="${tool.name} on ${on_string}: VSCs breadth-of-coverage"> | |
| 419 <filter>viral_analysis['profile_vsc']</filter> | |
| 420 </data> | |
| 421 <data name="subsample_single" format="fastqsanger" from_work_dir="subsampled.out" label="${tool.name} on ${on_string}: Subsampled reads"> | |
| 422 <filter>subsample['selector'] == 'single'</filter> | |
| 423 </data> | |
| 424 <collection name="subsample_paired" type="paired" label="${tool.name} on ${on_string}: Subsampled paired reads"> | |
| 425 <data name="forward" format="fastqsanger" from_work_dir="subsampled.R1.out"/> | |
| 426 <data name="reverse" format="fastqsanger" from_work_dir="subsampled.R2.out"/> | |
| 427 <filter>subsample['selector'] == 'paired'</filter> | |
| 428 </collection> | |
| 429 </outputs> | |
| 430 <tests> | |
| 431 <!-- Single GZ file, Cached db --> | |
| 432 <test expect_num_outputs="6"> | |
| 433 <section name="inputs"> | |
| 434 <conditional name="in"> | |
| 435 <param name="selector" value="raw"/> | |
| 436 <conditional name="raw_in"> | |
| 437 <param name="selector" value="single"/> | |
| 438 <param name="in" value="no_taxon_input.fasta"/> | |
| 439 </conditional> | |
| 440 <param name="read_min_len" value="70"/> | |
| 441 <section name="mapping"> | |
| 442 <param name="bt2_ps" value="sensitive"/> | |
| 443 <param name="min_mapq_val" value="5"/> | |
| 444 </section> | |
| 445 </conditional> | |
| 446 <conditional name="db"> | |
| 447 <param name="db_selector" value="cached"/> | |
| 448 <param name="cached_db" value="test-db-20210409"/> | |
| 449 </conditional> | |
| 450 </section> | |
| 451 <section name="analysis"> | |
| 452 <conditional name="analysis_type"> | |
| 453 <param name="t" value="rel_ab"/> | |
| 454 <conditional name="tax_lev"> | |
| 455 <param name="tax_lev" value="a"/> | |
| 456 <param name="split_levels" value="true"/> | |
| 457 </conditional> | |
| 458 </conditional> | |
| 459 <param name="min_cu_len" value="2000"/> | |
| 460 <param name="organism_profiling" value="add_viruses"/> | |
| 461 <param name="stat" value="avg_g"/> | |
| 462 <param name="stat_q" value="0.2"/> | |
| 463 <param name="perc_nonzero" value="0.33"/> | |
| 464 <param name="avoid_disqm" value="true"/> | |
| 465 </section> | |
| 466 <section name="out"> | |
| 467 <param name="sample_id_key" value="SampleID"/> | |
| 468 <param name="sample_id" value="Metaphlan_Analysis"/> | |
| 469 <param name="use_group_representative" value="false"/> | |
| 470 <param name="legacy_output" value="false"/> | |
| 471 <param name="CAMI_format_output" value="false"/> | |
| 472 <param name="unclassified_estimation" value="false"/> | |
| 473 <param name="krona_output" value="true"/> | |
| 474 </section> | |
| 475 <output name="output_file" ftype="tabular"> | |
| 476 <assert_contents> | |
| 477 <has_text text="UNCLASSIFIED"/> | |
| 478 </assert_contents> | |
| 479 </output> | |
| 480 <output name="biom_output_file" ftype="biom1"> | |
| 481 <assert_contents> | |
| 482 <not_has_text text="k__Bacteria"/> | |
| 483 <not_has_text text="p__Actinobacteria"/> | |
| 484 </assert_contents> | |
| 485 </output> | |
| 486 <output_collection name="levels" type="list"> | |
| 487 <element name="all" ftype="tabular"> | |
| 488 <assert_contents> | |
| 489 <has_text text="class"/> | |
| 490 <has_n_columns n="17"/> | |
| 491 <has_n_lines n="1"/> | |
| 492 </assert_contents> | |
| 493 </element> | |
| 494 <element name="class" ftype="tabular"> | |
| 495 <assert_contents> | |
| 496 <has_text text="class_id"/> | |
| 497 <not_has_text text="phylum_id"/> | |
| 498 <has_n_columns n="3"/> | |
| 499 <has_n_lines n="1"/> | |
| 500 </assert_contents> | |
| 501 </element> | |
| 502 <element name="family" ftype="tabular"> | |
| 503 <assert_contents> | |
| 504 <has_text text="family_id"/> | |
| 505 <not_has_text text="order"/> | |
| 506 <has_n_columns n="3"/> | |
| 507 <has_n_lines n="1"/> | |
| 508 </assert_contents> | |
| 509 </element> | |
| 510 <element name="genus" ftype="tabular"> | |
| 511 <assert_contents> | |
| 512 <has_text text="genus_id"/> | |
| 513 <not_has_text text="family"/> | |
| 514 <has_n_columns n="3"/> | |
| 515 <has_n_lines n="1"/> | |
| 516 </assert_contents> | |
| 517 </element> | |
| 518 <element name="kingdom" ftype="tabular"> | |
| 519 <assert_contents> | |
| 520 <has_text text="kingdom_id"/> | |
| 521 <has_n_columns n="3"/> | |
| 522 <has_n_lines n="1"/> | |
| 523 </assert_contents> | |
| 524 </element> | |
| 525 <element name="order" ftype="tabular"> | |
| 526 <assert_contents> | |
| 527 <has_text text="order_id"/> | |
| 528 <not_has_text text="class_id"/> | |
| 529 <has_n_columns n="3"/> | |
| 530 <has_n_lines n="1"/> | |
| 531 </assert_contents> | |
| 532 </element> | |
| 533 <element name="phylum" ftype="tabular"> | |
| 534 <assert_contents> | |
| 535 <has_text text="phylum_id"/> | |
| 536 <not_has_text text="kingdom_id"/> | |
| 537 <has_n_columns n="3"/> | |
| 538 <has_n_lines n="1"/> | |
| 539 </assert_contents> | |
| 540 </element> | |
| 541 <element name="species" ftype="tabular"> | |
| 542 <assert_contents> | |
| 543 <has_text text="species_id"/> | |
| 544 <not_has_text text="genus"/> | |
| 545 <has_n_columns n="3"/> | |
| 546 <has_n_lines n="1"/> | |
| 547 </assert_contents> | |
| 548 </element> | |
| 549 <element name="strains" ftype="tabular"> | |
| 550 <assert_contents> | |
| 551 <has_text text="strains_id"/> | |
| 552 <not_has_text text="species_id"/> | |
| 553 <has_n_columns n="3"/> | |
| 554 <has_n_lines n="1"/> | |
| 555 </assert_contents> | |
| 556 </element> | |
| 557 </output_collection> | |
| 558 <output name="krona_output_file" ftype="tabular"> | |
| 559 <assert_contents> | |
| 560 <not_has_text text="k__Bacteria"/> | |
| 561 <has_n_lines n="1" delta="1"/> | |
| 562 <has_size value="1" delta="1"/> | |
| 563 </assert_contents> | |
| 564 </output> | |
| 565 <assert_stderr> | |
| 566 <has_text text="Downloading" negate="true"/> | |
| 567 </assert_stderr> | |
| 568 </test> | |
| 569 <!-- Single GZ file, Cached db --> | |
| 570 <test expect_num_outputs="6"> | |
| 571 <section name="inputs"> | |
| 572 <conditional name="in"> | |
| 573 <param name="selector" value="raw"/> | |
| 574 <conditional name="raw_in"> | |
| 575 <param name="selector" value="single"/> | |
| 576 <param name="in" value="SRS014464-Anterior_nares.fasta.gz"/> | |
| 577 </conditional> | |
| 578 <param name="read_min_len" value="70"/> | |
| 579 <section name="mapping"> | |
| 580 <param name="bt2_ps" value="sensitive"/> | |
| 581 <param name="min_mapq_val" value="5"/> | |
| 582 </section> | |
| 583 </conditional> | |
| 584 <conditional name="db"> | |
| 585 <param name="db_selector" value="cached"/> | |
| 586 <param name="cached_db" value="test-db-20210409"/> | |
| 587 </conditional> | |
| 588 </section> | |
| 589 <section name="analysis"> | |
| 590 <conditional name="analysis_type"> | |
| 591 <param name="t" value="rel_ab"/> | |
| 592 <conditional name="tax_lev"> | |
| 593 <param name="tax_lev" value="a"/> | |
| 594 <param name="split_levels" value="true"/> | |
| 595 </conditional> | |
| 596 </conditional> | |
| 597 <param name="min_cu_len" value="2000"/> | |
| 598 <param name="organism_profiling" value="add_viruses"/> | |
| 599 <param name="stat" value="avg_g"/> | |
| 600 <param name="stat_q" value="0.2"/> | |
| 601 <param name="perc_nonzero" value="0.33"/> | |
| 602 <param name="avoid_disqm" value="true"/> | |
| 603 </section> | |
| 604 <section name="out"> | |
| 605 <param name="sample_id_key" value="SampleID"/> | |
| 606 <param name="sample_id" value="Metaphlan_Analysis"/> | |
| 607 <param name="use_group_representative" value="false"/> | |
| 608 <param name="legacy_output" value="false"/> | |
| 609 <param name="CAMI_format_output" value="false"/> | |
| 610 <param name="unclassified_estimation" value="false"/> | |
| 611 <param name="krona_output" value="true"/> | |
| 612 </section> | |
| 613 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> | |
| 614 <assert_contents> | |
| 615 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
| 616 </assert_contents> | |
| 617 </output> | |
| 618 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
| 619 <assert_contents> | |
| 620 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> | |
| 621 </assert_contents> | |
| 622 </output> | |
| 623 <output_collection name="levels" type="list"> | |
| 624 <element name="all" ftype="tabular"> | |
| 625 <assert_contents> | |
| 626 <has_text text="Gammaproteobacteria"/> | |
| 627 <has_text text="Corynebacterium accolens"/> | |
| 628 <has_n_columns n="17"/> | |
| 629 </assert_contents> | |
| 630 </element> | |
| 631 <element name="class" ftype="tabular"> | |
| 632 <assert_contents> | |
| 633 <has_text text="class_id"/> | |
| 634 <not_has_text text="phylum_id"/> | |
| 635 <has_text text="Actinobacteria"/> | |
| 636 <has_n_columns n="3"/> | |
| 637 </assert_contents> | |
| 638 </element> | |
| 639 <element name="family" ftype="tabular"> | |
| 640 <assert_contents> | |
| 641 <has_text text="family_id"/> | |
| 642 <not_has_text text="order"/> | |
| 643 <has_text text="Propionibacteriaceae"/> | |
| 644 <has_n_columns n="3"/> | |
| 645 </assert_contents> | |
| 646 </element> | |
| 647 <element name="genus" ftype="tabular"> | |
| 648 <assert_contents> | |
| 649 <has_text text="genus_id"/> | |
| 650 <not_has_text text="family"/> | |
| 651 <has_text text="Cutibacterium"/> | |
| 652 <has_n_columns n="3"/> | |
| 653 </assert_contents> | |
| 654 </element> | |
| 655 <element name="kingdom" ftype="tabular"> | |
| 656 <assert_contents> | |
| 657 <has_text text="kingdom_id"/> | |
| 658 <has_text text="Bacteria"/> | |
| 659 <has_n_columns n="3"/> | |
| 660 </assert_contents> | |
| 661 </element> | |
| 662 <element name="order" ftype="tabular"> | |
| 663 <assert_contents> | |
| 664 <has_text text="order_id"/> | |
| 665 <not_has_text text="class_id"/> | |
| 666 <has_text text="Propionibacteriales"/> | |
| 667 <has_n_columns n="3"/> | |
| 668 </assert_contents> | |
| 669 </element> | |
| 670 <element name="phylum" ftype="tabular"> | |
| 671 <assert_contents> | |
| 672 <has_text text="phylum_id"/> | |
| 673 <not_has_text text="kingdom_id"/> | |
| 674 <has_text text="Firmicutes"/> | |
| 675 <has_n_columns n="3"/> | |
| 676 </assert_contents> | |
| 677 </element> | |
| 678 <element name="species" ftype="tabular"> | |
| 679 <assert_contents> | |
| 680 <has_text text="species_id"/> | |
| 681 <not_has_text text="genus"/> | |
| 682 <has_text text="Corynebacterium accolens"/> | |
| 683 <has_n_columns n="3"/> | |
| 684 </assert_contents> | |
| 685 </element> | |
| 686 <element name="strains" ftype="tabular"> | |
| 687 <assert_contents> | |
| 688 <has_text text="strains_id"/> | |
| 689 <not_has_text text="species_id"/> | |
| 690 <has_n_columns n="3"/> | |
| 691 </assert_contents> | |
| 692 </element> | |
| 693 </output_collection> | |
| 694 <output name="krona_output_file" ftype="tabular"> | |
| 695 <assert_contents> | |
| 696 <not_has_text text="k__Bacteria"/> | |
| 697 <has_text text="Corynebacterium accolens"/> | |
| 698 <has_n_columns n="9"/> | |
| 699 </assert_contents> | |
| 700 </output> | |
| 701 <assert_stderr> | |
| 702 <has_text text="Downloading" negate="true"/> | |
| 703 </assert_stderr> | |
| 704 </test> | |
| 705 <!-- Multiple GZ file, Local db--> | |
| 706 <test expect_num_outputs="4"> | |
| 707 <section name="inputs"> | |
| 708 <conditional name="in"> | |
| 709 <param name="selector" value="raw"/> | |
| 710 <conditional name="raw_in"> | |
| 711 <param name="selector" value="multiple"/> | |
| 712 <param name="in" value="SRS014464-Anterior_nares.fasta.gz,SRS014464-Anterior_nares.fasta.gz"/> | |
| 713 </conditional> | |
| 714 <param name="read_min_len" value="70"/> | |
| 715 <section name="mapping"> | |
| 716 <param name="bt2_ps" value="sensitive"/> | |
| 717 <param name="min_mapq_val" value="5"/> | |
| 718 </section> | |
| 719 </conditional> | |
| 720 <conditional name="db"> | |
| 721 <param name="db_selector" value="history"/> | |
| 722 <param name="bowtie2db" value="test-db.fasta"/> | |
| 723 <param name="mpa_pkl" value="test-db.json"/> | |
| 724 </conditional> | |
| 725 </section> | |
| 726 <section name="analysis"> | |
| 727 <conditional name="analysis_type"> | |
| 728 <param name="t" value="rel_ab"/> | |
| 729 <conditional name="tax_lev"> | |
| 730 <param name="tax_lev" value="a"/> | |
| 731 <param name="split_levels" value="false"/> | |
| 732 </conditional> | |
| 733 </conditional> | |
| 734 <param name="min_cu_len" value="2000"/> | |
| 735 <param name="organism_profiling" value="add_viruses"/> | |
| 736 <param name="stat" value="avg_g"/> | |
| 737 <param name="stat_q" value="0.2"/> | |
| 738 <param name="perc_nonzero" value="0.33"/> | |
| 739 <param name="avoid_disqm" value="true"/> | |
| 740 </section> | |
| 741 <section name="out"> | |
| 742 <param name="sample_id_key" value="SampleID"/> | |
| 743 <param name="sample_id" value="Metaphlan_Analysis"/> | |
| 744 <param name="use_group_representative" value="false"/> | |
| 745 <param name="legacy_output" value="false"/> | |
| 746 <param name="CAMI_format_output" value="false"/> | |
| 747 <param name="unclassified_estimation" value="false"/> | |
| 748 <param name="krona_output" value="false"/> | |
| 749 </section> | |
| 750 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> | |
| 751 <assert_contents> | |
| 752 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
| 753 <has_text text="relative_abundance"/> | |
| 754 <has_text text="NCBI_tax_id"/> | |
| 755 <has_text text="clade_name"/> | |
| 756 </assert_contents> | |
| 757 </output> | |
| 758 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
| 759 <assert_contents> | |
| 760 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> | |
| 761 </assert_contents> | |
| 762 </output> | |
| 763 <assert_stderr> | |
| 764 <has_text text="Downloading" negate="true"/> | |
| 765 </assert_stderr> | |
| 766 </test> | |
| 767 <!-- Paired GZ file, Cached db (note sumsample_paired and the included forward and reverse reads are counted separatelym because they are all statically defined) --> | |
| 768 <test expect_num_outputs="7"> | |
| 769 <section name="inputs"> | |
| 770 <conditional name="in"> | |
| 771 <param name="selector" value="raw"/> | |
| 772 <conditional name="raw_in"> | |
| 773 <param name="selector" value="paired"/> | |
| 774 <param name="in_f" value="SRS014464-Anterior_nares.fastq.gz"/> | |
| 775 <param name="in_r" value="SRS014464-Anterior_nares.fastq.gz"/> | |
| 776 </conditional> | |
| 777 <param name="read_min_len" value="70"/> | |
| 778 <section name="mapping"> | |
| 779 <param name="bt2_ps" value="sensitive"/> | |
| 780 <param name="min_mapq_val" value="5"/> | |
| 781 </section> | |
| 782 </conditional> | |
| 783 <conditional name="db"> | |
| 784 <param name="db_selector" value="cached"/> | |
| 785 <param name="cached_db" value="test-db-20210409"/> | |
| 786 </conditional> | |
| 787 </section> | |
| 788 <section name="analysis"> | |
| 789 <conditional name="analysis_type"> | |
| 790 <param name="t" value="rel_ab"/> | |
| 791 <conditional name="tax_lev"> | |
| 792 <param name="tax_lev" value="a"/> | |
| 793 <param name="split_levels" value="false"/> | |
| 794 </conditional> | |
| 795 </conditional> | |
| 796 <param name="min_cu_len" value="2000"/> | |
| 797 <param name="organism_profiling" value="add_viruses"/> | |
| 798 <param name="stat" value="avg_g"/> | |
| 799 <param name="stat_q" value="0.2"/> | |
| 800 <param name="perc_nonzero" value="0.33"/> | |
| 801 <param name="avoid_disqm" value="true"/> | |
| 802 </section> | |
| 803 <conditional name="subsample"> | |
| 804 <param name="selector" value="paired"/> | |
| 805 <param name="subsampling_paired" value="20257"/> | |
| 806 <param name="subsampling_seed" value="42"/> | |
| 807 </conditional> | |
| 808 <section name="out"> | |
| 809 <param name="sample_id_key" value="SampleID"/> | |
| 810 <param name="sample_id" value="Metaphlan_Analysis"/> | |
| 811 <param name="use_group_representative" value="false"/> | |
| 812 <param name="legacy_output" value="false"/> | |
| 813 <param name="CAMI_format_output" value="false"/> | |
| 814 <param name="unclassified_estimation" value="false"/> | |
| 815 <param name="krona_output" value="false"/> | |
| 816 </section> | |
| 817 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> | |
| 818 <assert_contents> | |
| 819 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
| 820 <has_text text="relative_abundance"/> | |
| 821 <has_text text="NCBI_tax_id"/> | |
| 822 <has_text text="clade_name"/> | |
| 823 </assert_contents> | |
| 824 </output> | |
| 825 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
| 826 <assert_contents> | |
| 827 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
| 828 </assert_contents> | |
| 829 </output> | |
| 830 <output_collection name="subsample_paired" type="paired"> | |
| 831 <element name="forward"> | |
| 832 <assert_contents> | |
| 833 <has_line_matching expression="^@.*" n="10128"/> | |
| 834 </assert_contents> | |
| 835 </element> | |
| 836 <element name="reverse"> | |
| 837 <assert_contents> | |
| 838 <has_line_matching expression="^@.*" n="10128"/> | |
| 839 </assert_contents> | |
| 840 </element> | |
| 841 </output_collection> | |
| 842 <assert_stderr> | |
| 843 <has_text text="Downloading" negate="true"/> | |
| 844 </assert_stderr> | |
| 845 </test> | |
| 846 <!-- Paired GZ file as collection, Cached db (note sumsample_paired and the included forward and reverse reads are counted separatelym because they are all statically defined) --> | |
| 847 <test expect_num_outputs="7"> | |
| 848 <section name="inputs"> | |
| 849 <conditional name="in"> | |
| 850 <param name="selector" value="raw"/> | |
| 851 <conditional name="raw_in"> | |
| 852 <param name="selector" value="paired_collection"/> | |
| 853 <param name="in"> | |
| 854 <collection type="paired" name="pair"> | |
| 855 <element name="forward" value="SRS014464-Anterior_nares.fastq.gz"/> | |
| 856 <element name="reverse" value="SRS014464-Anterior_nares.fastq.gz"/> | |
| 857 </collection> | |
| 858 </param> | |
| 859 </conditional> | |
| 860 <param name="read_min_len" value="70"/> | |
| 861 <section name="mapping"> | |
| 862 <param name="bt2_ps" value="sensitive"/> | |
| 863 <param name="min_mapq_val" value="5"/> | |
| 864 </section> | |
| 865 </conditional> | |
| 866 <conditional name="db"> | |
| 867 <param name="db_selector" value="cached"/> | |
| 868 <param name="cached_db" value="test-db-20210409"/> | |
| 869 </conditional> | |
| 870 </section> | |
| 871 <section name="analysis"> | |
| 872 <conditional name="analysis_type"> | |
| 873 <param name="t" value="rel_ab"/> | |
| 874 <conditional name="tax_lev"> | |
| 875 <param name="tax_lev" value="a"/> | |
| 876 <param name="split_levels" value="false"/> | |
| 877 </conditional> | |
| 878 </conditional> | |
| 879 <param name="min_cu_len" value="2000"/> | |
| 880 <param name="organism_profiling" value="add_viruses"/> | |
| 881 <param name="stat" value="avg_g"/> | |
| 882 <param name="stat_q" value="0.2"/> | |
| 883 <param name="perc_nonzero" value="0.33"/> | |
| 884 <param name="avoid_disqm" value="true"/> | |
| 885 </section> | |
| 886 <conditional name="subsample"> | |
| 887 <param name="selector" value="paired"/> | |
| 888 <param name="subsampling_paired" value="20257"/> | |
| 889 <param name="subsampling_seed" value="42"/> | |
| 890 </conditional> | |
| 891 <section name="out"> | |
| 892 <param name="sample_id_key" value="SampleID"/> | |
| 893 <param name="sample_id" value="Metaphlan_Analysis"/> | |
| 894 <param name="use_group_representative" value="false"/> | |
| 895 <param name="legacy_output" value="false"/> | |
| 896 <param name="CAMI_format_output" value="false"/> | |
| 897 <param name="unclassified_estimation" value="false"/> | |
| 898 <param name="krona_output" value="false"/> | |
| 899 </section> | |
| 900 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> | |
| 901 <assert_contents> | |
| 902 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
| 903 <has_text text="relative_abundance"/> | |
| 904 <has_text text="NCBI_tax_id"/> | |
| 905 <has_text text="clade_name"/> | |
| 906 </assert_contents> | |
| 907 </output> | |
| 908 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
| 909 <assert_contents> | |
| 910 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
| 911 </assert_contents> | |
| 912 </output> | |
| 913 <output_collection name="subsample_paired" type="paired"> | |
| 914 <element name="forward"> | |
| 915 <assert_contents> | |
| 916 <has_line_matching expression="^@.*" n="10128"/> | |
| 917 </assert_contents> | |
| 918 </element> | |
| 919 <element name="reverse"> | |
| 920 <assert_contents> | |
| 921 <has_line_matching expression="^@.*" n="10128"/> | |
| 922 </assert_contents> | |
| 923 </element> | |
| 924 </output_collection> | |
| 925 <assert_stderr> | |
| 926 <has_text text="Downloading" negate="true"/> | |
| 927 </assert_stderr> | |
| 928 </test> | |
| 929 <!-- Paired fastq file as collection, Cached db --> | |
| 930 <test expect_num_outputs="4"> | |
| 931 <section name="inputs"> | |
| 932 <conditional name="in"> | |
| 933 <param name="selector" value="raw"/> | |
| 934 <conditional name="raw_in"> | |
| 935 <param name="selector" value="paired_collection"/> | |
| 936 <param name="in"> | |
| 937 <collection type="paired" name="pair"> | |
| 938 <element name="forward" value="SRS014464-Anterior_nares_mini.fastq" /> | |
| 939 <element name="reverse" value="SRS014464-Anterior_nares_mini.fastq" /> | |
| 940 </collection> | |
| 941 </param> | |
| 942 </conditional> | |
| 943 <param name="read_min_len" value="70"/> | |
| 944 <section name="mapping"> | |
| 945 <param name="bt2_ps" value="sensitive"/> | |
| 946 <param name="min_mapq_val" value="5"/> | |
| 947 </section> | |
| 948 </conditional> | |
| 949 <conditional name="db"> | |
| 950 <param name="db_selector" value="cached"/> | |
| 951 <param name="cached_db" value="test-db-20210409"/> | |
| 952 </conditional> | |
| 953 </section> | |
| 954 <section name="analysis"> | |
| 955 <conditional name="analysis_type"> | |
| 956 <param name="t" value="rel_ab"/> | |
| 957 <conditional name="tax_lev"> | |
| 958 <param name="tax_lev" value="a"/> | |
| 959 <param name="split_levels" value="false"/> | |
| 960 </conditional> | |
| 961 </conditional> | |
| 962 <param name="min_cu_len" value="2000"/> | |
| 963 <param name="organism_profiling" value="add_viruses"/> | |
| 964 <param name="stat" value="avg_g"/> | |
| 965 <param name="stat_q" value="0.2"/> | |
| 966 <param name="perc_nonzero" value="0.33"/> | |
| 967 <param name="avoid_disqm" value="true"/> | |
| 968 </section> | |
| 969 <conditional name="subsample"> | |
| 970 <param name="selector" value="no"/> | |
| 971 </conditional> | |
| 972 <section name="out"> | |
| 973 <param name="sample_id_key" value="SampleID"/> | |
| 974 <param name="sample_id" value="Metaphlan_Analysis"/> | |
| 975 <param name="use_group_representative" value="false"/> | |
| 976 <param name="legacy_output" value="false"/> | |
| 977 <param name="CAMI_format_output" value="false"/> | |
| 978 <param name="unclassified_estimation" value="false"/> | |
| 979 <param name="krona_output" value="false"/> | |
| 980 </section> | |
| 981 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> | |
| 982 <assert_contents> | |
| 983 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
| 984 <has_text text="relative_abundance"/> | |
| 985 <has_text text="NCBI_tax_id"/> | |
| 986 <has_text text="clade_name"/> | |
| 987 </assert_contents> | |
| 988 </output> | |
| 989 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
| 990 <assert_contents> | |
| 991 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
| 992 </assert_contents> | |
| 993 </output> | |
| 994 <assert_stderr> | |
| 995 <has_text text="Downloading" negate="true"/> | |
| 996 </assert_stderr> | |
| 997 </test> | |
| 998 <!-- SAM, cached DB --> | |
| 999 <test expect_num_outputs="2"> | |
| 1000 <section name="inputs"> | |
| 1001 <conditional name="in"> | |
| 1002 <param name="selector" value="sam"/> | |
| 1003 <param name="in" value="SRS014464-Anterior_nares.sam"/> | |
| 1004 </conditional> | |
| 1005 <conditional name="db"> | |
| 1006 <param name="db_selector" value="cached"/> | |
| 1007 <param name="cached_db" value="test-db-20210409"/> | |
| 1008 </conditional> | |
| 1009 </section> | |
| 1010 <section name="analysis"> | |
| 1011 <conditional name="analysis_type"> | |
| 1012 <param name="t" value="rel_ab"/> | |
| 1013 <conditional name="tax_lev"> | |
| 1014 <param name="tax_lev" value="a"/> | |
| 1015 <param name="split_levels" value="false"/> | |
| 1016 </conditional> | |
| 1017 </conditional> | |
| 1018 <param name="min_cu_len" value="2000"/> | |
| 1019 <param name="organism_profiling" value="add_viruses"/> | |
| 1020 <param name="stat" value="avg_g"/> | |
| 1021 <param name="stat_q" value="0.2"/> | |
| 1022 <param name="perc_nonzero" value="0.33"/> | |
| 1023 <param name="avoid_disqm" value="true"/> | |
| 1024 </section> | |
| 1025 <section name="out"> | |
| 1026 <param name="sample_id_key" value="SampleID"/> | |
| 1027 <param name="sample_id" value="Metaphlan_Analysis"/> | |
| 1028 <param name="use_group_representative" value="false"/> | |
| 1029 <param name="legacy_output" value="false"/> | |
| 1030 <param name="CAMI_format_output" value="false"/> | |
| 1031 <param name="unclassified_estimation" value="false"/> | |
| 1032 <param name="krona_output" value="false"/> | |
| 1033 </section> | |
| 1034 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> | |
| 1035 <assert_contents> | |
| 1036 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
| 1037 <has_text text="relative_abundance"/> | |
| 1038 <has_text text="NCBI_tax_id"/> | |
| 1039 <has_text text="clade_name"/> | |
| 1040 </assert_contents> | |
| 1041 </output> | |
| 1042 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
| 1043 <assert_contents> | |
| 1044 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> | |
| 1045 </assert_contents> | |
| 1046 </output> | |
| 1047 <assert_stderr> | |
| 1048 <has_text text="Downloading" negate="true"/> | |
| 1049 </assert_stderr> | |
| 1050 </test> | |
| 1051 <!-- bowtie2out, cached DB --> | |
| 1052 <test expect_num_outputs="2"> | |
| 1053 <section name="inputs"> | |
| 1054 <conditional name="in"> | |
| 1055 <param name="selector" value="bowtie2out"/> | |
| 1056 <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/> | |
| 1057 </conditional> | |
| 1058 <conditional name="db"> | |
| 1059 <param name="db_selector" value="cached"/> | |
| 1060 <param name="cached_db" value="test-db-20210409"/> | |
| 1061 </conditional> | |
| 1062 </section> | |
| 1063 <section name="analysis"> | |
| 1064 <conditional name="analysis_type"> | |
| 1065 <param name="t" value="rel_ab"/> | |
| 1066 <conditional name="tax_lev"> | |
| 1067 <param name="tax_lev" value="a"/> | |
| 1068 <param name="split_levels" value="false"/> | |
| 1069 </conditional> | |
| 1070 </conditional> | |
| 1071 <param name="min_cu_len" value="2000"/> | |
| 1072 <param name="organism_profiling" value="add_viruses"/> | |
| 1073 <param name="stat" value="avg_g"/> | |
| 1074 <param name="stat_q" value="0.2"/> | |
| 1075 <param name="perc_nonzero" value="0.33"/> | |
| 1076 <param name="avoid_disqm" value="true"/> | |
| 1077 </section> | |
| 1078 <section name="out"> | |
| 1079 <param name="sample_id_key" value="SampleID"/> | |
| 1080 <param name="sample_id" value="Metaphlan_Analysis"/> | |
| 1081 <param name="use_group_representative" value="false"/> | |
| 1082 <param name="legacy_output" value="false"/> | |
| 1083 <param name="CAMI_format_output" value="false"/> | |
| 1084 <param name="unclassified_estimation" value="false"/> | |
| 1085 <param name="krona_output" value="false"/> | |
| 1086 </section> | |
| 1087 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> | |
| 1088 <assert_contents> | |
| 1089 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
| 1090 <has_text text="relative_abundance"/> | |
| 1091 <has_text text="NCBI_tax_id"/> | |
| 1092 <has_text text="clade_name"/> | |
| 1093 </assert_contents> | |
| 1094 </output> | |
| 1095 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
| 1096 <assert_contents> | |
| 1097 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> | |
| 1098 </assert_contents> | |
| 1099 </output> | |
| 1100 <assert_stderr> | |
| 1101 <has_text text="Downloading" negate="true"/> | |
| 1102 </assert_stderr> | |
| 1103 </test> | |
| 1104 <!-- Single FASTA file, Cached db --> | |
| 1105 <test expect_num_outputs="6"> | |
| 1106 <section name="inputs"> | |
| 1107 <conditional name="in"> | |
| 1108 <param name="selector" value="raw"/> | |
| 1109 <conditional name="raw_in"> | |
| 1110 <param name="selector" value="single"/> | |
| 1111 <param name="in" value="SRS014464-Anterior_nares.fasta"/> | |
| 1112 </conditional> | |
| 1113 <param name="read_min_len" value="70"/> | |
| 1114 <section name="mapping"> | |
| 1115 <param name="bt2_ps" value="sensitive"/> | |
| 1116 <param name="min_mapq_val" value="5"/> | |
| 1117 </section> | |
| 1118 </conditional> | |
| 1119 <conditional name="db"> | |
| 1120 <param name="db_selector" value="cached"/> | |
| 1121 <param name="cached_db" value="test-db-20210409"/> | |
| 1122 </conditional> | |
| 1123 </section> | |
| 1124 <section name="analysis"> | |
| 1125 <conditional name="analysis_type"> | |
| 1126 <param name="t" value="rel_ab"/> | |
| 1127 <conditional name="tax_lev"> | |
| 1128 <param name="tax_lev" value="a"/> | |
| 1129 <param name="split_levels" value="true"/> | |
| 1130 </conditional> | |
| 1131 </conditional> | |
| 1132 <param name="min_cu_len" value="2000"/> | |
| 1133 <param name="organism_profiling" value="add_viruses"/> | |
| 1134 <param name="stat" value="avg_g"/> | |
| 1135 <param name="stat_q" value="0.2"/> | |
| 1136 <param name="perc_nonzero" value="0.33"/> | |
| 1137 <param name="ignore_markers" value="marker.txt"/> | |
| 1138 <param name="avoid_disqm" value="true"/> | |
| 1139 </section> | |
| 1140 <section name="out"> | |
| 1141 <param name="sample_id_key" value="SampleID"/> | |
| 1142 <param name="sample_id" value="Metaphlan_Analysis"/> | |
| 1143 <param name="use_group_representative" value="false"/> | |
| 1144 <param name="legacy_output" value="true"/> | |
| 1145 <param name="CAMI_format_output" value="false"/> | |
| 1146 <param name="unclassified_estimation" value="false"/> | |
| 1147 <param name="krona_output" value="true"/> | |
| 1148 </section> | |
| 1149 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size"> | |
| 1150 <assert_contents> | |
| 1151 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
| 1152 <has_text text="SampleID"/> | |
| 1153 <has_text text="Metaphlan_Analysis"/> | |
| 1154 </assert_contents> | |
| 1155 </output> | |
| 1156 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
| 1157 <assert_contents> | |
| 1158 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> | |
| 1159 </assert_contents> | |
| 1160 </output> | |
| 1161 <output_collection name="levels" type="list"> | |
| 1162 <element name="all" ftype="tabular"> | |
| 1163 <assert_contents> | |
| 1164 <has_text text="Gammaproteobacteria"/> | |
| 1165 <has_text text="Corynebacterium accolens"/> | |
| 1166 <has_n_columns n="9"/> | |
| 1167 </assert_contents> | |
| 1168 </element> | |
| 1169 <element name="class" ftype="tabular"> | |
| 1170 <assert_contents> | |
| 1171 <has_text text="class"/> | |
| 1172 <has_text text="Actinobacteria"/> | |
| 1173 <has_n_columns n="2"/> | |
| 1174 </assert_contents> | |
| 1175 </element> | |
| 1176 <element name="family" ftype="tabular"> | |
| 1177 <assert_contents> | |
| 1178 <has_text text="family"/> | |
| 1179 <has_text text="Propionibacteriaceae"/> | |
| 1180 <has_n_columns n="2"/> | |
| 1181 </assert_contents> | |
| 1182 </element> | |
| 1183 <element name="genus" ftype="tabular"> | |
| 1184 <assert_contents> | |
| 1185 <has_text text="genus"/> | |
| 1186 <has_text text="Cutibacterium"/> | |
| 1187 <has_n_columns n="2"/> | |
| 1188 </assert_contents> | |
| 1189 </element> | |
| 1190 <element name="kingdom" ftype="tabular"> | |
| 1191 <assert_contents> | |
| 1192 <has_text text="kingdom"/> | |
| 1193 <has_text text="Bacteria"/> | |
| 1194 <has_n_columns n="2"/> | |
| 1195 </assert_contents> | |
| 1196 </element> | |
| 1197 <element name="order" ftype="tabular"> | |
| 1198 <assert_contents> | |
| 1199 <has_text text="order"/> | |
| 1200 <has_text text="Propionibacteriales"/> | |
| 1201 <has_n_columns n="2"/> | |
| 1202 </assert_contents> | |
| 1203 </element> | |
| 1204 <element name="phylum" ftype="tabular"> | |
| 1205 <assert_contents> | |
| 1206 <has_text text="phylum"/> | |
| 1207 <has_text text="Firmicutes"/> | |
| 1208 <has_n_columns n="2"/> | |
| 1209 </assert_contents> | |
| 1210 </element> | |
| 1211 <element name="species" ftype="tabular"> | |
| 1212 <assert_contents> | |
| 1213 <has_text text="species"/> | |
| 1214 <has_text text="Corynebacterium accolens"/> | |
| 1215 <has_n_columns n="2"/> | |
| 1216 </assert_contents> | |
| 1217 </element> | |
| 1218 <element name="strains" ftype="tabular"> | |
| 1219 <assert_contents> | |
| 1220 <has_text text="strains"/> | |
| 1221 <has_n_columns n="2"/> | |
| 1222 </assert_contents> | |
| 1223 </element> | |
| 1224 </output_collection> | |
| 1225 <output name="krona_output_file" ftype="tabular"> | |
| 1226 <assert_contents> | |
| 1227 <not_has_text text="k__Bacteria"/> | |
| 1228 <has_text text="Corynebacterium accolens"/> | |
| 1229 <has_n_columns n="9"/> | |
| 1230 </assert_contents> | |
| 1231 </output> | |
| 1232 <assert_stderr> | |
| 1233 <has_text text="Downloading" negate="true"/> | |
| 1234 </assert_stderr> | |
| 1235 </test> | |
| 1236 <!-- Check a non-default analysis mode | |
| 1237 and viral analysis --> | |
| 1238 <test expect_num_outputs="6"> | |
| 1239 <section name="inputs"> | |
| 1240 <conditional name="in"> | |
| 1241 <param name="selector" value="raw"/> | |
| 1242 <conditional name="raw_in"> | |
| 1243 <param name="selector" value="single"/> | |
| 1244 <param name="in" value="SRS014464-Anterior_nares.fastq.gz"/> | |
| 1245 </conditional> | |
| 1246 </conditional> | |
| 1247 <conditional name="db"> | |
| 1248 <param name="db_selector" value="cached"/> | |
| 1249 <param name="cached_db" value="mpa_vJan21_TOY_CHOCOPhlAnSGB"/> | |
| 1250 </conditional> | |
| 1251 </section> | |
| 1252 <section name="analysis"> | |
| 1253 <conditional name="analysis_type"> | |
| 1254 <param name="t" value="marker_ab_table"/> | |
| 1255 </conditional> | |
| 1256 </section> | |
| 1257 <conditional name="viral_analysis"> | |
| 1258 <param name="profile_vsc" value="--profile_vsc"/> | |
| 1259 </conditional> | |
| 1260 <conditional name="subsample"> | |
| 1261 <param name="selector" value="single"/> | |
| 1262 <param name="subsampling" value="10000"/> | |
| 1263 <param name="subsampling_seed" value="42"/> | |
| 1264 </conditional> | |
| 1265 <param name="test" value="true"/> | |
| 1266 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size"> | |
| 1267 <assert_contents> | |
| 1268 <has_text text="SGB7017__MKDPKOFL_00679"/> | |
| 1269 <has_text text="SampleID"/> | |
| 1270 <has_text text="Metaphlan_Analysis"/> | |
| 1271 </assert_contents> | |
| 1272 </output> | |
| 1273 <output name="subsample_single"> | |
| 1274 <assert_contents> | |
| 1275 <has_text text="@" n="10000"/> | |
| 1276 </assert_contents> | |
| 1277 </output> | |
| 1278 <!-- reference data empty -> empty output --> | |
| 1279 <output name="vcs_breath_coverage" ftype="tabular"> | |
| 1280 <assert_contents> | |
| 1281 <has_size size="0"/> | |
| 1282 </assert_contents> | |
| 1283 </output> | |
| 1284 <assert_command> | |
| 1285 <has_text text="--profile_vsc"/> | |
| 1286 <has_text text="--vsc_breadth 0.75"/> | |
| 1287 <has_text text="--vsc_out"/> | |
| 1288 </assert_command> | |
| 1289 <assert_stderr> | |
| 1290 <has_text text="Downloading"/> | |
| 1291 <!-- due to test=true and the absence of the TOY reference DB Metaphlan will download to ~10MB--> | |
| 1292 <has_text text="No reads aligning to VSC markers"/> | |
| 1293 </assert_stderr> | |
| 1294 </test> | |
| 1295 </tests> | |
| 1296 <help><![CDATA[ | |
| 1297 What it does | |
| 1298 ============ | |
| 1299 | |
| 1300 MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria, | |
| 1301 Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) at species-level. | |
| 1302 | |
| 1303 MetaPhlAn relies on ~1.1M unique clade-specific marker genes identified from ~100,000 reference genomes | |
| 1304 (~99,500 bacterial and archaeal and ~500 eukaryotic), allowing: | |
| 1305 | |
| 1306 - unambiguous taxonomic assignments; | |
| 1307 - accurate estimation of organismal relative abundance; | |
| 1308 - species-level resolution for bacteria, archaea, eukaryotes and viruses; | |
| 1309 - strain identification and tracking | |
| 1310 - orders of magnitude speedups compared to existing methods. | |
| 1311 - microbiota strain-level population genomics | |
| 1312 | |
| 1313 MetaPhlAn clade-abundance estimation | |
| 1314 ------------------------------------ | |
| 1315 | |
| 1316 The basic usage of MetaPhlAn consists in the identification of the clades (from phyla to species and | |
| 1317 strains in particular cases) present in the microbiota obtained from a microbiome sample and their | |
| 1318 relative abundance. | |
| 1319 | |
| 1320 Marker level analysis | |
| 1321 --------------------- | |
| 1322 | |
| 1323 MetaPhlAn introduces the capability of characterizing organisms at the strain level using non | |
| 1324 aggregated marker information. Such capability comes with several slightly different flavours and | |
| 1325 are a way to perform strain tracking and comparison across multiple samples. | |
| 1326 | |
| 1327 Usually, MetaPhlAn is first ran with default parameters for the type of analysis to profile the | |
| 1328 species present in the community, and then a strain-level profiling can be performed to zoom-in on | |
| 1329 specific species of interest. This operation can be performed quickly as it exploits the bowtie2out | |
| 1330 intermediate file saved during the execution of the default analysis type. | |
| 1331 | |
| 1332 Inputs | |
| 1333 ====== | |
| 1334 | |
| 1335 Metaphlan takes as input either: | |
| 1336 | |
| 1337 - one or several sequence files in Fasta, FastQ (whether compressed or not) | |
| 1338 - a BowTie2 produced SAM file | |
| 1339 - an intermediary mapping file of the microbiota generated by a previous MetaPhlAn run | |
| 1340 | |
| 1341 It also needs the reference database, which can be locally installed or customized using the dedicated tools. | |
| 1342 | |
| 1343 Outputs | |
| 1344 ======= | |
| 1345 | |
| 1346 The main output is a tab-separated file with the predicted taxon relative abundances. | |
| 1347 | |
| 1348 It also generates a BIOM file and some intermediate files (SAM and BowTie2out) if sequence files are given as inputs. | |
| 1349 | |
| 1350 | |
| 1351 More help and use cases | |
| 1352 ======================= | |
| 1353 | |
| 1354 To get more information about MetaPhlAn usage and use cases, please refer to the `Metaphlan documentation`_. | |
| 1355 | |
| 1356 .. _Metaphlan documentation: https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-4#Basic-Usage | |
| 1357 | |
| 1358 ]]></help> | |
| 1359 <expand macro="citations"/> | |
| 1360 </tool> |
