Mercurial > repos > estrain > gtdbtk_classify_wf
annotate gtdbtk_classify_wf.xml @ 2:bca71d3aa1bf draft default tip
planemo upload commit 21086bad5baa43e0d616b6942ded72a70840495d
| author | estrain |
|---|---|
| date | Fri, 13 Mar 2026 11:56:54 +0000 |
| parents | 353347ef2386 |
| children |
| rev | line source |
|---|---|
|
0
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
1 <tool id="gtdbtk_classify_wf" name="GTDB-Tk Classify genomes" version="0.1.3" profile="24"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
2 <description>by placement in GTDB reference tree</description> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
3 <requirements> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
4 <requirement type="package" version="2.5.2">gtdbtk</requirement> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
5 </requirements> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
6 <command detect_errors="exit_code"><![CDATA[ |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
7 #import re |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
8 |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
9 mkdir input_dir && |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
10 mkdir output_dir && |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
11 ## GTDBTK can process *.fna. and $.fna.gz but unzipping everying to simplify the workflow |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
12 #for $i in $input: |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
13 #set cleaned = re.sub(r'\.fna|\.fasta|\.fa|\.gz', '', $i.element_identifier) |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
14 #set final_name = cleaned + '.fna' |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
15 #if $i.ext in ['fasta.gz']: |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
16 gunzip -c '${i}' > input_dir/'${final_name}' && |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
17 #else: |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
18 ln -s '${i}' input_dir/'${final_name}' && |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
19 #end if |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
20 #end for |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
21 export GTDBTK_DATA_PATH=$gtdbtk_db.fields.path && |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
22 gtdbtk classify_wf |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
23 --genome_dir input_dir |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
24 --out_dir output_dir |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
25 --mash_db $gtdbtk_db.fields.path |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
26 --cpus \${GALAXY_SLOTS:-4} |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
27 |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
28 #if str($advanced.output_process_log) == 'yes': |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
29 && cat output_dir/gtdbtk.warnings.log output_dir/gtdbtk.log > '$process_log' |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
30 #end if |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
31 ]]></command> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
32 <inputs> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
33 <param name="input" type="data" format="fasta,fasta.gz" multiple="true" label="Fasta (Genome) files"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
34 <param name="gtdbtk_db" type="select" label="GTDB-Tk database"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
35 <options from_data_table="gtdbtk_database_versioned"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
36 <validator type="no_options" message="No locally cached GTDB-Tk database is available"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
37 </options> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
38 </param> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
39 <section name="advanced" title="Advanced options"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
40 <param argument="--min_perc_aa" type="integer" min="0" max="100" value="10" label="Exclude genomes that do not have at least this percentage of AA in the MSA" help="Inclusive bound"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
41 <param argument="--force" type="boolean" truevalue="--force" falsevalue="" checked="false" label="Continue processing if an error occurs on a single genome?"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
42 <param argument="--min_af" type="float" min="0" max="1" value="0.65" label="Minimum alignment fraction to consider closest genome"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
43 <param name="output_process_log" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Output process log file?"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
44 </section> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
45 </inputs> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
46 <outputs> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
47 <data name="process_log" format="txt" label="${tool.name} on ${on_string} (process log)"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
48 <filter>advanced['output_process_log']</filter> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
49 </data> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
50 <collection name="output_align" type="list" format="fasta.gz" label="${tool.name} on ${on_string} (align)"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
51 <discover_datasets pattern="(?P<designation>.+)\.fasta.gz" ext="fasta.gz" directory="output_dir/align"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
52 </collection> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
53 <collection name="output_identfy" type="list" format="tsv" label="${tool.name} on ${on_string} (identify)"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
54 <discover_datasets pattern="(?P<designation>.+)\.tsv" ext="tsv" directory="output_dir/identify"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
55 </collection> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
56 <collection name="output_classify" type="list" format="newick" label="${tool.name} on ${on_string} (classify)"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
57 <discover_datasets pattern="(?P<designation>.+)\.tree" ext="newick" directory="output_dir/classify"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
58 </collection> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
59 <collection name="output_summary" type="list" format="tsv" label="${tool.name} on ${on_string} (summary)"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
60 <discover_datasets pattern="(?P<designation>.+)\.tsv" ext="tsv" directory="output_dir"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
61 </collection> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
62 </outputs> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
63 <tests> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
64 <!-- The commented test here is valid if we could store the GTDB-Tk database --> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
65 <!-- |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
66 <test expect_num_outputs="4"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
67 <param name="input" value="genome_1.fna.gz" ftype="fasta.gz"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
68 <param name="gtdbtk_db" value="gtdbtk214"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
69 <output_collection name="output_summary" type="list" count="1"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
70 <element name="gtdbtk.ar53.summary" ftype="tsv"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
71 <assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
72 <has_text text="user_genome"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
73 </assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
74 </element> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
75 </output_collection> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
76 <output_collection name="output_identfy" type="list" count="4"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
77 <element name="gtdbtk.ar53.markers_summary" ftype="tsv"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
78 <assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
79 <has_text text="number_unique_genes"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
80 </assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
81 </element> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
82 <element name="gtdbtk.bac120.markers_summary" ftype="tsv"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
83 <assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
84 <has_text text="genome_1_fna_gz"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
85 </assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
86 </element> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
87 <element name="gtdbtk.failed_genomes" ftype="tsv"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
88 <assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
89 <has_size value="0"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
90 </assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
91 </element> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
92 <element name="gtdbtk.translation_table_summary" ftype="tsv"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
93 <assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
94 <has_text text="genome_1_fna_gz"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
95 </assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
96 </element> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
97 </output_collection> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
98 <output_collection name="output_classify" type="list" count="1"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
99 <element name="gtdbtk.ar53.classify" ftype="newick"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
100 <assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
101 <has_text text="GB_GCA_"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
102 </assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
103 </element> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
104 </output_collection> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
105 <output_collection name="output_align" type="list" count="2"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
106 <element name="gtdbtk.ar53.msa" ftype="fasta.gz" decompress="true"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
107 <assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
108 <has_text text="GB_GCA_000008085"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
109 </assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
110 </element> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
111 <element name="gtdbtk.ar53.user_msa" ftype="fasta.gz" decompress="true"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
112 <assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
113 <has_text text="genome_1_fna_gz"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
114 </assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
115 </element> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
116 </output_collection> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
117 </test> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
118 --> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
119 <!-- GTDB-Tk databases are far too large to test currently --> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
120 <test expect_failure="true"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
121 <param name="input" value="genome_1.fna.gz" ftype="fasta.gz"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
122 <param name="gtdbtk_db" value="gtdbtk214"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
123 <assert_stderr> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
124 <has_text text="Fatal error: Exit code 1"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
125 </assert_stderr> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
126 </test> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
127 </tests> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
128 <help><![CDATA[ |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
129 **What it does** |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
130 |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
131 GTDB-Tk is a software toolkit for assigning objective taxonomic classifications to bacterial and archaeal genomes |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
132 based on the Genome Database Taxonomy GTDB. It is designed to work with recent advances that allow hundreds or |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
133 thousands of metagenome-assembled genomes (MAGs) to be obtained directly from environmental samples. It can also |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
134 be applied to isolate and single-cell genomes. |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
135 |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
136 This tool accepts one or more fasta (genome) files and determines taxonomic classification of genomes by |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
137 maximum-likelihood (ML) placement. The classification workflow consists of three steps: identify, align, and |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
138 classify. |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
139 |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
140 The identify step calls genes using Prodigal, and uses HMM models and the HMMER package to identify the 120 bacterial |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
141 and 122 archaeal marker genes used for phylogenetic inference. Multiple sequence alignments (MSA) are obtained by |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
142 aligning marker genes to their respective HMM model. |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
143 |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
144 The align step concatenates the aligned marker genes and filters the concatenated MSA to approximately 5,000 |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
145 amino acids. |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
146 |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
147 Finally, the classify step uses pplacer to find the maximum-likelihood placement of each genome in the GTDB-Tk |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
148 reference tree. GTDB-Tk classifies each genome based on its placement in the reference tree, its relative evolutionary |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
149 divergence, and/or average nucleotide identity (ANI) to reference genomes. |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
150 |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
151 Results can be impacted by a lack of marker genes or contamination. |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
152 ]]></help> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
153 <expand macro="citations"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
154 </tool> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
155 |
