Mercurial > repos > estrain > gtdbtk_classify_wf
annotate gtdbtk_classify_wf.xml @ 1:353347ef2386 draft
planemo upload commit 28a309b5b4f23853603771d856d2557aa261e43e
| author | estrain |
|---|---|
| date | Thu, 12 Mar 2026 20:16:30 +0000 |
| parents | a20cd9311046 |
| children | bca71d3aa1bf |
| rev | line source |
|---|---|
|
0
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
1 <tool id="gtdbtk_classify_wf" name="GTDB-Tk Classify genomes" version="0.1.3" profile="24"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
2 <description>by placement in GTDB reference tree</description> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
3 <requirements> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
4 <requirement type="package" version="2.5.2">gtdbtk</requirement> |
|
1
353347ef2386
planemo upload commit 28a309b5b4f23853603771d856d2557aa261e43e
estrain
parents:
0
diff
changeset
|
5 |
|
0
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
6 </requirements> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
7 <command detect_errors="exit_code"><![CDATA[ |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
8 #import re |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
9 |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
10 mkdir input_dir && |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
11 mkdir output_dir && |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
12 ## GTDBTK can process *.fna. and $.fna.gz but unzipping everying to simplify the workflow |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
13 #for $i in $input: |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
14 #set cleaned = re.sub(r'\.fna|\.fasta|\.fa|\.gz', '', $i.element_identifier) |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
15 #set final_name = cleaned + '.fna' |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
16 #if $i.ext in ['fasta.gz']: |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
17 gunzip -c '${i}' > input_dir/'${final_name}' && |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
18 #else: |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
19 ln -s '${i}' input_dir/'${final_name}' && |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
20 #end if |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
21 #end for |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
22 export GTDBTK_DATA_PATH=$gtdbtk_db.fields.path && |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
23 gtdbtk classify_wf |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
24 --genome_dir input_dir |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
25 --out_dir output_dir |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
26 --mash_db $gtdbtk_db.fields.path |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
27 --cpus \${GALAXY_SLOTS:-4} |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
28 |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
29 #if str($advanced.output_process_log) == 'yes': |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
30 && cat output_dir/gtdbtk.warnings.log output_dir/gtdbtk.log > '$process_log' |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
31 #end if |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
32 ]]></command> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
33 <inputs> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
34 <param name="input" type="data" format="fasta,fasta.gz" multiple="true" label="Fasta (Genome) files"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
35 <param name="gtdbtk_db" type="select" label="GTDB-Tk database"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
36 <options from_data_table="gtdbtk_database_versioned"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
37 <validator type="no_options" message="No locally cached GTDB-Tk database is available"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
38 </options> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
39 </param> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
40 <section name="advanced" title="Advanced options"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
41 <param argument="--min_perc_aa" type="integer" min="0" max="100" value="10" label="Exclude genomes that do not have at least this percentage of AA in the MSA" help="Inclusive bound"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
42 <param argument="--force" type="boolean" truevalue="--force" falsevalue="" checked="false" label="Continue processing if an error occurs on a single genome?"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
43 <param argument="--min_af" type="float" min="0" max="1" value="0.65" label="Minimum alignment fraction to consider closest genome"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
44 <param name="output_process_log" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Output process log file?"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
45 </section> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
46 </inputs> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
47 <outputs> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
48 <data name="process_log" format="txt" label="${tool.name} on ${on_string} (process log)"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
49 <filter>advanced['output_process_log']</filter> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
50 </data> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
51 <collection name="output_align" type="list" format="fasta.gz" label="${tool.name} on ${on_string} (align)"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
52 <discover_datasets pattern="(?P<designation>.+)\.fasta.gz" ext="fasta.gz" directory="output_dir/align"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
53 </collection> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
54 <collection name="output_identfy" type="list" format="tsv" label="${tool.name} on ${on_string} (identify)"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
55 <discover_datasets pattern="(?P<designation>.+)\.tsv" ext="tsv" directory="output_dir/identify"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
56 </collection> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
57 <collection name="output_classify" type="list" format="newick" label="${tool.name} on ${on_string} (classify)"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
58 <discover_datasets pattern="(?P<designation>.+)\.tree" ext="newick" directory="output_dir/classify"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
59 </collection> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
60 <collection name="output_summary" type="list" format="tsv" label="${tool.name} on ${on_string} (summary)"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
61 <discover_datasets pattern="(?P<designation>.+)\.tsv" ext="tsv" directory="output_dir"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
62 </collection> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
63 </outputs> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
64 <tests> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
65 <!-- The commented test here is valid if we could store the GTDB-Tk database --> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
66 <!-- |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
67 <test expect_num_outputs="4"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
68 <param name="input" value="genome_1.fna.gz" ftype="fasta.gz"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
69 <param name="gtdbtk_db" value="gtdbtk214"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
70 <output_collection name="output_summary" type="list" count="1"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
71 <element name="gtdbtk.ar53.summary" ftype="tsv"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
72 <assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
73 <has_text text="user_genome"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
74 </assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
75 </element> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
76 </output_collection> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
77 <output_collection name="output_identfy" type="list" count="4"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
78 <element name="gtdbtk.ar53.markers_summary" ftype="tsv"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
79 <assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
80 <has_text text="number_unique_genes"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
81 </assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
82 </element> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
83 <element name="gtdbtk.bac120.markers_summary" ftype="tsv"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
84 <assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
85 <has_text text="genome_1_fna_gz"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
86 </assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
87 </element> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
88 <element name="gtdbtk.failed_genomes" ftype="tsv"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
89 <assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
90 <has_size value="0"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
91 </assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
92 </element> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
93 <element name="gtdbtk.translation_table_summary" ftype="tsv"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
94 <assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
95 <has_text text="genome_1_fna_gz"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
96 </assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
97 </element> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
98 </output_collection> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
99 <output_collection name="output_classify" type="list" count="1"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
100 <element name="gtdbtk.ar53.classify" ftype="newick"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
101 <assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
102 <has_text text="GB_GCA_"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
103 </assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
104 </element> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
105 </output_collection> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
106 <output_collection name="output_align" type="list" count="2"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
107 <element name="gtdbtk.ar53.msa" ftype="fasta.gz" decompress="true"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
108 <assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
109 <has_text text="GB_GCA_000008085"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
110 </assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
111 </element> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
112 <element name="gtdbtk.ar53.user_msa" ftype="fasta.gz" decompress="true"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
113 <assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
114 <has_text text="genome_1_fna_gz"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
115 </assert_contents> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
116 </element> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
117 </output_collection> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
118 </test> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
119 --> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
120 <!-- GTDB-Tk databases are far too large to test currently --> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
121 <test expect_failure="true"> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
122 <param name="input" value="genome_1.fna.gz" ftype="fasta.gz"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
123 <param name="gtdbtk_db" value="gtdbtk214"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
124 <assert_stderr> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
125 <has_text text="Fatal error: Exit code 1"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
126 </assert_stderr> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
127 </test> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
128 </tests> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
129 <help><![CDATA[ |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
130 **What it does** |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
131 |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
132 GTDB-Tk is a software toolkit for assigning objective taxonomic classifications to bacterial and archaeal genomes |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
133 based on the Genome Database Taxonomy GTDB. It is designed to work with recent advances that allow hundreds or |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
134 thousands of metagenome-assembled genomes (MAGs) to be obtained directly from environmental samples. It can also |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
135 be applied to isolate and single-cell genomes. |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
136 |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
137 This tool accepts one or more fasta (genome) files and determines taxonomic classification of genomes by |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
138 maximum-likelihood (ML) placement. The classification workflow consists of three steps: identify, align, and |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
139 classify. |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
140 |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
141 The identify step calls genes using Prodigal, and uses HMM models and the HMMER package to identify the 120 bacterial |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
142 and 122 archaeal marker genes used for phylogenetic inference. Multiple sequence alignments (MSA) are obtained by |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
143 aligning marker genes to their respective HMM model. |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
144 |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
145 The align step concatenates the aligned marker genes and filters the concatenated MSA to approximately 5,000 |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
146 amino acids. |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
147 |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
148 Finally, the classify step uses pplacer to find the maximum-likelihood placement of each genome in the GTDB-Tk |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
149 reference tree. GTDB-Tk classifies each genome based on its placement in the reference tree, its relative evolutionary |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
150 divergence, and/or average nucleotide identity (ANI) to reference genomes. |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
151 |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
152 Results can be impacted by a lack of marker genes or contamination. |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
153 ]]></help> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
154 <expand macro="citations"/> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
155 </tool> |
|
a20cd9311046
planemo upload commit bdb45cf3a98e21f5002866b6789a1457f521bf5d
estrain
parents:
diff
changeset
|
156 |
