Mercurial > repos > jpayne > quast_select
changeset 0:c36a89d3a351 tip
planemo upload
author | jpayne |
---|---|
date | Wed, 07 Feb 2018 16:37:42 -0500 |
parents | |
children | |
files | quast-select.xml quast_select.py test-data/combined_table.tsv test-data/sample1.fasta test-data/sample2.fasta test-data/sample3.fasta test-data/sample4.fasta |
diffstat | 7 files changed, 94 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/quast-select.xml Wed Feb 07 16:37:42 2018 -0500 @@ -0,0 +1,45 @@ +<tool id="quast-select" name="Select Best" version="0.1.0" profile="16.10"> + <description>assembly based on a combined QUAST table</description> + <requirements> + <requirement type="package">python</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + #for $asm in $coll + ln -s $asm ./$asm.element_identifier && + #end for + cp ./\$(python $__tool_directory__/quast_select.py $table $criterion) $output + ]]></command> + <inputs> + <param type="data" format="tsv" name="table" label="Combined QUAST output (from table_union)"/> + <param type="select" label="Select best assembly by..." name="criterion"> + <option value="N50">Longest N50</option> + <option value="Largest contig">Longest single contig</option> + <option value="# contigs">Fewest contigs</option> + <option value="# contigs (>= 1000 bp)">Fewest contigs of length 1kbp or longer</option> + <option value="Total length">Total assembly length</option> + <option value="Total length (>= 1000 bp)">Total assembly length of contigs longer than 1kbp</option> + </param> + <param type="data_collection" collection_type="list" format="fasta" name="coll" label="Collection of FASTA assemblies" /> + </inputs> + <outputs> + <data format="fasta" name="output" label="Best assembly by ${criterion}" /> + </outputs> + <tests> + <test> + <param name="table" value="combined_table.tsv" /> + <param name="coll" > + <collection type="list"> + <element name="sample1" value="sample1.fasta" /> + <element name="sample2" value="sample2.fasta" /> + <element name="sample3" value="sample3.fasta" /> + <element name="sample4" value="sample4.fasta" /> + </collection> + </param> + <param name="criterion" value="N50"/> + <output name="output" value="sample1.fasta" /> + </test> + </tests> + <help><![CDATA[ + Pick the best assembly from a collection of assemblies and a combined QUAST report. + ]]></help> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/quast_select.py Wed Feb 07 16:37:42 2018 -0500 @@ -0,0 +1,27 @@ +from __future__ import print_function + +import csv +from operator import lt, gt +import sys + +def pick(rows, key, reverse=False): + sorted_rows = sorted(rows, key=lambda r:r[key], reverse=reverse) + return sorted_rows[0]['Assembly'] + +def int_or_str(token): + try: + return int(token) + except ValueError: + return str(token) + +if __name__ == '__main__': + path, compared = sys.argv[1:] + #QUAST tables have sample info as columns, so we need to transpose the table + rows = list(zip(*csv.reader(open(path, "rU"), delimiter='\t', dialect='excel'))) + hed = rows.pop(0) + dict_rows = [{h : int_or_str(r[i]) for i, h in enumerate(hed)} for r in rows] + if "#" in compared: + reverse = False #if it's a count, we want the fewest + else: + reverse = True #otherwise it's a length and we want the longest + print(pick(dict_rows, compared, reverse)) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/combined_table.tsv Wed Feb 07 16:37:42 2018 -0500 @@ -0,0 +1,14 @@ +Assembly sample1 sample2 sample3 sample4 +# N's per 100 kbp 0.00 0.00 0.00 0.00 +# contigs 15 26 25 18 +# contigs (>= 0 bp) 15 26 25 18 +# contigs (>= 1000 bp) 12 17 20 13 +GC (%) 49.67 50.22 49.81 49.62 +L50 4 8 8 5 +L75 7 14 15 9 +Largest contig 9036 4811 5055 5138 +N50 4026 1934 1668 3114 +N75 3428 1371 1217 1833 +Total length 42889 42188 41537 41859 +Total length (>= 0 bp) 42889 42188 41537 41859 +Total length (>= 1000 bp) 40450 35624 37562 37621
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample1.fasta Wed Feb 07 16:37:42 2018 -0500 @@ -0,0 +1,2 @@ +>sample1 +AAAA \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample2.fasta Wed Feb 07 16:37:42 2018 -0500 @@ -0,0 +1,2 @@ +>sample2 +TTTT \ No newline at end of file