Mercurial > repos > jpayne > table_ops
changeset 0:f1f2497301d3
planemo upload
author | jpayne |
---|---|
date | Mon, 08 Jan 2018 11:19:54 -0500 |
parents | |
children | 9c8237621723 |
files | table-sort.py table-sort.xml table-union.py table-union.xml test-data/combined.tsv test-data/dingbat.tsv test-data/loki.tsv test-data/sort_test_multiple.txt test-data/sort_test_multiple_sorted.txt test-data/sorted.tsv |
diffstat | 10 files changed, 122 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/table-sort.py Mon Jan 08 11:19:54 2018 -0500 @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 + +import csv +import sys + +def main(headers): + rows = csv.DictReader(sys.stdin, delimiter='\t', dialect='excel-tab') + if not any([str(header) in rows.fieldnames for header in headers]): + raise ValueError("Couldn't find any of supplied headers ({}) in the table.".format(','.join(['"{}"'.format(header) for header in headers]))) + items = list(rows) + items.sort(key=lambda d: [d.get(h) or "" for h in headers]) + wr = csv.DictWriter(sys.stdout, dialect='excel-tab', fieldnames=rows.fieldnames) + wr.writeheader() + wr.writerows(items) + sys.stdout.flush() + +if __name__ == '__main__': + main(sys.argv[1:]) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/table-sort.xml Mon Jan 08 11:19:54 2018 -0500 @@ -0,0 +1,33 @@ +<tool id="table-sort" name="Sort tabular dataset" version="0.1.0"> + <description>on specified header or headers</description> + <requirements> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + cat $table | $__tool_directory__/table-sort.py $headers > $sorted + ]]></command> + <inputs> + <param name="table" format="tsv" label="Table in TSV format to sort." type="data" /> + <param name="headers" type="text" label="Space-delimited list of headers on which to sort." /> + </inputs> + <outputs> + <data name="sorted" format="tsv" label="Sorted table" /> + </outputs> + <tests> + <test> + <param name="table" value="combined.tsv" ftype="tsv" /> + <param name="headers" value="flavor" /> + <output name="sorted" value="sorted.tsv"/> + </test> + <test> + <param name="table" value="sort_test_multiple.txt" ftype="tsv" /> + <param name="headers" value="A D" /> + <output name="sorted" value="sort_test_multiple_sorted.txt"/> + </test> + </tests> + <help><![CDATA[ + Sort a table, ascending, by one or more rows. + ]]></help> + + <citations> + </citations> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/table-union.py Mon Jan 08 11:19:54 2018 -0500 @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 + +import csv +import sys + + + +def main(files): + header = [] + items = [] + for fi in files: + with open(fi, 'rU') as table: + rows = csv.DictReader(table, delimiter='\t', dialect='excel-tab') + for field in rows.fieldnames: + if field not in set(header): + header.append(field) + items.extend(rows) + wr = csv.DictWriter(sys.stdout, delimiter='\t', dialect='excel-tab', fieldnames=header) + wr.writeheader() + wr.writerows(items) + + +if __name__ == '__main__': + main(sys.argv[1:]) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/table-union.xml Mon Jan 08 11:19:54 2018 -0500 @@ -0,0 +1,35 @@ +<tool id="table-union" name="Merge tabular datasets" version="0.1.0"> + <requirements> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + $__tool_directory__/table-union.py + #for $table in $tables + $table + #end for + > $combined_table + ]]></command> + <inputs> + <param name="tables" type="data_collection" format="tsv" collection_type="list" /> + </inputs> + <outputs> + <data name="combined_table" format="tsv" label="Combined table" /> + </outputs> + + <tests> + <test> + <param name="tables"> + <collection type="list"> + <element name="1" value="dingbat.tsv" /> + <element name="2" value="loki.tsv" /> + </collection> + </param> + <output name="combined_table" value="combined.tsv" /> + </test> + </tests> + <help><![CDATA[ + Concatenate a collection of TSV files wile preserving and unionizing headers. + ]]></help> + + <citations> + </citations> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/combined.tsv Mon Jan 08 11:19:54 2018 -0500 @@ -0,0 +1,3 @@ +name flavor color size +Dingbat strawberry red +Loki chocolate massive
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dingbat.tsv Mon Jan 08 11:19:54 2018 -0500 @@ -0,0 +1,2 @@ +name flavor color +Dingbat strawberry red
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/loki.tsv Mon Jan 08 11:19:54 2018 -0500 @@ -0,0 +1,2 @@ +name flavor size +Loki chocolate massive
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_test_multiple.txt Mon Jan 08 11:19:54 2018 -0500 @@ -0,0 +1,1 @@ +A B C D 1 1 2 3 1 1 2 2 1 1 1 1 2 2 2 2 \ No newline at end of file