changeset 0:f1f2497301d3

planemo upload
author jpayne
date Mon, 08 Jan 2018 11:19:54 -0500
parents
children 9c8237621723
files table-sort.py table-sort.xml table-union.py table-union.xml test-data/combined.tsv test-data/dingbat.tsv test-data/loki.tsv test-data/sort_test_multiple.txt test-data/sort_test_multiple_sorted.txt test-data/sorted.tsv
diffstat 10 files changed, 122 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/table-sort.py	Mon Jan 08 11:19:54 2018 -0500
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+
+import csv
+import sys
+
+def main(headers):
+	rows = csv.DictReader(sys.stdin, delimiter='\t', dialect='excel-tab')
+	if not any([str(header) in rows.fieldnames for header in headers]):
+		raise ValueError("Couldn't find any of supplied headers ({}) in the table.".format(','.join(['"{}"'.format(header) for header in headers])))
+	items = list(rows)
+	items.sort(key=lambda d: [d.get(h) or "" for h in headers])
+	wr = csv.DictWriter(sys.stdout, dialect='excel-tab', fieldnames=rows.fieldnames)
+	wr.writeheader()
+	wr.writerows(items)
+	sys.stdout.flush()
+
+if __name__ == '__main__':
+	main(sys.argv[1:])
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/table-sort.xml	Mon Jan 08 11:19:54 2018 -0500
@@ -0,0 +1,33 @@
+<tool id="table-sort" name="Sort tabular dataset" version="0.1.0">
+    <description>on specified header or headers</description>
+    <requirements>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        cat $table | $__tool_directory__/table-sort.py $headers > $sorted
+    ]]></command>
+    <inputs>
+        <param name="table" format="tsv" label="Table in TSV format to sort." type="data" />
+        <param name="headers" type="text" label="Space-delimited list of headers on which to sort." />
+    </inputs>
+    <outputs>
+        <data name="sorted" format="tsv" label="Sorted table" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="table" value="combined.tsv" ftype="tsv" />
+            <param name="headers" value="flavor" />
+            <output name="sorted" value="sorted.tsv"/>
+        </test>
+        <test>
+            <param name="table" value="sort_test_multiple.txt" ftype="tsv" />
+            <param name="headers" value="A D" />
+            <output name="sorted" value="sort_test_multiple_sorted.txt"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+        Sort a table, ascending, by one or more rows.
+    ]]></help>
+
+    <citations>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/table-union.py	Mon Jan 08 11:19:54 2018 -0500
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+
+import csv
+import sys
+
+
+
+def main(files):
+	header = []
+	items = []
+	for fi in files:
+		with open(fi, 'rU') as table:
+			rows = csv.DictReader(table, delimiter='\t', dialect='excel-tab')
+			for field in rows.fieldnames:
+				if field not in set(header):
+					header.append(field)
+			items.extend(rows)
+	wr = csv.DictWriter(sys.stdout, delimiter='\t', dialect='excel-tab', fieldnames=header)
+	wr.writeheader()
+	wr.writerows(items)
+
+
+if __name__ == '__main__':
+	main(sys.argv[1:])
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/table-union.xml	Mon Jan 08 11:19:54 2018 -0500
@@ -0,0 +1,35 @@
+<tool id="table-union" name="Merge tabular datasets" version="0.1.0">
+    <requirements>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        $__tool_directory__/table-union.py 
+        #for $table in $tables
+            $table
+        #end for
+        > $combined_table
+    ]]></command>
+    <inputs>
+        <param name="tables" type="data_collection" format="tsv" collection_type="list" />
+    </inputs>
+    <outputs>
+        <data name="combined_table" format="tsv" label="Combined table" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="tables">
+                <collection type="list">
+                    <element name="1" value="dingbat.tsv" />
+                    <element name="2" value="loki.tsv" />
+                </collection>
+            </param>
+            <output name="combined_table" value="combined.tsv" />
+        </test>
+    </tests>
+    <help><![CDATA[
+        Concatenate a collection of TSV files wile preserving and unionizing headers.
+    ]]></help>
+
+    <citations>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/combined.tsv	Mon Jan 08 11:19:54 2018 -0500
@@ -0,0 +1,3 @@
+name	flavor	color	size
+Dingbat	strawberry	red	
+Loki	chocolate		massive
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dingbat.tsv	Mon Jan 08 11:19:54 2018 -0500
@@ -0,0 +1,2 @@
+name	flavor	color
+Dingbat	strawberry	red
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/loki.tsv	Mon Jan 08 11:19:54 2018 -0500
@@ -0,0 +1,2 @@
+name	flavor	size
+Loki	chocolate	massive
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sort_test_multiple.txt	Mon Jan 08 11:19:54 2018 -0500
@@ -0,0 +1,1 @@
+A	B	C	D
1	1	2	3
1	1	2	2
1	1	1	1
2	2	2	2
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sort_test_multiple_sorted.txt	Mon Jan 08 11:19:54 2018 -0500
@@ -0,0 +1,1 @@
+A	B	C	D
1	1	1	1
1	1	2	2
1	1	2	3
2	2	2	2
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sorted.tsv	Mon Jan 08 11:19:54 2018 -0500
@@ -0,0 +1,3 @@
+name	flavor	color	size
+Loki	chocolate		massive
+Dingbat	strawberry	red