annotate table-summarize.py @ 0:402b58f45844 draft default tip

planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
author jpayne
date Mon, 08 Dec 2025 15:03:06 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
1 #! /usr/bin/env python
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
2
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
3 from __future__ import print_function
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
4
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
5 import csv
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
6 import sys
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
7 from collections import Counter, OrderedDict
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
8
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
9
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
10 def main(table):
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
11 with open(
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
12 table, "r", newline="", encoding="utf-8"
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
13 ) as table_f: # Improved file opening
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
14 rdr = csv.DictReader(table_f, delimiter="\t", dialect="excel")
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
15
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
16 # Check if fieldnames exist before proceeding to avoid potential errors
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
17 if not rdr.fieldnames or len(rdr.fieldnames) <= 1:
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
18 print("No data columns found in the table.")
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
19 return
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
20
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
21 summary = OrderedDict()
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
22 for row in rdr: # Iterate directly without creating a list in memory
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
23 for name in rdr.fieldnames[1:]:
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
24 summary.setdefault(name, Counter()).update(
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
25 [row[name]]
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
26 ) # More efficient counting
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
27
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
28 total = rdr.line_num - 1 # get the number of rows
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
29
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
30 print("Summary:")
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
31 for name, results in summary.items():
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
32 print(f"{name}:") # f-string
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
33 for result, num in results.items():
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
34 if result:
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
35 print(f"\t - {result}: {num} of {total}") # f-string
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
36
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
37
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
38 if __name__ == "__main__":
402b58f45844 planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff changeset
39 main(sys.argv[1])