Mercurial > repos > jpayne > tableops
annotate table-summarize.py @ 0:402b58f45844 draft default tip
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
| author | jpayne |
|---|---|
| date | Mon, 08 Dec 2025 15:03:06 +0000 |
| parents | |
| children |
| rev | line source |
|---|---|
|
0
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
1 #! /usr/bin/env python |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
2 |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
3 from __future__ import print_function |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
4 |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
5 import csv |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
6 import sys |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
7 from collections import Counter, OrderedDict |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
8 |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
9 |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
10 def main(table): |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
11 with open( |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
12 table, "r", newline="", encoding="utf-8" |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
13 ) as table_f: # Improved file opening |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
14 rdr = csv.DictReader(table_f, delimiter="\t", dialect="excel") |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
15 |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
16 # Check if fieldnames exist before proceeding to avoid potential errors |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
17 if not rdr.fieldnames or len(rdr.fieldnames) <= 1: |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
18 print("No data columns found in the table.") |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
19 return |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
20 |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
21 summary = OrderedDict() |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
22 for row in rdr: # Iterate directly without creating a list in memory |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
23 for name in rdr.fieldnames[1:]: |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
24 summary.setdefault(name, Counter()).update( |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
25 [row[name]] |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
26 ) # More efficient counting |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
27 |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
28 total = rdr.line_num - 1 # get the number of rows |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
29 |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
30 print("Summary:") |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
31 for name, results in summary.items(): |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
32 print(f"{name}:") # f-string |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
33 for result, num in results.items(): |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
34 if result: |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
35 print(f"\t - {result}: {num} of {total}") # f-string |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
36 |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
37 |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
38 if __name__ == "__main__": |
|
402b58f45844
planemo upload commit 9cc4dc1db55299bf92ec6bd359161ece4592bd16-dirty
jpayne
parents:
diff
changeset
|
39 main(sys.argv[1]) |
