Mercurial > repos > estrain > microrunqc
annotate mlstAddFields.py @ 0:4e629e82c5b1 draft default tip
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
| author | estrain |
|---|---|
| date | Fri, 13 Mar 2026 12:51:10 +0000 |
| parents | |
| children |
| rev | line source |
|---|---|
|
0
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
1 #!/usr/bin/env |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
2 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
3 import sys |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
4 import csv |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
5 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
6 def find_index(headers, term): |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
7 try: |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
8 return headers.index(term) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
9 except ValueError: |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
10 return -1 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
11 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
12 def main(mlst_file, db_path=None): |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
13 with open(mlst_file, 'r') as file: |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
14 reader = csv.reader(file, delimiter='\t') |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
15 mlstout = next(reader) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
16 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
17 schema = mlstout[1] |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
18 mlstST = mlstout[2] |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
19 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
20 # Return the output without appending if schema equals "-" |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
21 if schema == "-": |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
22 print("\t".join(mlstout)) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
23 return |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
24 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
25 if db_path is None: |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
26 # If no database path is provided, find it using an external command |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
27 # This requires the 'mlst' command to be installed and available in the path |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
28 import subprocess |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
29 mlstdesc = subprocess.check_output(['mlst', '-h']).decode() |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
30 db_pubmlst = [line for line in mlstdesc.split('\n') if 'db/pubmlst' in line] |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
31 if db_pubmlst: |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
32 mlstloc = db_pubmlst[0].split("'")[1].replace("bin/..", "") |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
33 else: |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
34 raise Exception("Could not find MLST database location.") |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
35 else: |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
36 mlstloc = db_path |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
37 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
38 mlst_file_path = f"{mlstloc}/{schema}/{schema}.txt" |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
39 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
40 schema_dict = {} |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
41 with open(mlst_file_path, 'r') as file: |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
42 reader = csv.reader(file, delimiter='\t') |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
43 headers = next(reader) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
44 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
45 clonal = find_index(headers, 'clonal_complex') |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
46 cc = find_index(headers, 'CC') |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
47 lineage = find_index(headers, 'Lineage') |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
48 species = find_index(headers, 'species') |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
49 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
50 for line in reader: |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
51 desc = [] |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
52 if clonal > -1 and line[clonal]: |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
53 desc.append(f"clonal_complex={line[clonal]}") |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
54 if cc > -1 and line[cc]: |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
55 desc.append(f"CC={line[cc]}") |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
56 if lineage > -1 and line[lineage]: |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
57 desc.append(f"Lineage={line[lineage]}") |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
58 if species > -1 and line[species]: |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
59 desc.append(f"species={line[species]}") |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
60 schema_dict[line[0]] = ','.join(desc) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
61 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
62 output = mlstout[:3] |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
63 if mlstST in schema_dict: |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
64 output.append(schema_dict[mlstST]) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
65 else: |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
66 output.append("-") |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
67 output.extend(mlstout[3:]) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
68 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
69 print("\t".join(output)) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
70 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
71 if __name__ == "__main__": |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
72 if len(sys.argv) < 2: |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
73 print("Usage: python mlstAddFields.py <mlst_file> [db_path]") |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
74 sys.exit(1) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
75 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
76 mlst_file = sys.argv[1] |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
77 db_path = sys.argv[2] if len(sys.argv) > 2 else None |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
78 |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
79 main(mlst_file, db_path) |
|
4e629e82c5b1
planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff
changeset
|
80 |
