annotate mlstAddFields.py @ 0:4e629e82c5b1 draft default tip

planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
author estrain
date Fri, 13 Mar 2026 12:51:10 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
1 #!/usr/bin/env
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
2
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
3 import sys
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
4 import csv
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
5
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
6 def find_index(headers, term):
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
7 try:
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
8 return headers.index(term)
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
9 except ValueError:
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
10 return -1
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
11
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
12 def main(mlst_file, db_path=None):
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
13 with open(mlst_file, 'r') as file:
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
14 reader = csv.reader(file, delimiter='\t')
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
15 mlstout = next(reader)
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
16
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
17 schema = mlstout[1]
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
18 mlstST = mlstout[2]
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
19
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
20 # Return the output without appending if schema equals "-"
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
21 if schema == "-":
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
22 print("\t".join(mlstout))
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
23 return
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
24
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
25 if db_path is None:
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
26 # If no database path is provided, find it using an external command
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
27 # This requires the 'mlst' command to be installed and available in the path
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
28 import subprocess
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
29 mlstdesc = subprocess.check_output(['mlst', '-h']).decode()
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
30 db_pubmlst = [line for line in mlstdesc.split('\n') if 'db/pubmlst' in line]
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
31 if db_pubmlst:
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
32 mlstloc = db_pubmlst[0].split("'")[1].replace("bin/..", "")
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
33 else:
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
34 raise Exception("Could not find MLST database location.")
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
35 else:
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
36 mlstloc = db_path
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
37
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
38 mlst_file_path = f"{mlstloc}/{schema}/{schema}.txt"
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
39
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
40 schema_dict = {}
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
41 with open(mlst_file_path, 'r') as file:
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
42 reader = csv.reader(file, delimiter='\t')
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
43 headers = next(reader)
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
44
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
45 clonal = find_index(headers, 'clonal_complex')
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
46 cc = find_index(headers, 'CC')
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
47 lineage = find_index(headers, 'Lineage')
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
48 species = find_index(headers, 'species')
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
49
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
50 for line in reader:
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
51 desc = []
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
52 if clonal > -1 and line[clonal]:
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
53 desc.append(f"clonal_complex={line[clonal]}")
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
54 if cc > -1 and line[cc]:
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
55 desc.append(f"CC={line[cc]}")
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
56 if lineage > -1 and line[lineage]:
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
57 desc.append(f"Lineage={line[lineage]}")
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
58 if species > -1 and line[species]:
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
59 desc.append(f"species={line[species]}")
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
60 schema_dict[line[0]] = ','.join(desc)
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
61
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
62 output = mlstout[:3]
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
63 if mlstST in schema_dict:
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
64 output.append(schema_dict[mlstST])
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
65 else:
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
66 output.append("-")
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
67 output.extend(mlstout[3:])
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
68
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
69 print("\t".join(output))
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
70
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
71 if __name__ == "__main__":
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
72 if len(sys.argv) < 2:
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
73 print("Usage: python mlstAddFields.py <mlst_file> [db_path]")
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
74 sys.exit(1)
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
75
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
76 mlst_file = sys.argv[1]
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
77 db_path = sys.argv[2] if len(sys.argv) > 2 else None
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
78
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
79 main(mlst_file, db_path)
4e629e82c5b1 planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
estrain
parents:
diff changeset
80