comparison mlstAddFields.py @ 0:4e629e82c5b1 draft default tip

planemo upload commit a820b38dea9a409c11e220ba904da232fdbc4c05
author estrain
date Fri, 13 Mar 2026 12:51:10 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4e629e82c5b1
1 #!/usr/bin/env
2
3 import sys
4 import csv
5
6 def find_index(headers, term):
7 try:
8 return headers.index(term)
9 except ValueError:
10 return -1
11
12 def main(mlst_file, db_path=None):
13 with open(mlst_file, 'r') as file:
14 reader = csv.reader(file, delimiter='\t')
15 mlstout = next(reader)
16
17 schema = mlstout[1]
18 mlstST = mlstout[2]
19
20 # Return the output without appending if schema equals "-"
21 if schema == "-":
22 print("\t".join(mlstout))
23 return
24
25 if db_path is None:
26 # If no database path is provided, find it using an external command
27 # This requires the 'mlst' command to be installed and available in the path
28 import subprocess
29 mlstdesc = subprocess.check_output(['mlst', '-h']).decode()
30 db_pubmlst = [line for line in mlstdesc.split('\n') if 'db/pubmlst' in line]
31 if db_pubmlst:
32 mlstloc = db_pubmlst[0].split("'")[1].replace("bin/..", "")
33 else:
34 raise Exception("Could not find MLST database location.")
35 else:
36 mlstloc = db_path
37
38 mlst_file_path = f"{mlstloc}/{schema}/{schema}.txt"
39
40 schema_dict = {}
41 with open(mlst_file_path, 'r') as file:
42 reader = csv.reader(file, delimiter='\t')
43 headers = next(reader)
44
45 clonal = find_index(headers, 'clonal_complex')
46 cc = find_index(headers, 'CC')
47 lineage = find_index(headers, 'Lineage')
48 species = find_index(headers, 'species')
49
50 for line in reader:
51 desc = []
52 if clonal > -1 and line[clonal]:
53 desc.append(f"clonal_complex={line[clonal]}")
54 if cc > -1 and line[cc]:
55 desc.append(f"CC={line[cc]}")
56 if lineage > -1 and line[lineage]:
57 desc.append(f"Lineage={line[lineage]}")
58 if species > -1 and line[species]:
59 desc.append(f"species={line[species]}")
60 schema_dict[line[0]] = ','.join(desc)
61
62 output = mlstout[:3]
63 if mlstST in schema_dict:
64 output.append(schema_dict[mlstST])
65 else:
66 output.append("-")
67 output.extend(mlstout[3:])
68
69 print("\t".join(output))
70
71 if __name__ == "__main__":
72 if len(sys.argv) < 2:
73 print("Usage: python mlstAddFields.py <mlst_file> [db_path]")
74 sys.exit(1)
75
76 mlst_file = sys.argv[1]
77 db_path = sys.argv[2] if len(sys.argv) > 2 else None
78
79 main(mlst_file, db_path)
80