annotate data_manager/data_manager_amrfinderplus.py @ 0:2986b488a62b draft default tip

planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
author estrain
date Thu, 12 Mar 2026 19:41:57 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
1 #!/usr/bin/env python3
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
2 """
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
3 Galaxy Data Manager for NCBI AMRFinderPlus database.
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
4 Downloads and indexes the AMRFinderPlus database using amrfinder_update.
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
5 Writes a Galaxy-compatible data table JSON.
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
6 """
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
7
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
8 import argparse
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
9 import json
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
10 import os
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
11 import shutil
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
12 import subprocess
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
13 from pathlib import Path
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
14
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
15
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
16 class AmrFinderPlusDataManager:
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
17 def __init__(self, json_path: str, db_name="amrfinderplus-db"):
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
18 self.json_path = Path(json_path)
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
19 self.db_name = db_name
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
20 self.output_dir = None
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
21 self.extra_files_path = None
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
22 self.version = None
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
23 self.dbformat = None
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
24
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
25 # --- Galaxy I/O ---------------------------------------------------------
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
26
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
27 def read_input_json(self):
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
28 """Read the input Galaxy data manager JSON."""
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
29 with open(self.json_path) as fh:
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
30 params = json.load(fh)
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
31
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
32 # Galaxy passes where we can write files (extra_files_path)
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
33 self.extra_files_path = Path(params["output_data"][0]["extra_files_path"])
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
34 self.extra_files_path.mkdir(parents=True, exist_ok=True)
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
35 self.output_dir = self.extra_files_path / "tmp_download"
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
36 self.output_dir.mkdir(parents=True, exist_ok=True)
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
37
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
38 def write_output_json(self):
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
39 """Write Galaxy data table JSON entry."""
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
40 entry = {
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
41 "data_tables": {
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
42 "amrfinderplus_versioned_database": [
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
43 {
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
44 "value": f"amrfinderplus_{self.version}_{self.dbformat}",
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
45 "name": f"{self.version} ({self.dbformat})",
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
46 "db_version": self.dbformat,
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
47 "path": self.db_name,
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
48 }
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
49 ]
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
50 }
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
51 }
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
52
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
53 # Overwrite Galaxy's job JSON atomically and flush it
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
54 with open(self.json_path, "w") as fh:
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
55 json.dump(entry, fh, indent=2, sort_keys=True)
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
56 fh.flush()
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
57 os.fsync(fh.fileno())
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
58
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
59 # --- Database logic -----------------------------------------------------
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
60
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
61 def run_amrfinder_update(self):
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
62 """Run amrfinder_update to download the database."""
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
63 print(f"Running amrfinder_update -d {self.output_dir}")
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
64 subprocess.run(["amrfinder_update", "-d", str(self.output_dir)], check=True)
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
65
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
66 def read_versions(self):
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
67 """Read version.txt and database_format_version.txt."""
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
68 latest_dir = self.output_dir / "latest"
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
69 with open(latest_dir / "version.txt") as f:
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
70 self.version = f.readline().strip()
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
71 with open(latest_dir / "database_format_version.txt") as f:
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
72 self.dbformat = f.readline().strip()
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
73
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
74 def copy_database(self):
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
75 """Copy the downloaded database to a permanent location."""
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
76 latest_dir = self.output_dir / "latest"
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
77 final_dir = self.extra_files_path / self.db_name
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
78 shutil.copytree(latest_dir, final_dir, dirs_exist_ok=True)
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
79
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
80 def cleanup(self):
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
81 """Remove temporary download folder."""
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
82 shutil.rmtree(self.output_dir, ignore_errors=True)
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
83
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
84 # --- Main run -----------------------------------------------------------
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
85
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
86 def run(self):
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
87 try:
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
88 self.read_input_json()
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
89 self.run_amrfinder_update()
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
90 self.read_versions()
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
91 self.copy_database()
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
92 except Exception as e:
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
93 print(f"AMRFinderPlus Data Manager failed: {e}")
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
94 # still record placeholder entry
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
95 self.version = "unknown"
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
96 self.dbformat = "unknown"
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
97 finally:
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
98 self.cleanup()
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
99 self.write_output_json()
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
100
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
101
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
102 # --- CLI entrypoint --------------------------------------------------------
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
103
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
104 def parse_args():
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
105 parser = argparse.ArgumentParser(description="Galaxy Data Manager for AMRFinderPlus database")
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
106 parser.add_argument("data_manager_json", help="Galaxy data manager input/output JSON file")
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
107 return parser.parse_args()
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
108
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
109
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
110 def main():
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
111 args = parse_args()
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
112 mgr = AmrFinderPlusDataManager(args.data_manager_json)
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
113 mgr.run()
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
114
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
115
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
116 if __name__ == "__main__":
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
117 main()
2986b488a62b planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff changeset
118