Mercurial > repos > estrain > data_manager_amrfinderplus
annotate data_manager/data_manager_amrfinderplus.py @ 0:2986b488a62b draft default tip
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
| author | estrain |
|---|---|
| date | Thu, 12 Mar 2026 19:41:57 +0000 |
| parents | |
| children |
| rev | line source |
|---|---|
|
0
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
2 """ |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
3 Galaxy Data Manager for NCBI AMRFinderPlus database. |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
4 Downloads and indexes the AMRFinderPlus database using amrfinder_update. |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
5 Writes a Galaxy-compatible data table JSON. |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
6 """ |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
7 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
8 import argparse |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
9 import json |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
10 import os |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
11 import shutil |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
12 import subprocess |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
13 from pathlib import Path |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
14 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
15 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
16 class AmrFinderPlusDataManager: |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
17 def __init__(self, json_path: str, db_name="amrfinderplus-db"): |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
18 self.json_path = Path(json_path) |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
19 self.db_name = db_name |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
20 self.output_dir = None |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
21 self.extra_files_path = None |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
22 self.version = None |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
23 self.dbformat = None |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
24 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
25 # --- Galaxy I/O --------------------------------------------------------- |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
26 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
27 def read_input_json(self): |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
28 """Read the input Galaxy data manager JSON.""" |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
29 with open(self.json_path) as fh: |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
30 params = json.load(fh) |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
31 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
32 # Galaxy passes where we can write files (extra_files_path) |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
33 self.extra_files_path = Path(params["output_data"][0]["extra_files_path"]) |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
34 self.extra_files_path.mkdir(parents=True, exist_ok=True) |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
35 self.output_dir = self.extra_files_path / "tmp_download" |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
36 self.output_dir.mkdir(parents=True, exist_ok=True) |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
37 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
38 def write_output_json(self): |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
39 """Write Galaxy data table JSON entry.""" |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
40 entry = { |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
41 "data_tables": { |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
42 "amrfinderplus_versioned_database": [ |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
43 { |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
44 "value": f"amrfinderplus_{self.version}_{self.dbformat}", |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
45 "name": f"{self.version} ({self.dbformat})", |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
46 "db_version": self.dbformat, |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
47 "path": self.db_name, |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
48 } |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
49 ] |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
50 } |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
51 } |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
52 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
53 # Overwrite Galaxy's job JSON atomically and flush it |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
54 with open(self.json_path, "w") as fh: |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
55 json.dump(entry, fh, indent=2, sort_keys=True) |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
56 fh.flush() |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
57 os.fsync(fh.fileno()) |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
58 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
59 # --- Database logic ----------------------------------------------------- |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
60 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
61 def run_amrfinder_update(self): |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
62 """Run amrfinder_update to download the database.""" |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
63 print(f"Running amrfinder_update -d {self.output_dir}") |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
64 subprocess.run(["amrfinder_update", "-d", str(self.output_dir)], check=True) |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
65 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
66 def read_versions(self): |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
67 """Read version.txt and database_format_version.txt.""" |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
68 latest_dir = self.output_dir / "latest" |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
69 with open(latest_dir / "version.txt") as f: |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
70 self.version = f.readline().strip() |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
71 with open(latest_dir / "database_format_version.txt") as f: |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
72 self.dbformat = f.readline().strip() |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
73 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
74 def copy_database(self): |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
75 """Copy the downloaded database to a permanent location.""" |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
76 latest_dir = self.output_dir / "latest" |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
77 final_dir = self.extra_files_path / self.db_name |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
78 shutil.copytree(latest_dir, final_dir, dirs_exist_ok=True) |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
79 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
80 def cleanup(self): |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
81 """Remove temporary download folder.""" |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
82 shutil.rmtree(self.output_dir, ignore_errors=True) |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
83 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
84 # --- Main run ----------------------------------------------------------- |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
85 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
86 def run(self): |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
87 try: |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
88 self.read_input_json() |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
89 self.run_amrfinder_update() |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
90 self.read_versions() |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
91 self.copy_database() |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
92 except Exception as e: |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
93 print(f"AMRFinderPlus Data Manager failed: {e}") |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
94 # still record placeholder entry |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
95 self.version = "unknown" |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
96 self.dbformat = "unknown" |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
97 finally: |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
98 self.cleanup() |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
99 self.write_output_json() |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
100 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
101 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
102 # --- CLI entrypoint -------------------------------------------------------- |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
103 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
104 def parse_args(): |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
105 parser = argparse.ArgumentParser(description="Galaxy Data Manager for AMRFinderPlus database") |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
106 parser.add_argument("data_manager_json", help="Galaxy data manager input/output JSON file") |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
107 return parser.parse_args() |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
108 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
109 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
110 def main(): |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
111 args = parse_args() |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
112 mgr = AmrFinderPlusDataManager(args.data_manager_json) |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
113 mgr.run() |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
114 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
115 |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
116 if __name__ == "__main__": |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
117 main() |
|
2986b488a62b
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
estrain
parents:
diff
changeset
|
118 |
