Mercurial > repos > estrain > data_manager_amrfinderplus
comparison data_manager/data_manager_amrfinderplus.py @ 0:2986b488a62b draft default tip
planemo upload commit 6f89f1ec5ad85eb9c08c0d48c77b8f8eadbdcaff
| author | estrain |
|---|---|
| date | Thu, 12 Mar 2026 19:41:57 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:2986b488a62b |
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 """ | |
| 3 Galaxy Data Manager for NCBI AMRFinderPlus database. | |
| 4 Downloads and indexes the AMRFinderPlus database using amrfinder_update. | |
| 5 Writes a Galaxy-compatible data table JSON. | |
| 6 """ | |
| 7 | |
| 8 import argparse | |
| 9 import json | |
| 10 import os | |
| 11 import shutil | |
| 12 import subprocess | |
| 13 from pathlib import Path | |
| 14 | |
| 15 | |
| 16 class AmrFinderPlusDataManager: | |
| 17 def __init__(self, json_path: str, db_name="amrfinderplus-db"): | |
| 18 self.json_path = Path(json_path) | |
| 19 self.db_name = db_name | |
| 20 self.output_dir = None | |
| 21 self.extra_files_path = None | |
| 22 self.version = None | |
| 23 self.dbformat = None | |
| 24 | |
| 25 # --- Galaxy I/O --------------------------------------------------------- | |
| 26 | |
| 27 def read_input_json(self): | |
| 28 """Read the input Galaxy data manager JSON.""" | |
| 29 with open(self.json_path) as fh: | |
| 30 params = json.load(fh) | |
| 31 | |
| 32 # Galaxy passes where we can write files (extra_files_path) | |
| 33 self.extra_files_path = Path(params["output_data"][0]["extra_files_path"]) | |
| 34 self.extra_files_path.mkdir(parents=True, exist_ok=True) | |
| 35 self.output_dir = self.extra_files_path / "tmp_download" | |
| 36 self.output_dir.mkdir(parents=True, exist_ok=True) | |
| 37 | |
| 38 def write_output_json(self): | |
| 39 """Write Galaxy data table JSON entry.""" | |
| 40 entry = { | |
| 41 "data_tables": { | |
| 42 "amrfinderplus_versioned_database": [ | |
| 43 { | |
| 44 "value": f"amrfinderplus_{self.version}_{self.dbformat}", | |
| 45 "name": f"{self.version} ({self.dbformat})", | |
| 46 "db_version": self.dbformat, | |
| 47 "path": self.db_name, | |
| 48 } | |
| 49 ] | |
| 50 } | |
| 51 } | |
| 52 | |
| 53 # Overwrite Galaxy's job JSON atomically and flush it | |
| 54 with open(self.json_path, "w") as fh: | |
| 55 json.dump(entry, fh, indent=2, sort_keys=True) | |
| 56 fh.flush() | |
| 57 os.fsync(fh.fileno()) | |
| 58 | |
| 59 # --- Database logic ----------------------------------------------------- | |
| 60 | |
| 61 def run_amrfinder_update(self): | |
| 62 """Run amrfinder_update to download the database.""" | |
| 63 print(f"Running amrfinder_update -d {self.output_dir}") | |
| 64 subprocess.run(["amrfinder_update", "-d", str(self.output_dir)], check=True) | |
| 65 | |
| 66 def read_versions(self): | |
| 67 """Read version.txt and database_format_version.txt.""" | |
| 68 latest_dir = self.output_dir / "latest" | |
| 69 with open(latest_dir / "version.txt") as f: | |
| 70 self.version = f.readline().strip() | |
| 71 with open(latest_dir / "database_format_version.txt") as f: | |
| 72 self.dbformat = f.readline().strip() | |
| 73 | |
| 74 def copy_database(self): | |
| 75 """Copy the downloaded database to a permanent location.""" | |
| 76 latest_dir = self.output_dir / "latest" | |
| 77 final_dir = self.extra_files_path / self.db_name | |
| 78 shutil.copytree(latest_dir, final_dir, dirs_exist_ok=True) | |
| 79 | |
| 80 def cleanup(self): | |
| 81 """Remove temporary download folder.""" | |
| 82 shutil.rmtree(self.output_dir, ignore_errors=True) | |
| 83 | |
| 84 # --- Main run ----------------------------------------------------------- | |
| 85 | |
| 86 def run(self): | |
| 87 try: | |
| 88 self.read_input_json() | |
| 89 self.run_amrfinder_update() | |
| 90 self.read_versions() | |
| 91 self.copy_database() | |
| 92 except Exception as e: | |
| 93 print(f"AMRFinderPlus Data Manager failed: {e}") | |
| 94 # still record placeholder entry | |
| 95 self.version = "unknown" | |
| 96 self.dbformat = "unknown" | |
| 97 finally: | |
| 98 self.cleanup() | |
| 99 self.write_output_json() | |
| 100 | |
| 101 | |
| 102 # --- CLI entrypoint -------------------------------------------------------- | |
| 103 | |
| 104 def parse_args(): | |
| 105 parser = argparse.ArgumentParser(description="Galaxy Data Manager for AMRFinderPlus database") | |
| 106 parser.add_argument("data_manager_json", help="Galaxy data manager input/output JSON file") | |
| 107 return parser.parse_args() | |
| 108 | |
| 109 | |
| 110 def main(): | |
| 111 args = parse_args() | |
| 112 mgr = AmrFinderPlusDataManager(args.data_manager_json) | |
| 113 mgr.run() | |
| 114 | |
| 115 | |
| 116 if __name__ == "__main__": | |
| 117 main() | |
| 118 |
