changeset 0:f99c1cc0b190 draft

planemo upload commit bae430588c393222b68c5a7aa47f57bffee99475
author galaxytrakr
date Thu, 26 Mar 2026 20:22:36 +0000
parents
children 97634e034eed
files data_manager/data_manager_fetch_mapseq_db.py data_manager/macros.xml data_manager/mapseq_db_fetcher.xml data_manager_conf.xml
diffstat 3 files changed, 244 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_fetch_mapseq_db.py	Thu Mar 26 20:22:36 2026 +0000
@@ -0,0 +1,172 @@
+#!/usr/bin/env python
+# Python 3.6 compatible
+
+import argparse
+import json
+import os
+import shutil
+import tarfile
+import tempfile
+from datetime import datetime
+
+import wget
+
+DB_paths = {
+    "mgnify_v5_lsu": "ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipeline-5.0/ref-dbs/silva_lsu-20200130.tar.gz",
+    "mgnify_v5_ssu": "ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipeline-5.0/ref-dbs/silva_ssu-20200130.tar.gz",
+    "mgnify_v5_its_unite": "ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipeline-5.0/ref-dbs/UNITE-20200214.tar.gz",
+    "mgnify_v5_its_itsonedb": "ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipeline-5.0/ref-dbs/ITSoneDB-20200214.tar.gz",
+    "mgnify_v6_lsu": "ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/tool-dbs/silva-lsu/silva-lsu_138.1.tar.gz",
+    "mgnify_v6_ssu": "ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/tool-dbs/silva-ssu/silva-ssu_138.1.tar.gz",
+    "mgnify_v6_its_unite": "ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/tool-dbs/unite/unite_9.0.tar.gz",
+    "mgnify_v6_its_itsonedb": "ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/tool-dbs/itsonedb/itsonedb_1.141.tar.gz",
+    "mgnify_v6_pr2": "ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/tool-dbs/pr2/pr2_5.0.0.tar.gz",
+    "test_lsu": "https://zenodo.org/record/8205348/files/test_lsu.tar.gz",
+}
+
+DB_names = {
+    "mgnify_v5_lsu": "MGnify LSU (v5.0.7) - silva_lsu-20200130",
+    "mgnify_v5_ssu": "MGnify SSU (v5.0.7) - silva_ssu-20200130",
+    "mgnify_v5_its_unite": "MGnify ITS UNITE (v5.0.7) - UNITE-20200214",
+    "mgnify_v5_its_itsonedb": "MGnify ITS ITSonedb (v5.0.7) - ITSoneDB-20200214",
+    "mgnify_v6_lsu": "MGnify LSU (v6.0) - silva_lsu-20240702",
+    "mgnify_v6_ssu": "MGnify SSU (v6.0) - silva_ssu-20240701",
+    "mgnify_v6_its_unite": "MGnify ITS UNITE (v6.0) - UNITE-20240702",
+    "mgnify_v6_its_itsonedb": "MGnify ITS ITSonedb (v6.0) - ITSoneDB-20240702",
+    "mgnify_v6_pr2": "MGnify PR2 (v6.0) - PR2-20240702",
+    "test_lsu": "Trimmed LSU Test DB",
+}
+
+def _empty_dir(path):
+    if not os.path.isdir(path):
+        os.makedirs(path)
+        return
+    for name in os.listdir(path):
+        p = os.path.join(path, name)
+        if os.path.isdir(p) and not os.path.islink(p):
+            shutil.rmtree(p)
+        else:
+            try:
+                os.remove(p)
+            except OSError:
+                pass
+
+def _copy_all(src_dir, dest_dir):
+    for name in os.listdir(src_dir):
+        s = os.path.join(src_dir, name)
+        d = os.path.join(dest_dir, name)
+        if os.path.isdir(s) and not os.path.islink(s):
+            if os.path.exists(d):
+                shutil.rmtree(d)
+            shutil.copytree(s, d)
+        else:
+            shutil.copy2(s, dest_dir)
+
+def _safe_members(members):
+    for m in members:
+        mpath = m.name
+        norm = mpath.replace("\\", "/")
+        if os.path.isabs(mpath) or ".." in norm.split("/"):
+            continue
+        yield m
+
+def materialize_db_into(output_dir, url):
+    # Work entirely under /tmp to avoid job-dir races
+    tmp_root = tempfile.mkdtemp(prefix="mapseq_dm_", dir="/tmp")
+    temp_extract = tempfile.mkdtemp(prefix="extract_", dir=tmp_root)
+
+    tar_path = wget.download(url, out=tmp_root)
+    with tarfile.open(tar_path, "r:*") as tar:
+        tar.extractall(temp_extract, members=_safe_members(tar))
+
+    _empty_dir(output_dir)
+
+    entries = os.listdir(temp_extract)
+    if len(entries) == 1 and os.path.isdir(os.path.join(temp_extract, entries[0])):
+        inner = os.path.join(temp_extract, entries[0])
+        vf = os.path.join(inner, "VERSION.txt")
+        if os.path.exists(vf):
+            try:
+                os.remove(vf)
+            except OSError:
+                pass
+        _copy_all(inner, output_dir)
+    else:
+        vf = os.path.join(temp_extract, "VERSION.txt")
+        if os.path.exists(vf):
+            try:
+                os.remove(vf)
+            except OSError:
+                pass
+        _copy_all(temp_extract, output_dir)
+
+    try:
+        shutil.rmtree(tmp_root)
+    except Exception:
+        pass
+
+def main():
+    parser = argparse.ArgumentParser(description="Fetch and register MAPseq DB")
+    parser.add_argument("--out", dest="output", required=True, help="Galaxy params/DM JSON path")
+    parser.add_argument("--version", dest="version", required=False, help="DB version label (e.g., 6.0)")
+    parser.add_argument("--database-type", dest="db_type", required=False, help="DB key (e.g., mgnify_v6_lsu)")
+    parser.add_argument("--test", action="store_true", help="Use small test DB")
+    args = parser.parse_args()
+
+    # Read Galaxy params to get extra_files_path
+    with open(args.output) as fh:
+        params = json.load(fh)
+
+    output_dir = params["output_data"][0]["extra_files_path"]
+    if not os.path.isdir(output_dir):
+        os.makedirs(output_dir)
+
+    # Resolve db_type/version: prefer CLI, else try params.select_version
+    db_type = args.db_type
+    version = args.version
+    sel = params.get("select_version") or {}
+    if not db_type:
+        db_type = sel.get("database_type")
+    if not version:
+        version = sel.get("version")
+
+    if not db_type:
+        raise RuntimeError("Missing --database-type and no params['select_version']['database_type'] provided.")
+    if db_type not in DB_paths:
+        raise KeyError("Unknown database type: {}. Valid keys: {}".format(db_type, ", ".join(sorted(DB_paths.keys()))))
+
+    url = DB_paths["test_lsu"] if args.test else DB_paths[db_type]
+    materialize_db_into(output_dir, url)
+
+    # Build DM JSON (value/name pattern you used earlier)
+    date_str = datetime.utcnow().strftime("%Y-%m-%d")
+    db_value = "{}_from_{}".format(db_type, date_str)
+    db_name = DB_names.get(db_type, db_type)
+    entry_name = "{} downloaded at {}".format(db_name, date_str)
+
+    data_manager_entry = {
+        "data_tables": {
+            "mapseq_db": [
+                {
+                    "value": db_value,
+                    "name": entry_name,
+                    "path": output_dir,
+                    "version": version,
+                }
+            ]
+        }
+    }
+
+    with open(args.output, "w") as fh:
+        json.dump(data_manager_entry, fh, indent=2, sort_keys=True)
+
+    try:
+        count = len(os.listdir(output_dir))
+        print("[INFO] Wrote {} items into {}".format(count, output_dir))
+    except Exception:
+        pass
+
+    print("[SUCCESS] Data Manager JSON written to {}".format(args.output))
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/mapseq_db_fetcher.xml	Thu Mar 26 20:22:36 2026 +0000
@@ -0,0 +1,52 @@
+<tool id="mapseq_db_fetcher" name="Mapseq DB fetcher" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Fetches the DB required for mapseq</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="version" />
+    <command detect_errors="exit_code">
+    <![CDATA[
+        python '$__tool_directory__/data_manager_fetch_mapseq_db.py' 
+        --out '${out_file}' 
+        --version '${select_version.version}' 
+        --database-type '${database_type}'
+    ]]>
+    </command>
+    <inputs>
+        <conditional name="select_version">
+            <param argument="--version" type="select" label="Select MGnify version">
+                <option value="5.0">v5.0</option>
+                <option value="6.0">v6.0</option>
+            </param>
+            <when value="5.0">
+                <param name="database_type" type="select" multiple="false" label="Database Type">
+                    <option value="mgnify_v5_lsu">MGnify LSU (v5.0.7)</option>
+                    <option value="mgnify_v5_ssu">MGnify SSU (v5.0.7)</option>
+                    <option value="mgnify_v5_its_itsonedb">MGnify ITS ITSonedb (v5.0.7)</option>
+                    <option value="mgnify_v5_its_unite">MGnify ITS UNITE (v5.0.7)</option>
+                </param>
+            </when>
+            <when value="6.0">
+                <param name="database_type" type="select" multiple="false" label="Database Type">
+                    <option value="mgnify_v6_lsu">MGnify LSU (v6.0)</option>
+                    <option value="mgnify_v6_ssu">MGnify SSU (v6.0)</option>
+                    <option value="mgnify_v6_its_itsonedb">MGnify ITS ITSonedb (v6.0)</option>
+                    <option value="mgnify_v6_its_unite">MGnify ITS UNITE (v6.0)</option>
+                    <option value="mgnify_v6_pr2">MGnify PR2 (v6.0)</option>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data format="data_manager_json" name="out_file" />
+    </outputs>
+    <tests>
+    </tests>
+    <help>
+    Downloads preformatted DBs form MGnify that can be used for mapseq. 
+    The download paths were taken from: https://github.com/EBI-Metagenomics/pipeline-v5/
+    </help>
+    <expand macro="citations" />
+    <expand macro="creator" />
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Thu Mar 26 20:22:36 2026 +0000
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/mapseq_db_fetcher.xml" id="data_manager_mapseq">
+      <data_table name="mapseq_db">
+        <output>
+          <column name="value" />
+          <column name="name" />
+          <column name="version" />
+          <column name="path" output_ref="out_file" >
+            <move type="directory" relativize_symlinks="True">
+              <src>${path}</src>
+              <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">mapseq_db/${value}</target>
+            </move>
+            <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/mapseq_db/${value}</value_translation>
+            <value_translation type="function">abspath</value_translation>
+          </column>
+        </output>
+      </data_table>
+    </data_manager>
+</data_managers>
\ No newline at end of file