changeset 9:4c4899031795 draft

planemo upload commit fcafae43456eb929e62b5c879ac954f75745bbf8
author galaxytrakr
date Fri, 15 May 2026 11:44:48 +0000
parents e3b5ed54af18
children 6cba046e4aaa
files Dockerfile job_conf.yml misc/bioconda-recipes-seqsero2s-meta.yml patch_stringmlst.sh seqsero2S.xml test-data/.gitmodules tool-data/all_fasta.loc.sample
diffstat 5 files changed, 251 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Dockerfile	Fri May 15 11:44:48 2026 +0000
@@ -0,0 +1,137 @@
+# Multi-stage Dockerfile for SeqSero2S
+# Default build: docker build -t seqsero2s:latest .
+# Test build: docker build --target test -t seqsero2s:test .
+
+# ============================================================================
+# Stage 1: Base image with mambaforge for faster dependency resolution
+# ============================================================================
+FROM condaforge/mambaforge:latest AS base
+
+LABEL maintainer="SeqSero2S Maintainers"
+LABEL description="Simplified Salmonella serotype prediction from genome sequencing data"
+
+# Set environment variables to reduce conda output and ensure non-interactive
+# Disable SSL verification for VPN environments
+ENV CONDA_ALWAYS_YES=true \
+    CONDA_AUTO_UPDATE_CONDA=false \
+    DEBIAN_FRONTEND=noninteractive
+
+# Configure conda to skip SSL verification
+RUN conda config --set ssl_verify false
+
+# ============================================================================
+# Stage 2: Builder - Install all dependencies and SeqSero2S
+# ============================================================================
+FROM base AS builder
+
+# Update base packages and install build essentials
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        wget \
+        ca-certificates \
+        bash \
+    && apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Copy the patch script
+COPY patch_stringmlst.sh /tmp/patch_stringmlst.sh
+RUN chmod +x /tmp/patch_stringmlst.sh
+
+# Create conda environment with all dependencies
+# Using mamba for faster dependency resolution
+RUN mamba create -n seqsero2s -c conda-forge -c bioconda \
+    python>=3 \
+    pip \
+    setuptools \
+    blast>=2.2 \
+    zstd \
+    samtools \
+    bedtools>=2.17 \
+    sra-tools>=2.8 \
+    spades>=3.9 \
+    salmid \
+    bwa>=0.7 \
+    seqtk>=1.3 \
+    stringmlst>=0.6 \
+    mlst>=2.32.2 \
+    perl-list-moreutils \
+    && mamba clean -afy
+
+# Install SeqSero2S from local fork
+WORKDIR /tmp/build
+
+# Copy local SeqSero2S directory
+COPY SeqSero2S /tmp/build/SeqSero2S
+
+# Install SeqSero2S
+RUN cd SeqSero2S && \
+    /opt/conda/envs/seqsero2s/bin/python -m pip install . -vv --no-deps --no-build-isolation --no-cache-dir
+
+# Apply the stringMLST.py patch (from the conda recipe)
+# The patch replaces dbPrefix reference with cwd to avoid path issues
+RUN PREFIX=/opt/conda/envs/seqsero2s /tmp/patch_stringmlst.sh
+
+# ============================================================================
+# Stage 3: Test image - runs validation tests
+# ============================================================================
+FROM builder AS test
+
+# Create test directory
+WORKDIR /test
+
+# Run test commands from the conda recipe
+RUN echo "Running SeqSero2S tests..." && \
+    /opt/conda/envs/seqsero2s/bin/SeqSero2S.py -h && \
+    echo "SeqSero2S.py -h: PASSED" && \
+    /opt/conda/envs/seqsero2s/bin/blastn -help && \
+    echo "blastn -help: PASSED" && \
+    /opt/conda/envs/seqsero2s/bin/SalmID.py -h && \
+    echo "SalmID.py -h: PASSED" && \
+    /opt/conda/envs/seqsero2s/bin/mlst -h && \
+    echo "mlst -h: PASSED" && \
+    echo "All tests completed successfully!"
+
+# Default command shows test results
+CMD ["echo", "All SeqSero2S tests passed successfully!"]
+
+# ============================================================================
+# Stage 4: Production/Distribution image - minimal runtime (DEFAULT)
+# ============================================================================
+FROM condaforge/mambaforge:latest AS dist
+
+# Copy conda environment from builder
+COPY --from=builder /opt/conda/envs/seqsero2s /opt/conda/envs/seqsero2s
+
+# Update PATH to use the conda environment
+ENV PATH=/opt/conda/envs/seqsero2s/bin:$PATH \
+    CONDA_DEFAULT_ENV=seqsero2s \
+    CONDA_PREFIX=/opt/conda/envs/seqsero2s
+
+# Install minimal runtime dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        ca-certificates \
+        procps \
+    && apt-get clean && \
+    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+
+# Create working directory
+WORKDIR /data
+
+# Create non-root user for running the application
+RUN useradd -m -u 1000 -s /bin/bash seqsero2s && \
+    chown -R seqsero2s:seqsero2s /data
+
+USER seqsero2s
+
+# Add metadata labels
+LABEL org.opencontainers.image.version="1.1.4" \
+      org.opencontainers.image.authors="LSTUGA" \
+      org.opencontainers.image.url="https://github.com/LSTUGA/SeqSero2S" \
+      org.opencontainers.image.documentation="https://github.com/LSTUGA/SeqSero2S" \
+      org.opencontainers.image.source="https://github.com/LSTUGA/SeqSero2S" \
+      org.opencontainers.image.licenses="GPL-2.0-or-later" \
+      org.opencontainers.image.title="SeqSero2S" \
+      org.opencontainers.image.description="Simplified Salmonella serotype prediction from genome sequencing data"
+
+# No entrypoint or command for dist target
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/misc/bioconda-recipes-seqsero2s-meta.yml	Fri May 15 11:44:48 2026 +0000
@@ -0,0 +1,67 @@
+{% set name = "SeqSero2S" %}
+{% set version = "1.1.4" %}
+{% set release = "89f1f5aca7a8819ee96239592fedd2e737036ada" %}
+{% set sha256 = "995f1815cc6cee7b8e37604b068bbec673e2ee8880e41adf6df0350966fe4c65" %}
+
+package:
+  name: {{ name|lower }}
+  version: {{ version }}
+
+source:
+  url: https://github.com/LSTUGA/SeqSero2S/archive/refs/tags/v{{ version }}.tar.gz
+  sha256: {{ sha256 }}
+
+build:
+  number: 2
+  noarch: python
+  script:
+    - {{ PYTHON }} -m pip install . -vv --no-deps --no-build-isolation --no-cache-dir
+    # patch stringMLST.py log path: replace dbPrefix reference with cwd to avoid
+    # patches ALL occurrences (at line 1464 and predict section at line 1478).
+    - bash $RECIPE_DIR/patch_stringmlst.sh
+  run_exports:
+    - {{ pin_subpackage(name|lower, max_pin="x.x") }}
+
+requirements:
+  host:
+    - python >=3
+    - pip
+    - setuptools
+    - stringmlst >=0.6
+  run:
+    - python >=3
+    - blast >=2.2
+    - zstd         # libzstd.so.1 required by blastn binary
+    - samtools
+    - bedtools >=2.17
+    - sra-tools >=2.8
+    - spades >=3.9
+    - salmid
+    - bwa >=0.7
+    - seqtk >=1.3
+    - stringmlst >=0.6
+    - mlst >=2.32.2
+
+test:
+  commands:
+    - SeqSero2S.py -h
+    - blastn -help
+    - SalmID.py -h
+    - mlst -h
+
+about:
+  home: "https://github.com/LSTUGA/{{ name }}"
+  license: "GPL-2.0-or-later"
+  license_family: GPL
+  license_file: 'LICENSE'
+  summary: "Simplified Salmonella serotype prediction from genome sequencing data"
+  dev_url: "https://github.com/LSTUGA/{{ name }}"
+
+extra:
+  recipe-maintainers:
+    - LSTUGA
+    - crashfrog
+    - biocoder
+  identifiers:
+    - doi:10.1128/aem.02600-24
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/patch_stringmlst.sh	Fri May 15 11:44:48 2026 +0000
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+
+# patch stringMLST.py log path: replace dbPrefix reference with cwd.
+# patches all occurrences (at line 1464 and predict section at line 1478).
+
+set -euo pipefail
+
+STRINGMLST="$PREFIX/bin/stringMLST.py"
+
+if [ ! -f "$STRINGMLST" ]; then
+    echo "SKIP: $STRINGMLST not found"
+    exit 0
+fi
+
+python3 << 'PATCH_WITH_PY'
+import os
+
+p = os.path.join(os.environ["PREFIX"], "bin", "stringMLST.py")
+with open(p) as f:
+    lines = f.readlines()
+
+original = "            log = dbPrefix+'.log'\n"
+commented = "            # log = dbPrefix+'.log'\n"
+replacement = '            log = os.path.join(os.getcwd(), "kmer.log")\n'
+
+# Count occurrences BEFORE modifying
+occurrences = lines.count(original)
+
+if occurrences == 0:
+    print("SKIP: stringMLST.py has 0 occurrences, expected at least 1")
+    exit(0)
+
+out = []
+for line in lines:
+    if line.rstrip("\n") == original.rstrip("\n"):
+        out.append(commented)
+        out.append(replacement)
+    else:
+        out.append(line)
+
+with open(p, "w") as f:
+    f.writelines(out)
+
+print("PATCHED: stringMLST.py log path fixed (%d occurrences)" % occurrences)
+PATCH_WITH_PY
\ No newline at end of file
--- a/seqsero2S.xml	Thu May 07 18:12:13 2026 +0000
+++ b/seqsero2S.xml	Fri May 15 11:44:48 2026 +0000
@@ -1,8 +1,7 @@
-<tool id="seqsero2s" name="SeqSero2S" version="1.1.4+0">
+<tool id="seqsero2s" name="SeqSero2S" version="1.1.4+galaxytrakr">
     <description>Simplified Salmonella serotype prediction</description>
     <requirements>
-        <!-- <requirement type="package" version="@VERSION@">seqsero2s</requirement> -->
-        <container type="docker">quay.io/biocontainers/seqsero2s:1.1.4--pyhdfd78af_2</container>
+        <container type="docker">quay.io/galaxytrakr/seqsero2s:1.1.4</container>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
       mkdir ./output;
--- a/test-data/.gitmodules	Thu May 07 18:12:13 2026 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-[submodule "test/csp2"]
-	path = test/csp2
-	url = https://github.com/CFSAN-Biostatistics/CSP2_TestData.git