Mercurial > repos > jpayne > seqsero2s
changeset 9:4c4899031795 draft
planemo upload commit fcafae43456eb929e62b5c879ac954f75745bbf8
| author | galaxytrakr |
|---|---|
| date | Fri, 15 May 2026 11:44:48 +0000 |
| parents | e3b5ed54af18 |
| children | 6cba046e4aaa |
| files | Dockerfile job_conf.yml misc/bioconda-recipes-seqsero2s-meta.yml patch_stringmlst.sh seqsero2S.xml test-data/.gitmodules tool-data/all_fasta.loc.sample |
| diffstat | 5 files changed, 251 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Dockerfile Fri May 15 11:44:48 2026 +0000 @@ -0,0 +1,137 @@ +# Multi-stage Dockerfile for SeqSero2S +# Default build: docker build -t seqsero2s:latest . +# Test build: docker build --target test -t seqsero2s:test . + +# ============================================================================ +# Stage 1: Base image with mambaforge for faster dependency resolution +# ============================================================================ +FROM condaforge/mambaforge:latest AS base + +LABEL maintainer="SeqSero2S Maintainers" +LABEL description="Simplified Salmonella serotype prediction from genome sequencing data" + +# Set environment variables to reduce conda output and ensure non-interactive +# Disable SSL verification for VPN environments +ENV CONDA_ALWAYS_YES=true \ + CONDA_AUTO_UPDATE_CONDA=false \ + DEBIAN_FRONTEND=noninteractive + +# Configure conda to skip SSL verification +RUN conda config --set ssl_verify false + +# ============================================================================ +# Stage 2: Builder - Install all dependencies and SeqSero2S +# ============================================================================ +FROM base AS builder + +# Update base packages and install build essentials +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + bash \ + && apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Copy the patch script +COPY patch_stringmlst.sh /tmp/patch_stringmlst.sh +RUN chmod +x /tmp/patch_stringmlst.sh + +# Create conda environment with all dependencies +# Using mamba for faster dependency resolution +RUN mamba create -n seqsero2s -c conda-forge -c bioconda \ + python>=3 \ + pip \ + setuptools \ + blast>=2.2 \ + zstd \ + samtools \ + bedtools>=2.17 \ + sra-tools>=2.8 \ + spades>=3.9 \ + salmid \ + bwa>=0.7 \ + seqtk>=1.3 \ + stringmlst>=0.6 \ + mlst>=2.32.2 \ + perl-list-moreutils \ + && mamba clean -afy + +# Install SeqSero2S from local fork +WORKDIR /tmp/build + +# Copy local SeqSero2S directory +COPY SeqSero2S /tmp/build/SeqSero2S + +# Install SeqSero2S +RUN cd SeqSero2S && \ + /opt/conda/envs/seqsero2s/bin/python -m pip install . -vv --no-deps --no-build-isolation --no-cache-dir + +# Apply the stringMLST.py patch (from the conda recipe) +# The patch replaces dbPrefix reference with cwd to avoid path issues +RUN PREFIX=/opt/conda/envs/seqsero2s /tmp/patch_stringmlst.sh + +# ============================================================================ +# Stage 3: Test image - runs validation tests +# ============================================================================ +FROM builder AS test + +# Create test directory +WORKDIR /test + +# Run test commands from the conda recipe +RUN echo "Running SeqSero2S tests..." && \ + /opt/conda/envs/seqsero2s/bin/SeqSero2S.py -h && \ + echo "SeqSero2S.py -h: PASSED" && \ + /opt/conda/envs/seqsero2s/bin/blastn -help && \ + echo "blastn -help: PASSED" && \ + /opt/conda/envs/seqsero2s/bin/SalmID.py -h && \ + echo "SalmID.py -h: PASSED" && \ + /opt/conda/envs/seqsero2s/bin/mlst -h && \ + echo "mlst -h: PASSED" && \ + echo "All tests completed successfully!" + +# Default command shows test results +CMD ["echo", "All SeqSero2S tests passed successfully!"] + +# ============================================================================ +# Stage 4: Production/Distribution image - minimal runtime (DEFAULT) +# ============================================================================ +FROM condaforge/mambaforge:latest AS dist + +# Copy conda environment from builder +COPY --from=builder /opt/conda/envs/seqsero2s /opt/conda/envs/seqsero2s + +# Update PATH to use the conda environment +ENV PATH=/opt/conda/envs/seqsero2s/bin:$PATH \ + CONDA_DEFAULT_ENV=seqsero2s \ + CONDA_PREFIX=/opt/conda/envs/seqsero2s + +# Install minimal runtime dependencies +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + procps \ + && apt-get clean && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +# Create working directory +WORKDIR /data + +# Create non-root user for running the application +RUN useradd -m -u 1000 -s /bin/bash seqsero2s && \ + chown -R seqsero2s:seqsero2s /data + +USER seqsero2s + +# Add metadata labels +LABEL org.opencontainers.image.version="1.1.4" \ + org.opencontainers.image.authors="LSTUGA" \ + org.opencontainers.image.url="https://github.com/LSTUGA/SeqSero2S" \ + org.opencontainers.image.documentation="https://github.com/LSTUGA/SeqSero2S" \ + org.opencontainers.image.source="https://github.com/LSTUGA/SeqSero2S" \ + org.opencontainers.image.licenses="GPL-2.0-or-later" \ + org.opencontainers.image.title="SeqSero2S" \ + org.opencontainers.image.description="Simplified Salmonella serotype prediction from genome sequencing data" + +# No entrypoint or command for dist target
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/misc/bioconda-recipes-seqsero2s-meta.yml Fri May 15 11:44:48 2026 +0000 @@ -0,0 +1,67 @@ +{% set name = "SeqSero2S" %} +{% set version = "1.1.4" %} +{% set release = "89f1f5aca7a8819ee96239592fedd2e737036ada" %} +{% set sha256 = "995f1815cc6cee7b8e37604b068bbec673e2ee8880e41adf6df0350966fe4c65" %} + +package: + name: {{ name|lower }} + version: {{ version }} + +source: + url: https://github.com/LSTUGA/SeqSero2S/archive/refs/tags/v{{ version }}.tar.gz + sha256: {{ sha256 }} + +build: + number: 2 + noarch: python + script: + - {{ PYTHON }} -m pip install . -vv --no-deps --no-build-isolation --no-cache-dir + # patch stringMLST.py log path: replace dbPrefix reference with cwd to avoid + # patches ALL occurrences (at line 1464 and predict section at line 1478). + - bash $RECIPE_DIR/patch_stringmlst.sh + run_exports: + - {{ pin_subpackage(name|lower, max_pin="x.x") }} + +requirements: + host: + - python >=3 + - pip + - setuptools + - stringmlst >=0.6 + run: + - python >=3 + - blast >=2.2 + - zstd # libzstd.so.1 required by blastn binary + - samtools + - bedtools >=2.17 + - sra-tools >=2.8 + - spades >=3.9 + - salmid + - bwa >=0.7 + - seqtk >=1.3 + - stringmlst >=0.6 + - mlst >=2.32.2 + +test: + commands: + - SeqSero2S.py -h + - blastn -help + - SalmID.py -h + - mlst -h + +about: + home: "https://github.com/LSTUGA/{{ name }}" + license: "GPL-2.0-or-later" + license_family: GPL + license_file: 'LICENSE' + summary: "Simplified Salmonella serotype prediction from genome sequencing data" + dev_url: "https://github.com/LSTUGA/{{ name }}" + +extra: + recipe-maintainers: + - LSTUGA + - crashfrog + - biocoder + identifiers: + - doi:10.1128/aem.02600-24 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patch_stringmlst.sh Fri May 15 11:44:48 2026 +0000 @@ -0,0 +1,45 @@ +#!/usr/bin/env bash + +# patch stringMLST.py log path: replace dbPrefix reference with cwd. +# patches all occurrences (at line 1464 and predict section at line 1478). + +set -euo pipefail + +STRINGMLST="$PREFIX/bin/stringMLST.py" + +if [ ! -f "$STRINGMLST" ]; then + echo "SKIP: $STRINGMLST not found" + exit 0 +fi + +python3 << 'PATCH_WITH_PY' +import os + +p = os.path.join(os.environ["PREFIX"], "bin", "stringMLST.py") +with open(p) as f: + lines = f.readlines() + +original = " log = dbPrefix+'.log'\n" +commented = " # log = dbPrefix+'.log'\n" +replacement = ' log = os.path.join(os.getcwd(), "kmer.log")\n' + +# Count occurrences BEFORE modifying +occurrences = lines.count(original) + +if occurrences == 0: + print("SKIP: stringMLST.py has 0 occurrences, expected at least 1") + exit(0) + +out = [] +for line in lines: + if line.rstrip("\n") == original.rstrip("\n"): + out.append(commented) + out.append(replacement) + else: + out.append(line) + +with open(p, "w") as f: + f.writelines(out) + +print("PATCHED: stringMLST.py log path fixed (%d occurrences)" % occurrences) +PATCH_WITH_PY \ No newline at end of file
--- a/seqsero2S.xml Thu May 07 18:12:13 2026 +0000 +++ b/seqsero2S.xml Fri May 15 11:44:48 2026 +0000 @@ -1,8 +1,7 @@ -<tool id="seqsero2s" name="SeqSero2S" version="1.1.4+0"> +<tool id="seqsero2s" name="SeqSero2S" version="1.1.4+galaxytrakr"> <description>Simplified Salmonella serotype prediction</description> <requirements> - <!-- <requirement type="package" version="@VERSION@">seqsero2s</requirement> --> - <container type="docker">quay.io/biocontainers/seqsero2s:1.1.4--pyhdfd78af_2</container> + <container type="docker">quay.io/galaxytrakr/seqsero2s:1.1.4</container> </requirements> <command detect_errors="exit_code"><