rliterman@0: # CSP2 Dockerfile rliterman@0: # Based on StaPH-B's Dockerfile for BEDTools, MUmmer, and Skesa rliterman@0: # Thanks to Erin Young, Curtis Kapsak, John Arnn, and the StaPH-B team rliterman@0: # https://github.com/StaPH-B/docker-builds/blob/master/bedtools/2.31.1/Dockerfile rliterman@0: # https://github.com/StaPH-B/docker-builds/blob/master/mummer/4.0.0/Dockerfile rliterman@0: # https://github.com/StaPH-B/docker-builds/blob/master/skesa/2.4.0/Dockerfile rliterman@0: rliterman@0: ARG CSP2_VER="0.9.0" rliterman@0: ARG BEDTOOLS_VER="2.31.1" rliterman@0: ARG MUMMER_VER="4.0.0" rliterman@0: ARG SKESA_VER="2.4.0" rliterman@0: ARG MASH_VER="2.3" rliterman@0: ARG BBMAP_VER="38.90" rliterman@0: ARG PYTHON_VER="3.8" rliterman@0: rliterman@0: FROM ubuntu:focal AS build rliterman@0: rliterman@0: ARG BEDTOOLS_VER rliterman@0: ARG MUMMER_VER rliterman@0: ARG SKESA_VER rliterman@0: ARG MASH_VER rliterman@0: ARG BBMAP_VER rliterman@0: ARG PYTHON_VER rliterman@0: rliterman@0: WORKDIR /build rliterman@0: rliterman@0: # to prevent tzdata from asking for a region during apt updates; ARG so that variable only rliterman@0: # persists at buildtime rliterman@0: # from https://github.com/StaPH-B/docker-builds/blob/master/mummer/4.0.0/Dockerfile rliterman@0: ARG DEBIAN_FRONTEND=noninteractive rliterman@0: rliterman@0: # Install build dependencies rliterman@0: RUN apt-get update && apt-get install -y --no-install-recommends \ rliterman@0: tzdata \ rliterman@0: gpg-agent \ rliterman@0: software-properties-common \ rliterman@0: build-essential \ rliterman@0: zlib1g-dev \ rliterman@0: libghc-bzlib-dev \ rliterman@0: liblzma-dev \ rliterman@0: wget \ rliterman@0: ca-certificates rliterman@0: rliterman@0: RUN add-apt-repository 'ppa:deadsnakes/ppa' && apt-get update && apt-get install -y --no-install-recommends \ rliterman@0: python${PYTHON_VER} \ rliterman@0: # python${PYTHON_VER}-pip \ rliterman@0: # python${PYTHON_VER}-full \ rliterman@0: python${PYTHON_VER}-dev \ rliterman@0: python${PYTHON_VER}-venv && \ rliterman@0: python${PYTHON_VER} -m venv --copies /opt/venv rliterman@0: rliterman@0: rliterman@0: ENV PATH="/opt/venv/bin:$PATH" rliterman@0: rliterman@0: RUN pip install --no-cache-dir -U pandas~=1.2.0 pybedtools refchooser scikit-learn rliterman@0: rliterman@0: ADD https://github.com/arq5x/bedtools2/archive/refs/tags/v${BEDTOOLS_VER}.tar.gz . rliterman@0: ADD https://github.com/mummer4/mummer/releases/download/v${MUMMER_VER}rc1/mummer-${MUMMER_VER}rc1.tar.gz . rliterman@0: ADD https://github.com/ncbi/SKESA/releases/download/${SKESA_VER}/skesa.centos.7.7 . rliterman@0: ADD https://github.com/ncbi/SKESA/releases/download/${SKESA_VER}/gfa_connector.centos7.7 . rliterman@0: ADD https://github.com/ncbi/SKESA/releases/download/${SKESA_VER}/kmercounter.centos7.7 . rliterman@0: ADD https://github.com/marbl/Mash/releases/download/v${MASH_VER}/mash-Linux64-v${MASH_VER}.tar . rliterman@0: rliterman@0: # Install BEDTools rliterman@0: # per https://github.com/StaPH-B/docker-builds/blob/master/bedtools/2.31.1/Dockerfile rliterman@0: # python3 required when compiling via `make` command for creating old CLI executables rliterman@0: # dependencies listed here (albeit for v2.30.0, still should be identical): https://packages.ubuntu.com/jammy/bedtools rliterman@0: # requires libghc-bzlib-dev, build-essential, zlib1g-dev, and a few others rliterman@0: # 'make install' should place binary executable files in /usr/local/bin rliterman@0: RUN tar -xzf v${BEDTOOLS_VER}.tar.gz && \ rliterman@0: rm v${BEDTOOLS_VER}.tar.gz && \ rliterman@0: cd bedtools2-${BEDTOOLS_VER} && \ rliterman@0: make && \ rliterman@0: make install rliterman@0: rliterman@0: # Install mummer rliterman@0: # per https://github.com/StaPH-B/docker-builds/blob/master/mummer/4.0.0/Dockerfile rliterman@0: RUN tar -xvf mummer-${MUMMER_VER}rc1.tar.gz && \ rliterman@0: rm mummer-${MUMMER_VER}rc1.tar.gz && \ rliterman@0: cd mummer-${MUMMER_VER}rc1 && \ rliterman@0: ./configure --prefix=/usr/local && \ rliterman@0: make && \ rliterman@0: make install && \ rliterman@0: ldconfig rliterman@0: rliterman@0: # # Install Skesa rliterman@0: # # per https://github.com/StaPH-B/docker-builds/blob/master/skesa/2.4.0/Dockerfile rliterman@0: # # get skesa, gfa_connector, and kmercounter binaries, rename them rliterman@0: RUN mkdir skesa && \ rliterman@0: cd skesa && \ rliterman@0: mv /build/skesa.centos.7.7 skesa && \ rliterman@0: mv /build/gfa_connector.centos7.7 gfa_connector && \ rliterman@0: mv /build/kmercounter.centos7.7 kmercounter && \ rliterman@0: chmod +x skesa gfa_connector kmercounter && \ rliterman@0: mv skesa gfa_connector kmercounter /usr/local/bin rliterman@0: rliterman@0: # Install Mash rliterman@0: RUN tar -xvf mash-Linux64-v${MASH_VER}.tar && \ rliterman@0: mv mash-Linux64-v${MASH_VER}/mash /usr/local/bin rliterman@0: rliterman@0: # Install BBMap rliterman@0: RUN wget -O BBMap_${BBMAP_VER}.tar.gz https://sourceforge.net/projects/bbmap/files/BBMap_${BBMAP_VER}.tar.gz/download && \ rliterman@0: tar -xvf BBMap_${BBMAP_VER}.tar.gz && \ rliterman@0: mv bbmap/* /usr/local/bin rliterman@0: rliterman@0: rliterman@0: FROM ubuntu:focal AS app rliterman@0: rliterman@0: ARG CSP2_VER rliterman@0: ARG CSP2_BRANCH="main" rliterman@0: ARG PYTHON_VER rliterman@0: rliterman@0: LABEL base.image="ubuntu:focal" rliterman@0: LABEL version=${CSP2_VER} rliterman@0: LABEL software="CSP2" rliterman@0: LABEL software.version=${CSP2_VER} rliterman@0: LABEL description="a Nextflow pipeline for rapid, accurate SNP distance estimation from assembly data" rliterman@0: LABEL website="https://github.com/CFSAN-Biostatistics/CSP2" rliterman@0: LABEL licence="https://github.com/CFSAN-Biostatistics/CSP2/blob/main/LICENSE" rliterman@0: LABEL maintainer="Robert Literman" rliterman@0: LABEL maintainer.email="Robert.Literman@fda.hhs.gov" rliterman@0: LABEL maintainer.organization="FDA/CFSAN/Biostatistics" rliterman@0: LABEL maintainer2="Justin Payne" rliterman@0: LABEL maintainer2.email="Justin.Payne@fda.hhs.gov" rliterman@0: LABEL maintainer2.organization="FDA/CFSAN/Biostatistics" rliterman@0: rliterman@0: WORKDIR /root/.nextflow rliterman@0: WORKDIR /app rliterman@0: rliterman@0: # copy in all executable files from builder stage to final app stage rliterman@0: COPY --from=build /usr/local/bin /usr/local/bin rliterman@0: rliterman@0: # Lots of perl nonsense rliterman@0: COPY --from=build /usr/local/lib /usr/local/lib rliterman@0: COPY --from=build /usr/local/libexec/mummer /usr/local/libexec/mummer rliterman@0: COPY --from=build /usr/lib/x86_64-linux-gnu/perl /usr/lib/x86_64-linux-gnu/perl rliterman@0: COPY --from=build /usr/local/share /usr/local/share rliterman@0: COPY --from=build /usr/share /usr/share rliterman@0: COPY --from=build /opt/venv /opt/venv rliterman@0: COPY --from=build /usr/bin/make /usr/local/bin/make rliterman@0: rliterman@0: rliterman@0: # Python stuff rliterman@0: COPY --from=build /usr/lib/python${PYTHON_VER} /usr/lib/python${PYTHON_VER} rliterman@0: rliterman@0: rliterman@0: #Install JRE rliterman@0: RUN apt-get update && apt-get install -y --no-install-recommends \ rliterman@0: ca-certificates \ rliterman@0: openjdk-17-jre-headless \ rliterman@0: curl rliterman@0: rliterman@0: # Install Nextflow rliterman@0: # per https://www.nextflow.io/docs/latest/getstarted.html rliterman@0: RUN export CAPSULE_LOG=debug && curl -s https://get.nextflow.io | bash && \ rliterman@0: chmod +x nextflow && \ rliterman@0: mv nextflow /usr/local/bin && \ rliterman@0: nextflow run hello rliterman@0: rliterman@0: ADD docker/Makefile . rliterman@0: rliterman@0: # set PATH, set perl locale settings for singularity compatibility rliterman@0: ENV PATH="/opt/venv/bin:/usr/local/bin:/skesa:$PATH" \ rliterman@0: LC_ALL=C \ rliterman@0: NXF_OFFLINE='true' rliterman@0: rliterman@0: ADD bin ./bin rliterman@0: ADD conf ./conf rliterman@0: ADD subworkflows ./subworkflows rliterman@0: ADD CSP2.nf ./CSP2.nf rliterman@0: ADD nextflow.config ./nextflow.config rliterman@0: rliterman@0: rliterman@0: FROM app AS pretest rliterman@0: rliterman@0: # set PATH, set perl locale settings for singularity compatibility rliterman@0: ENV PATH="/opt/venv/bin:/usr/local/bin:/skesa:$PATH" \ rliterman@0: LC_ALL=C \ rliterman@0: NXF_OFFLINE='true' rliterman@0: rliterman@0: #Alternate test data directory rliterman@0: ADD https://github.com/CFSAN-Biostatistics/CSP2_TestData#main:assets assets/ rliterman@0: rliterman@0: rliterman@0: # Test MUmmer installation rliterman@0: # per https://github.com/StaPH-B/docker-builds/blob/master/mummer/4.0.0/Dockerfile rliterman@0: rliterman@0: ADD https://mummer4.github.io/tutorial/exampleFiles/2.1/in/H_pylori26695_Eslice.fasta . rliterman@0: ADD https://mummer4.github.io/tutorial/exampleFiles/2.1/in/H_pyloriJ99_Eslice.fasta . rliterman@0: ADD https://mummer4.github.io/tutorial/exampleFiles/2.2/in/B_anthracis_Mslice.fasta . rliterman@0: ADD https://mummer4.github.io/tutorial/exampleFiles/2.2/in/B_anthracis_contigs.fasta . rliterman@0: ADD http://mummer.sourceforge.net/examples/data/H_pylori26695_Eslice.fasta . rliterman@0: ADD http://mummer.sourceforge.net/examples/data/H_pyloriJ99_Eslice.fasta . rliterman@0: ADD https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V5.3.2/SARS-CoV-2.primer.bed ./V5.3.2.artic.bed rliterman@0: ADD https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V4.1/SARS-CoV-2.primer.bed ./V4.1.artic.bed rliterman@0: rliterman@0: FROM pretest AS test rliterman@0: rliterman@0: # Test MASH rliterman@0: rliterman@0: RUN nucmer -h && \ rliterman@0: promer -h && \ rliterman@0: mummer -mum -b -c H_pylori26695_Eslice.fasta H_pyloriJ99_Eslice.fasta > mummer.mums && \ rliterman@0: nucmer -c 100 -p nucmer B_anthracis_Mslice.fasta B_anthracis_contigs.fasta && \ rliterman@0: show-snps -C nucmer.delta > nucmer.snps && \ rliterman@0: promer -p promer_100 -c 100 H_pylori26695_Eslice.fasta H_pyloriJ99_Eslice.fasta rliterman@0: rliterman@0: # Test bedtools installation rliterman@0: # check help options and version rliterman@0: RUN bedtools --help && \ rliterman@0: bedtools --version rliterman@0: rliterman@0: # downloads two bedfiles for ARTIC SARS-CoV-2 artic schemes, fixes their formatting, uses bedtools sort, intersect, and merge rliterman@0: # per https://github.com/StaPH-B/docker-builds/blob/master/bedtools/2.31.1/Dockerfile rliterman@0: RUN awk '{print $1 "\t" $2 "\t" $3 "\t" $4 "\t" $5 "\t" $6}' V5.3.2.artic.bed > V5.3.2.unsorted.bed && \ rliterman@0: bedtools sort -i V5.3.2.unsorted.bed > V5.3.2.bed && \ rliterman@0: awk '{print $1 "\t" $2 "\t" $3 "\t" $4 "\t" $5 "\t" $6}' V4.1.artic.bed > V4.1.bed && \ rliterman@0: bedtools intersect -a V5.3.2.bed -b V4.1.bed > intersect_test.bed && \ rliterman@0: mergeBed -i V5.3.2.bed > merged_test.bed && \ rliterman@0: head intersect_test.bed merged_test.bed rliterman@0: rliterman@0: RUN /bin/bash -c 'make test' rliterman@0: rliterman@0: FROM app AS release rliterman@0: rliterman@0: ARG CSP2_VER rliterman@0: ARG BEDTOOLS_VER rliterman@0: ARG MUMMER_VER rliterman@0: ARG SKESA_VER rliterman@0: ARG MASH_VER rliterman@0: ARG BBMAP_VER rliterman@0: ARG PYTHON_VER rliterman@0: ENV CSP2_VER=${CSP2_VER} rliterman@0: ENV BEDTOOLS_VER=${BEDTOOLS_VER} rliterman@0: ENV MUMMER_VER=${MUMMER_VER} rliterman@0: ENV SKESA_VER=${SKESA_VER} rliterman@0: ENV MASH_VER=${MASH_VER} rliterman@0: ENV BBMAP_VER=${BBMAP_VER} rliterman@0: ENV PYTHON_VER=${PYTHON_VER} rliterman@0: rliterman@0: # set PATH, set perl locale settings for singularity compatibility rliterman@0: ENV PATH="/opt/venv/bin:/usr/local/bin:/skesa:$PATH" \ rliterman@0: LC_ALL=C \ rliterman@0: NXF_OFFLINE='true' rliterman@0: rliterman@0: ENTRYPOINT ["make", "--makefile=/app/Makefile"]