rliterman@0
|
1 # CSP2 Dockerfile
|
rliterman@0
|
2 # Based on StaPH-B's Dockerfile for BEDTools, MUmmer, and Skesa
|
rliterman@0
|
3 # Thanks to Erin Young, Curtis Kapsak, John Arnn, and the StaPH-B team
|
rliterman@0
|
4 # https://github.com/StaPH-B/docker-builds/blob/master/bedtools/2.31.1/Dockerfile
|
rliterman@0
|
5 # https://github.com/StaPH-B/docker-builds/blob/master/mummer/4.0.0/Dockerfile
|
rliterman@0
|
6 # https://github.com/StaPH-B/docker-builds/blob/master/skesa/2.4.0/Dockerfile
|
rliterman@0
|
7
|
rliterman@0
|
8 ARG CSP2_VER="0.9.0"
|
rliterman@0
|
9 ARG BEDTOOLS_VER="2.31.1"
|
rliterman@0
|
10 ARG MUMMER_VER="4.0.0"
|
rliterman@0
|
11 ARG SKESA_VER="2.4.0"
|
rliterman@0
|
12 ARG MASH_VER="2.3"
|
rliterman@0
|
13 ARG BBMAP_VER="38.90"
|
rliterman@0
|
14 ARG PYTHON_VER="3.8"
|
rliterman@0
|
15
|
rliterman@0
|
16 FROM ubuntu:focal AS build
|
rliterman@0
|
17
|
rliterman@0
|
18 ARG BEDTOOLS_VER
|
rliterman@0
|
19 ARG MUMMER_VER
|
rliterman@0
|
20 ARG SKESA_VER
|
rliterman@0
|
21 ARG MASH_VER
|
rliterman@0
|
22 ARG BBMAP_VER
|
rliterman@0
|
23 ARG PYTHON_VER
|
rliterman@0
|
24
|
rliterman@0
|
25 WORKDIR /build
|
rliterman@0
|
26
|
rliterman@0
|
27 # to prevent tzdata from asking for a region during apt updates; ARG so that variable only
|
rliterman@0
|
28 # persists at buildtime
|
rliterman@0
|
29 # from https://github.com/StaPH-B/docker-builds/blob/master/mummer/4.0.0/Dockerfile
|
rliterman@0
|
30 ARG DEBIAN_FRONTEND=noninteractive
|
rliterman@0
|
31
|
rliterman@0
|
32 # Install build dependencies
|
rliterman@0
|
33 RUN apt-get update && apt-get install -y --no-install-recommends \
|
rliterman@0
|
34 tzdata \
|
rliterman@0
|
35 gpg-agent \
|
rliterman@0
|
36 software-properties-common \
|
rliterman@0
|
37 build-essential \
|
rliterman@0
|
38 zlib1g-dev \
|
rliterman@0
|
39 libghc-bzlib-dev \
|
rliterman@0
|
40 liblzma-dev \
|
rliterman@0
|
41 wget \
|
rliterman@0
|
42 ca-certificates
|
rliterman@0
|
43
|
rliterman@0
|
44 RUN add-apt-repository 'ppa:deadsnakes/ppa' && apt-get update && apt-get install -y --no-install-recommends \
|
rliterman@0
|
45 python${PYTHON_VER} \
|
rliterman@0
|
46 # python${PYTHON_VER}-pip \
|
rliterman@0
|
47 # python${PYTHON_VER}-full \
|
rliterman@0
|
48 python${PYTHON_VER}-dev \
|
rliterman@0
|
49 python${PYTHON_VER}-venv && \
|
rliterman@0
|
50 python${PYTHON_VER} -m venv --copies /opt/venv
|
rliterman@0
|
51
|
rliterman@0
|
52
|
rliterman@0
|
53 ENV PATH="/opt/venv/bin:$PATH"
|
rliterman@0
|
54
|
rliterman@0
|
55 RUN pip install --no-cache-dir -U pandas~=1.2.0 pybedtools refchooser scikit-learn
|
rliterman@0
|
56
|
rliterman@0
|
57 ADD https://github.com/arq5x/bedtools2/archive/refs/tags/v${BEDTOOLS_VER}.tar.gz .
|
rliterman@0
|
58 ADD https://github.com/mummer4/mummer/releases/download/v${MUMMER_VER}rc1/mummer-${MUMMER_VER}rc1.tar.gz .
|
rliterman@0
|
59 ADD https://github.com/ncbi/SKESA/releases/download/${SKESA_VER}/skesa.centos.7.7 .
|
rliterman@0
|
60 ADD https://github.com/ncbi/SKESA/releases/download/${SKESA_VER}/gfa_connector.centos7.7 .
|
rliterman@0
|
61 ADD https://github.com/ncbi/SKESA/releases/download/${SKESA_VER}/kmercounter.centos7.7 .
|
rliterman@0
|
62 ADD https://github.com/marbl/Mash/releases/download/v${MASH_VER}/mash-Linux64-v${MASH_VER}.tar .
|
rliterman@0
|
63
|
rliterman@0
|
64 # Install BEDTools
|
rliterman@0
|
65 # per https://github.com/StaPH-B/docker-builds/blob/master/bedtools/2.31.1/Dockerfile
|
rliterman@0
|
66 # python3 required when compiling via `make` command for creating old CLI executables
|
rliterman@0
|
67 # dependencies listed here (albeit for v2.30.0, still should be identical): https://packages.ubuntu.com/jammy/bedtools
|
rliterman@0
|
68 # requires libghc-bzlib-dev, build-essential, zlib1g-dev, and a few others
|
rliterman@0
|
69 # 'make install' should place binary executable files in /usr/local/bin
|
rliterman@0
|
70 RUN tar -xzf v${BEDTOOLS_VER}.tar.gz && \
|
rliterman@0
|
71 rm v${BEDTOOLS_VER}.tar.gz && \
|
rliterman@0
|
72 cd bedtools2-${BEDTOOLS_VER} && \
|
rliterman@0
|
73 make && \
|
rliterman@0
|
74 make install
|
rliterman@0
|
75
|
rliterman@0
|
76 # Install mummer
|
rliterman@0
|
77 # per https://github.com/StaPH-B/docker-builds/blob/master/mummer/4.0.0/Dockerfile
|
rliterman@0
|
78 RUN tar -xvf mummer-${MUMMER_VER}rc1.tar.gz && \
|
rliterman@0
|
79 rm mummer-${MUMMER_VER}rc1.tar.gz && \
|
rliterman@0
|
80 cd mummer-${MUMMER_VER}rc1 && \
|
rliterman@0
|
81 ./configure --prefix=/usr/local && \
|
rliterman@0
|
82 make && \
|
rliterman@0
|
83 make install && \
|
rliterman@0
|
84 ldconfig
|
rliterman@0
|
85
|
rliterman@0
|
86 # # Install Skesa
|
rliterman@0
|
87 # # per https://github.com/StaPH-B/docker-builds/blob/master/skesa/2.4.0/Dockerfile
|
rliterman@0
|
88 # # get skesa, gfa_connector, and kmercounter binaries, rename them
|
rliterman@0
|
89 RUN mkdir skesa && \
|
rliterman@0
|
90 cd skesa && \
|
rliterman@0
|
91 mv /build/skesa.centos.7.7 skesa && \
|
rliterman@0
|
92 mv /build/gfa_connector.centos7.7 gfa_connector && \
|
rliterman@0
|
93 mv /build/kmercounter.centos7.7 kmercounter && \
|
rliterman@0
|
94 chmod +x skesa gfa_connector kmercounter && \
|
rliterman@0
|
95 mv skesa gfa_connector kmercounter /usr/local/bin
|
rliterman@0
|
96
|
rliterman@0
|
97 # Install Mash
|
rliterman@0
|
98 RUN tar -xvf mash-Linux64-v${MASH_VER}.tar && \
|
rliterman@0
|
99 mv mash-Linux64-v${MASH_VER}/mash /usr/local/bin
|
rliterman@0
|
100
|
rliterman@0
|
101 # Install BBMap
|
rliterman@0
|
102 RUN wget -O BBMap_${BBMAP_VER}.tar.gz https://sourceforge.net/projects/bbmap/files/BBMap_${BBMAP_VER}.tar.gz/download && \
|
rliterman@0
|
103 tar -xvf BBMap_${BBMAP_VER}.tar.gz && \
|
rliterman@0
|
104 mv bbmap/* /usr/local/bin
|
rliterman@0
|
105
|
rliterman@0
|
106
|
rliterman@0
|
107 FROM ubuntu:focal AS app
|
rliterman@0
|
108
|
rliterman@0
|
109 ARG CSP2_VER
|
rliterman@0
|
110 ARG CSP2_BRANCH="main"
|
rliterman@0
|
111 ARG PYTHON_VER
|
rliterman@0
|
112
|
rliterman@0
|
113 LABEL base.image="ubuntu:focal"
|
rliterman@0
|
114 LABEL version=${CSP2_VER}
|
rliterman@0
|
115 LABEL software="CSP2"
|
rliterman@0
|
116 LABEL software.version=${CSP2_VER}
|
rliterman@0
|
117 LABEL description="a Nextflow pipeline for rapid, accurate SNP distance estimation from assembly data"
|
rliterman@0
|
118 LABEL website="https://github.com/CFSAN-Biostatistics/CSP2"
|
rliterman@0
|
119 LABEL licence="https://github.com/CFSAN-Biostatistics/CSP2/blob/main/LICENSE"
|
rliterman@0
|
120 LABEL maintainer="Robert Literman"
|
rliterman@0
|
121 LABEL maintainer.email="Robert.Literman@fda.hhs.gov"
|
rliterman@0
|
122 LABEL maintainer.organization="FDA/CFSAN/Biostatistics"
|
rliterman@0
|
123 LABEL maintainer2="Justin Payne"
|
rliterman@0
|
124 LABEL maintainer2.email="Justin.Payne@fda.hhs.gov"
|
rliterman@0
|
125 LABEL maintainer2.organization="FDA/CFSAN/Biostatistics"
|
rliterman@0
|
126
|
rliterman@0
|
127 WORKDIR /root/.nextflow
|
rliterman@0
|
128 WORKDIR /app
|
rliterman@0
|
129
|
rliterman@0
|
130 # copy in all executable files from builder stage to final app stage
|
rliterman@0
|
131 COPY --from=build /usr/local/bin /usr/local/bin
|
rliterman@0
|
132
|
rliterman@0
|
133 # Lots of perl nonsense
|
rliterman@0
|
134 COPY --from=build /usr/local/lib /usr/local/lib
|
rliterman@0
|
135 COPY --from=build /usr/local/libexec/mummer /usr/local/libexec/mummer
|
rliterman@0
|
136 COPY --from=build /usr/lib/x86_64-linux-gnu/perl /usr/lib/x86_64-linux-gnu/perl
|
rliterman@0
|
137 COPY --from=build /usr/local/share /usr/local/share
|
rliterman@0
|
138 COPY --from=build /usr/share /usr/share
|
rliterman@0
|
139 COPY --from=build /opt/venv /opt/venv
|
rliterman@0
|
140 COPY --from=build /usr/bin/make /usr/local/bin/make
|
rliterman@0
|
141
|
rliterman@0
|
142
|
rliterman@0
|
143 # Python stuff
|
rliterman@0
|
144 COPY --from=build /usr/lib/python${PYTHON_VER} /usr/lib/python${PYTHON_VER}
|
rliterman@0
|
145
|
rliterman@0
|
146
|
rliterman@0
|
147 #Install JRE
|
rliterman@0
|
148 RUN apt-get update && apt-get install -y --no-install-recommends \
|
rliterman@0
|
149 ca-certificates \
|
rliterman@0
|
150 openjdk-17-jre-headless \
|
rliterman@0
|
151 curl
|
rliterman@0
|
152
|
rliterman@0
|
153 # Install Nextflow
|
rliterman@0
|
154 # per https://www.nextflow.io/docs/latest/getstarted.html
|
rliterman@0
|
155 RUN export CAPSULE_LOG=debug && curl -s https://get.nextflow.io | bash && \
|
rliterman@0
|
156 chmod +x nextflow && \
|
rliterman@0
|
157 mv nextflow /usr/local/bin && \
|
rliterman@0
|
158 nextflow run hello
|
rliterman@0
|
159
|
rliterman@0
|
160 ADD docker/Makefile .
|
rliterman@0
|
161
|
rliterman@0
|
162 # set PATH, set perl locale settings for singularity compatibility
|
rliterman@0
|
163 ENV PATH="/opt/venv/bin:/usr/local/bin:/skesa:$PATH" \
|
rliterman@0
|
164 LC_ALL=C \
|
rliterman@0
|
165 NXF_OFFLINE='true'
|
rliterman@0
|
166
|
rliterman@0
|
167 ADD bin ./bin
|
rliterman@0
|
168 ADD conf ./conf
|
rliterman@0
|
169 ADD subworkflows ./subworkflows
|
rliterman@0
|
170 ADD CSP2.nf ./CSP2.nf
|
rliterman@0
|
171 ADD nextflow.config ./nextflow.config
|
rliterman@0
|
172
|
rliterman@0
|
173
|
rliterman@0
|
174 FROM app AS pretest
|
rliterman@0
|
175
|
rliterman@0
|
176 # set PATH, set perl locale settings for singularity compatibility
|
rliterman@0
|
177 ENV PATH="/opt/venv/bin:/usr/local/bin:/skesa:$PATH" \
|
rliterman@0
|
178 LC_ALL=C \
|
rliterman@0
|
179 NXF_OFFLINE='true'
|
rliterman@0
|
180
|
rliterman@0
|
181 #Alternate test data directory
|
rliterman@0
|
182 ADD https://github.com/CFSAN-Biostatistics/CSP2_TestData#main:assets assets/
|
rliterman@0
|
183
|
rliterman@0
|
184
|
rliterman@0
|
185 # Test MUmmer installation
|
rliterman@0
|
186 # per https://github.com/StaPH-B/docker-builds/blob/master/mummer/4.0.0/Dockerfile
|
rliterman@0
|
187
|
rliterman@0
|
188 ADD https://mummer4.github.io/tutorial/exampleFiles/2.1/in/H_pylori26695_Eslice.fasta .
|
rliterman@0
|
189 ADD https://mummer4.github.io/tutorial/exampleFiles/2.1/in/H_pyloriJ99_Eslice.fasta .
|
rliterman@0
|
190 ADD https://mummer4.github.io/tutorial/exampleFiles/2.2/in/B_anthracis_Mslice.fasta .
|
rliterman@0
|
191 ADD https://mummer4.github.io/tutorial/exampleFiles/2.2/in/B_anthracis_contigs.fasta .
|
rliterman@0
|
192 ADD http://mummer.sourceforge.net/examples/data/H_pylori26695_Eslice.fasta .
|
rliterman@0
|
193 ADD http://mummer.sourceforge.net/examples/data/H_pyloriJ99_Eslice.fasta .
|
rliterman@0
|
194 ADD https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V5.3.2/SARS-CoV-2.primer.bed ./V5.3.2.artic.bed
|
rliterman@0
|
195 ADD https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V4.1/SARS-CoV-2.primer.bed ./V4.1.artic.bed
|
rliterman@0
|
196
|
rliterman@0
|
197 FROM pretest AS test
|
rliterman@0
|
198
|
rliterman@0
|
199 # Test MASH
|
rliterman@0
|
200
|
rliterman@0
|
201 RUN nucmer -h && \
|
rliterman@0
|
202 promer -h && \
|
rliterman@0
|
203 mummer -mum -b -c H_pylori26695_Eslice.fasta H_pyloriJ99_Eslice.fasta > mummer.mums && \
|
rliterman@0
|
204 nucmer -c 100 -p nucmer B_anthracis_Mslice.fasta B_anthracis_contigs.fasta && \
|
rliterman@0
|
205 show-snps -C nucmer.delta > nucmer.snps && \
|
rliterman@0
|
206 promer -p promer_100 -c 100 H_pylori26695_Eslice.fasta H_pyloriJ99_Eslice.fasta
|
rliterman@0
|
207
|
rliterman@0
|
208 # Test bedtools installation
|
rliterman@0
|
209 # check help options and version
|
rliterman@0
|
210 RUN bedtools --help && \
|
rliterman@0
|
211 bedtools --version
|
rliterman@0
|
212
|
rliterman@0
|
213 # downloads two bedfiles for ARTIC SARS-CoV-2 artic schemes, fixes their formatting, uses bedtools sort, intersect, and merge
|
rliterman@0
|
214 # per https://github.com/StaPH-B/docker-builds/blob/master/bedtools/2.31.1/Dockerfile
|
rliterman@0
|
215 RUN awk '{print $1 "\t" $2 "\t" $3 "\t" $4 "\t" $5 "\t" $6}' V5.3.2.artic.bed > V5.3.2.unsorted.bed && \
|
rliterman@0
|
216 bedtools sort -i V5.3.2.unsorted.bed > V5.3.2.bed && \
|
rliterman@0
|
217 awk '{print $1 "\t" $2 "\t" $3 "\t" $4 "\t" $5 "\t" $6}' V4.1.artic.bed > V4.1.bed && \
|
rliterman@0
|
218 bedtools intersect -a V5.3.2.bed -b V4.1.bed > intersect_test.bed && \
|
rliterman@0
|
219 mergeBed -i V5.3.2.bed > merged_test.bed && \
|
rliterman@0
|
220 head intersect_test.bed merged_test.bed
|
rliterman@0
|
221
|
rliterman@0
|
222 RUN /bin/bash -c 'make test'
|
rliterman@0
|
223
|
rliterman@0
|
224 FROM app AS release
|
rliterman@0
|
225
|
rliterman@0
|
226 ARG CSP2_VER
|
rliterman@0
|
227 ARG BEDTOOLS_VER
|
rliterman@0
|
228 ARG MUMMER_VER
|
rliterman@0
|
229 ARG SKESA_VER
|
rliterman@0
|
230 ARG MASH_VER
|
rliterman@0
|
231 ARG BBMAP_VER
|
rliterman@0
|
232 ARG PYTHON_VER
|
rliterman@0
|
233 ENV CSP2_VER=${CSP2_VER}
|
rliterman@0
|
234 ENV BEDTOOLS_VER=${BEDTOOLS_VER}
|
rliterman@0
|
235 ENV MUMMER_VER=${MUMMER_VER}
|
rliterman@0
|
236 ENV SKESA_VER=${SKESA_VER}
|
rliterman@0
|
237 ENV MASH_VER=${MASH_VER}
|
rliterman@0
|
238 ENV BBMAP_VER=${BBMAP_VER}
|
rliterman@0
|
239 ENV PYTHON_VER=${PYTHON_VER}
|
rliterman@0
|
240
|
rliterman@0
|
241 # set PATH, set perl locale settings for singularity compatibility
|
rliterman@0
|
242 ENV PATH="/opt/venv/bin:/usr/local/bin:/skesa:$PATH" \
|
rliterman@0
|
243 LC_ALL=C \
|
rliterman@0
|
244 NXF_OFFLINE='true'
|
rliterman@0
|
245
|
rliterman@0
|
246 ENTRYPOINT ["make", "--makefile=/app/Makefile"] |