Mercurial > repos > rliterman > csp2
changeset 36:f6cd698b6f62
"planemo upload"
author | rliterman |
---|---|
date | Thu, 05 Dec 2024 18:09:26 -0500 |
parents | 106d28c851fa |
children | f62b9bcf7cf3 |
files | CSP2/.github/workflows/build-docker.yml CSP2/.gitignore CSP2/conf/profiles.config CSP2/docker/Dockerfile CSP2/docker/Makefile csp2_screen.xml csp2_snp.xml |
diffstat | 7 files changed, 3 insertions(+), 658 deletions(-) [+] |
line wrap: on
line diff
--- a/CSP2/.github/workflows/build-docker.yml Thu Dec 05 16:30:58 2024 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,34 +0,0 @@ -name: docker-build - -on: - release: - types: [published] - -jobs: - docker: - runs-on: ubuntu-latest - steps: - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Build and push - uses: docker/build-push-action@v6 - with: - file: docker/Dockerfile - platforms: linux/amd64 - target: release - build-args: | - VERSION=${{ github.ref_name }} - CSP2_BRANCH=${{ github.ref_name }} - push: true - tags: cfsanbiostatistics/csp2:latest,cfsanbiostatistics/csp2:${{ github.ref_name }}
--- a/CSP2/.gitignore Thu Dec 05 16:30:58 2024 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,211 +0,0 @@ -# Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,virtualenv -# Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode,virtualenv - -### Python ### -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/#use-with-ide -.pdm.toml - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ - -### Python Patch ### -# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration -poetry.toml - -# ruff -.ruff_cache/ - -# LSP config files -pyrightconfig.json - -### VirtualEnv ### -# Virtualenv -# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ -[Bb]in -[Ii]nclude -[Ll]ib -[Ll]ib64 -[Ll]ocal -[Ss]cripts -pyvenv.cfg -pip-selfcheck.json - -### VisualStudioCode ### -.vscode/* -!.vscode/settings.json -!.vscode/tasks.json -!.vscode/launch.json -!.vscode/extensions.json -!.vscode/*.code-snippets - -# Local History for Visual Studio Code -.history/ - -# Built Visual Studio Code Extensions -*.vsix - -### VisualStudioCode Patch ### -# Ignore all local history of files -.history -.ionide - -# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,virtualenv - -#Nextflow -.nextflow* -work/ \ No newline at end of file
--- a/CSP2/conf/profiles.config Thu Dec 05 16:30:58 2024 -0500 +++ b/CSP2/conf/profiles.config Thu Dec 05 18:09:26 2024 -0500 @@ -79,7 +79,7 @@ params.trim_name = "" } - csp2_galaxy1 { + csp2_galaxy { conda.enabled = true conda.useMicromamba = true conda.cacheDir = "${projectDir}/CSP2_env" @@ -99,25 +99,4 @@ params.mash_module = "" params.trim_name = "" } - - csp2_galaxy2 { - conda.enabled = true - conda.useMicromamba = true - conda.cacheDir = "${projectDir}/CSP2_env" - process.conda = "${projectDir}/conf/CSP2.yaml" - - process.queue = "q8cpu" - - executor.$slurm.exitReadTimeout = 120000 - - process.executor = 'slurm' - params.cores = 8 - params.python_module = "" - params.mummer_module = "" - params.skesa_module = "" - params.bedtools_module = "" - params.bbtools_module = "" - params.mash_module = "" - params.trim_name = "" - } }
--- a/CSP2/docker/Dockerfile Thu Dec 05 16:30:58 2024 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,246 +0,0 @@ -# CSP2 Dockerfile -# Based on StaPH-B's Dockerfile for BEDTools, MUmmer, and Skesa -# Thanks to Erin Young, Curtis Kapsak, John Arnn, and the StaPH-B team -# https://github.com/StaPH-B/docker-builds/blob/master/bedtools/2.31.1/Dockerfile -# https://github.com/StaPH-B/docker-builds/blob/master/mummer/4.0.0/Dockerfile -# https://github.com/StaPH-B/docker-builds/blob/master/skesa/2.4.0/Dockerfile - -ARG CSP2_VER="0.9.0" -ARG BEDTOOLS_VER="2.31.1" -ARG MUMMER_VER="4.0.0" -ARG SKESA_VER="2.4.0" -ARG MASH_VER="2.3" -ARG BBMAP_VER="38.90" -ARG PYTHON_VER="3.8" - -FROM ubuntu:focal AS build - -ARG BEDTOOLS_VER -ARG MUMMER_VER -ARG SKESA_VER -ARG MASH_VER -ARG BBMAP_VER -ARG PYTHON_VER - -WORKDIR /build - -# to prevent tzdata from asking for a region during apt updates; ARG so that variable only -# persists at buildtime -# from https://github.com/StaPH-B/docker-builds/blob/master/mummer/4.0.0/Dockerfile -ARG DEBIAN_FRONTEND=noninteractive - -# Install build dependencies -RUN apt-get update && apt-get install -y --no-install-recommends \ - tzdata \ - gpg-agent \ - software-properties-common \ - build-essential \ - zlib1g-dev \ - libghc-bzlib-dev \ - liblzma-dev \ - wget \ - ca-certificates - -RUN add-apt-repository 'ppa:deadsnakes/ppa' && apt-get update && apt-get install -y --no-install-recommends \ - python${PYTHON_VER} \ -# python${PYTHON_VER}-pip \ -# python${PYTHON_VER}-full \ - python${PYTHON_VER}-dev \ - python${PYTHON_VER}-venv && \ - python${PYTHON_VER} -m venv --copies /opt/venv - - -ENV PATH="/opt/venv/bin:$PATH" - -RUN pip install --no-cache-dir -U pandas~=1.2.0 pybedtools refchooser scikit-learn - -ADD https://github.com/arq5x/bedtools2/archive/refs/tags/v${BEDTOOLS_VER}.tar.gz . -ADD https://github.com/mummer4/mummer/releases/download/v${MUMMER_VER}rc1/mummer-${MUMMER_VER}rc1.tar.gz . -ADD https://github.com/ncbi/SKESA/releases/download/${SKESA_VER}/skesa.centos.7.7 . -ADD https://github.com/ncbi/SKESA/releases/download/${SKESA_VER}/gfa_connector.centos7.7 . -ADD https://github.com/ncbi/SKESA/releases/download/${SKESA_VER}/kmercounter.centos7.7 . -ADD https://github.com/marbl/Mash/releases/download/v${MASH_VER}/mash-Linux64-v${MASH_VER}.tar . - -# Install BEDTools -# per https://github.com/StaPH-B/docker-builds/blob/master/bedtools/2.31.1/Dockerfile -# python3 required when compiling via `make` command for creating old CLI executables -# dependencies listed here (albeit for v2.30.0, still should be identical): https://packages.ubuntu.com/jammy/bedtools -# requires libghc-bzlib-dev, build-essential, zlib1g-dev, and a few others -# 'make install' should place binary executable files in /usr/local/bin -RUN tar -xzf v${BEDTOOLS_VER}.tar.gz && \ - rm v${BEDTOOLS_VER}.tar.gz && \ - cd bedtools2-${BEDTOOLS_VER} && \ - make && \ - make install - - # Install mummer - # per https://github.com/StaPH-B/docker-builds/blob/master/mummer/4.0.0/Dockerfile -RUN tar -xvf mummer-${MUMMER_VER}rc1.tar.gz && \ - rm mummer-${MUMMER_VER}rc1.tar.gz && \ - cd mummer-${MUMMER_VER}rc1 && \ - ./configure --prefix=/usr/local && \ - make && \ - make install && \ - ldconfig - -# # Install Skesa -# # per https://github.com/StaPH-B/docker-builds/blob/master/skesa/2.4.0/Dockerfile -# # get skesa, gfa_connector, and kmercounter binaries, rename them -RUN mkdir skesa && \ - cd skesa && \ - mv /build/skesa.centos.7.7 skesa && \ - mv /build/gfa_connector.centos7.7 gfa_connector && \ - mv /build/kmercounter.centos7.7 kmercounter && \ - chmod +x skesa gfa_connector kmercounter && \ - mv skesa gfa_connector kmercounter /usr/local/bin - -# Install Mash -RUN tar -xvf mash-Linux64-v${MASH_VER}.tar && \ - mv mash-Linux64-v${MASH_VER}/mash /usr/local/bin - -# Install BBMap -RUN wget -O BBMap_${BBMAP_VER}.tar.gz https://sourceforge.net/projects/bbmap/files/BBMap_${BBMAP_VER}.tar.gz/download && \ - tar -xvf BBMap_${BBMAP_VER}.tar.gz && \ - mv bbmap/* /usr/local/bin - - -FROM ubuntu:focal AS app - -ARG CSP2_VER -ARG CSP2_BRANCH="main" -ARG PYTHON_VER - -LABEL base.image="ubuntu:focal" -LABEL version=${CSP2_VER} -LABEL software="CSP2" -LABEL software.version=${CSP2_VER} -LABEL description="a Nextflow pipeline for rapid, accurate SNP distance estimation from assembly data" -LABEL website="https://github.com/CFSAN-Biostatistics/CSP2" -LABEL licence="https://github.com/CFSAN-Biostatistics/CSP2/blob/main/LICENSE" -LABEL maintainer="Robert Literman" -LABEL maintainer.email="Robert.Literman@fda.hhs.gov" -LABEL maintainer.organization="FDA/CFSAN/Biostatistics" -LABEL maintainer2="Justin Payne" -LABEL maintainer2.email="Justin.Payne@fda.hhs.gov" -LABEL maintainer2.organization="FDA/CFSAN/Biostatistics" - -WORKDIR /root/.nextflow -WORKDIR /app - -# copy in all executable files from builder stage to final app stage -COPY --from=build /usr/local/bin /usr/local/bin - -# Lots of perl nonsense -COPY --from=build /usr/local/lib /usr/local/lib -COPY --from=build /usr/local/libexec/mummer /usr/local/libexec/mummer -COPY --from=build /usr/lib/x86_64-linux-gnu/perl /usr/lib/x86_64-linux-gnu/perl -COPY --from=build /usr/local/share /usr/local/share -COPY --from=build /usr/share /usr/share -COPY --from=build /opt/venv /opt/venv -COPY --from=build /usr/bin/make /usr/local/bin/make - - -# Python stuff -COPY --from=build /usr/lib/python${PYTHON_VER} /usr/lib/python${PYTHON_VER} - - -#Install JRE -RUN apt-get update && apt-get install -y --no-install-recommends \ - ca-certificates \ - openjdk-17-jre-headless \ - curl - -# Install Nextflow -# per https://www.nextflow.io/docs/latest/getstarted.html -RUN export CAPSULE_LOG=debug && curl -s https://get.nextflow.io | bash && \ - chmod +x nextflow && \ - mv nextflow /usr/local/bin && \ - nextflow run hello - -ADD docker/Makefile . - -# set PATH, set perl locale settings for singularity compatibility -ENV PATH="/opt/venv/bin:/usr/local/bin:/skesa:$PATH" \ - LC_ALL=C \ - NXF_OFFLINE='true' - -ADD bin ./bin -ADD conf ./conf -ADD subworkflows ./subworkflows -ADD CSP2.nf ./CSP2.nf -ADD nextflow.config ./nextflow.config - - -FROM app AS pretest - -# set PATH, set perl locale settings for singularity compatibility -ENV PATH="/opt/venv/bin:/usr/local/bin:/skesa:$PATH" \ - LC_ALL=C \ - NXF_OFFLINE='true' - -#Alternate test data directory -ADD https://github.com/CFSAN-Biostatistics/CSP2_TestData#main:assets assets/ - - -# Test MUmmer installation -# per https://github.com/StaPH-B/docker-builds/blob/master/mummer/4.0.0/Dockerfile - -ADD https://mummer4.github.io/tutorial/exampleFiles/2.1/in/H_pylori26695_Eslice.fasta . -ADD https://mummer4.github.io/tutorial/exampleFiles/2.1/in/H_pyloriJ99_Eslice.fasta . -ADD https://mummer4.github.io/tutorial/exampleFiles/2.2/in/B_anthracis_Mslice.fasta . -ADD https://mummer4.github.io/tutorial/exampleFiles/2.2/in/B_anthracis_contigs.fasta . -ADD http://mummer.sourceforge.net/examples/data/H_pylori26695_Eslice.fasta . -ADD http://mummer.sourceforge.net/examples/data/H_pyloriJ99_Eslice.fasta . -ADD https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V5.3.2/SARS-CoV-2.primer.bed ./V5.3.2.artic.bed -ADD https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V4.1/SARS-CoV-2.primer.bed ./V4.1.artic.bed - -FROM pretest AS test - -# Test MASH - -RUN nucmer -h && \ - promer -h && \ - mummer -mum -b -c H_pylori26695_Eslice.fasta H_pyloriJ99_Eslice.fasta > mummer.mums && \ - nucmer -c 100 -p nucmer B_anthracis_Mslice.fasta B_anthracis_contigs.fasta && \ - show-snps -C nucmer.delta > nucmer.snps && \ - promer -p promer_100 -c 100 H_pylori26695_Eslice.fasta H_pyloriJ99_Eslice.fasta - -# Test bedtools installation -# check help options and version -RUN bedtools --help && \ - bedtools --version - -# downloads two bedfiles for ARTIC SARS-CoV-2 artic schemes, fixes their formatting, uses bedtools sort, intersect, and merge -# per https://github.com/StaPH-B/docker-builds/blob/master/bedtools/2.31.1/Dockerfile -RUN awk '{print $1 "\t" $2 "\t" $3 "\t" $4 "\t" $5 "\t" $6}' V5.3.2.artic.bed > V5.3.2.unsorted.bed && \ - bedtools sort -i V5.3.2.unsorted.bed > V5.3.2.bed && \ - awk '{print $1 "\t" $2 "\t" $3 "\t" $4 "\t" $5 "\t" $6}' V4.1.artic.bed > V4.1.bed && \ - bedtools intersect -a V5.3.2.bed -b V4.1.bed > intersect_test.bed && \ - mergeBed -i V5.3.2.bed > merged_test.bed && \ - head intersect_test.bed merged_test.bed - -RUN /bin/bash -c 'make test' - -FROM app AS release - -ARG CSP2_VER -ARG BEDTOOLS_VER -ARG MUMMER_VER -ARG SKESA_VER -ARG MASH_VER -ARG BBMAP_VER -ARG PYTHON_VER -ENV CSP2_VER=${CSP2_VER} -ENV BEDTOOLS_VER=${BEDTOOLS_VER} -ENV MUMMER_VER=${MUMMER_VER} -ENV SKESA_VER=${SKESA_VER} -ENV MASH_VER=${MASH_VER} -ENV BBMAP_VER=${BBMAP_VER} -ENV PYTHON_VER=${PYTHON_VER} - -# set PATH, set perl locale settings for singularity compatibility -ENV PATH="/opt/venv/bin:/usr/local/bin:/skesa:$PATH" \ - LC_ALL=C \ - NXF_OFFLINE='true' - -ENTRYPOINT ["make", "--makefile=/app/Makefile"] \ No newline at end of file
--- a/CSP2/docker/Makefile Thu Dec 05 16:30:58 2024 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,143 +0,0 @@ -.PHONY: - -.ONESHELL: - - - -usage: ## Show this menu - @grep -E '^[a-zA-Z_-]+:.*?##.*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?##"}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' - -version: ## Show version and branch - @echo "CSP2 v$${CSP2_VER}/$${CSP2_BRANCH}" - -# ENV CSP2_VER=${CSP2_VER} -# ENV BEDTOOLS_VER=${BEDTOOLS_VER} -# ENV MIUMMER_VER=${MUMMER_VER} -# ENV SKESA_VER=${SKESA_VER} -# ENV MASH_VER=${MASH_VER} -# ENV BBMAP_VER=${BBMAP_VER} -# ENV PYTHON_VER=${PYTHON_VER} - -versions: version ## Show versions of key installed depedencies - @echo `nextflow -v` - @echo `python3 --version` " (container says ${PYTHON_VER})" - @echo `bedtools --version` " (container says ${BEDTOOLS_VER})" - @echo "mummer " `mummer --version` " (container says ${MUMMER_VER})" - @echo `skesa --version 2>&1` " (container says ${SKESA_VER})" - @echo "mash " `mash --version` " (container says ${MASH_VER})" - @echo `bbmap.sh --version 2>&1` " (container says ${BBMAP_VER})" - -help: ## Show help - @echo "Citation: CFSAN SNP Pipeline 2, v$${CSP2_VER}, Literman et al. 2024" - @echo - @echo "CSP2 is a Nextflow pipeline for rapid, accurate SNP distance estimation" - @echo "from assembly data." - @echo - @echo "Please see: https://github.com/CFSAN-Biostatistics/CSP2" - @echo - @echo "CSP2 runs are managed via Nextflow, providing the user with an array of" - @echo "customizations while also facilitating module development and additions in" - @echo "future releases." - @echo - @echo "Important Note: The software continues to be focused on the analysis of" - @echo "groups of bacterial genomes with limited evolutionary differences (<1000" - @echo "SNPs). Testing is underway to determine how the underlying cluster" - @echo "diversity impacts distances estimates." - @echo - @echo "CSP2 has two main run modes:" - @echo "1) "Screening Mode" (screen): Used to determine whether query isolates are" - @echo "close to a set of reference isolates (e.g., lab control strains, strains" - @echo "related to an outbreak, etc.) Given one or more user-provided reference" - @echo "isolates (--ref_reads; --ref_fasta), get alignment statistics and SNP" - @echo "distances between all reference and query isolates (--reads; --fasta)" - @echo - @echo "2) "SNP Pipeline Mode" (snp): Used to generate pairwise distances and" - @echo "alignments for a set of query isolates Generate pairwise SNP distances and" - @echo "alignments for 2+ isolates (--reads; --fasta) based on comparisons to:" - @echo - @echo "One or more user-provided references (--ref_reads; --ref_fasta), or One or" - @echo "more reference isolates selected by RefChooser (--n_ref)" - @echo - @echo "Usage: screen [options] {--fasta PATH {--reads=PATH | --forward=STR --reverse=STR} --out=PATH}" - @echo " or snp [options] {--fasta {--reads=PATH | --forward=STR --reverse=STR} --out=PATH}" - @echo - @echo "Options:" - @echo " --outroot=PATH\tBase directory to create output folder [default=$CWD] " - @echo " --out=PATH\t\tName of the output folder to create (must not exist)" - @echo "\t\t\t [default=CSP2_<current_datetime>]" - @echo " --forward=STR\t\tFull file extension for forward/left reads of query" - @echo "\t\t\t [default='_1.fastq.gz']" - @echo " --reverse=STR\t\tFull file extension for reverse/right reads of reference" - @echo "\t\t\t [default='_2.fastq.gz']" - @echo " --ref_forward=STR\tFull file extension for forward/left reads of reference" - @echo "\t\t\t [default='_1.fastq.gz']" - @echo " --ref_reverse=STR\tFull file extension for reverse/right reads of reference" - @echo "\t\t\t [default='_2.fastq.gz']" - @echo " --readext=STR\t\tExtension for single-end reads for query [default='fastq.gz']" - @echo " --ref_readext=STR\tExtension for single-end reads for reference" - @echo "\t\t\t [default='fastq.gz']" - @echo " --min_cov=NUM\t\tDo not analyze queries that cover less than <min_cov>% of the" - @echo "\t\t\treference assembly [default=85]" - @echo " --min_iden=NUM\tOnly consider alignments where the percent identity is at least" - @echo "\t\t\t <min_iden> [default=99]" - @echo " --min_len=NUM\t\tOnly consider alignments that span at least <min_len> in bp" - @echo "\t\t\t [default=500]" - @echo " --dwin=LIST\t\tA comma-separated list of windows to check SNP densities" - @echo "\t\t\t [default=1000,125,15]" - @echo " --wsnps=LIST\t\tThe maximum number of SNPs allowed in the corresponding window from" - @echo "\t\t\t --dwin [default=3,2,1]" - @echo " --query_edge=NUM\tOnly consider SNPs that occur within <query_edge>bp of the end" - @echo "\t\t\t of a query contig [default=250]" - @echo " --ref_edge=NUM\tOnly consider SNPs that occur within <query_edge>bp of the end" - @echo "\t\t\t of a reference contig [default=250]" - @echo " --n_ref=NUM\t\tThe number of RefChooser reference isolates to consider (only" - @echo "\t\t\t applied if using RefChooser) [default=3]" - @echo " --reads=PATH\t\tLocation of query read data (Path to directory, or path to file with" - @echo "\t\t\t multiple directories)" - @echo " --fasta=PATH\t\tLocation of query assembly data (Path to directory containing" - @echo "\t\t\t FASTAs, path to FASTA, path to multiple FASTAs)" - @echo " --ref_reads=PATH\tLocation of reference read data (Path to directory, or path to" - @echo "\t\t\t file with multiple directories)" - @echo " --ref_fasta=PATH\tLocation of reference assembly data (Path to directory" - @echo "\t\t\t containing FASTAs, path to FASTA, path to multiple FASTAs)" - @echo " --trim_name=STR\tA string in assembly file names that you want to remove from" - @echo "\t\t\t sample IDs (e.g., _contigs_skesa)" - -config: - @cat <<- EOF - profiles { - standard { - process.executor = 'local' - params.cores = `nproc --all` - } - } - EOF > ~/.nextflow/config - - -ifeq (screen, $(firstword $(MAKECMDGOALS))) - runargs := $(wordlist 2, $(words $(MAKECMDGOALS)), $(MAKECMDGOALS)) - $(eval $(runargs):;@true) -endif - -ifeq (snp, $(firstword $(MAKECMDGOALS))) - runargs := $(wordlist 2, $(words $(MAKECMDGOALS)), $(MAKECMDGOALS)) - $(eval $(runargs):;@true) -endif - -screen: config ## determine whether query isolates are close to a reference - nextflow run CSP2.nf -profile standard --runmode screen $(runargs) - -snp: config ## generate pairwise distances for a set of query isolates - nextflow run CSP2.nf -profile standard --runmode snp $(runargs) - -snpdiffs: config - -test_screen: - nextflow run CSP2.nf -profile standard --runmode screen --fasta assets/Screen/Assembly/Week_42_Assembly.fasta --reads assets/Screen/Reads/ --ref_fasta assets/Screen/Assembly/Lab_Control.fasta --out ./CSP2_Test_Screen --readext fq.gz --forward _1.fq.gz --reverse _2.fq.gz - -test_snp: - nextflow run CSP2.nf -profile standard --runmode snp --fasta assets/SNP/ --n_ref 3 --out ./CSP2_Test_SNP --max_missing 50 - -test: config test_screen test_snp - ls -lah assets/Screen/Output/Contamination_Screen/ - diff -bur ./CSP2_Test_SNP/snpdiffs assets/SNP/Output/Soil_Analysis/snpdiffs \ No newline at end of file
--- a/csp2_screen.xml Thu Dec 05 16:30:58 2024 -0500 +++ b/csp2_screen.xml Thu Dec 05 18:09:26 2024 -0500 @@ -57,7 +57,7 @@ export REF_ID_ARG=""; fi; -nextflow run ${__tool_directory__}/CSP2/CSP2.nf -profile csp2_galaxy1 --runmode screen \$QUERY_FASTA_ARG \$REF_FASTA_ARG \$QUERY_READS_ARG \$REF_READS_ARG \$REF_ID_ARG \$TRIM_ARG --readext $readext --forward $forward --reverse $reverse --ref_readext $readext --ref_forward $forward --ref_reverse $reverse --min_cov $min_cov --min_iden $min_iden --min_len $min_len --ref_edge $ref_edge --query_edge $query_edge --dwin $dwin --wsnps $wsnps --out \$CSP2_DIR/CSP2_Screen_Output > Nextflow_Log.txt 2>&1; +nextflow run ${__tool_directory__}/CSP2/CSP2.nf -profile csp2_galaxy --runmode screen \$QUERY_FASTA_ARG \$REF_FASTA_ARG \$QUERY_READS_ARG \$REF_READS_ARG \$REF_ID_ARG \$TRIM_ARG --readext $readext --forward $forward --reverse $reverse --ref_readext $readext --ref_forward $forward --ref_reverse $reverse --min_cov $min_cov --min_iden $min_iden --min_len $min_len --ref_edge $ref_edge --query_edge $query_edge --dwin $dwin --wsnps $wsnps --out \$CSP2_DIR/CSP2_Screen_Output > Nextflow_Log.txt 2>&1; zip -r CSP2_Output.zip CSP2_Screen_Output; ]]> </command>
--- a/csp2_snp.xml Thu Dec 05 16:30:58 2024 -0500 +++ b/csp2_snp.xml Thu Dec 05 18:09:26 2024 -0500 @@ -57,7 +57,7 @@ export RESCUE_ARG=""; fi; -nextflow run ${__tool_directory__}/CSP2/CSP2.nf -profile csp2_galaxy1 --runmode snp \$QUERY_FASTA_ARG \$REF_FASTA_ARG \$QUERY_READS_ARG \$REF_READS_ARG \$TRIM_ARG \$RESCUE_ARG --readext $readext --forward $forward --reverse $reverse --ref_readext $readext --ref_forward $forward --ref_reverse $reverse --min_cov $min_cov --min_iden $min_iden --min_len $min_len --ref_edge $ref_edge --query_edge $query_edge --dwin $dwin --wsnps $wsnps --max_missing $max_missing --out \$CSP2_DIR/CSP2_SNP_Output > Nextflow_Log.txt 2>&1; +nextflow run ${__tool_directory__}/CSP2/CSP2.nf -profile csp2_galaxy --runmode snp \$QUERY_FASTA_ARG \$REF_FASTA_ARG \$QUERY_READS_ARG \$REF_READS_ARG \$TRIM_ARG \$RESCUE_ARG --readext $readext --forward $forward --reverse $reverse --ref_readext $readext --ref_forward $forward --ref_reverse $reverse --min_cov $min_cov --min_iden $min_iden --min_len $min_len --ref_edge $ref_edge --query_edge $query_edge --dwin $dwin --wsnps $wsnps --max_missing $max_missing --out \$CSP2_DIR/CSP2_SNP_Output > Nextflow_Log.txt 2>&1; mkdir -p Output; for file in CSP2_SNP_Output/SNP_Analysis/*/*; do ln -sf "\$file" Output/; done; zip -r CSP2_Output.zip CSP2_SNP_Output;