rliterman@0: .PHONY: rliterman@0: rliterman@0: .ONESHELL: rliterman@0: rliterman@0: rliterman@0: rliterman@0: usage: ## Show this menu rliterman@0: @grep -E '^[a-zA-Z_-]+:.*?##.*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?##"}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' rliterman@0: rliterman@0: version: ## Show version and branch rliterman@0: @echo "CSP2 v$${CSP2_VER}/$${CSP2_BRANCH}" rliterman@0: rliterman@0: # ENV CSP2_VER=${CSP2_VER} rliterman@0: # ENV BEDTOOLS_VER=${BEDTOOLS_VER} rliterman@0: # ENV MIUMMER_VER=${MUMMER_VER} rliterman@0: # ENV SKESA_VER=${SKESA_VER} rliterman@0: # ENV MASH_VER=${MASH_VER} rliterman@0: # ENV BBMAP_VER=${BBMAP_VER} rliterman@0: # ENV PYTHON_VER=${PYTHON_VER} rliterman@0: rliterman@0: versions: version ## Show versions of key installed depedencies rliterman@0: @echo `nextflow -v` rliterman@0: @echo `python3 --version` " (container says ${PYTHON_VER})" rliterman@0: @echo `bedtools --version` " (container says ${BEDTOOLS_VER})" rliterman@0: @echo "mummer " `mummer --version` " (container says ${MUMMER_VER})" rliterman@0: @echo `skesa --version 2>&1` " (container says ${SKESA_VER})" rliterman@0: @echo "mash " `mash --version` " (container says ${MASH_VER})" rliterman@0: @echo `bbmap.sh --version 2>&1` " (container says ${BBMAP_VER})" rliterman@0: rliterman@0: help: ## Show help rliterman@0: @echo "Citation: CFSAN SNP Pipeline 2, v$${CSP2_VER}, Literman et al. 2024" rliterman@0: @echo rliterman@0: @echo "CSP2 is a Nextflow pipeline for rapid, accurate SNP distance estimation" rliterman@0: @echo "from assembly data." rliterman@0: @echo rliterman@0: @echo "Please see: https://github.com/CFSAN-Biostatistics/CSP2" rliterman@0: @echo rliterman@0: @echo "CSP2 runs are managed via Nextflow, providing the user with an array of" rliterman@0: @echo "customizations while also facilitating module development and additions in" rliterman@0: @echo "future releases." rliterman@0: @echo rliterman@0: @echo "Important Note: The software continues to be focused on the analysis of" rliterman@0: @echo "groups of bacterial genomes with limited evolutionary differences (<1000" rliterman@0: @echo "SNPs). Testing is underway to determine how the underlying cluster" rliterman@0: @echo "diversity impacts distances estimates." rliterman@0: @echo rliterman@0: @echo "CSP2 has two main run modes:" rliterman@0: @echo "1) "Screening Mode" (screen): Used to determine whether query isolates are" rliterman@0: @echo "close to a set of reference isolates (e.g., lab control strains, strains" rliterman@0: @echo "related to an outbreak, etc.) Given one or more user-provided reference" rliterman@0: @echo "isolates (--ref_reads; --ref_fasta), get alignment statistics and SNP" rliterman@0: @echo "distances between all reference and query isolates (--reads; --fasta)" rliterman@0: @echo rliterman@0: @echo "2) "SNP Pipeline Mode" (snp): Used to generate pairwise distances and" rliterman@0: @echo "alignments for a set of query isolates Generate pairwise SNP distances and" rliterman@0: @echo "alignments for 2+ isolates (--reads; --fasta) based on comparisons to:" rliterman@0: @echo rliterman@0: @echo "One or more user-provided references (--ref_reads; --ref_fasta), or One or" rliterman@0: @echo "more reference isolates selected by RefChooser (--n_ref)" rliterman@0: @echo rliterman@0: @echo "Usage: screen [options] {--fasta PATH {--reads=PATH | --forward=STR --reverse=STR} --out=PATH}" rliterman@0: @echo " or snp [options] {--fasta {--reads=PATH | --forward=STR --reverse=STR} --out=PATH}" rliterman@0: @echo rliterman@0: @echo "Options:" rliterman@0: @echo " --outroot=PATH\tBase directory to create output folder [default=$CWD] " rliterman@0: @echo " --out=PATH\t\tName of the output folder to create (must not exist)" rliterman@0: @echo "\t\t\t [default=CSP2_]" rliterman@0: @echo " --forward=STR\t\tFull file extension for forward/left reads of query" rliterman@0: @echo "\t\t\t [default='_1.fastq.gz']" rliterman@0: @echo " --reverse=STR\t\tFull file extension for reverse/right reads of reference" rliterman@0: @echo "\t\t\t [default='_2.fastq.gz']" rliterman@0: @echo " --ref_forward=STR\tFull file extension for forward/left reads of reference" rliterman@0: @echo "\t\t\t [default='_1.fastq.gz']" rliterman@0: @echo " --ref_reverse=STR\tFull file extension for reverse/right reads of reference" rliterman@0: @echo "\t\t\t [default='_2.fastq.gz']" rliterman@0: @echo " --readext=STR\t\tExtension for single-end reads for query [default='fastq.gz']" rliterman@0: @echo " --ref_readext=STR\tExtension for single-end reads for reference" rliterman@0: @echo "\t\t\t [default='fastq.gz']" rliterman@0: @echo " --min_cov=NUM\t\tDo not analyze queries that cover less than % of the" rliterman@0: @echo "\t\t\treference assembly [default=85]" rliterman@0: @echo " --min_iden=NUM\tOnly consider alignments where the percent identity is at least" rliterman@0: @echo "\t\t\t [default=99]" rliterman@0: @echo " --min_len=NUM\t\tOnly consider alignments that span at least in bp" rliterman@0: @echo "\t\t\t [default=500]" rliterman@0: @echo " --dwin=LIST\t\tA comma-separated list of windows to check SNP densities" rliterman@0: @echo "\t\t\t [default=1000,125,15]" rliterman@0: @echo " --wsnps=LIST\t\tThe maximum number of SNPs allowed in the corresponding window from" rliterman@0: @echo "\t\t\t --dwin [default=3,2,1]" rliterman@0: @echo " --query_edge=NUM\tOnly consider SNPs that occur within bp of the end" rliterman@0: @echo "\t\t\t of a query contig [default=250]" rliterman@0: @echo " --ref_edge=NUM\tOnly consider SNPs that occur within bp of the end" rliterman@0: @echo "\t\t\t of a reference contig [default=250]" rliterman@0: @echo " --n_ref=NUM\t\tThe number of RefChooser reference isolates to consider (only" rliterman@0: @echo "\t\t\t applied if using RefChooser) [default=3]" rliterman@0: @echo " --reads=PATH\t\tLocation of query read data (Path to directory, or path to file with" rliterman@0: @echo "\t\t\t multiple directories)" rliterman@0: @echo " --fasta=PATH\t\tLocation of query assembly data (Path to directory containing" rliterman@0: @echo "\t\t\t FASTAs, path to FASTA, path to multiple FASTAs)" rliterman@0: @echo " --ref_reads=PATH\tLocation of reference read data (Path to directory, or path to" rliterman@0: @echo "\t\t\t file with multiple directories)" rliterman@0: @echo " --ref_fasta=PATH\tLocation of reference assembly data (Path to directory" rliterman@0: @echo "\t\t\t containing FASTAs, path to FASTA, path to multiple FASTAs)" rliterman@0: @echo " --trim_name=STR\tA string in assembly file names that you want to remove from" rliterman@0: @echo "\t\t\t sample IDs (e.g., _contigs_skesa)" rliterman@0: rliterman@0: config: rliterman@0: @cat <<- EOF rliterman@0: profiles { rliterman@0: standard { rliterman@0: process.executor = 'local' rliterman@0: params.cores = `nproc --all` rliterman@0: } rliterman@0: } rliterman@0: EOF > ~/.nextflow/config rliterman@0: rliterman@0: rliterman@0: ifeq (screen, $(firstword $(MAKECMDGOALS))) rliterman@0: runargs := $(wordlist 2, $(words $(MAKECMDGOALS)), $(MAKECMDGOALS)) rliterman@0: $(eval $(runargs):;@true) rliterman@0: endif rliterman@0: rliterman@0: ifeq (snp, $(firstword $(MAKECMDGOALS))) rliterman@0: runargs := $(wordlist 2, $(words $(MAKECMDGOALS)), $(MAKECMDGOALS)) rliterman@0: $(eval $(runargs):;@true) rliterman@0: endif rliterman@0: rliterman@0: screen: config ## determine whether query isolates are close to a reference rliterman@0: nextflow run CSP2.nf -profile standard --runmode screen $(runargs) rliterman@0: rliterman@0: snp: config ## generate pairwise distances for a set of query isolates rliterman@0: nextflow run CSP2.nf -profile standard --runmode snp $(runargs) rliterman@0: rliterman@0: snpdiffs: config rliterman@0: rliterman@0: test_screen: rliterman@0: nextflow run CSP2.nf -profile standard --runmode screen --fasta assets/Screen/Assembly/Week_42_Assembly.fasta --reads assets/Screen/Reads/ --ref_fasta assets/Screen/Assembly/Lab_Control.fasta --out ./CSP2_Test_Screen --readext fq.gz --forward _1.fq.gz --reverse _2.fq.gz rliterman@0: rliterman@0: test_snp: rliterman@0: nextflow run CSP2.nf -profile standard --runmode snp --fasta assets/SNP/ --n_ref 3 --out ./CSP2_Test_SNP --max_missing 50 rliterman@0: rliterman@0: test: config test_screen test_snp rliterman@0: ls -lah assets/Screen/Output/Contamination_Screen/ rliterman@0: diff -bur ./CSP2_Test_SNP/snpdiffs assets/SNP/Output/Soil_Analysis/snpdiffs