Mercurial > repos > rliterman > csp2
diff CSP2/subworkflows/snpdiffs/main.nf @ 0:01431fa12065
"planemo upload"
author | rliterman |
---|---|
date | Mon, 02 Dec 2024 10:40:55 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/subworkflows/snpdiffs/main.nf Mon Dec 02 10:40:55 2024 -0500 @@ -0,0 +1,138 @@ +// Screening and SNP Pipeline processing +output_directory = file(params.output_directory) +log_directory = file(params.log_directory) +screen_log_dir = file(params.screen_log_dir) +snp_log_dir = file(params.snp_log_dir) +snp_directory = file(params.snp_directory) + +if(params.tmp_dir == ""){ + temp_dir = "" +} else{ + temp_dir = file(params.temp_dir) +} +ref_id_file = file(params.ref_id_file) + +ref_mode = params.ref_mode + +// Assess whether to rescue edge-filtered SNPs +edge_rescue = "${params.rescue}" == "norescue" ? "norescue" : "rescue" + +// Set paths for output files +all_snpdiffs_list = file("${log_directory}/All_SNPDiffs.txt") +snp_dirs_list = file("${log_directory}/SNP_Dirs.txt") +screening_results_file = file("${output_directory}/Screening_Results.tsv") +isolate_data_file = file("${output_directory}/Isolate_Data.tsv") +snpdiffs_summary_file = file("${output_directory}/Raw_MUMmer_Summary.tsv") + +// Get QC thresholds +min_cov = params.min_cov.toFloat() +min_length = params.min_len.toInteger() +min_iden = params.min_iden.toFloat() +reference_edge = params.ref_edge.toInteger() +query_edge = params.query_edge.toInteger() +max_missing = params.max_missing.toFloat() +n_ref = params.n_ref.toInteger() + +workflow { + main: + // Run SNP pipeline + runSNPPipeline(query_data: all_snpdiffs, reference_data: ref_id_file) +} + +workflow runScreen { + + take: + all_snpdiffs + + main: + + all_snpdiffs + .unique{it -> it[2]} + .collect() + | screenSNPDiffs +} + +process screenSNPDiffs{ + + input: + val(all_snpdiffs) + + script: + + screenDiffs = file("${projectDir}/bin/screenSNPDiffs.py") + """ + $params.load_python_module + $params.load_bedtools_module + python $screenDiffs --snpdiffs_file "${all_snpdiffs_list}" --log_dir "${screen_log_dir}" --min_cov "${min_cov}" --min_len "${min_length}" --min_iden "${min_iden}" --ref_edge "${reference_edge}" --query_edge "${query_edge}" --density_windows "${params.dwin}" --max_snps "${params.wsnps}" --trim_name "${params.trim_name}" --output_file "${screening_results_file}" --ref_id "${ref_id_file}" --tmp_dir "${temp_dir}" + """ +} + +workflow runSNPPipeline{ + take: + all_snpdiffs + reference_data + + main: + + query_snpdiffs = all_snpdiffs.map{tuple(it[0],it[2])} + ref_snpdiffs = all_snpdiffs.map{tuple(it[1],it[2])} + + stacked_snpdiffs = query_snpdiffs.concat(ref_snpdiffs) + .collect().flatten().collate(2) + + snp_dirs = stacked_snpdiffs + .combine(reference_data) + .filter{it -> it[0].toString() == it[2].toString()} + .map{it -> tuple(it[0],it[1])} + .groupTuple(by:0) + .map { ref, diff_files -> tuple( ref.toString(), diff_files.collect() ) } + | runSnpPipeline + + //snp_dirs.collect() | compileResults +} + +process compileResults{ + + executor = 'local' + cpus = 1 + maxForks = 1 + + input: + val(snp_directories) + + script: + + compile_script = file("${projectDir}/bin/compileSNPResults.py") + snp_dirs_list.write(snp_directories.join("\n")+ "\n") + """ + $params.load_python_module + python $compile_script --snp_dirs_file "${snp_dirs_list}" --output_directory "${snp_directory}" --isolate_data_file "${isolate_data_file}" --mummer_data_file "${snpdiffs_summary_file}" + """ +} + +process runSnpPipeline{ + + input: + tuple val(reference_id),val(diff_files) + + output: + stdout + + script: + + snp_script = file("${projectDir}/bin/runSNPPipeline.py") + + // Set + create output directory + snp_dir = file("${snp_directory}/${reference_id}") + snp_dir.mkdirs() + + // Write SNPDiffs list + out_snpdiffs = file("${snp_dir}/SNPDiffs.txt") + out_snpdiffs.write(diff_files.join("\n")+ "\n") + """ + $params.load_python_module + $params.load_bedtools_module + python $snp_script --reference_id "${reference_id}" --output_directory "${snp_dir}" --snpdiffs_file "${out_snpdiffs}" --log_directory "${snp_log_dir}" --min_cov "${min_cov}" --min_len "${min_length}" --min_iden "${min_iden}" --ref_edge "${reference_edge}" --query_edge "${query_edge}" --density_windows "${params.dwin}" --max_snps "${params.wsnps}" --trim_name "${params.trim_name}" --max_missing "${max_missing}" --tmp_dir "${temp_dir}" --rescue "${edge_rescue}" + echo -n $snp_dir + """ +} \ No newline at end of file