rliterman@0: // Screening and SNP Pipeline processing rliterman@0: output_directory = file(params.output_directory) rliterman@0: log_directory = file(params.log_directory) rliterman@0: screen_log_dir = file(params.screen_log_dir) rliterman@0: snp_log_dir = file(params.snp_log_dir) rliterman@0: snp_directory = file(params.snp_directory) rliterman@0: rliterman@0: if(params.tmp_dir == ""){ rliterman@0: temp_dir = "" rliterman@0: } else{ rliterman@0: temp_dir = file(params.temp_dir) rliterman@0: } rliterman@0: ref_id_file = file(params.ref_id_file) rliterman@0: rliterman@0: ref_mode = params.ref_mode rliterman@0: rliterman@0: // Assess whether to rescue edge-filtered SNPs rliterman@0: edge_rescue = "${params.rescue}" == "norescue" ? "norescue" : "rescue" rliterman@0: rliterman@0: // Set paths for output files rliterman@0: all_snpdiffs_list = file("${log_directory}/All_SNPDiffs.txt") rliterman@0: snp_dirs_list = file("${log_directory}/SNP_Dirs.txt") rliterman@0: screening_results_file = file("${output_directory}/Screening_Results.tsv") rliterman@0: isolate_data_file = file("${output_directory}/Isolate_Data.tsv") rliterman@0: snpdiffs_summary_file = file("${output_directory}/Raw_MUMmer_Summary.tsv") rliterman@0: rliterman@0: // Get QC thresholds rliterman@0: min_cov = params.min_cov.toFloat() rliterman@0: min_length = params.min_len.toInteger() rliterman@0: min_iden = params.min_iden.toFloat() rliterman@0: reference_edge = params.ref_edge.toInteger() rliterman@0: query_edge = params.query_edge.toInteger() rliterman@0: max_missing = params.max_missing.toFloat() rliterman@0: n_ref = params.n_ref.toInteger() rliterman@0: rliterman@0: workflow { rliterman@0: main: rliterman@0: // Run SNP pipeline rliterman@0: runSNPPipeline(query_data: all_snpdiffs, reference_data: ref_id_file) rliterman@0: } rliterman@0: rliterman@0: workflow runScreen { rliterman@0: rliterman@0: take: rliterman@0: all_snpdiffs rliterman@0: rliterman@0: main: rliterman@0: rliterman@0: all_snpdiffs rliterman@0: .unique{it -> it[2]} rliterman@0: .collect() rliterman@0: | screenSNPDiffs rliterman@0: } rliterman@0: rliterman@0: process screenSNPDiffs{ rliterman@0: rliterman@0: input: rliterman@0: val(all_snpdiffs) rliterman@0: rliterman@0: script: rliterman@0: rliterman@0: screenDiffs = file("${projectDir}/bin/screenSNPDiffs.py") rliterman@0: """ rliterman@0: $params.load_python_module rliterman@0: $params.load_bedtools_module rliterman@0: python $screenDiffs --snpdiffs_file "${all_snpdiffs_list}" --log_dir "${screen_log_dir}" --min_cov "${min_cov}" --min_len "${min_length}" --min_iden "${min_iden}" --ref_edge "${reference_edge}" --query_edge "${query_edge}" --density_windows "${params.dwin}" --max_snps "${params.wsnps}" --trim_name "${params.trim_name}" --output_file "${screening_results_file}" --ref_id "${ref_id_file}" --tmp_dir "${temp_dir}" rliterman@0: """ rliterman@0: } rliterman@0: rliterman@0: workflow runSNPPipeline{ rliterman@0: take: rliterman@0: all_snpdiffs rliterman@0: reference_data rliterman@0: rliterman@0: main: rliterman@0: rliterman@0: query_snpdiffs = all_snpdiffs.map{tuple(it[0],it[2])} rliterman@0: ref_snpdiffs = all_snpdiffs.map{tuple(it[1],it[2])} rliterman@0: rliterman@0: stacked_snpdiffs = query_snpdiffs.concat(ref_snpdiffs) rliterman@0: .collect().flatten().collate(2) rliterman@0: rliterman@0: snp_dirs = stacked_snpdiffs rliterman@0: .combine(reference_data) rliterman@0: .filter{it -> it[0].toString() == it[2].toString()} rliterman@0: .map{it -> tuple(it[0],it[1])} rliterman@0: .groupTuple(by:0) rliterman@0: .map { ref, diff_files -> tuple( ref.toString(), diff_files.collect() ) } rliterman@0: | runSnpPipeline rliterman@0: rliterman@0: //snp_dirs.collect() | compileResults rliterman@0: } rliterman@0: rliterman@0: process compileResults{ rliterman@0: rliterman@0: executor = 'local' rliterman@0: cpus = 1 rliterman@0: maxForks = 1 rliterman@0: rliterman@0: input: rliterman@0: val(snp_directories) rliterman@0: rliterman@0: script: rliterman@0: rliterman@0: compile_script = file("${projectDir}/bin/compileSNPResults.py") rliterman@0: snp_dirs_list.write(snp_directories.join("\n")+ "\n") rliterman@0: """ rliterman@0: $params.load_python_module rliterman@0: python $compile_script --snp_dirs_file "${snp_dirs_list}" --output_directory "${snp_directory}" --isolate_data_file "${isolate_data_file}" --mummer_data_file "${snpdiffs_summary_file}" rliterman@0: """ rliterman@0: } rliterman@0: rliterman@0: process runSnpPipeline{ rliterman@0: rliterman@0: input: rliterman@0: tuple val(reference_id),val(diff_files) rliterman@0: rliterman@0: output: rliterman@0: stdout rliterman@0: rliterman@0: script: rliterman@0: rliterman@0: snp_script = file("${projectDir}/bin/runSNPPipeline.py") rliterman@0: rliterman@0: // Set + create output directory rliterman@0: snp_dir = file("${snp_directory}/${reference_id}") rliterman@0: snp_dir.mkdirs() rliterman@0: rliterman@0: // Write SNPDiffs list rliterman@0: out_snpdiffs = file("${snp_dir}/SNPDiffs.txt") rliterman@0: out_snpdiffs.write(diff_files.join("\n")+ "\n") rliterman@0: """ rliterman@0: $params.load_python_module rliterman@0: $params.load_bedtools_module rliterman@0: python $snp_script --reference_id "${reference_id}" --output_directory "${snp_dir}" --snpdiffs_file "${out_snpdiffs}" --log_directory "${snp_log_dir}" --min_cov "${min_cov}" --min_len "${min_length}" --min_iden "${min_iden}" --ref_edge "${reference_edge}" --query_edge "${query_edge}" --density_windows "${params.dwin}" --max_snps "${params.wsnps}" --trim_name "${params.trim_name}" --max_missing "${max_missing}" --tmp_dir "${temp_dir}" --rescue "${edge_rescue}" rliterman@0: echo -n $snp_dir rliterman@0: """ rliterman@0: }