rliterman@0: // Subworkflow to run MUMmer for query/referece comparisons rliterman@0: rliterman@0: // Set path variables rliterman@0: output_directory = file(params.output_directory) rliterman@0: mummer_directory = file(params.mummer_directory) rliterman@0: mummer_log_directory = file(params.mummer_log_directory) rliterman@0: snpdiffs_directory = file(params.snpdiffs_directory) rliterman@0: log_directory = file(params.log_directory) rliterman@0: rliterman@0: if(params.tmp_dir == ""){ rliterman@0: temp_dir = "" rliterman@0: } else{ rliterman@0: temp_dir = file(params.temp_dir) rliterman@0: } rliterman@0: rliterman@0: ref_mode = params.ref_mode rliterman@0: ref_id_file = file(params.ref_id_file) rliterman@0: rliterman@0: // Set path to accessory scripts/files rliterman@0: all_snpdiffs_list = file("${log_directory}/All_SNPDiffs.txt") rliterman@0: isolate_data_file = file("${output_directory}/Isolate_Data.tsv") rliterman@0: snpdiffs_summary_file = file("${output_directory}/Raw_MUMmer_Summary.tsv") rliterman@0: mummerScript = file("$projectDir/bin/compileMUMmer.py") rliterman@0: rliterman@0: workflow { rliterman@0: main: rliterman@0: // Align genomes rliterman@0: snpdiffs = alignGenomes(to_align: read_data, snpdiffs_data: snpdiffs_data) rliterman@0: publish: rliterman@0: // Publish snpdiffs rliterman@0: snpdiffs >> 'snpdiffs.tsv' rliterman@0: } rliterman@0: rliterman@0: workflow alignGenomes{ rliterman@0: take: rliterman@0: to_align rliterman@0: snpdiffs_data rliterman@0: rliterman@0: emit: rliterman@0: return_snpdiffs rliterman@0: rliterman@0: main: rliterman@0: rliterman@0: // Align anything that needs aligning rliterman@0: sample_pairwise = to_align rliterman@0: .filter{"${it[0]}" != "${it[2]}"} // Don't map things to themselves rliterman@0: | runMUMmer rliterman@0: | splitCsv rliterman@0: rliterman@0: log_hold = sample_pairwise rliterman@0: .concat(snpdiffs_data) rliterman@0: .unique{it -> it[2]} rliterman@0: .collect{it -> it[2]} rliterman@0: rliterman@0: snpdiff_files = saveMUMmerLog(log_hold) rliterman@0: .collect().flatten().collate(1) rliterman@0: rliterman@0: return_snpdiffs = sample_pairwise rliterman@0: .concat(snpdiffs_data) rliterman@0: .map { it -> tuple([it[0], it[1]].sort().join(',').toString(),it[0], it[1], it[2]) } rliterman@0: .unique{it -> it[0]} rliterman@0: .map{it->tuple(it[3],it[1],it[2])} rliterman@0: .join(snpdiff_files,by:0) rliterman@0: .map{it->tuple(it[1],it[2],it[0])} rliterman@0: } rliterman@0: rliterman@0: process runMUMmer{ rliterman@0: rliterman@15: label 'mummerMem' rliterman@0: rliterman@0: input: rliterman@0: tuple val(query_name),val(query_fasta),val(ref_name),val(ref_fasta) rliterman@0: rliterman@0: output: rliterman@0: stdout rliterman@0: rliterman@0: script: rliterman@0: rliterman@0: report_id = "${query_name}__vs__${ref_name}" rliterman@0: mummer_log = file("${mummer_log_directory}/${report_id}.log") rliterman@0: rliterman@0: // Ensure MUmmer directories exist rliterman@0: if(!mummer_directory.isDirectory()){ rliterman@0: error "$mummer_directory does not exist..." rliterman@0: } else{ rliterman@0: """ rliterman@0: $params.load_mummer_module rliterman@0: $params.load_python_module rliterman@0: $params.load_bedtools_module rliterman@0: $params.load_bbtools_module rliterman@0: rliterman@0: cd ${mummer_directory} rliterman@0: dnadiff -p ${report_id} ${ref_fasta} ${query_fasta} rliterman@0: rliterman@0: rm -rf ${mummer_directory}/${report_id}.mdelta rliterman@0: rm -rf ${mummer_directory}/${report_id}.mcoords rliterman@0: rm -rf ${mummer_directory}/${report_id}.1delta rliterman@0: rm -rf ${mummer_directory}/${report_id}.delta rliterman@0: rm -rf ${mummer_directory}/${report_id}.qdiff rliterman@0: rm -rf ${mummer_directory}/${report_id}.rdiff rliterman@0: rm -rf ${mummer_directory}/${report_id}.unref rliterman@0: rm -rf ${mummer_directory}/${report_id}.unqry rliterman@0: rliterman@0: python ${mummerScript} --query "${query_name}" --query_fasta "${query_fasta}" --reference "${ref_name}" --reference_fasta "${ref_fasta}" --mummer_dir "${mummer_directory}" --snpdiffs_dir "${snpdiffs_directory}" --temp_dir "${temp_dir}" --log_file "${mummer_log}" rliterman@0: """ rliterman@0: } rliterman@0: } rliterman@0: rliterman@0: process saveMUMmerLog{ rliterman@0: rliterman@0: executor = 'local' rliterman@0: cpus = 1 rliterman@0: maxForks = 1 rliterman@0: rliterman@0: input: rliterman@0: val(snpdiffs_paths) rliterman@0: rliterman@0: output: rliterman@0: val(snpdiffs_paths) rliterman@0: rliterman@0: script: rliterman@0: saveSNPDiffs = file("$projectDir/bin/saveSNPDiffs.py") rliterman@0: all_snpdiffs_list.write(snpdiffs_paths.join('\n') + '\n') rliterman@0: """ rliterman@0: $params.load_python_module rliterman@0: python $saveSNPDiffs --snpdiffs_file "${all_snpdiffs_list}" --summary_file "${snpdiffs_summary_file}" --isolate_file "${isolate_data_file}" --trim_name "${params.trim_name}" --ref_id_file "${ref_id_file}" rliterman@0: """ rliterman@0: }