Mercurial > repos > rliterman > csp2
diff CSP2/subworkflows/alignData/main.nf @ 0:01431fa12065
"planemo upload"
author | rliterman |
---|---|
date | Mon, 02 Dec 2024 10:40:55 -0500 |
parents | |
children | 0d775868ee62 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/subworkflows/alignData/main.nf Mon Dec 02 10:40:55 2024 -0500 @@ -0,0 +1,129 @@ +// Subworkflow to run MUMmer for query/referece comparisons + +// Set path variables +output_directory = file(params.output_directory) +mummer_directory = file(params.mummer_directory) +mummer_log_directory = file(params.mummer_log_directory) +snpdiffs_directory = file(params.snpdiffs_directory) +log_directory = file(params.log_directory) + +if(params.tmp_dir == ""){ + temp_dir = "" +} else{ + temp_dir = file(params.temp_dir) +} + +ref_mode = params.ref_mode +ref_id_file = file(params.ref_id_file) + +// Set path to accessory scripts/files +all_snpdiffs_list = file("${log_directory}/All_SNPDiffs.txt") +isolate_data_file = file("${output_directory}/Isolate_Data.tsv") +snpdiffs_summary_file = file("${output_directory}/Raw_MUMmer_Summary.tsv") +mummerScript = file("$projectDir/bin/compileMUMmer.py") + +workflow { + main: + // Align genomes + snpdiffs = alignGenomes(to_align: read_data, snpdiffs_data: snpdiffs_data) + publish: + // Publish snpdiffs + snpdiffs >> 'snpdiffs.tsv' +} + +workflow alignGenomes{ + take: + to_align + snpdiffs_data + + emit: + return_snpdiffs + + main: + + // Align anything that needs aligning + sample_pairwise = to_align + .filter{"${it[0]}" != "${it[2]}"} // Don't map things to themselves + | runMUMmer + | splitCsv + + log_hold = sample_pairwise + .concat(snpdiffs_data) + .unique{it -> it[2]} + .collect{it -> it[2]} + + snpdiff_files = saveMUMmerLog(log_hold) + .collect().flatten().collate(1) + + return_snpdiffs = sample_pairwise + .concat(snpdiffs_data) + .map { it -> tuple([it[0], it[1]].sort().join(',').toString(),it[0], it[1], it[2]) } + .unique{it -> it[0]} + .map{it->tuple(it[3],it[1],it[2])} + .join(snpdiff_files,by:0) + .map{it->tuple(it[1],it[2],it[0])} +} + +process runMUMmer{ + + cpus = 1 + memory '4 GB' + + input: + tuple val(query_name),val(query_fasta),val(ref_name),val(ref_fasta) + + output: + stdout + + script: + + report_id = "${query_name}__vs__${ref_name}" + mummer_log = file("${mummer_log_directory}/${report_id}.log") + + // Ensure MUmmer directories exist + if(!mummer_directory.isDirectory()){ + error "$mummer_directory does not exist..." + } else{ + """ + $params.load_mummer_module + $params.load_python_module + $params.load_bedtools_module + $params.load_bbtools_module + + cd ${mummer_directory} + dnadiff -p ${report_id} ${ref_fasta} ${query_fasta} + + rm -rf ${mummer_directory}/${report_id}.mdelta + rm -rf ${mummer_directory}/${report_id}.mcoords + rm -rf ${mummer_directory}/${report_id}.1delta + rm -rf ${mummer_directory}/${report_id}.delta + rm -rf ${mummer_directory}/${report_id}.qdiff + rm -rf ${mummer_directory}/${report_id}.rdiff + rm -rf ${mummer_directory}/${report_id}.unref + rm -rf ${mummer_directory}/${report_id}.unqry + + python ${mummerScript} --query "${query_name}" --query_fasta "${query_fasta}" --reference "${ref_name}" --reference_fasta "${ref_fasta}" --mummer_dir "${mummer_directory}" --snpdiffs_dir "${snpdiffs_directory}" --temp_dir "${temp_dir}" --log_file "${mummer_log}" + """ + } +} + +process saveMUMmerLog{ + + executor = 'local' + cpus = 1 + maxForks = 1 + + input: + val(snpdiffs_paths) + + output: + val(snpdiffs_paths) + + script: + saveSNPDiffs = file("$projectDir/bin/saveSNPDiffs.py") + all_snpdiffs_list.write(snpdiffs_paths.join('\n') + '\n') + """ + $params.load_python_module + python $saveSNPDiffs --snpdiffs_file "${all_snpdiffs_list}" --summary_file "${snpdiffs_summary_file}" --isolate_file "${isolate_data_file}" --trim_name "${params.trim_name}" --ref_id_file "${ref_id_file}" + """ +} \ No newline at end of file