annotate CSP2/subworkflows/alignData/main.nf @ 15:0d775868ee62

"planemo upload"
author rliterman
date Tue, 03 Dec 2024 12:23:38 -0500
parents 01431fa12065
children
rev   line source
rliterman@0 1 // Subworkflow to run MUMmer for query/referece comparisons
rliterman@0 2
rliterman@0 3 // Set path variables
rliterman@0 4 output_directory = file(params.output_directory)
rliterman@0 5 mummer_directory = file(params.mummer_directory)
rliterman@0 6 mummer_log_directory = file(params.mummer_log_directory)
rliterman@0 7 snpdiffs_directory = file(params.snpdiffs_directory)
rliterman@0 8 log_directory = file(params.log_directory)
rliterman@0 9
rliterman@0 10 if(params.tmp_dir == ""){
rliterman@0 11 temp_dir = ""
rliterman@0 12 } else{
rliterman@0 13 temp_dir = file(params.temp_dir)
rliterman@0 14 }
rliterman@0 15
rliterman@0 16 ref_mode = params.ref_mode
rliterman@0 17 ref_id_file = file(params.ref_id_file)
rliterman@0 18
rliterman@0 19 // Set path to accessory scripts/files
rliterman@0 20 all_snpdiffs_list = file("${log_directory}/All_SNPDiffs.txt")
rliterman@0 21 isolate_data_file = file("${output_directory}/Isolate_Data.tsv")
rliterman@0 22 snpdiffs_summary_file = file("${output_directory}/Raw_MUMmer_Summary.tsv")
rliterman@0 23 mummerScript = file("$projectDir/bin/compileMUMmer.py")
rliterman@0 24
rliterman@0 25 workflow {
rliterman@0 26 main:
rliterman@0 27 // Align genomes
rliterman@0 28 snpdiffs = alignGenomes(to_align: read_data, snpdiffs_data: snpdiffs_data)
rliterman@0 29 publish:
rliterman@0 30 // Publish snpdiffs
rliterman@0 31 snpdiffs >> 'snpdiffs.tsv'
rliterman@0 32 }
rliterman@0 33
rliterman@0 34 workflow alignGenomes{
rliterman@0 35 take:
rliterman@0 36 to_align
rliterman@0 37 snpdiffs_data
rliterman@0 38
rliterman@0 39 emit:
rliterman@0 40 return_snpdiffs
rliterman@0 41
rliterman@0 42 main:
rliterman@0 43
rliterman@0 44 // Align anything that needs aligning
rliterman@0 45 sample_pairwise = to_align
rliterman@0 46 .filter{"${it[0]}" != "${it[2]}"} // Don't map things to themselves
rliterman@0 47 | runMUMmer
rliterman@0 48 | splitCsv
rliterman@0 49
rliterman@0 50 log_hold = sample_pairwise
rliterman@0 51 .concat(snpdiffs_data)
rliterman@0 52 .unique{it -> it[2]}
rliterman@0 53 .collect{it -> it[2]}
rliterman@0 54
rliterman@0 55 snpdiff_files = saveMUMmerLog(log_hold)
rliterman@0 56 .collect().flatten().collate(1)
rliterman@0 57
rliterman@0 58 return_snpdiffs = sample_pairwise
rliterman@0 59 .concat(snpdiffs_data)
rliterman@0 60 .map { it -> tuple([it[0], it[1]].sort().join(',').toString(),it[0], it[1], it[2]) }
rliterman@0 61 .unique{it -> it[0]}
rliterman@0 62 .map{it->tuple(it[3],it[1],it[2])}
rliterman@0 63 .join(snpdiff_files,by:0)
rliterman@0 64 .map{it->tuple(it[1],it[2],it[0])}
rliterman@0 65 }
rliterman@0 66
rliterman@0 67 process runMUMmer{
rliterman@0 68
rliterman@15 69 label 'mummerMem'
rliterman@0 70
rliterman@0 71 input:
rliterman@0 72 tuple val(query_name),val(query_fasta),val(ref_name),val(ref_fasta)
rliterman@0 73
rliterman@0 74 output:
rliterman@0 75 stdout
rliterman@0 76
rliterman@0 77 script:
rliterman@0 78
rliterman@0 79 report_id = "${query_name}__vs__${ref_name}"
rliterman@0 80 mummer_log = file("${mummer_log_directory}/${report_id}.log")
rliterman@0 81
rliterman@0 82 // Ensure MUmmer directories exist
rliterman@0 83 if(!mummer_directory.isDirectory()){
rliterman@0 84 error "$mummer_directory does not exist..."
rliterman@0 85 } else{
rliterman@0 86 """
rliterman@0 87 $params.load_mummer_module
rliterman@0 88 $params.load_python_module
rliterman@0 89 $params.load_bedtools_module
rliterman@0 90 $params.load_bbtools_module
rliterman@0 91
rliterman@0 92 cd ${mummer_directory}
rliterman@0 93 dnadiff -p ${report_id} ${ref_fasta} ${query_fasta}
rliterman@0 94
rliterman@0 95 rm -rf ${mummer_directory}/${report_id}.mdelta
rliterman@0 96 rm -rf ${mummer_directory}/${report_id}.mcoords
rliterman@0 97 rm -rf ${mummer_directory}/${report_id}.1delta
rliterman@0 98 rm -rf ${mummer_directory}/${report_id}.delta
rliterman@0 99 rm -rf ${mummer_directory}/${report_id}.qdiff
rliterman@0 100 rm -rf ${mummer_directory}/${report_id}.rdiff
rliterman@0 101 rm -rf ${mummer_directory}/${report_id}.unref
rliterman@0 102 rm -rf ${mummer_directory}/${report_id}.unqry
rliterman@0 103
rliterman@0 104 python ${mummerScript} --query "${query_name}" --query_fasta "${query_fasta}" --reference "${ref_name}" --reference_fasta "${ref_fasta}" --mummer_dir "${mummer_directory}" --snpdiffs_dir "${snpdiffs_directory}" --temp_dir "${temp_dir}" --log_file "${mummer_log}"
rliterman@0 105 """
rliterman@0 106 }
rliterman@0 107 }
rliterman@0 108
rliterman@0 109 process saveMUMmerLog{
rliterman@0 110
rliterman@0 111 executor = 'local'
rliterman@0 112 cpus = 1
rliterman@0 113 maxForks = 1
rliterman@0 114
rliterman@0 115 input:
rliterman@0 116 val(snpdiffs_paths)
rliterman@0 117
rliterman@0 118 output:
rliterman@0 119 val(snpdiffs_paths)
rliterman@0 120
rliterman@0 121 script:
rliterman@0 122 saveSNPDiffs = file("$projectDir/bin/saveSNPDiffs.py")
rliterman@0 123 all_snpdiffs_list.write(snpdiffs_paths.join('\n') + '\n')
rliterman@0 124 """
rliterman@0 125 $params.load_python_module
rliterman@0 126 python $saveSNPDiffs --snpdiffs_file "${all_snpdiffs_list}" --summary_file "${snpdiffs_summary_file}" --isolate_file "${isolate_data_file}" --trim_name "${params.trim_name}" --ref_id_file "${ref_id_file}"
rliterman@0 127 """
rliterman@0 128 }