comparison CSP2/subworkflows/alignData/main.nf @ 0:01431fa12065

"planemo upload"
author rliterman
date Mon, 02 Dec 2024 10:40:55 -0500
parents
children 0d775868ee62
comparison
equal deleted inserted replaced
-1:000000000000 0:01431fa12065
1 // Subworkflow to run MUMmer for query/referece comparisons
2
3 // Set path variables
4 output_directory = file(params.output_directory)
5 mummer_directory = file(params.mummer_directory)
6 mummer_log_directory = file(params.mummer_log_directory)
7 snpdiffs_directory = file(params.snpdiffs_directory)
8 log_directory = file(params.log_directory)
9
10 if(params.tmp_dir == ""){
11 temp_dir = ""
12 } else{
13 temp_dir = file(params.temp_dir)
14 }
15
16 ref_mode = params.ref_mode
17 ref_id_file = file(params.ref_id_file)
18
19 // Set path to accessory scripts/files
20 all_snpdiffs_list = file("${log_directory}/All_SNPDiffs.txt")
21 isolate_data_file = file("${output_directory}/Isolate_Data.tsv")
22 snpdiffs_summary_file = file("${output_directory}/Raw_MUMmer_Summary.tsv")
23 mummerScript = file("$projectDir/bin/compileMUMmer.py")
24
25 workflow {
26 main:
27 // Align genomes
28 snpdiffs = alignGenomes(to_align: read_data, snpdiffs_data: snpdiffs_data)
29 publish:
30 // Publish snpdiffs
31 snpdiffs >> 'snpdiffs.tsv'
32 }
33
34 workflow alignGenomes{
35 take:
36 to_align
37 snpdiffs_data
38
39 emit:
40 return_snpdiffs
41
42 main:
43
44 // Align anything that needs aligning
45 sample_pairwise = to_align
46 .filter{"${it[0]}" != "${it[2]}"} // Don't map things to themselves
47 | runMUMmer
48 | splitCsv
49
50 log_hold = sample_pairwise
51 .concat(snpdiffs_data)
52 .unique{it -> it[2]}
53 .collect{it -> it[2]}
54
55 snpdiff_files = saveMUMmerLog(log_hold)
56 .collect().flatten().collate(1)
57
58 return_snpdiffs = sample_pairwise
59 .concat(snpdiffs_data)
60 .map { it -> tuple([it[0], it[1]].sort().join(',').toString(),it[0], it[1], it[2]) }
61 .unique{it -> it[0]}
62 .map{it->tuple(it[3],it[1],it[2])}
63 .join(snpdiff_files,by:0)
64 .map{it->tuple(it[1],it[2],it[0])}
65 }
66
67 process runMUMmer{
68
69 cpus = 1
70 memory '4 GB'
71
72 input:
73 tuple val(query_name),val(query_fasta),val(ref_name),val(ref_fasta)
74
75 output:
76 stdout
77
78 script:
79
80 report_id = "${query_name}__vs__${ref_name}"
81 mummer_log = file("${mummer_log_directory}/${report_id}.log")
82
83 // Ensure MUmmer directories exist
84 if(!mummer_directory.isDirectory()){
85 error "$mummer_directory does not exist..."
86 } else{
87 """
88 $params.load_mummer_module
89 $params.load_python_module
90 $params.load_bedtools_module
91 $params.load_bbtools_module
92
93 cd ${mummer_directory}
94 dnadiff -p ${report_id} ${ref_fasta} ${query_fasta}
95
96 rm -rf ${mummer_directory}/${report_id}.mdelta
97 rm -rf ${mummer_directory}/${report_id}.mcoords
98 rm -rf ${mummer_directory}/${report_id}.1delta
99 rm -rf ${mummer_directory}/${report_id}.delta
100 rm -rf ${mummer_directory}/${report_id}.qdiff
101 rm -rf ${mummer_directory}/${report_id}.rdiff
102 rm -rf ${mummer_directory}/${report_id}.unref
103 rm -rf ${mummer_directory}/${report_id}.unqry
104
105 python ${mummerScript} --query "${query_name}" --query_fasta "${query_fasta}" --reference "${ref_name}" --reference_fasta "${ref_fasta}" --mummer_dir "${mummer_directory}" --snpdiffs_dir "${snpdiffs_directory}" --temp_dir "${temp_dir}" --log_file "${mummer_log}"
106 """
107 }
108 }
109
110 process saveMUMmerLog{
111
112 executor = 'local'
113 cpus = 1
114 maxForks = 1
115
116 input:
117 val(snpdiffs_paths)
118
119 output:
120 val(snpdiffs_paths)
121
122 script:
123 saveSNPDiffs = file("$projectDir/bin/saveSNPDiffs.py")
124 all_snpdiffs_list.write(snpdiffs_paths.join('\n') + '\n')
125 """
126 $params.load_python_module
127 python $saveSNPDiffs --snpdiffs_file "${all_snpdiffs_list}" --summary_file "${snpdiffs_summary_file}" --isolate_file "${isolate_data_file}" --trim_name "${params.trim_name}" --ref_id_file "${ref_id_file}"
128 """
129 }