rliterman@0
|
1 // Subworkflow to run MUMmer for query/referece comparisons
|
rliterman@0
|
2
|
rliterman@0
|
3 // Set path variables
|
rliterman@0
|
4 output_directory = file(params.output_directory)
|
rliterman@0
|
5 mummer_directory = file(params.mummer_directory)
|
rliterman@0
|
6 mummer_log_directory = file(params.mummer_log_directory)
|
rliterman@0
|
7 snpdiffs_directory = file(params.snpdiffs_directory)
|
rliterman@0
|
8 log_directory = file(params.log_directory)
|
rliterman@0
|
9
|
rliterman@0
|
10 if(params.tmp_dir == ""){
|
rliterman@0
|
11 temp_dir = ""
|
rliterman@0
|
12 } else{
|
rliterman@0
|
13 temp_dir = file(params.temp_dir)
|
rliterman@0
|
14 }
|
rliterman@0
|
15
|
rliterman@0
|
16 ref_mode = params.ref_mode
|
rliterman@0
|
17 ref_id_file = file(params.ref_id_file)
|
rliterman@0
|
18
|
rliterman@0
|
19 // Set path to accessory scripts/files
|
rliterman@0
|
20 all_snpdiffs_list = file("${log_directory}/All_SNPDiffs.txt")
|
rliterman@0
|
21 isolate_data_file = file("${output_directory}/Isolate_Data.tsv")
|
rliterman@0
|
22 snpdiffs_summary_file = file("${output_directory}/Raw_MUMmer_Summary.tsv")
|
rliterman@0
|
23 mummerScript = file("$projectDir/bin/compileMUMmer.py")
|
rliterman@0
|
24
|
rliterman@0
|
25 workflow {
|
rliterman@0
|
26 main:
|
rliterman@0
|
27 // Align genomes
|
rliterman@0
|
28 snpdiffs = alignGenomes(to_align: read_data, snpdiffs_data: snpdiffs_data)
|
rliterman@0
|
29 publish:
|
rliterman@0
|
30 // Publish snpdiffs
|
rliterman@0
|
31 snpdiffs >> 'snpdiffs.tsv'
|
rliterman@0
|
32 }
|
rliterman@0
|
33
|
rliterman@0
|
34 workflow alignGenomes{
|
rliterman@0
|
35 take:
|
rliterman@0
|
36 to_align
|
rliterman@0
|
37 snpdiffs_data
|
rliterman@0
|
38
|
rliterman@0
|
39 emit:
|
rliterman@0
|
40 return_snpdiffs
|
rliterman@0
|
41
|
rliterman@0
|
42 main:
|
rliterman@0
|
43
|
rliterman@0
|
44 // Align anything that needs aligning
|
rliterman@0
|
45 sample_pairwise = to_align
|
rliterman@0
|
46 .filter{"${it[0]}" != "${it[2]}"} // Don't map things to themselves
|
rliterman@0
|
47 | runMUMmer
|
rliterman@0
|
48 | splitCsv
|
rliterman@0
|
49
|
rliterman@0
|
50 log_hold = sample_pairwise
|
rliterman@0
|
51 .concat(snpdiffs_data)
|
rliterman@0
|
52 .unique{it -> it[2]}
|
rliterman@0
|
53 .collect{it -> it[2]}
|
rliterman@0
|
54
|
rliterman@0
|
55 snpdiff_files = saveMUMmerLog(log_hold)
|
rliterman@0
|
56 .collect().flatten().collate(1)
|
rliterman@0
|
57
|
rliterman@0
|
58 return_snpdiffs = sample_pairwise
|
rliterman@0
|
59 .concat(snpdiffs_data)
|
rliterman@0
|
60 .map { it -> tuple([it[0], it[1]].sort().join(',').toString(),it[0], it[1], it[2]) }
|
rliterman@0
|
61 .unique{it -> it[0]}
|
rliterman@0
|
62 .map{it->tuple(it[3],it[1],it[2])}
|
rliterman@0
|
63 .join(snpdiff_files,by:0)
|
rliterman@0
|
64 .map{it->tuple(it[1],it[2],it[0])}
|
rliterman@0
|
65 }
|
rliterman@0
|
66
|
rliterman@0
|
67 process runMUMmer{
|
rliterman@0
|
68
|
rliterman@15
|
69 label 'mummerMem'
|
rliterman@0
|
70
|
rliterman@0
|
71 input:
|
rliterman@0
|
72 tuple val(query_name),val(query_fasta),val(ref_name),val(ref_fasta)
|
rliterman@0
|
73
|
rliterman@0
|
74 output:
|
rliterman@0
|
75 stdout
|
rliterman@0
|
76
|
rliterman@0
|
77 script:
|
rliterman@0
|
78
|
rliterman@0
|
79 report_id = "${query_name}__vs__${ref_name}"
|
rliterman@0
|
80 mummer_log = file("${mummer_log_directory}/${report_id}.log")
|
rliterman@0
|
81
|
rliterman@0
|
82 // Ensure MUmmer directories exist
|
rliterman@0
|
83 if(!mummer_directory.isDirectory()){
|
rliterman@0
|
84 error "$mummer_directory does not exist..."
|
rliterman@0
|
85 } else{
|
rliterman@0
|
86 """
|
rliterman@0
|
87 $params.load_mummer_module
|
rliterman@0
|
88 $params.load_python_module
|
rliterman@0
|
89 $params.load_bedtools_module
|
rliterman@0
|
90 $params.load_bbtools_module
|
rliterman@0
|
91
|
rliterman@0
|
92 cd ${mummer_directory}
|
rliterman@0
|
93 dnadiff -p ${report_id} ${ref_fasta} ${query_fasta}
|
rliterman@0
|
94
|
rliterman@0
|
95 rm -rf ${mummer_directory}/${report_id}.mdelta
|
rliterman@0
|
96 rm -rf ${mummer_directory}/${report_id}.mcoords
|
rliterman@0
|
97 rm -rf ${mummer_directory}/${report_id}.1delta
|
rliterman@0
|
98 rm -rf ${mummer_directory}/${report_id}.delta
|
rliterman@0
|
99 rm -rf ${mummer_directory}/${report_id}.qdiff
|
rliterman@0
|
100 rm -rf ${mummer_directory}/${report_id}.rdiff
|
rliterman@0
|
101 rm -rf ${mummer_directory}/${report_id}.unref
|
rliterman@0
|
102 rm -rf ${mummer_directory}/${report_id}.unqry
|
rliterman@0
|
103
|
rliterman@0
|
104 python ${mummerScript} --query "${query_name}" --query_fasta "${query_fasta}" --reference "${ref_name}" --reference_fasta "${ref_fasta}" --mummer_dir "${mummer_directory}" --snpdiffs_dir "${snpdiffs_directory}" --temp_dir "${temp_dir}" --log_file "${mummer_log}"
|
rliterman@0
|
105 """
|
rliterman@0
|
106 }
|
rliterman@0
|
107 }
|
rliterman@0
|
108
|
rliterman@0
|
109 process saveMUMmerLog{
|
rliterman@0
|
110
|
rliterman@0
|
111 executor = 'local'
|
rliterman@0
|
112 cpus = 1
|
rliterman@0
|
113 maxForks = 1
|
rliterman@0
|
114
|
rliterman@0
|
115 input:
|
rliterman@0
|
116 val(snpdiffs_paths)
|
rliterman@0
|
117
|
rliterman@0
|
118 output:
|
rliterman@0
|
119 val(snpdiffs_paths)
|
rliterman@0
|
120
|
rliterman@0
|
121 script:
|
rliterman@0
|
122 saveSNPDiffs = file("$projectDir/bin/saveSNPDiffs.py")
|
rliterman@0
|
123 all_snpdiffs_list.write(snpdiffs_paths.join('\n') + '\n')
|
rliterman@0
|
124 """
|
rliterman@0
|
125 $params.load_python_module
|
rliterman@0
|
126 python $saveSNPDiffs --snpdiffs_file "${all_snpdiffs_list}" --summary_file "${snpdiffs_summary_file}" --isolate_file "${isolate_data_file}" --trim_name "${params.trim_name}" --ref_id_file "${ref_id_file}"
|
rliterman@0
|
127 """
|
rliterman@0
|
128 } |