Mercurial > repos > rliterman > csp2
comparison CSP2/subworkflows/alignData/main.nf @ 0:01431fa12065
"planemo upload"
author | rliterman |
---|---|
date | Mon, 02 Dec 2024 10:40:55 -0500 |
parents | |
children | 0d775868ee62 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:01431fa12065 |
---|---|
1 // Subworkflow to run MUMmer for query/referece comparisons | |
2 | |
3 // Set path variables | |
4 output_directory = file(params.output_directory) | |
5 mummer_directory = file(params.mummer_directory) | |
6 mummer_log_directory = file(params.mummer_log_directory) | |
7 snpdiffs_directory = file(params.snpdiffs_directory) | |
8 log_directory = file(params.log_directory) | |
9 | |
10 if(params.tmp_dir == ""){ | |
11 temp_dir = "" | |
12 } else{ | |
13 temp_dir = file(params.temp_dir) | |
14 } | |
15 | |
16 ref_mode = params.ref_mode | |
17 ref_id_file = file(params.ref_id_file) | |
18 | |
19 // Set path to accessory scripts/files | |
20 all_snpdiffs_list = file("${log_directory}/All_SNPDiffs.txt") | |
21 isolate_data_file = file("${output_directory}/Isolate_Data.tsv") | |
22 snpdiffs_summary_file = file("${output_directory}/Raw_MUMmer_Summary.tsv") | |
23 mummerScript = file("$projectDir/bin/compileMUMmer.py") | |
24 | |
25 workflow { | |
26 main: | |
27 // Align genomes | |
28 snpdiffs = alignGenomes(to_align: read_data, snpdiffs_data: snpdiffs_data) | |
29 publish: | |
30 // Publish snpdiffs | |
31 snpdiffs >> 'snpdiffs.tsv' | |
32 } | |
33 | |
34 workflow alignGenomes{ | |
35 take: | |
36 to_align | |
37 snpdiffs_data | |
38 | |
39 emit: | |
40 return_snpdiffs | |
41 | |
42 main: | |
43 | |
44 // Align anything that needs aligning | |
45 sample_pairwise = to_align | |
46 .filter{"${it[0]}" != "${it[2]}"} // Don't map things to themselves | |
47 | runMUMmer | |
48 | splitCsv | |
49 | |
50 log_hold = sample_pairwise | |
51 .concat(snpdiffs_data) | |
52 .unique{it -> it[2]} | |
53 .collect{it -> it[2]} | |
54 | |
55 snpdiff_files = saveMUMmerLog(log_hold) | |
56 .collect().flatten().collate(1) | |
57 | |
58 return_snpdiffs = sample_pairwise | |
59 .concat(snpdiffs_data) | |
60 .map { it -> tuple([it[0], it[1]].sort().join(',').toString(),it[0], it[1], it[2]) } | |
61 .unique{it -> it[0]} | |
62 .map{it->tuple(it[3],it[1],it[2])} | |
63 .join(snpdiff_files,by:0) | |
64 .map{it->tuple(it[1],it[2],it[0])} | |
65 } | |
66 | |
67 process runMUMmer{ | |
68 | |
69 cpus = 1 | |
70 memory '4 GB' | |
71 | |
72 input: | |
73 tuple val(query_name),val(query_fasta),val(ref_name),val(ref_fasta) | |
74 | |
75 output: | |
76 stdout | |
77 | |
78 script: | |
79 | |
80 report_id = "${query_name}__vs__${ref_name}" | |
81 mummer_log = file("${mummer_log_directory}/${report_id}.log") | |
82 | |
83 // Ensure MUmmer directories exist | |
84 if(!mummer_directory.isDirectory()){ | |
85 error "$mummer_directory does not exist..." | |
86 } else{ | |
87 """ | |
88 $params.load_mummer_module | |
89 $params.load_python_module | |
90 $params.load_bedtools_module | |
91 $params.load_bbtools_module | |
92 | |
93 cd ${mummer_directory} | |
94 dnadiff -p ${report_id} ${ref_fasta} ${query_fasta} | |
95 | |
96 rm -rf ${mummer_directory}/${report_id}.mdelta | |
97 rm -rf ${mummer_directory}/${report_id}.mcoords | |
98 rm -rf ${mummer_directory}/${report_id}.1delta | |
99 rm -rf ${mummer_directory}/${report_id}.delta | |
100 rm -rf ${mummer_directory}/${report_id}.qdiff | |
101 rm -rf ${mummer_directory}/${report_id}.rdiff | |
102 rm -rf ${mummer_directory}/${report_id}.unref | |
103 rm -rf ${mummer_directory}/${report_id}.unqry | |
104 | |
105 python ${mummerScript} --query "${query_name}" --query_fasta "${query_fasta}" --reference "${ref_name}" --reference_fasta "${ref_fasta}" --mummer_dir "${mummer_directory}" --snpdiffs_dir "${snpdiffs_directory}" --temp_dir "${temp_dir}" --log_file "${mummer_log}" | |
106 """ | |
107 } | |
108 } | |
109 | |
110 process saveMUMmerLog{ | |
111 | |
112 executor = 'local' | |
113 cpus = 1 | |
114 maxForks = 1 | |
115 | |
116 input: | |
117 val(snpdiffs_paths) | |
118 | |
119 output: | |
120 val(snpdiffs_paths) | |
121 | |
122 script: | |
123 saveSNPDiffs = file("$projectDir/bin/saveSNPDiffs.py") | |
124 all_snpdiffs_list.write(snpdiffs_paths.join('\n') + '\n') | |
125 """ | |
126 $params.load_python_module | |
127 python $saveSNPDiffs --snpdiffs_file "${all_snpdiffs_list}" --summary_file "${snpdiffs_summary_file}" --isolate_file "${isolate_data_file}" --trim_name "${params.trim_name}" --ref_id_file "${ref_id_file}" | |
128 """ | |
129 } |