Mercurial > repos > rliterman > csp2
comparison CSP2/subworkflows/snpdiffs/main.nf @ 0:01431fa12065
"planemo upload"
author | rliterman |
---|---|
date | Mon, 02 Dec 2024 10:40:55 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:01431fa12065 |
---|---|
1 // Screening and SNP Pipeline processing | |
2 output_directory = file(params.output_directory) | |
3 log_directory = file(params.log_directory) | |
4 screen_log_dir = file(params.screen_log_dir) | |
5 snp_log_dir = file(params.snp_log_dir) | |
6 snp_directory = file(params.snp_directory) | |
7 | |
8 if(params.tmp_dir == ""){ | |
9 temp_dir = "" | |
10 } else{ | |
11 temp_dir = file(params.temp_dir) | |
12 } | |
13 ref_id_file = file(params.ref_id_file) | |
14 | |
15 ref_mode = params.ref_mode | |
16 | |
17 // Assess whether to rescue edge-filtered SNPs | |
18 edge_rescue = "${params.rescue}" == "norescue" ? "norescue" : "rescue" | |
19 | |
20 // Set paths for output files | |
21 all_snpdiffs_list = file("${log_directory}/All_SNPDiffs.txt") | |
22 snp_dirs_list = file("${log_directory}/SNP_Dirs.txt") | |
23 screening_results_file = file("${output_directory}/Screening_Results.tsv") | |
24 isolate_data_file = file("${output_directory}/Isolate_Data.tsv") | |
25 snpdiffs_summary_file = file("${output_directory}/Raw_MUMmer_Summary.tsv") | |
26 | |
27 // Get QC thresholds | |
28 min_cov = params.min_cov.toFloat() | |
29 min_length = params.min_len.toInteger() | |
30 min_iden = params.min_iden.toFloat() | |
31 reference_edge = params.ref_edge.toInteger() | |
32 query_edge = params.query_edge.toInteger() | |
33 max_missing = params.max_missing.toFloat() | |
34 n_ref = params.n_ref.toInteger() | |
35 | |
36 workflow { | |
37 main: | |
38 // Run SNP pipeline | |
39 runSNPPipeline(query_data: all_snpdiffs, reference_data: ref_id_file) | |
40 } | |
41 | |
42 workflow runScreen { | |
43 | |
44 take: | |
45 all_snpdiffs | |
46 | |
47 main: | |
48 | |
49 all_snpdiffs | |
50 .unique{it -> it[2]} | |
51 .collect() | |
52 | screenSNPDiffs | |
53 } | |
54 | |
55 process screenSNPDiffs{ | |
56 | |
57 input: | |
58 val(all_snpdiffs) | |
59 | |
60 script: | |
61 | |
62 screenDiffs = file("${projectDir}/bin/screenSNPDiffs.py") | |
63 """ | |
64 $params.load_python_module | |
65 $params.load_bedtools_module | |
66 python $screenDiffs --snpdiffs_file "${all_snpdiffs_list}" --log_dir "${screen_log_dir}" --min_cov "${min_cov}" --min_len "${min_length}" --min_iden "${min_iden}" --ref_edge "${reference_edge}" --query_edge "${query_edge}" --density_windows "${params.dwin}" --max_snps "${params.wsnps}" --trim_name "${params.trim_name}" --output_file "${screening_results_file}" --ref_id "${ref_id_file}" --tmp_dir "${temp_dir}" | |
67 """ | |
68 } | |
69 | |
70 workflow runSNPPipeline{ | |
71 take: | |
72 all_snpdiffs | |
73 reference_data | |
74 | |
75 main: | |
76 | |
77 query_snpdiffs = all_snpdiffs.map{tuple(it[0],it[2])} | |
78 ref_snpdiffs = all_snpdiffs.map{tuple(it[1],it[2])} | |
79 | |
80 stacked_snpdiffs = query_snpdiffs.concat(ref_snpdiffs) | |
81 .collect().flatten().collate(2) | |
82 | |
83 snp_dirs = stacked_snpdiffs | |
84 .combine(reference_data) | |
85 .filter{it -> it[0].toString() == it[2].toString()} | |
86 .map{it -> tuple(it[0],it[1])} | |
87 .groupTuple(by:0) | |
88 .map { ref, diff_files -> tuple( ref.toString(), diff_files.collect() ) } | |
89 | runSnpPipeline | |
90 | |
91 //snp_dirs.collect() | compileResults | |
92 } | |
93 | |
94 process compileResults{ | |
95 | |
96 executor = 'local' | |
97 cpus = 1 | |
98 maxForks = 1 | |
99 | |
100 input: | |
101 val(snp_directories) | |
102 | |
103 script: | |
104 | |
105 compile_script = file("${projectDir}/bin/compileSNPResults.py") | |
106 snp_dirs_list.write(snp_directories.join("\n")+ "\n") | |
107 """ | |
108 $params.load_python_module | |
109 python $compile_script --snp_dirs_file "${snp_dirs_list}" --output_directory "${snp_directory}" --isolate_data_file "${isolate_data_file}" --mummer_data_file "${snpdiffs_summary_file}" | |
110 """ | |
111 } | |
112 | |
113 process runSnpPipeline{ | |
114 | |
115 input: | |
116 tuple val(reference_id),val(diff_files) | |
117 | |
118 output: | |
119 stdout | |
120 | |
121 script: | |
122 | |
123 snp_script = file("${projectDir}/bin/runSNPPipeline.py") | |
124 | |
125 // Set + create output directory | |
126 snp_dir = file("${snp_directory}/${reference_id}") | |
127 snp_dir.mkdirs() | |
128 | |
129 // Write SNPDiffs list | |
130 out_snpdiffs = file("${snp_dir}/SNPDiffs.txt") | |
131 out_snpdiffs.write(diff_files.join("\n")+ "\n") | |
132 """ | |
133 $params.load_python_module | |
134 $params.load_bedtools_module | |
135 python $snp_script --reference_id "${reference_id}" --output_directory "${snp_dir}" --snpdiffs_file "${out_snpdiffs}" --log_directory "${snp_log_dir}" --min_cov "${min_cov}" --min_len "${min_length}" --min_iden "${min_iden}" --ref_edge "${reference_edge}" --query_edge "${query_edge}" --density_windows "${params.dwin}" --max_snps "${params.wsnps}" --trim_name "${params.trim_name}" --max_missing "${max_missing}" --tmp_dir "${temp_dir}" --rescue "${edge_rescue}" | |
136 echo -n $snp_dir | |
137 """ | |
138 } |