rliterman@0
|
1 // Screening and SNP Pipeline processing
|
rliterman@0
|
2 output_directory = file(params.output_directory)
|
rliterman@0
|
3 log_directory = file(params.log_directory)
|
rliterman@0
|
4 screen_log_dir = file(params.screen_log_dir)
|
rliterman@0
|
5 snp_log_dir = file(params.snp_log_dir)
|
rliterman@0
|
6 snp_directory = file(params.snp_directory)
|
rliterman@0
|
7
|
rliterman@0
|
8 if(params.tmp_dir == ""){
|
rliterman@0
|
9 temp_dir = ""
|
rliterman@0
|
10 } else{
|
rliterman@0
|
11 temp_dir = file(params.temp_dir)
|
rliterman@0
|
12 }
|
rliterman@0
|
13 ref_id_file = file(params.ref_id_file)
|
rliterman@0
|
14
|
rliterman@0
|
15 ref_mode = params.ref_mode
|
rliterman@0
|
16
|
rliterman@0
|
17 // Assess whether to rescue edge-filtered SNPs
|
rliterman@0
|
18 edge_rescue = "${params.rescue}" == "norescue" ? "norescue" : "rescue"
|
rliterman@0
|
19
|
rliterman@0
|
20 // Set paths for output files
|
rliterman@0
|
21 all_snpdiffs_list = file("${log_directory}/All_SNPDiffs.txt")
|
rliterman@0
|
22 snp_dirs_list = file("${log_directory}/SNP_Dirs.txt")
|
rliterman@0
|
23 screening_results_file = file("${output_directory}/Screening_Results.tsv")
|
rliterman@0
|
24 isolate_data_file = file("${output_directory}/Isolate_Data.tsv")
|
rliterman@0
|
25 snpdiffs_summary_file = file("${output_directory}/Raw_MUMmer_Summary.tsv")
|
rliterman@0
|
26
|
rliterman@0
|
27 // Get QC thresholds
|
rliterman@0
|
28 min_cov = params.min_cov.toFloat()
|
rliterman@0
|
29 min_length = params.min_len.toInteger()
|
rliterman@0
|
30 min_iden = params.min_iden.toFloat()
|
rliterman@0
|
31 reference_edge = params.ref_edge.toInteger()
|
rliterman@0
|
32 query_edge = params.query_edge.toInteger()
|
rliterman@0
|
33 max_missing = params.max_missing.toFloat()
|
rliterman@0
|
34 n_ref = params.n_ref.toInteger()
|
rliterman@0
|
35
|
rliterman@0
|
36 workflow {
|
rliterman@0
|
37 main:
|
rliterman@0
|
38 // Run SNP pipeline
|
rliterman@0
|
39 runSNPPipeline(query_data: all_snpdiffs, reference_data: ref_id_file)
|
rliterman@0
|
40 }
|
rliterman@0
|
41
|
rliterman@0
|
42 workflow runScreen {
|
rliterman@0
|
43
|
rliterman@0
|
44 take:
|
rliterman@0
|
45 all_snpdiffs
|
rliterman@0
|
46
|
rliterman@0
|
47 main:
|
rliterman@0
|
48
|
rliterman@0
|
49 all_snpdiffs
|
rliterman@0
|
50 .unique{it -> it[2]}
|
rliterman@0
|
51 .collect()
|
rliterman@0
|
52 | screenSNPDiffs
|
rliterman@0
|
53 }
|
rliterman@0
|
54
|
rliterman@0
|
55 process screenSNPDiffs{
|
rliterman@0
|
56
|
rliterman@0
|
57 input:
|
rliterman@0
|
58 val(all_snpdiffs)
|
rliterman@0
|
59
|
rliterman@0
|
60 script:
|
rliterman@0
|
61
|
rliterman@0
|
62 screenDiffs = file("${projectDir}/bin/screenSNPDiffs.py")
|
rliterman@0
|
63 """
|
rliterman@0
|
64 $params.load_python_module
|
rliterman@0
|
65 $params.load_bedtools_module
|
rliterman@0
|
66 python $screenDiffs --snpdiffs_file "${all_snpdiffs_list}" --log_dir "${screen_log_dir}" --min_cov "${min_cov}" --min_len "${min_length}" --min_iden "${min_iden}" --ref_edge "${reference_edge}" --query_edge "${query_edge}" --density_windows "${params.dwin}" --max_snps "${params.wsnps}" --trim_name "${params.trim_name}" --output_file "${screening_results_file}" --ref_id "${ref_id_file}" --tmp_dir "${temp_dir}"
|
rliterman@0
|
67 """
|
rliterman@0
|
68 }
|
rliterman@0
|
69
|
rliterman@0
|
70 workflow runSNPPipeline{
|
rliterman@0
|
71 take:
|
rliterman@0
|
72 all_snpdiffs
|
rliterman@0
|
73 reference_data
|
rliterman@0
|
74
|
rliterman@0
|
75 main:
|
rliterman@0
|
76
|
rliterman@0
|
77 query_snpdiffs = all_snpdiffs.map{tuple(it[0],it[2])}
|
rliterman@0
|
78 ref_snpdiffs = all_snpdiffs.map{tuple(it[1],it[2])}
|
rliterman@0
|
79
|
rliterman@0
|
80 stacked_snpdiffs = query_snpdiffs.concat(ref_snpdiffs)
|
rliterman@0
|
81 .collect().flatten().collate(2)
|
rliterman@0
|
82
|
rliterman@0
|
83 snp_dirs = stacked_snpdiffs
|
rliterman@0
|
84 .combine(reference_data)
|
rliterman@0
|
85 .filter{it -> it[0].toString() == it[2].toString()}
|
rliterman@0
|
86 .map{it -> tuple(it[0],it[1])}
|
rliterman@0
|
87 .groupTuple(by:0)
|
rliterman@0
|
88 .map { ref, diff_files -> tuple( ref.toString(), diff_files.collect() ) }
|
rliterman@0
|
89 | runSnpPipeline
|
rliterman@0
|
90
|
rliterman@0
|
91 //snp_dirs.collect() | compileResults
|
rliterman@0
|
92 }
|
rliterman@0
|
93
|
rliterman@0
|
94 process compileResults{
|
rliterman@0
|
95
|
rliterman@0
|
96 executor = 'local'
|
rliterman@0
|
97 cpus = 1
|
rliterman@0
|
98 maxForks = 1
|
rliterman@0
|
99
|
rliterman@0
|
100 input:
|
rliterman@0
|
101 val(snp_directories)
|
rliterman@0
|
102
|
rliterman@0
|
103 script:
|
rliterman@0
|
104
|
rliterman@0
|
105 compile_script = file("${projectDir}/bin/compileSNPResults.py")
|
rliterman@0
|
106 snp_dirs_list.write(snp_directories.join("\n")+ "\n")
|
rliterman@0
|
107 """
|
rliterman@0
|
108 $params.load_python_module
|
rliterman@0
|
109 python $compile_script --snp_dirs_file "${snp_dirs_list}" --output_directory "${snp_directory}" --isolate_data_file "${isolate_data_file}" --mummer_data_file "${snpdiffs_summary_file}"
|
rliterman@0
|
110 """
|
rliterman@0
|
111 }
|
rliterman@0
|
112
|
rliterman@0
|
113 process runSnpPipeline{
|
rliterman@0
|
114
|
rliterman@0
|
115 input:
|
rliterman@0
|
116 tuple val(reference_id),val(diff_files)
|
rliterman@0
|
117
|
rliterman@0
|
118 output:
|
rliterman@0
|
119 stdout
|
rliterman@0
|
120
|
rliterman@0
|
121 script:
|
rliterman@0
|
122
|
rliterman@0
|
123 snp_script = file("${projectDir}/bin/runSNPPipeline.py")
|
rliterman@0
|
124
|
rliterman@0
|
125 // Set + create output directory
|
rliterman@0
|
126 snp_dir = file("${snp_directory}/${reference_id}")
|
rliterman@0
|
127 snp_dir.mkdirs()
|
rliterman@0
|
128
|
rliterman@0
|
129 // Write SNPDiffs list
|
rliterman@0
|
130 out_snpdiffs = file("${snp_dir}/SNPDiffs.txt")
|
rliterman@0
|
131 out_snpdiffs.write(diff_files.join("\n")+ "\n")
|
rliterman@0
|
132 """
|
rliterman@0
|
133 $params.load_python_module
|
rliterman@0
|
134 $params.load_bedtools_module
|
rliterman@0
|
135 python $snp_script --reference_id "${reference_id}" --output_directory "${snp_dir}" --snpdiffs_file "${out_snpdiffs}" --log_directory "${snp_log_dir}" --min_cov "${min_cov}" --min_len "${min_length}" --min_iden "${min_iden}" --ref_edge "${reference_edge}" --query_edge "${query_edge}" --density_windows "${params.dwin}" --max_snps "${params.wsnps}" --trim_name "${params.trim_name}" --max_missing "${max_missing}" --tmp_dir "${temp_dir}" --rescue "${edge_rescue}"
|
rliterman@0
|
136 echo -n $snp_dir
|
rliterman@0
|
137 """
|
rliterman@0
|
138 } |