rliterman@0
|
1 // Subworkflow to run RefChooser for list of queries
|
rliterman@0
|
2
|
rliterman@0
|
3 // Set directory structure
|
rliterman@0
|
4 output_directory = file(params.output_directory)
|
rliterman@0
|
5 log_directory = file(params.log_directory)
|
rliterman@0
|
6 mash_directory = file(params.mash_directory)
|
rliterman@0
|
7
|
rliterman@0
|
8 workflow {
|
rliterman@0
|
9 main:
|
rliterman@0
|
10 // Run RefChooser
|
rliterman@0
|
11 reference_data = runRefChooser(query_data: query_data)
|
rliterman@0
|
12 publish:
|
rliterman@0
|
13 // Publish reference data
|
rliterman@0
|
14 reference_data >> 'reference.fa'
|
rliterman@0
|
15 }
|
rliterman@0
|
16
|
rliterman@0
|
17 workflow runRefChooser{
|
rliterman@0
|
18 take:
|
rliterman@0
|
19 query_data
|
rliterman@0
|
20
|
rliterman@0
|
21 emit:
|
rliterman@0
|
22 reference_data
|
rliterman@0
|
23
|
rliterman@0
|
24 main:
|
rliterman@0
|
25
|
rliterman@0
|
26 // Make MASH sketches (1 CPU per query) and generate triangle (all CPUs)
|
rliterman@0
|
27 mash_refs = query_data
|
rliterman@0
|
28 .unique{it -> it[1]}
|
rliterman@0
|
29 .map { [ it[0], it[1] ] }
|
rliterman@0
|
30 | mashSketch
|
rliterman@0
|
31 | collect
|
rliterman@0
|
32 | mashTriangle
|
rliterman@0
|
33 | chooseRefs
|
rliterman@0
|
34 | splitCsv | collect | flatten | collate(1)
|
rliterman@0
|
35
|
rliterman@0
|
36 reference_data = query_data
|
rliterman@0
|
37 .map{it -> tuple(it[1].toString(),it[0])}
|
rliterman@0
|
38 .join(mash_refs, by:0)
|
rliterman@0
|
39 .map{tuple(it[1],it[0])}
|
rliterman@0
|
40 .unique{it -> it[0]}.collect().flatten().collate(2)
|
rliterman@0
|
41
|
rliterman@0
|
42 // Save reference data to file
|
rliterman@0
|
43 reference_data
|
rliterman@0
|
44 .collect{it -> it[0]}
|
rliterman@0
|
45 | saveRefIDs
|
rliterman@0
|
46 }
|
rliterman@0
|
47
|
rliterman@0
|
48 process chooseRefs{
|
rliterman@0
|
49
|
rliterman@0
|
50 executor = 'local'
|
rliterman@0
|
51 cpus = 1
|
rliterman@0
|
52 maxForks = 1
|
rliterman@0
|
53
|
rliterman@0
|
54 input:
|
rliterman@0
|
55 val(mash_triangle)
|
rliterman@0
|
56
|
rliterman@0
|
57 output:
|
rliterman@0
|
58 stdout
|
rliterman@0
|
59
|
rliterman@0
|
60 script:
|
rliterman@0
|
61
|
rliterman@0
|
62 ref_count = params.n_ref.toInteger()
|
rliterman@0
|
63 ref_script = file("${projectDir}/bin/chooseRefs.py")
|
rliterman@0
|
64 """
|
rliterman@0
|
65 $params.load_python_module
|
rliterman@0
|
66 cd $mash_directory
|
rliterman@0
|
67
|
rliterman@0
|
68 python $ref_script --ref_count $ref_count --mash_triangle_file $mash_triangle --trim_name "${params.trim_name}"
|
rliterman@0
|
69 """
|
rliterman@0
|
70 }
|
rliterman@0
|
71
|
rliterman@0
|
72 process mashTriangle{
|
rliterman@0
|
73
|
rliterman@0
|
74 input:
|
rliterman@0
|
75 val(mash_sketches)
|
rliterman@0
|
76
|
rliterman@0
|
77 output:
|
rliterman@0
|
78 stdout
|
rliterman@0
|
79
|
rliterman@0
|
80 script:
|
rliterman@0
|
81
|
rliterman@0
|
82 sketch_file = file("${mash_directory}/Mash_Sketches.txt")
|
rliterman@0
|
83 mash_triangle_file = file("${mash_directory}/Mash_Triangle")
|
rliterman@0
|
84
|
rliterman@0
|
85 """
|
rliterman@0
|
86 $params.load_mash_module
|
rliterman@0
|
87 ls ${mash_directory}/*.msh > $sketch_file
|
rliterman@0
|
88 mash triangle -p ${params.cores} -l $sketch_file > $mash_triangle_file
|
rliterman@0
|
89 echo -n $mash_triangle_file
|
rliterman@0
|
90 """
|
rliterman@0
|
91 }
|
rliterman@0
|
92
|
rliterman@0
|
93 process mashSketch{
|
rliterman@0
|
94 cpus = 1
|
rliterman@0
|
95
|
rliterman@0
|
96 input:
|
rliterman@0
|
97 tuple val(query_name),val(query_fasta)
|
rliterman@0
|
98
|
rliterman@0
|
99 output:
|
rliterman@0
|
100 stdout
|
rliterman@0
|
101
|
rliterman@0
|
102 script:
|
rliterman@0
|
103
|
rliterman@0
|
104 mash_path = "${mash_directory}/${query_name}.msh"
|
rliterman@0
|
105 """
|
rliterman@0
|
106 $params.load_mash_module
|
rliterman@0
|
107 mash sketch -s 10000 -p 1 -o $mash_path $query_fasta
|
rliterman@0
|
108 echo -n "${mash_path}"
|
rliterman@0
|
109 """
|
rliterman@0
|
110 }
|
rliterman@0
|
111
|
rliterman@0
|
112 process saveRefIDs{
|
rliterman@0
|
113 executor = 'local'
|
rliterman@0
|
114 cpus = 1
|
rliterman@0
|
115 maxForks = 1
|
rliterman@0
|
116
|
rliterman@0
|
117 input:
|
rliterman@0
|
118 val(ref_ids)
|
rliterman@0
|
119
|
rliterman@0
|
120 script:
|
rliterman@0
|
121 ref_id_file = file(params.ref_id_file)
|
rliterman@0
|
122 ref_id_file.append(ref_ids.join('\n') + '\n')
|
rliterman@0
|
123 """
|
rliterman@0
|
124 """
|
rliterman@0
|
125 }
|