comparison CSP2/subworkflows/refchooser/main.nf @ 0:01431fa12065

"planemo upload"
author rliterman
date Mon, 02 Dec 2024 10:40:55 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:01431fa12065
1 // Subworkflow to run RefChooser for list of queries
2
3 // Set directory structure
4 output_directory = file(params.output_directory)
5 log_directory = file(params.log_directory)
6 mash_directory = file(params.mash_directory)
7
8 workflow {
9 main:
10 // Run RefChooser
11 reference_data = runRefChooser(query_data: query_data)
12 publish:
13 // Publish reference data
14 reference_data >> 'reference.fa'
15 }
16
17 workflow runRefChooser{
18 take:
19 query_data
20
21 emit:
22 reference_data
23
24 main:
25
26 // Make MASH sketches (1 CPU per query) and generate triangle (all CPUs)
27 mash_refs = query_data
28 .unique{it -> it[1]}
29 .map { [ it[0], it[1] ] }
30 | mashSketch
31 | collect
32 | mashTriangle
33 | chooseRefs
34 | splitCsv | collect | flatten | collate(1)
35
36 reference_data = query_data
37 .map{it -> tuple(it[1].toString(),it[0])}
38 .join(mash_refs, by:0)
39 .map{tuple(it[1],it[0])}
40 .unique{it -> it[0]}.collect().flatten().collate(2)
41
42 // Save reference data to file
43 reference_data
44 .collect{it -> it[0]}
45 | saveRefIDs
46 }
47
48 process chooseRefs{
49
50 executor = 'local'
51 cpus = 1
52 maxForks = 1
53
54 input:
55 val(mash_triangle)
56
57 output:
58 stdout
59
60 script:
61
62 ref_count = params.n_ref.toInteger()
63 ref_script = file("${projectDir}/bin/chooseRefs.py")
64 """
65 $params.load_python_module
66 cd $mash_directory
67
68 python $ref_script --ref_count $ref_count --mash_triangle_file $mash_triangle --trim_name "${params.trim_name}"
69 """
70 }
71
72 process mashTriangle{
73
74 input:
75 val(mash_sketches)
76
77 output:
78 stdout
79
80 script:
81
82 sketch_file = file("${mash_directory}/Mash_Sketches.txt")
83 mash_triangle_file = file("${mash_directory}/Mash_Triangle")
84
85 """
86 $params.load_mash_module
87 ls ${mash_directory}/*.msh > $sketch_file
88 mash triangle -p ${params.cores} -l $sketch_file > $mash_triangle_file
89 echo -n $mash_triangle_file
90 """
91 }
92
93 process mashSketch{
94 cpus = 1
95
96 input:
97 tuple val(query_name),val(query_fasta)
98
99 output:
100 stdout
101
102 script:
103
104 mash_path = "${mash_directory}/${query_name}.msh"
105 """
106 $params.load_mash_module
107 mash sketch -s 10000 -p 1 -o $mash_path $query_fasta
108 echo -n "${mash_path}"
109 """
110 }
111
112 process saveRefIDs{
113 executor = 'local'
114 cpus = 1
115 maxForks = 1
116
117 input:
118 val(ref_ids)
119
120 script:
121 ref_id_file = file(params.ref_id_file)
122 ref_id_file.append(ref_ids.join('\n') + '\n')
123 """
124 """
125 }