Mercurial > repos > rliterman > csp2
comparison CSP2/subworkflows/refchooser/main.nf @ 0:01431fa12065
"planemo upload"
author | rliterman |
---|---|
date | Mon, 02 Dec 2024 10:40:55 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:01431fa12065 |
---|---|
1 // Subworkflow to run RefChooser for list of queries | |
2 | |
3 // Set directory structure | |
4 output_directory = file(params.output_directory) | |
5 log_directory = file(params.log_directory) | |
6 mash_directory = file(params.mash_directory) | |
7 | |
8 workflow { | |
9 main: | |
10 // Run RefChooser | |
11 reference_data = runRefChooser(query_data: query_data) | |
12 publish: | |
13 // Publish reference data | |
14 reference_data >> 'reference.fa' | |
15 } | |
16 | |
17 workflow runRefChooser{ | |
18 take: | |
19 query_data | |
20 | |
21 emit: | |
22 reference_data | |
23 | |
24 main: | |
25 | |
26 // Make MASH sketches (1 CPU per query) and generate triangle (all CPUs) | |
27 mash_refs = query_data | |
28 .unique{it -> it[1]} | |
29 .map { [ it[0], it[1] ] } | |
30 | mashSketch | |
31 | collect | |
32 | mashTriangle | |
33 | chooseRefs | |
34 | splitCsv | collect | flatten | collate(1) | |
35 | |
36 reference_data = query_data | |
37 .map{it -> tuple(it[1].toString(),it[0])} | |
38 .join(mash_refs, by:0) | |
39 .map{tuple(it[1],it[0])} | |
40 .unique{it -> it[0]}.collect().flatten().collate(2) | |
41 | |
42 // Save reference data to file | |
43 reference_data | |
44 .collect{it -> it[0]} | |
45 | saveRefIDs | |
46 } | |
47 | |
48 process chooseRefs{ | |
49 | |
50 executor = 'local' | |
51 cpus = 1 | |
52 maxForks = 1 | |
53 | |
54 input: | |
55 val(mash_triangle) | |
56 | |
57 output: | |
58 stdout | |
59 | |
60 script: | |
61 | |
62 ref_count = params.n_ref.toInteger() | |
63 ref_script = file("${projectDir}/bin/chooseRefs.py") | |
64 """ | |
65 $params.load_python_module | |
66 cd $mash_directory | |
67 | |
68 python $ref_script --ref_count $ref_count --mash_triangle_file $mash_triangle --trim_name "${params.trim_name}" | |
69 """ | |
70 } | |
71 | |
72 process mashTriangle{ | |
73 | |
74 input: | |
75 val(mash_sketches) | |
76 | |
77 output: | |
78 stdout | |
79 | |
80 script: | |
81 | |
82 sketch_file = file("${mash_directory}/Mash_Sketches.txt") | |
83 mash_triangle_file = file("${mash_directory}/Mash_Triangle") | |
84 | |
85 """ | |
86 $params.load_mash_module | |
87 ls ${mash_directory}/*.msh > $sketch_file | |
88 mash triangle -p ${params.cores} -l $sketch_file > $mash_triangle_file | |
89 echo -n $mash_triangle_file | |
90 """ | |
91 } | |
92 | |
93 process mashSketch{ | |
94 cpus = 1 | |
95 | |
96 input: | |
97 tuple val(query_name),val(query_fasta) | |
98 | |
99 output: | |
100 stdout | |
101 | |
102 script: | |
103 | |
104 mash_path = "${mash_directory}/${query_name}.msh" | |
105 """ | |
106 $params.load_mash_module | |
107 mash sketch -s 10000 -p 1 -o $mash_path $query_fasta | |
108 echo -n "${mash_path}" | |
109 """ | |
110 } | |
111 | |
112 process saveRefIDs{ | |
113 executor = 'local' | |
114 cpus = 1 | |
115 maxForks = 1 | |
116 | |
117 input: | |
118 val(ref_ids) | |
119 | |
120 script: | |
121 ref_id_file = file(params.ref_id_file) | |
122 ref_id_file.append(ref_ids.join('\n') + '\n') | |
123 """ | |
124 """ | |
125 } |