annotate CSP2/subworkflows/refchooser/main.nf @ 51:2a36d0ebb408

"planemo upload"
author rliterman
date Fri, 13 Dec 2024 08:49:33 -0500
parents 01431fa12065
children
rev   line source
rliterman@0 1 // Subworkflow to run RefChooser for list of queries
rliterman@0 2
rliterman@0 3 // Set directory structure
rliterman@0 4 output_directory = file(params.output_directory)
rliterman@0 5 log_directory = file(params.log_directory)
rliterman@0 6 mash_directory = file(params.mash_directory)
rliterman@0 7
rliterman@0 8 workflow {
rliterman@0 9 main:
rliterman@0 10 // Run RefChooser
rliterman@0 11 reference_data = runRefChooser(query_data: query_data)
rliterman@0 12 publish:
rliterman@0 13 // Publish reference data
rliterman@0 14 reference_data >> 'reference.fa'
rliterman@0 15 }
rliterman@0 16
rliterman@0 17 workflow runRefChooser{
rliterman@0 18 take:
rliterman@0 19 query_data
rliterman@0 20
rliterman@0 21 emit:
rliterman@0 22 reference_data
rliterman@0 23
rliterman@0 24 main:
rliterman@0 25
rliterman@0 26 // Make MASH sketches (1 CPU per query) and generate triangle (all CPUs)
rliterman@0 27 mash_refs = query_data
rliterman@0 28 .unique{it -> it[1]}
rliterman@0 29 .map { [ it[0], it[1] ] }
rliterman@0 30 | mashSketch
rliterman@0 31 | collect
rliterman@0 32 | mashTriangle
rliterman@0 33 | chooseRefs
rliterman@0 34 | splitCsv | collect | flatten | collate(1)
rliterman@0 35
rliterman@0 36 reference_data = query_data
rliterman@0 37 .map{it -> tuple(it[1].toString(),it[0])}
rliterman@0 38 .join(mash_refs, by:0)
rliterman@0 39 .map{tuple(it[1],it[0])}
rliterman@0 40 .unique{it -> it[0]}.collect().flatten().collate(2)
rliterman@0 41
rliterman@0 42 // Save reference data to file
rliterman@0 43 reference_data
rliterman@0 44 .collect{it -> it[0]}
rliterman@0 45 | saveRefIDs
rliterman@0 46 }
rliterman@0 47
rliterman@0 48 process chooseRefs{
rliterman@0 49
rliterman@0 50 executor = 'local'
rliterman@0 51 cpus = 1
rliterman@0 52 maxForks = 1
rliterman@0 53
rliterman@0 54 input:
rliterman@0 55 val(mash_triangle)
rliterman@0 56
rliterman@0 57 output:
rliterman@0 58 stdout
rliterman@0 59
rliterman@0 60 script:
rliterman@0 61
rliterman@0 62 ref_count = params.n_ref.toInteger()
rliterman@0 63 ref_script = file("${projectDir}/bin/chooseRefs.py")
rliterman@0 64 """
rliterman@0 65 $params.load_python_module
rliterman@0 66 cd $mash_directory
rliterman@0 67
rliterman@0 68 python $ref_script --ref_count $ref_count --mash_triangle_file $mash_triangle --trim_name "${params.trim_name}"
rliterman@0 69 """
rliterman@0 70 }
rliterman@0 71
rliterman@0 72 process mashTriangle{
rliterman@0 73
rliterman@0 74 input:
rliterman@0 75 val(mash_sketches)
rliterman@0 76
rliterman@0 77 output:
rliterman@0 78 stdout
rliterman@0 79
rliterman@0 80 script:
rliterman@0 81
rliterman@0 82 sketch_file = file("${mash_directory}/Mash_Sketches.txt")
rliterman@0 83 mash_triangle_file = file("${mash_directory}/Mash_Triangle")
rliterman@0 84
rliterman@0 85 """
rliterman@0 86 $params.load_mash_module
rliterman@0 87 ls ${mash_directory}/*.msh > $sketch_file
rliterman@0 88 mash triangle -p ${params.cores} -l $sketch_file > $mash_triangle_file
rliterman@0 89 echo -n $mash_triangle_file
rliterman@0 90 """
rliterman@0 91 }
rliterman@0 92
rliterman@0 93 process mashSketch{
rliterman@0 94 cpus = 1
rliterman@0 95
rliterman@0 96 input:
rliterman@0 97 tuple val(query_name),val(query_fasta)
rliterman@0 98
rliterman@0 99 output:
rliterman@0 100 stdout
rliterman@0 101
rliterman@0 102 script:
rliterman@0 103
rliterman@0 104 mash_path = "${mash_directory}/${query_name}.msh"
rliterman@0 105 """
rliterman@0 106 $params.load_mash_module
rliterman@0 107 mash sketch -s 10000 -p 1 -o $mash_path $query_fasta
rliterman@0 108 echo -n "${mash_path}"
rliterman@0 109 """
rliterman@0 110 }
rliterman@0 111
rliterman@0 112 process saveRefIDs{
rliterman@0 113 executor = 'local'
rliterman@0 114 cpus = 1
rliterman@0 115 maxForks = 1
rliterman@0 116
rliterman@0 117 input:
rliterman@0 118 val(ref_ids)
rliterman@0 119
rliterman@0 120 script:
rliterman@0 121 ref_id_file = file(params.ref_id_file)
rliterman@0 122 ref_id_file.append(ref_ids.join('\n') + '\n')
rliterman@0 123 """
rliterman@0 124 """
rliterman@0 125 }