rliterman@0: // Subworkflow to run RefChooser for list of queries rliterman@0: rliterman@0: // Set directory structure rliterman@0: output_directory = file(params.output_directory) rliterman@0: log_directory = file(params.log_directory) rliterman@0: mash_directory = file(params.mash_directory) rliterman@0: rliterman@0: workflow { rliterman@0: main: rliterman@0: // Run RefChooser rliterman@0: reference_data = runRefChooser(query_data: query_data) rliterman@0: publish: rliterman@0: // Publish reference data rliterman@0: reference_data >> 'reference.fa' rliterman@0: } rliterman@0: rliterman@0: workflow runRefChooser{ rliterman@0: take: rliterman@0: query_data rliterman@0: rliterman@0: emit: rliterman@0: reference_data rliterman@0: rliterman@0: main: rliterman@0: rliterman@0: // Make MASH sketches (1 CPU per query) and generate triangle (all CPUs) rliterman@0: mash_refs = query_data rliterman@0: .unique{it -> it[1]} rliterman@0: .map { [ it[0], it[1] ] } rliterman@0: | mashSketch rliterman@0: | collect rliterman@0: | mashTriangle rliterman@0: | chooseRefs rliterman@0: | splitCsv | collect | flatten | collate(1) rliterman@0: rliterman@0: reference_data = query_data rliterman@0: .map{it -> tuple(it[1].toString(),it[0])} rliterman@0: .join(mash_refs, by:0) rliterman@0: .map{tuple(it[1],it[0])} rliterman@0: .unique{it -> it[0]}.collect().flatten().collate(2) rliterman@0: rliterman@0: // Save reference data to file rliterman@0: reference_data rliterman@0: .collect{it -> it[0]} rliterman@0: | saveRefIDs rliterman@0: } rliterman@0: rliterman@0: process chooseRefs{ rliterman@0: rliterman@0: executor = 'local' rliterman@0: cpus = 1 rliterman@0: maxForks = 1 rliterman@0: rliterman@0: input: rliterman@0: val(mash_triangle) rliterman@0: rliterman@0: output: rliterman@0: stdout rliterman@0: rliterman@0: script: rliterman@0: rliterman@0: ref_count = params.n_ref.toInteger() rliterman@0: ref_script = file("${projectDir}/bin/chooseRefs.py") rliterman@0: """ rliterman@0: $params.load_python_module rliterman@0: cd $mash_directory rliterman@0: rliterman@0: python $ref_script --ref_count $ref_count --mash_triangle_file $mash_triangle --trim_name "${params.trim_name}" rliterman@0: """ rliterman@0: } rliterman@0: rliterman@0: process mashTriangle{ rliterman@0: rliterman@0: input: rliterman@0: val(mash_sketches) rliterman@0: rliterman@0: output: rliterman@0: stdout rliterman@0: rliterman@0: script: rliterman@0: rliterman@0: sketch_file = file("${mash_directory}/Mash_Sketches.txt") rliterman@0: mash_triangle_file = file("${mash_directory}/Mash_Triangle") rliterman@0: rliterman@0: """ rliterman@0: $params.load_mash_module rliterman@0: ls ${mash_directory}/*.msh > $sketch_file rliterman@0: mash triangle -p ${params.cores} -l $sketch_file > $mash_triangle_file rliterman@0: echo -n $mash_triangle_file rliterman@0: """ rliterman@0: } rliterman@0: rliterman@0: process mashSketch{ rliterman@0: cpus = 1 rliterman@0: rliterman@0: input: rliterman@0: tuple val(query_name),val(query_fasta) rliterman@0: rliterman@0: output: rliterman@0: stdout rliterman@0: rliterman@0: script: rliterman@0: rliterman@0: mash_path = "${mash_directory}/${query_name}.msh" rliterman@0: """ rliterman@0: $params.load_mash_module rliterman@0: mash sketch -s 10000 -p 1 -o $mash_path $query_fasta rliterman@0: echo -n "${mash_path}" rliterman@0: """ rliterman@0: } rliterman@0: rliterman@0: process saveRefIDs{ rliterman@0: executor = 'local' rliterman@0: cpus = 1 rliterman@0: maxForks = 1 rliterman@0: rliterman@0: input: rliterman@0: val(ref_ids) rliterman@0: rliterman@0: script: rliterman@0: ref_id_file = file(params.ref_id_file) rliterman@0: ref_id_file.append(ref_ids.join('\n') + '\n') rliterman@0: """ rliterman@0: """ rliterman@0: }