diff CSP2/subworkflows/refchooser/main.nf @ 0:01431fa12065

"planemo upload"
author rliterman
date Mon, 02 Dec 2024 10:40:55 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/subworkflows/refchooser/main.nf	Mon Dec 02 10:40:55 2024 -0500
@@ -0,0 +1,125 @@
+// Subworkflow to run RefChooser for list of queries
+
+// Set directory structure
+output_directory = file(params.output_directory)
+log_directory = file(params.log_directory)
+mash_directory = file(params.mash_directory)
+
+workflow {
+    main:
+    // Run RefChooser
+    reference_data = runRefChooser(query_data: query_data)
+    publish:
+    // Publish reference data
+    reference_data >> 'reference.fa'
+}
+
+workflow runRefChooser{
+    take:
+    query_data
+
+    emit:
+    reference_data
+
+    main:
+
+    // Make MASH sketches (1 CPU per query) and generate triangle (all CPUs)
+    mash_refs = query_data
+    .unique{it -> it[1]}
+    .map { [ it[0], it[1] ] }
+    | mashSketch 
+    | collect
+    | mashTriangle
+    | chooseRefs
+    | splitCsv | collect | flatten | collate(1) 
+
+    reference_data = query_data
+    .map{it -> tuple(it[1].toString(),it[0])}
+    .join(mash_refs, by:0)
+    .map{tuple(it[1],it[0])}
+    .unique{it -> it[0]}.collect().flatten().collate(2)
+
+    // Save reference data to file
+    reference_data
+    .collect{it -> it[0]}
+    | saveRefIDs
+}
+
+process chooseRefs{
+    
+    executor = 'local'
+    cpus = 1
+    maxForks = 1
+
+    input:
+    val(mash_triangle)
+
+    output:
+    stdout
+
+    script:
+
+    ref_count = params.n_ref.toInteger()
+    ref_script = file("${projectDir}/bin/chooseRefs.py")
+    """
+    $params.load_python_module  
+    cd $mash_directory
+
+    python $ref_script --ref_count $ref_count --mash_triangle_file $mash_triangle --trim_name "${params.trim_name}"
+    """
+}
+
+process mashTriangle{
+
+    input:
+    val(mash_sketches)
+
+    output:
+    stdout
+
+    script:
+
+    sketch_file = file("${mash_directory}/Mash_Sketches.txt")
+    mash_triangle_file = file("${mash_directory}/Mash_Triangle")
+
+    """
+    $params.load_mash_module
+    ls ${mash_directory}/*.msh > $sketch_file
+    mash triangle -p ${params.cores} -l $sketch_file > $mash_triangle_file
+    echo -n $mash_triangle_file
+    """
+}
+
+process mashSketch{
+    cpus = 1
+
+    input:
+    tuple val(query_name),val(query_fasta)
+    
+    output:
+    stdout
+
+    script:
+
+    mash_path = "${mash_directory}/${query_name}.msh"
+    """
+    $params.load_mash_module
+    mash sketch -s 10000 -p 1 -o $mash_path $query_fasta
+    echo -n "${mash_path}"
+    """
+}
+
+process saveRefIDs{
+    executor = 'local'
+    cpus = 1
+    maxForks = 1
+    
+    input:
+    val(ref_ids)
+
+    script:
+    ref_id_file = file(params.ref_id_file)
+    ref_id_file.append(ref_ids.join('\n') + '\n')        
+    """
+    """
+}