annotate CSP2/subworkflows/snpdiffs/main.nf @ 0:01431fa12065

"planemo upload"
author rliterman
date Mon, 02 Dec 2024 10:40:55 -0500
parents
children
rev   line source
rliterman@0 1 // Screening and SNP Pipeline processing
rliterman@0 2 output_directory = file(params.output_directory)
rliterman@0 3 log_directory = file(params.log_directory)
rliterman@0 4 screen_log_dir = file(params.screen_log_dir)
rliterman@0 5 snp_log_dir = file(params.snp_log_dir)
rliterman@0 6 snp_directory = file(params.snp_directory)
rliterman@0 7
rliterman@0 8 if(params.tmp_dir == ""){
rliterman@0 9 temp_dir = ""
rliterman@0 10 } else{
rliterman@0 11 temp_dir = file(params.temp_dir)
rliterman@0 12 }
rliterman@0 13 ref_id_file = file(params.ref_id_file)
rliterman@0 14
rliterman@0 15 ref_mode = params.ref_mode
rliterman@0 16
rliterman@0 17 // Assess whether to rescue edge-filtered SNPs
rliterman@0 18 edge_rescue = "${params.rescue}" == "norescue" ? "norescue" : "rescue"
rliterman@0 19
rliterman@0 20 // Set paths for output files
rliterman@0 21 all_snpdiffs_list = file("${log_directory}/All_SNPDiffs.txt")
rliterman@0 22 snp_dirs_list = file("${log_directory}/SNP_Dirs.txt")
rliterman@0 23 screening_results_file = file("${output_directory}/Screening_Results.tsv")
rliterman@0 24 isolate_data_file = file("${output_directory}/Isolate_Data.tsv")
rliterman@0 25 snpdiffs_summary_file = file("${output_directory}/Raw_MUMmer_Summary.tsv")
rliterman@0 26
rliterman@0 27 // Get QC thresholds
rliterman@0 28 min_cov = params.min_cov.toFloat()
rliterman@0 29 min_length = params.min_len.toInteger()
rliterman@0 30 min_iden = params.min_iden.toFloat()
rliterman@0 31 reference_edge = params.ref_edge.toInteger()
rliterman@0 32 query_edge = params.query_edge.toInteger()
rliterman@0 33 max_missing = params.max_missing.toFloat()
rliterman@0 34 n_ref = params.n_ref.toInteger()
rliterman@0 35
rliterman@0 36 workflow {
rliterman@0 37 main:
rliterman@0 38 // Run SNP pipeline
rliterman@0 39 runSNPPipeline(query_data: all_snpdiffs, reference_data: ref_id_file)
rliterman@0 40 }
rliterman@0 41
rliterman@0 42 workflow runScreen {
rliterman@0 43
rliterman@0 44 take:
rliterman@0 45 all_snpdiffs
rliterman@0 46
rliterman@0 47 main:
rliterman@0 48
rliterman@0 49 all_snpdiffs
rliterman@0 50 .unique{it -> it[2]}
rliterman@0 51 .collect()
rliterman@0 52 | screenSNPDiffs
rliterman@0 53 }
rliterman@0 54
rliterman@0 55 process screenSNPDiffs{
rliterman@0 56
rliterman@0 57 input:
rliterman@0 58 val(all_snpdiffs)
rliterman@0 59
rliterman@0 60 script:
rliterman@0 61
rliterman@0 62 screenDiffs = file("${projectDir}/bin/screenSNPDiffs.py")
rliterman@0 63 """
rliterman@0 64 $params.load_python_module
rliterman@0 65 $params.load_bedtools_module
rliterman@0 66 python $screenDiffs --snpdiffs_file "${all_snpdiffs_list}" --log_dir "${screen_log_dir}" --min_cov "${min_cov}" --min_len "${min_length}" --min_iden "${min_iden}" --ref_edge "${reference_edge}" --query_edge "${query_edge}" --density_windows "${params.dwin}" --max_snps "${params.wsnps}" --trim_name "${params.trim_name}" --output_file "${screening_results_file}" --ref_id "${ref_id_file}" --tmp_dir "${temp_dir}"
rliterman@0 67 """
rliterman@0 68 }
rliterman@0 69
rliterman@0 70 workflow runSNPPipeline{
rliterman@0 71 take:
rliterman@0 72 all_snpdiffs
rliterman@0 73 reference_data
rliterman@0 74
rliterman@0 75 main:
rliterman@0 76
rliterman@0 77 query_snpdiffs = all_snpdiffs.map{tuple(it[0],it[2])}
rliterman@0 78 ref_snpdiffs = all_snpdiffs.map{tuple(it[1],it[2])}
rliterman@0 79
rliterman@0 80 stacked_snpdiffs = query_snpdiffs.concat(ref_snpdiffs)
rliterman@0 81 .collect().flatten().collate(2)
rliterman@0 82
rliterman@0 83 snp_dirs = stacked_snpdiffs
rliterman@0 84 .combine(reference_data)
rliterman@0 85 .filter{it -> it[0].toString() == it[2].toString()}
rliterman@0 86 .map{it -> tuple(it[0],it[1])}
rliterman@0 87 .groupTuple(by:0)
rliterman@0 88 .map { ref, diff_files -> tuple( ref.toString(), diff_files.collect() ) }
rliterman@0 89 | runSnpPipeline
rliterman@0 90
rliterman@0 91 //snp_dirs.collect() | compileResults
rliterman@0 92 }
rliterman@0 93
rliterman@0 94 process compileResults{
rliterman@0 95
rliterman@0 96 executor = 'local'
rliterman@0 97 cpus = 1
rliterman@0 98 maxForks = 1
rliterman@0 99
rliterman@0 100 input:
rliterman@0 101 val(snp_directories)
rliterman@0 102
rliterman@0 103 script:
rliterman@0 104
rliterman@0 105 compile_script = file("${projectDir}/bin/compileSNPResults.py")
rliterman@0 106 snp_dirs_list.write(snp_directories.join("\n")+ "\n")
rliterman@0 107 """
rliterman@0 108 $params.load_python_module
rliterman@0 109 python $compile_script --snp_dirs_file "${snp_dirs_list}" --output_directory "${snp_directory}" --isolate_data_file "${isolate_data_file}" --mummer_data_file "${snpdiffs_summary_file}"
rliterman@0 110 """
rliterman@0 111 }
rliterman@0 112
rliterman@0 113 process runSnpPipeline{
rliterman@0 114
rliterman@0 115 input:
rliterman@0 116 tuple val(reference_id),val(diff_files)
rliterman@0 117
rliterman@0 118 output:
rliterman@0 119 stdout
rliterman@0 120
rliterman@0 121 script:
rliterman@0 122
rliterman@0 123 snp_script = file("${projectDir}/bin/runSNPPipeline.py")
rliterman@0 124
rliterman@0 125 // Set + create output directory
rliterman@0 126 snp_dir = file("${snp_directory}/${reference_id}")
rliterman@0 127 snp_dir.mkdirs()
rliterman@0 128
rliterman@0 129 // Write SNPDiffs list
rliterman@0 130 out_snpdiffs = file("${snp_dir}/SNPDiffs.txt")
rliterman@0 131 out_snpdiffs.write(diff_files.join("\n")+ "\n")
rliterman@0 132 """
rliterman@0 133 $params.load_python_module
rliterman@0 134 $params.load_bedtools_module
rliterman@0 135 python $snp_script --reference_id "${reference_id}" --output_directory "${snp_dir}" --snpdiffs_file "${out_snpdiffs}" --log_directory "${snp_log_dir}" --min_cov "${min_cov}" --min_len "${min_length}" --min_iden "${min_iden}" --ref_edge "${reference_edge}" --query_edge "${query_edge}" --density_windows "${params.dwin}" --max_snps "${params.wsnps}" --trim_name "${params.trim_name}" --max_missing "${max_missing}" --tmp_dir "${temp_dir}" --rescue "${edge_rescue}"
rliterman@0 136 echo -n $snp_dir
rliterman@0 137 """
rliterman@0 138 }