changeset 27:792274118b2e

"planemo upload"
author rliterman
date Wed, 04 Dec 2024 12:50:59 -0500
parents 6f85641ecd48
children 893a6993efe3
files CSP2/bin/chooseRefs.py CSP2/bin/fetchReads.py CSP2/bin/runSNPPipeline.py CSP2/bin/saveSNPDiffs.py CSP2/bin/screenSNPDiffs.py CSP2/bin/userSNPDiffs.py CSP2/conf/profiles.config CSP2/nextflow.config CSP2/subworkflows/fetchData/main.nf csp_screen.xml
diffstat 10 files changed, 46 insertions(+), 44 deletions(-) [+]
line wrap: on
line diff
--- a/CSP2/bin/chooseRefs.py	Tue Dec 03 16:35:35 2024 -0500
+++ b/CSP2/bin/chooseRefs.py	Wed Dec 04 12:50:59 2024 -0500
@@ -83,9 +83,9 @@
     return [file_path,contig_count,assembly_bases,n50,n90,l50,l90]
 
 parser = argparse.ArgumentParser(description='Choose reference isolates based on FASTA metrics and mean distances.')
-parser.add_argument('--ref_count', type=int, help='Number of reference isolates to select')
+parser.add_argument('--ref_count', type=int, default=1, help='Number of reference isolates to select')
 parser.add_argument('--mash_triangle_file', type=str, help='Path to the mash triangle file')
-parser.add_argument('--trim_name', type=str, help='Trim name')
+parser.add_argument('--trim_name', type=str, default="", help='trim name')
 args = parser.parse_args()
 
 ref_count = args.ref_count
--- a/CSP2/bin/fetchReads.py	Tue Dec 03 16:35:35 2024 -0500
+++ b/CSP2/bin/fetchReads.py	Wed Dec 04 12:50:59 2024 -0500
@@ -8,10 +8,10 @@
 # Parse args
 parser = argparse.ArgumentParser(description='Fetch Reads')
 parser.add_argument('--read_dir', type=str, help='path to directory containing read files')
-parser.add_argument('--read_filetype', type=str, help='read filetype information')
-parser.add_argument('--forward_suffix', type=str, help='forward suffix')
-parser.add_argument('--reverse_suffix', type=str, help='reverse suffix')
-parser.add_argument('--trim_name', type=str, help='trim name')
+parser.add_argument('--read_filetype',default='fastq.gz', type=str, help='read filetype information')
+parser.add_argument('--forward_suffix',default='_1.fastq.gz', type=str, help='forward suffix')
+parser.add_argument('--reverse_suffix',default = '_2.fastq.gz', type=str, help='reverse suffix')
+parser.add_argument('--trim_name', type=str, default="", help='trim name')
 args = parser.parse_args()
 
 # Get path to directory containing read files
--- a/CSP2/bin/runSNPPipeline.py	Tue Dec 03 16:35:35 2024 -0500
+++ b/CSP2/bin/runSNPPipeline.py	Wed Dec 04 12:50:59 2024 -0500
@@ -636,17 +636,17 @@
 parser.add_argument('--output_directory', type=str, help='Output Directory')
 parser.add_argument('--log_directory', type=str, help='Log Directory')
 parser.add_argument('--snpdiffs_file', type=str, help='Path to SNPdiffs file')
-parser.add_argument('--min_cov', type=float, help='Minimum coverage')
-parser.add_argument('--min_len', type=int, help='Minimum length')
-parser.add_argument('--min_iden', type=float, help='Minimum identity')
-parser.add_argument('--ref_edge', type=int, help='Reference edge')
-parser.add_argument('--query_edge', type=int, help='Query edge')
-parser.add_argument('--density_windows', type=str, help='Density windows')
-parser.add_argument('--max_snps', type=str, help='Maximum SNPs')
-parser.add_argument('--trim_name', type=str, help='Trim name')
-parser.add_argument('--max_missing', type=float, help='Maximum missing')
-parser.add_argument('--tmp_dir', type=str, help='Temporary directory')
-parser.add_argument('--rescue', type=str, help='Rescue edge SNPs (rescue/norescue)')
+parser.add_argument('--min_cov', default=85,type=float, help='Minimum coverage')
+parser.add_argument('--min_len', default=500,type=int, help='Minimum length')
+parser.add_argument('--min_iden', default=99,type=float, help='Minimum identity')
+parser.add_argument('--ref_edge', default=150,type=int, help='Reference edge')
+parser.add_argument('--query_edge', default=150,type=int, help='Query edge')
+parser.add_argument('--density_windows', default="1000,125,15",type=str, help='Density windows')
+parser.add_argument('--max_snps', default="3,2,1", type=str, help='Maximum SNPs')
+parser.add_argument('--trim_name', type=str, default="", help='trim name')
+parser.add_argument('--max_missing',default=50, type=float, help='Maximum missing')
+parser.add_argument('--tmp_dir',default="", type=str, help='Temporary directory')
+parser.add_argument('--rescue', default="norescue",type=str, help='Rescue edge SNPs (rescue/norescue)')
 args = parser.parse_args()
 
 reference_id = args.reference_id
--- a/CSP2/bin/saveSNPDiffs.py	Tue Dec 03 16:35:35 2024 -0500
+++ b/CSP2/bin/saveSNPDiffs.py	Wed Dec 04 12:50:59 2024 -0500
@@ -30,7 +30,7 @@
 parser.add_argument("--snpdiffs_file", help="Path to the SNP diffs list file")
 parser.add_argument("--summary_file", help="Path to the summary file")
 parser.add_argument("--isolate_file", help="Path to the isolate data file")
-parser.add_argument("--trim_name", help="Trim name")
+parser.add_argument('--trim_name', type=str, default="", help='trim name')
 parser.add_argument("--ref_id_file", help="Path to the reference IDs file")
 args = parser.parse_args()
 
--- a/CSP2/bin/screenSNPDiffs.py	Tue Dec 03 16:35:35 2024 -0500
+++ b/CSP2/bin/screenSNPDiffs.py	Wed Dec 04 12:50:59 2024 -0500
@@ -548,17 +548,17 @@
 parser = argparse.ArgumentParser()
 parser.add_argument("--snpdiffs_file", help="Path to the file containing SNP diffs")
 parser.add_argument("--log_dir", help="Path to the log directory")
-parser.add_argument("--min_cov", type=float, help="Minimum coverage")
-parser.add_argument("--min_len", type=int, help="Minimum length")
-parser.add_argument("--min_iden", type=float, help="Minimum identity")
-parser.add_argument("--ref_edge", type=int, help="Reference edge")
-parser.add_argument("--query_edge", type=int, help="Query edge")
-parser.add_argument("--density_windows", help="Density windows (comma-separated)")
-parser.add_argument("--max_snps", help="Maximum SNPs (comma-separated)")
-parser.add_argument("--trim_name", help="Trim name")
+parser.add_argument("--min_cov", default=85, type=float, help="Minimum coverage")
+parser.add_argument("--min_len", default=500,type=int, help="Minimum length")
+parser.add_argument("--min_iden", default=99,type=float, help="Minimum identity")
+parser.add_argument("--ref_edge", default=150,type=int, help="Reference edge")
+parser.add_argument("--query_edge", default=150,type=int, help="Query edge")
+parser.add_argument("--density_windows",default="1000,125,15", help="Density windows (comma-separated)")
+parser.add_argument("--max_snps", default="3,2,1",help="Maximum SNPs (comma-separated)")
+parser.add_argument('--trim_name', type=str, default="", help='trim name')
 parser.add_argument("--output_file", help="Output file")
 parser.add_argument("--ref_id", help="Reference IDs file")
-parser.add_argument("--tmp_dir", help="TMP dir")
+parser.add_argument("--tmp_dir",default="", help="TMP dir")
 
 args = parser.parse_args()
 
--- a/CSP2/bin/userSNPDiffs.py	Tue Dec 03 16:35:35 2024 -0500
+++ b/CSP2/bin/userSNPDiffs.py	Wed Dec 04 12:50:59 2024 -0500
@@ -31,7 +31,7 @@
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--snpdiffs_file", help="Path to the SNP diffs list file")
-parser.add_argument("--trim_name", help="Trim name")
+parser.add_argument('--trim_name', type=str, default="", help='trim name')
 args = parser.parse_args()
 
 snpdiffs_list_file = args.snpdiffs_file
--- a/CSP2/conf/profiles.config	Tue Dec 03 16:35:35 2024 -0500
+++ b/CSP2/conf/profiles.config	Wed Dec 04 12:50:59 2024 -0500
@@ -90,7 +90,7 @@
         executor.$slurm.exitReadTimeout = 120000
 
         process.executor = 'slurm'
-        params.cores = 2
+        params.cores = 8
         params.python_module = ""
         params.mummer_module = ""
         params.skesa_module = ""
--- a/CSP2/nextflow.config	Tue Dec 03 16:35:35 2024 -0500
+++ b/CSP2/nextflow.config	Wed Dec 04 12:50:59 2024 -0500
@@ -16,11 +16,11 @@
 process {
     withLabel: 'mummerMem' {
         cpus = 1
-        // memory = '4 GB'
+       // memory = '4 GB'
     }
-    // withLabel: 'skesaMem' {
-    //     memory = '12 GB'
-    // }
+    //withLabel: 'skesaMem' {
+    //    memory = '12 GB'
+    //}
 }
 
 // Global default params
--- a/CSP2/subworkflows/fetchData/main.nf	Tue Dec 03 16:35:35 2024 -0500
+++ b/CSP2/subworkflows/fetchData/main.nf	Wed Dec 04 12:50:59 2024 -0500
@@ -21,8 +21,6 @@
 
 // Set SKESA cores to 4 or fewer
 skesa_cpus = (params.cores as Integer) >= 4 ? 4 : params.cores as Integer
-println "params.cores: ${params.cores}"
-println "skesa_cpus: ${skesa_cpus}"
 
 workflow {
     main:
@@ -274,12 +272,16 @@
         } else{
             error "$fasta_dir is not a valid directory or file..."
         }
+
         fasta_data = ch_fasta
-        .filter { file(it).exists() }
+        .map { filePath ->
+            if (!file(filePath).exists()) { error "$filePath is not a valid directory or file..." }
+            return filePath }
         .map { filePath ->
             def fileName = file(filePath).getBaseName()
             def sampleName = fileName.replaceAll(trim_this, "")
-            tuple(sampleName, filePath)}
+            tuple(sampleName, filePath)
+        }
     }
 }
 workflow processSNPDiffs{
@@ -315,8 +317,10 @@
         }
 
         snpdiffs_data = ch_snpdiffs
-            .filter { file(it).exists() }
-            .collect() | getSNPDiffsData | splitCsv | collect | flatten | collate(19)
+        .map { filePath ->
+            if (!file(filePath).exists()) { error "$filePath is not a valid directory or file..." }
+            return filePath }
+        .collect() | getSNPDiffsData | splitCsv | collect | flatten | collate(19)
 
         // (1) SNPDiffs_File, (2) Query_ID, (3) Query_Assembly, (4) Query_Contig_Count, (5) Query_Assembly_Bases, 
         // (6) Query_N50, (7) Query_N90, (8) Query_L50, (9) Query_L90, (10) Query_SHA256,
@@ -495,10 +499,8 @@
     assembled_data = assembly_output.map{it->tuple(it[0],it[3])}
 }
 process skesaAssemble{
-    // label 'skesaMem'
-
-    cpus = skesa_cpus
-    
+    //label 'skesaMem'
+  
     input:
     tuple val(sample_name),val(read_type),val(read_location)
 
--- a/csp_screen.xml	Tue Dec 03 16:35:35 2024 -0500
+++ b/csp_screen.xml	Wed Dec 04 12:50:59 2024 -0500
@@ -58,7 +58,7 @@
 fi;
 
 		nextflow run ${__tool_directory__}/CSP2/CSP2.nf -profile csp2_galaxy --cores $cores --runmode screen \$QUERY_FASTA_ARG \$REF_FASTA_ARG \$QUERY_READS_ARG \$REF_READS_ARG \$REF_ID_ARG \$TRIM_ARG --readext $readext --forward $forward --reverse $reverse --ref_readext $readext --ref_forward $forward --ref_reverse $reverse --min_cov $min_cov --min_iden $min_iden --min_len $min_len --ref_edge $ref_edge --query_edge $query_edge --dwin $dwin --wsnps $wsnps --cores 8 --out \$CSP2_DIR/CSP2_Screen_Output > Nextflow_Log.txt 2>&1;
-
+		sleep 5;
 		zip -r work.zip work;
 		zip -r csp2.zip CSP2_Screen_Output;
 	]]>