# HG changeset patch # User rliterman # Date 1733334659 18000 # Node ID 792274118b2ed28655a6c0f900fb774153aaceab # Parent 6f85641ecd4807129ace746a7091603d22d8c59f "planemo upload" diff -r 6f85641ecd48 -r 792274118b2e CSP2/bin/chooseRefs.py --- a/CSP2/bin/chooseRefs.py Tue Dec 03 16:35:35 2024 -0500 +++ b/CSP2/bin/chooseRefs.py Wed Dec 04 12:50:59 2024 -0500 @@ -83,9 +83,9 @@ return [file_path,contig_count,assembly_bases,n50,n90,l50,l90] parser = argparse.ArgumentParser(description='Choose reference isolates based on FASTA metrics and mean distances.') -parser.add_argument('--ref_count', type=int, help='Number of reference isolates to select') +parser.add_argument('--ref_count', type=int, default=1, help='Number of reference isolates to select') parser.add_argument('--mash_triangle_file', type=str, help='Path to the mash triangle file') -parser.add_argument('--trim_name', type=str, help='Trim name') +parser.add_argument('--trim_name', type=str, default="", help='trim name') args = parser.parse_args() ref_count = args.ref_count diff -r 6f85641ecd48 -r 792274118b2e CSP2/bin/fetchReads.py --- a/CSP2/bin/fetchReads.py Tue Dec 03 16:35:35 2024 -0500 +++ b/CSP2/bin/fetchReads.py Wed Dec 04 12:50:59 2024 -0500 @@ -8,10 +8,10 @@ # Parse args parser = argparse.ArgumentParser(description='Fetch Reads') parser.add_argument('--read_dir', type=str, help='path to directory containing read files') -parser.add_argument('--read_filetype', type=str, help='read filetype information') -parser.add_argument('--forward_suffix', type=str, help='forward suffix') -parser.add_argument('--reverse_suffix', type=str, help='reverse suffix') -parser.add_argument('--trim_name', type=str, help='trim name') +parser.add_argument('--read_filetype',default='fastq.gz', type=str, help='read filetype information') +parser.add_argument('--forward_suffix',default='_1.fastq.gz', type=str, help='forward suffix') +parser.add_argument('--reverse_suffix',default = '_2.fastq.gz', type=str, help='reverse suffix') +parser.add_argument('--trim_name', type=str, default="", help='trim name') args = parser.parse_args() # Get path to directory containing read files diff -r 6f85641ecd48 -r 792274118b2e CSP2/bin/runSNPPipeline.py --- a/CSP2/bin/runSNPPipeline.py Tue Dec 03 16:35:35 2024 -0500 +++ b/CSP2/bin/runSNPPipeline.py Wed Dec 04 12:50:59 2024 -0500 @@ -636,17 +636,17 @@ parser.add_argument('--output_directory', type=str, help='Output Directory') parser.add_argument('--log_directory', type=str, help='Log Directory') parser.add_argument('--snpdiffs_file', type=str, help='Path to SNPdiffs file') -parser.add_argument('--min_cov', type=float, help='Minimum coverage') -parser.add_argument('--min_len', type=int, help='Minimum length') -parser.add_argument('--min_iden', type=float, help='Minimum identity') -parser.add_argument('--ref_edge', type=int, help='Reference edge') -parser.add_argument('--query_edge', type=int, help='Query edge') -parser.add_argument('--density_windows', type=str, help='Density windows') -parser.add_argument('--max_snps', type=str, help='Maximum SNPs') -parser.add_argument('--trim_name', type=str, help='Trim name') -parser.add_argument('--max_missing', type=float, help='Maximum missing') -parser.add_argument('--tmp_dir', type=str, help='Temporary directory') -parser.add_argument('--rescue', type=str, help='Rescue edge SNPs (rescue/norescue)') +parser.add_argument('--min_cov', default=85,type=float, help='Minimum coverage') +parser.add_argument('--min_len', default=500,type=int, help='Minimum length') +parser.add_argument('--min_iden', default=99,type=float, help='Minimum identity') +parser.add_argument('--ref_edge', default=150,type=int, help='Reference edge') +parser.add_argument('--query_edge', default=150,type=int, help='Query edge') +parser.add_argument('--density_windows', default="1000,125,15",type=str, help='Density windows') +parser.add_argument('--max_snps', default="3,2,1", type=str, help='Maximum SNPs') +parser.add_argument('--trim_name', type=str, default="", help='trim name') +parser.add_argument('--max_missing',default=50, type=float, help='Maximum missing') +parser.add_argument('--tmp_dir',default="", type=str, help='Temporary directory') +parser.add_argument('--rescue', default="norescue",type=str, help='Rescue edge SNPs (rescue/norescue)') args = parser.parse_args() reference_id = args.reference_id diff -r 6f85641ecd48 -r 792274118b2e CSP2/bin/saveSNPDiffs.py --- a/CSP2/bin/saveSNPDiffs.py Tue Dec 03 16:35:35 2024 -0500 +++ b/CSP2/bin/saveSNPDiffs.py Wed Dec 04 12:50:59 2024 -0500 @@ -30,7 +30,7 @@ parser.add_argument("--snpdiffs_file", help="Path to the SNP diffs list file") parser.add_argument("--summary_file", help="Path to the summary file") parser.add_argument("--isolate_file", help="Path to the isolate data file") -parser.add_argument("--trim_name", help="Trim name") +parser.add_argument('--trim_name', type=str, default="", help='trim name') parser.add_argument("--ref_id_file", help="Path to the reference IDs file") args = parser.parse_args() diff -r 6f85641ecd48 -r 792274118b2e CSP2/bin/screenSNPDiffs.py --- a/CSP2/bin/screenSNPDiffs.py Tue Dec 03 16:35:35 2024 -0500 +++ b/CSP2/bin/screenSNPDiffs.py Wed Dec 04 12:50:59 2024 -0500 @@ -548,17 +548,17 @@ parser = argparse.ArgumentParser() parser.add_argument("--snpdiffs_file", help="Path to the file containing SNP diffs") parser.add_argument("--log_dir", help="Path to the log directory") -parser.add_argument("--min_cov", type=float, help="Minimum coverage") -parser.add_argument("--min_len", type=int, help="Minimum length") -parser.add_argument("--min_iden", type=float, help="Minimum identity") -parser.add_argument("--ref_edge", type=int, help="Reference edge") -parser.add_argument("--query_edge", type=int, help="Query edge") -parser.add_argument("--density_windows", help="Density windows (comma-separated)") -parser.add_argument("--max_snps", help="Maximum SNPs (comma-separated)") -parser.add_argument("--trim_name", help="Trim name") +parser.add_argument("--min_cov", default=85, type=float, help="Minimum coverage") +parser.add_argument("--min_len", default=500,type=int, help="Minimum length") +parser.add_argument("--min_iden", default=99,type=float, help="Minimum identity") +parser.add_argument("--ref_edge", default=150,type=int, help="Reference edge") +parser.add_argument("--query_edge", default=150,type=int, help="Query edge") +parser.add_argument("--density_windows",default="1000,125,15", help="Density windows (comma-separated)") +parser.add_argument("--max_snps", default="3,2,1",help="Maximum SNPs (comma-separated)") +parser.add_argument('--trim_name', type=str, default="", help='trim name') parser.add_argument("--output_file", help="Output file") parser.add_argument("--ref_id", help="Reference IDs file") -parser.add_argument("--tmp_dir", help="TMP dir") +parser.add_argument("--tmp_dir",default="", help="TMP dir") args = parser.parse_args() diff -r 6f85641ecd48 -r 792274118b2e CSP2/bin/userSNPDiffs.py --- a/CSP2/bin/userSNPDiffs.py Tue Dec 03 16:35:35 2024 -0500 +++ b/CSP2/bin/userSNPDiffs.py Wed Dec 04 12:50:59 2024 -0500 @@ -31,7 +31,7 @@ parser = argparse.ArgumentParser() parser.add_argument("--snpdiffs_file", help="Path to the SNP diffs list file") -parser.add_argument("--trim_name", help="Trim name") +parser.add_argument('--trim_name', type=str, default="", help='trim name') args = parser.parse_args() snpdiffs_list_file = args.snpdiffs_file diff -r 6f85641ecd48 -r 792274118b2e CSP2/conf/profiles.config --- a/CSP2/conf/profiles.config Tue Dec 03 16:35:35 2024 -0500 +++ b/CSP2/conf/profiles.config Wed Dec 04 12:50:59 2024 -0500 @@ -90,7 +90,7 @@ executor.$slurm.exitReadTimeout = 120000 process.executor = 'slurm' - params.cores = 2 + params.cores = 8 params.python_module = "" params.mummer_module = "" params.skesa_module = "" diff -r 6f85641ecd48 -r 792274118b2e CSP2/nextflow.config --- a/CSP2/nextflow.config Tue Dec 03 16:35:35 2024 -0500 +++ b/CSP2/nextflow.config Wed Dec 04 12:50:59 2024 -0500 @@ -16,11 +16,11 @@ process { withLabel: 'mummerMem' { cpus = 1 - // memory = '4 GB' + // memory = '4 GB' } - // withLabel: 'skesaMem' { - // memory = '12 GB' - // } + //withLabel: 'skesaMem' { + // memory = '12 GB' + //} } // Global default params diff -r 6f85641ecd48 -r 792274118b2e CSP2/subworkflows/fetchData/main.nf --- a/CSP2/subworkflows/fetchData/main.nf Tue Dec 03 16:35:35 2024 -0500 +++ b/CSP2/subworkflows/fetchData/main.nf Wed Dec 04 12:50:59 2024 -0500 @@ -21,8 +21,6 @@ // Set SKESA cores to 4 or fewer skesa_cpus = (params.cores as Integer) >= 4 ? 4 : params.cores as Integer -println "params.cores: ${params.cores}" -println "skesa_cpus: ${skesa_cpus}" workflow { main: @@ -274,12 +272,16 @@ } else{ error "$fasta_dir is not a valid directory or file..." } + fasta_data = ch_fasta - .filter { file(it).exists() } + .map { filePath -> + if (!file(filePath).exists()) { error "$filePath is not a valid directory or file..." } + return filePath } .map { filePath -> def fileName = file(filePath).getBaseName() def sampleName = fileName.replaceAll(trim_this, "") - tuple(sampleName, filePath)} + tuple(sampleName, filePath) + } } } workflow processSNPDiffs{ @@ -315,8 +317,10 @@ } snpdiffs_data = ch_snpdiffs - .filter { file(it).exists() } - .collect() | getSNPDiffsData | splitCsv | collect | flatten | collate(19) + .map { filePath -> + if (!file(filePath).exists()) { error "$filePath is not a valid directory or file..." } + return filePath } + .collect() | getSNPDiffsData | splitCsv | collect | flatten | collate(19) // (1) SNPDiffs_File, (2) Query_ID, (3) Query_Assembly, (4) Query_Contig_Count, (5) Query_Assembly_Bases, // (6) Query_N50, (7) Query_N90, (8) Query_L50, (9) Query_L90, (10) Query_SHA256, @@ -495,10 +499,8 @@ assembled_data = assembly_output.map{it->tuple(it[0],it[3])} } process skesaAssemble{ - // label 'skesaMem' - - cpus = skesa_cpus - + //label 'skesaMem' + input: tuple val(sample_name),val(read_type),val(read_location) diff -r 6f85641ecd48 -r 792274118b2e csp_screen.xml --- a/csp_screen.xml Tue Dec 03 16:35:35 2024 -0500 +++ b/csp_screen.xml Wed Dec 04 12:50:59 2024 -0500 @@ -58,7 +58,7 @@ fi; nextflow run ${__tool_directory__}/CSP2/CSP2.nf -profile csp2_galaxy --cores $cores --runmode screen \$QUERY_FASTA_ARG \$REF_FASTA_ARG \$QUERY_READS_ARG \$REF_READS_ARG \$REF_ID_ARG \$TRIM_ARG --readext $readext --forward $forward --reverse $reverse --ref_readext $readext --ref_forward $forward --ref_reverse $reverse --min_cov $min_cov --min_iden $min_iden --min_len $min_len --ref_edge $ref_edge --query_edge $query_edge --dwin $dwin --wsnps $wsnps --cores 8 --out \$CSP2_DIR/CSP2_Screen_Output > Nextflow_Log.txt 2>&1; - + sleep 5; zip -r work.zip work; zip -r csp2.zip CSP2_Screen_Output; ]]>