Mercurial > repos > rliterman > csp2
diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/nucmer @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/nucmer Tue Mar 18 17:55:14 2025 -0400 @@ -0,0 +1,394 @@ +#!/usr/bin/env perl + +#------------------------------------------------------------------------------- +# Programmer: Adam M Phillippy, The Institute for Genomic Research +# File: nucmer +# Date: 04 / 09 / 03 +# +# Usage: +# nucmer [options] <Reference> <Query> +# +# Try 'nucmer -h' for more information. +# +# Purpose: To create alignments between two multi-FASTA inputs by using +# the MUMmer matching and clustering algorithms. +# +#------------------------------------------------------------------------------- + +use lib "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts"; +use Foundation; +use File::Spec::Functions; +use strict; + +my $AUX_BIN_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/aux_bin"; +my $BIN_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23"; +my $SCRIPT_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts"; + + +my $VERSION_INFO = q~ +NUCmer (NUCleotide MUMmer) version 3.1 + ~; + + +my $HELP_INFO = q~ + USAGE: nucmer [options] <Reference> <Query> + + DESCRIPTION: + nucmer generates nucleotide alignments between two mutli-FASTA input + files. The out.delta output file lists the distance between insertions + and deletions that produce maximal scoring alignments between each + sequence. The show-* utilities know how to read this format. + + MANDATORY: + Reference Set the input reference multi-FASTA filename + Query Set the input query multi-FASTA filename + + OPTIONS: + --mum Use anchor matches that are unique in both the reference + and query + --mumcand Same as --mumreference + --mumreference Use anchor matches that are unique in in the reference + but not necessarily unique in the query (default behavior) + --maxmatch Use all anchor matches regardless of their uniqueness + + -b|breaklen Set the distance an alignment extension will attempt to + extend poor scoring regions before giving up (default 200) + --[no]banded Enforce absolute banding of dynamic programming matrix + based on diagdiff parameter EXPERIMENTAL (default no) + -c|mincluster Sets the minimum length of a cluster of matches (default 65) + --[no]delta Toggle the creation of the delta file (default --delta) + --depend Print the dependency information and exit + -D|diagdiff Set the maximum diagonal difference between two adjacent + anchors in a cluster (default 5) + -d|diagfactor Set the maximum diagonal difference between two adjacent + anchors in a cluster as a differential fraction of the gap + length (default 0.12) + --[no]extend Toggle the cluster extension step (default --extend) + -f + --forward Use only the forward strand of the Query sequences + -g|maxgap Set the maximum gap between two adjacent matches in a + cluster (default 90) + -h + --help Display help information and exit + -l|minmatch Set the minimum length of a single match (default 20) + -o + --coords Automatically generate the original NUCmer1.1 coords + output file using the 'show-coords' program + --[no]optimize Toggle alignment score optimization, i.e. if an alignment + extension reaches the end of a sequence, it will backtrack + to optimize the alignment score instead of terminating the + alignment at the end of the sequence (default --optimize) + -p|prefix Set the prefix of the output files (default "out") + -r + --reverse Use only the reverse complement of the Query sequences + --[no]simplify Simplify alignments by removing shadowed clusters. Turn + this option off if aligning a sequence to itself to look + for repeats (default --simplify) + -V + --version Display the version information and exit + ~; + + +my $USAGE_INFO = q~ + USAGE: nucmer [options] <Reference> <Query> + ~; + + +my @DEPEND_INFO = + ( + "$BIN_DIR/mummer", + "$BIN_DIR/mgaps", + "$BIN_DIR/show-coords", + "$AUX_BIN_DIR/postnuc", + "$AUX_BIN_DIR/prenuc", + "$SCRIPT_DIR/Foundation.pm" + ); + + +my %DEFAULT_PARAMETERS = + ( + "OUTPUT_PREFIX" => "out", # prefix for all output files + "MATCH_ALGORITHM" => "-mumreference", # match finding algo switch + "MATCH_DIRECTION" => "-b", # match direction switch + "MIN_MATCH" => "20", # minimum match size + "MAX_GAP" => "90", # maximum gap between matches + "MIN_CLUSTER" => "65", # minimum cluster size + "DIAG_DIFF" => "5", # diagonal difference absolute + "DIAG_FACTOR" => ".12", # diagonal difference fraction + "BREAK_LEN" => "200", # extension break length + "POST_SWITCHES" => "" # switches for the post processing + ); + + +sub main ( ) +{ + my $tigr; # TIGR::Foundation object + my @err; # Error variable + + my $ref_file; # path of the reference input file + my $qry_file; # path of the query input file + + #-- The command line options for the various programs + my $pfx = $DEFAULT_PARAMETERS { "OUTPUT_PREFIX" }; + my $algo = $DEFAULT_PARAMETERS { "MATCH_ALGORITHM" }; + my $mdir = $DEFAULT_PARAMETERS { "MATCH_DIRECTION" }; + my $size = $DEFAULT_PARAMETERS { "MIN_MATCH" }; + my $gap = $DEFAULT_PARAMETERS { "MAX_GAP" }; + my $clus = $DEFAULT_PARAMETERS { "MIN_CLUSTER" }; + my $ddiff = $DEFAULT_PARAMETERS { "DIAG_DIFF" }; + my $dfrac = $DEFAULT_PARAMETERS { "DIAG_FACTOR" }; + my $blen = $DEFAULT_PARAMETERS { "BREAK_LEN" }; + my $psw = $DEFAULT_PARAMETERS { "POST_SWITCHES" }; + + my $fwd; # if true, use forward strand + my $rev; # if true, use reverse strand + my $maxmatch; # matching algorithm switches + my $mumreference; + my $mum; + my $banded = 0; # if true, enforce absolute dp banding + my $extend = 1; # if true, extend clusters + my $delta = 1; # if true, create the delta file + my $optimize = 1; # if true, optimize alignment scores + my $simplify = 1; # if true, simplify shadowed alignments + + my $generate_coords; + + #-- Initialize TIGR::Foundation + $tigr = new TIGR::Foundation; + if ( !defined ($tigr) ) { + print (STDERR "ERROR: TIGR::Foundation could not be initialized"); + exit (1); + } + + #-- Set help and usage information + $tigr->setHelpInfo ($HELP_INFO); + $tigr->setUsageInfo ($USAGE_INFO); + $tigr->setVersionInfo ($VERSION_INFO); + $tigr->addDependInfo (@DEPEND_INFO); + + #-- Get command line parameters + $err[0] = $tigr->TIGR_GetOptions + ( + "maxmatch" => \$maxmatch, + "mumcand" => \$mumreference, + "mumreference" => \$mumreference, + "mum" => \$mum, + "b|breaklen=i" => \$blen, + "banded!" => \$banded, + "c|mincluster=i" => \$clus, + "delta!" => \$delta, + "D|diagdiff=i" => \$ddiff, + "d|diagfactor=f" => \$dfrac, + "extend!" => \$extend, + "f|forward" => \$fwd, + "g|maxgap=i" => \$gap, + "l|minmatch=i" => \$size, + "o|coords" => \$generate_coords, + "optimize!" => \$optimize, + "p|prefix=s" => \$pfx, + "r|reverse" => \$rev, + "simplify!" => \$simplify + ); + + + #-- Check if the parsing was successful + if ( $err[0] == 0 || $#ARGV != 1 ) { + $tigr->printUsageInfo( ); + print (STDERR "Try '$0 -h' for more information.\n"); + exit (1); + } + + $ref_file = File::Spec->rel2abs ($ARGV[0]); + $qry_file = File::Spec->rel2abs ($ARGV[1]); + + #-- Set up the program parameters + if ( $fwd && $rev ) { + $mdir = "-b"; + } elsif ( $fwd ) { + $mdir = ""; + } elsif ( $rev ) { + $mdir = "-r"; + } + if ( ! $extend ) { + $psw .= "-e "; + } + if ( ! $delta ) { + $psw .= "-d "; + } + if ( ! $optimize ) { + $psw .= "-t "; + } + if ( ! $simplify ) { + $psw .= "-s "; + } + + undef (@err); + $err[0] = 0; + if ( $mum ) { + $err[0] ++; + $algo = "-mum"; + } + if ( $mumreference ) { + $err[0] ++; + $algo = "-mumreference"; + } + if ( $maxmatch ) { + $err[0] ++; + $algo = "-maxmatch"; + } + if ( $err[0] > 1 ) { + $tigr->printUsageInfo( ); + print (STDERR "ERROR: Multiple matching algorithms selected\n"); + print (STDERR "Try '$0 -h' for more information.\n"); + exit (1); + } + + #-- Set up the program path names + my $algo_path = "$BIN_DIR/mummer"; + my $mgaps_path = "$BIN_DIR/mgaps"; + my $prenuc_path = "$AUX_BIN_DIR/prenuc"; + my $postnuc_path = "$AUX_BIN_DIR/postnuc"; + my $showcoords_path = "$BIN_DIR/show-coords"; + + #-- Check that the files needed are all there and readable/writable + { + undef (@err); + if ( !$tigr->isExecutableFile ($algo_path) ) { + push (@err, $algo_path); + } + + if ( !$tigr->isExecutableFile ($mgaps_path) ) { + push (@err, $mgaps_path); + } + + if ( !$tigr->isExecutableFile ($prenuc_path) ) { + push (@err, $prenuc_path); + } + + if ( !$tigr->isExecutableFile ($postnuc_path) ) { + push (@err, $postnuc_path); + } + + if ( !$tigr->isReadableFile ($ref_file) ) { + push (@err, $ref_file); + } + + if ( !$tigr->isReadableFile ($qry_file) ) { + push (@err, $qry_file); + } + + if ( !$tigr->isCreatableFile ("$pfx.ntref") ) { + if ( !$tigr->isWritableFile ("$pfx.ntref") ) { + push (@err, "$pfx.ntref"); + } + } + + if ( !$tigr->isCreatableFile ("$pfx.mgaps") ) { + if ( !$tigr->isWritableFile ("$pfx.mgaps") ) { + push (@err, "$pfx.mgaps"); + } + } + + if ( !$tigr->isCreatableFile ("$pfx.delta") ) { + if ( !$tigr->isWritableFile ("$pfx.delta") ) { + push (@err, "$pfx.delta"); + } + } + + if ( $generate_coords ) { + if ( !$tigr->isExecutableFile ($showcoords_path) ) { + push (@err, $showcoords_path); + } + if ( !$tigr->isCreatableFile ("$pfx.coords") ) { + if ( !$tigr->isWritableFile ("$pfx.coords") ) { + push (@err, "$pfx.coords"); + } + } + } + + #-- If 1 or more files could not be processed, terminate script + if ( $#err >= 0 ) { + $tigr->logError + ("ERROR: The following critical files could not be used", 1); + while ( $#err >= 0 ) { + $tigr->logError (pop(@err), 1); + } + $tigr->logError + ("Check your paths and file permissions and try again", 1); + $tigr->bail( ); + } + } + + + #-- Run prenuc and assert return value is zero + print (STDERR "1: PREPARING DATA\n"); + $err[0] = $tigr->runCommand + ("$prenuc_path $ref_file > $pfx.ntref"); + + if ( $err[0] != 0 ) { + $tigr->bail + ("ERROR: prenuc returned non-zero\n"); + } + + + #-- Run mummer | mgaps and assert return value is zero + print (STDERR "2,3: RUNNING mummer AND CREATING CLUSTERS\n"); + open(ALGO_PIPE, "$algo_path $algo $mdir -l $size -n $pfx.ntref $qry_file |") + or $tigr->bail ("ERROR: could not open $algo_path output pipe $!"); + open(CLUS_PIPE, "| $mgaps_path -l $clus -s $gap -d $ddiff -f $dfrac > $pfx.mgaps") + or $tigr->bail ("ERROR: could not open $mgaps_path input pipe $!"); + while ( <ALGO_PIPE> ) { + print CLUS_PIPE + or $tigr->bail ("ERROR: could not write to $mgaps_path pipe $!"); + } + $err[0] = close(ALGO_PIPE); + $err[1] = close(CLUS_PIPE); + + if ( $err[0] == 0 || $err[1] == 0 ) { + $tigr->bail ("ERROR: mummer and/or mgaps returned non-zero\n"); + } + + + #-- Run postnuc and assert return value is zero + print (STDERR "4: FINISHING DATA\n"); + if ( $banded ) + { + $err[0] = $tigr->runCommand + ("$postnuc_path $psw -b $blen -B $ddiff $ref_file $qry_file $pfx < $pfx.mgaps"); + } + else + { + $err[0] = $tigr->runCommand + ("$postnuc_path $psw -b $blen $ref_file $qry_file $pfx < $pfx.mgaps"); + } + + if ( $err[0] != 0 ) { + $tigr->bail ("ERROR: postnuc returned non-zero\n"); + } + + #-- If the -o flag was set, run show-coords using NUCmer1.1 settings + if ( $generate_coords ) { + print (STDERR "5: GENERATING COORDS FILE\n"); + $err[0] = $tigr->runCommand + ("$showcoords_path -r $pfx.delta > $pfx.coords"); + + if ( $err[0] != 0 ) { + $tigr->bail ("ERROR: show-coords returned non-zero\n"); + } + } + + #-- Remove the temporary output + $err[0] = unlink ("$pfx.ntref", "$pfx.mgaps"); + + if ( $err[0] != 2 ) { + $tigr->logError ("WARNING: there was a problem deleting". + " the temporary output files", 1); + } + + #-- Return success + return (0); +} + +exit ( main ( ) ); + +#-- END OF SCRIPT