jpayne@69: #!/usr/bin/env perl jpayne@69: jpayne@69: #------------------------------------------------------------------------------- jpayne@69: # Programmer: Adam M Phillippy, The Institute for Genomic Research jpayne@69: # File: nucmer jpayne@69: # Date: 04 / 09 / 03 jpayne@69: # jpayne@69: # Usage: jpayne@69: # nucmer [options] jpayne@69: # jpayne@69: # Try 'nucmer -h' for more information. jpayne@69: # jpayne@69: # Purpose: To create alignments between two multi-FASTA inputs by using jpayne@69: # the MUMmer matching and clustering algorithms. jpayne@69: # jpayne@69: #------------------------------------------------------------------------------- jpayne@69: jpayne@69: use lib "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts"; jpayne@69: use Foundation; jpayne@69: use File::Spec::Functions; jpayne@69: use strict; jpayne@69: jpayne@69: my $AUX_BIN_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/aux_bin"; jpayne@69: my $BIN_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23"; jpayne@69: my $SCRIPT_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts"; jpayne@69: jpayne@69: jpayne@69: my $VERSION_INFO = q~ jpayne@69: NUCmer (NUCleotide MUMmer) version 3.1 jpayne@69: ~; jpayne@69: jpayne@69: jpayne@69: my $HELP_INFO = q~ jpayne@69: USAGE: nucmer [options] jpayne@69: jpayne@69: DESCRIPTION: jpayne@69: nucmer generates nucleotide alignments between two mutli-FASTA input jpayne@69: files. The out.delta output file lists the distance between insertions jpayne@69: and deletions that produce maximal scoring alignments between each jpayne@69: sequence. The show-* utilities know how to read this format. jpayne@69: jpayne@69: MANDATORY: jpayne@69: Reference Set the input reference multi-FASTA filename jpayne@69: Query Set the input query multi-FASTA filename jpayne@69: jpayne@69: OPTIONS: jpayne@69: --mum Use anchor matches that are unique in both the reference jpayne@69: and query jpayne@69: --mumcand Same as --mumreference jpayne@69: --mumreference Use anchor matches that are unique in in the reference jpayne@69: but not necessarily unique in the query (default behavior) jpayne@69: --maxmatch Use all anchor matches regardless of their uniqueness jpayne@69: jpayne@69: -b|breaklen Set the distance an alignment extension will attempt to jpayne@69: extend poor scoring regions before giving up (default 200) jpayne@69: --[no]banded Enforce absolute banding of dynamic programming matrix jpayne@69: based on diagdiff parameter EXPERIMENTAL (default no) jpayne@69: -c|mincluster Sets the minimum length of a cluster of matches (default 65) jpayne@69: --[no]delta Toggle the creation of the delta file (default --delta) jpayne@69: --depend Print the dependency information and exit jpayne@69: -D|diagdiff Set the maximum diagonal difference between two adjacent jpayne@69: anchors in a cluster (default 5) jpayne@69: -d|diagfactor Set the maximum diagonal difference between two adjacent jpayne@69: anchors in a cluster as a differential fraction of the gap jpayne@69: length (default 0.12) jpayne@69: --[no]extend Toggle the cluster extension step (default --extend) jpayne@69: -f jpayne@69: --forward Use only the forward strand of the Query sequences jpayne@69: -g|maxgap Set the maximum gap between two adjacent matches in a jpayne@69: cluster (default 90) jpayne@69: -h jpayne@69: --help Display help information and exit jpayne@69: -l|minmatch Set the minimum length of a single match (default 20) jpayne@69: -o jpayne@69: --coords Automatically generate the original NUCmer1.1 coords jpayne@69: output file using the 'show-coords' program jpayne@69: --[no]optimize Toggle alignment score optimization, i.e. if an alignment jpayne@69: extension reaches the end of a sequence, it will backtrack jpayne@69: to optimize the alignment score instead of terminating the jpayne@69: alignment at the end of the sequence (default --optimize) jpayne@69: -p|prefix Set the prefix of the output files (default "out") jpayne@69: -r jpayne@69: --reverse Use only the reverse complement of the Query sequences jpayne@69: --[no]simplify Simplify alignments by removing shadowed clusters. Turn jpayne@69: this option off if aligning a sequence to itself to look jpayne@69: for repeats (default --simplify) jpayne@69: -V jpayne@69: --version Display the version information and exit jpayne@69: ~; jpayne@69: jpayne@69: jpayne@69: my $USAGE_INFO = q~ jpayne@69: USAGE: nucmer [options] jpayne@69: ~; jpayne@69: jpayne@69: jpayne@69: my @DEPEND_INFO = jpayne@69: ( jpayne@69: "$BIN_DIR/mummer", jpayne@69: "$BIN_DIR/mgaps", jpayne@69: "$BIN_DIR/show-coords", jpayne@69: "$AUX_BIN_DIR/postnuc", jpayne@69: "$AUX_BIN_DIR/prenuc", jpayne@69: "$SCRIPT_DIR/Foundation.pm" jpayne@69: ); jpayne@69: jpayne@69: jpayne@69: my %DEFAULT_PARAMETERS = jpayne@69: ( jpayne@69: "OUTPUT_PREFIX" => "out", # prefix for all output files jpayne@69: "MATCH_ALGORITHM" => "-mumreference", # match finding algo switch jpayne@69: "MATCH_DIRECTION" => "-b", # match direction switch jpayne@69: "MIN_MATCH" => "20", # minimum match size jpayne@69: "MAX_GAP" => "90", # maximum gap between matches jpayne@69: "MIN_CLUSTER" => "65", # minimum cluster size jpayne@69: "DIAG_DIFF" => "5", # diagonal difference absolute jpayne@69: "DIAG_FACTOR" => ".12", # diagonal difference fraction jpayne@69: "BREAK_LEN" => "200", # extension break length jpayne@69: "POST_SWITCHES" => "" # switches for the post processing jpayne@69: ); jpayne@69: jpayne@69: jpayne@69: sub main ( ) jpayne@69: { jpayne@69: my $tigr; # TIGR::Foundation object jpayne@69: my @err; # Error variable jpayne@69: jpayne@69: my $ref_file; # path of the reference input file jpayne@69: my $qry_file; # path of the query input file jpayne@69: jpayne@69: #-- The command line options for the various programs jpayne@69: my $pfx = $DEFAULT_PARAMETERS { "OUTPUT_PREFIX" }; jpayne@69: my $algo = $DEFAULT_PARAMETERS { "MATCH_ALGORITHM" }; jpayne@69: my $mdir = $DEFAULT_PARAMETERS { "MATCH_DIRECTION" }; jpayne@69: my $size = $DEFAULT_PARAMETERS { "MIN_MATCH" }; jpayne@69: my $gap = $DEFAULT_PARAMETERS { "MAX_GAP" }; jpayne@69: my $clus = $DEFAULT_PARAMETERS { "MIN_CLUSTER" }; jpayne@69: my $ddiff = $DEFAULT_PARAMETERS { "DIAG_DIFF" }; jpayne@69: my $dfrac = $DEFAULT_PARAMETERS { "DIAG_FACTOR" }; jpayne@69: my $blen = $DEFAULT_PARAMETERS { "BREAK_LEN" }; jpayne@69: my $psw = $DEFAULT_PARAMETERS { "POST_SWITCHES" }; jpayne@69: jpayne@69: my $fwd; # if true, use forward strand jpayne@69: my $rev; # if true, use reverse strand jpayne@69: my $maxmatch; # matching algorithm switches jpayne@69: my $mumreference; jpayne@69: my $mum; jpayne@69: my $banded = 0; # if true, enforce absolute dp banding jpayne@69: my $extend = 1; # if true, extend clusters jpayne@69: my $delta = 1; # if true, create the delta file jpayne@69: my $optimize = 1; # if true, optimize alignment scores jpayne@69: my $simplify = 1; # if true, simplify shadowed alignments jpayne@69: jpayne@69: my $generate_coords; jpayne@69: jpayne@69: #-- Initialize TIGR::Foundation jpayne@69: $tigr = new TIGR::Foundation; jpayne@69: if ( !defined ($tigr) ) { jpayne@69: print (STDERR "ERROR: TIGR::Foundation could not be initialized"); jpayne@69: exit (1); jpayne@69: } jpayne@69: jpayne@69: #-- Set help and usage information jpayne@69: $tigr->setHelpInfo ($HELP_INFO); jpayne@69: $tigr->setUsageInfo ($USAGE_INFO); jpayne@69: $tigr->setVersionInfo ($VERSION_INFO); jpayne@69: $tigr->addDependInfo (@DEPEND_INFO); jpayne@69: jpayne@69: #-- Get command line parameters jpayne@69: $err[0] = $tigr->TIGR_GetOptions jpayne@69: ( jpayne@69: "maxmatch" => \$maxmatch, jpayne@69: "mumcand" => \$mumreference, jpayne@69: "mumreference" => \$mumreference, jpayne@69: "mum" => \$mum, jpayne@69: "b|breaklen=i" => \$blen, jpayne@69: "banded!" => \$banded, jpayne@69: "c|mincluster=i" => \$clus, jpayne@69: "delta!" => \$delta, jpayne@69: "D|diagdiff=i" => \$ddiff, jpayne@69: "d|diagfactor=f" => \$dfrac, jpayne@69: "extend!" => \$extend, jpayne@69: "f|forward" => \$fwd, jpayne@69: "g|maxgap=i" => \$gap, jpayne@69: "l|minmatch=i" => \$size, jpayne@69: "o|coords" => \$generate_coords, jpayne@69: "optimize!" => \$optimize, jpayne@69: "p|prefix=s" => \$pfx, jpayne@69: "r|reverse" => \$rev, jpayne@69: "simplify!" => \$simplify jpayne@69: ); jpayne@69: jpayne@69: jpayne@69: #-- Check if the parsing was successful jpayne@69: if ( $err[0] == 0 || $#ARGV != 1 ) { jpayne@69: $tigr->printUsageInfo( ); jpayne@69: print (STDERR "Try '$0 -h' for more information.\n"); jpayne@69: exit (1); jpayne@69: } jpayne@69: jpayne@69: $ref_file = File::Spec->rel2abs ($ARGV[0]); jpayne@69: $qry_file = File::Spec->rel2abs ($ARGV[1]); jpayne@69: jpayne@69: #-- Set up the program parameters jpayne@69: if ( $fwd && $rev ) { jpayne@69: $mdir = "-b"; jpayne@69: } elsif ( $fwd ) { jpayne@69: $mdir = ""; jpayne@69: } elsif ( $rev ) { jpayne@69: $mdir = "-r"; jpayne@69: } jpayne@69: if ( ! $extend ) { jpayne@69: $psw .= "-e "; jpayne@69: } jpayne@69: if ( ! $delta ) { jpayne@69: $psw .= "-d "; jpayne@69: } jpayne@69: if ( ! $optimize ) { jpayne@69: $psw .= "-t "; jpayne@69: } jpayne@69: if ( ! $simplify ) { jpayne@69: $psw .= "-s "; jpayne@69: } jpayne@69: jpayne@69: undef (@err); jpayne@69: $err[0] = 0; jpayne@69: if ( $mum ) { jpayne@69: $err[0] ++; jpayne@69: $algo = "-mum"; jpayne@69: } jpayne@69: if ( $mumreference ) { jpayne@69: $err[0] ++; jpayne@69: $algo = "-mumreference"; jpayne@69: } jpayne@69: if ( $maxmatch ) { jpayne@69: $err[0] ++; jpayne@69: $algo = "-maxmatch"; jpayne@69: } jpayne@69: if ( $err[0] > 1 ) { jpayne@69: $tigr->printUsageInfo( ); jpayne@69: print (STDERR "ERROR: Multiple matching algorithms selected\n"); jpayne@69: print (STDERR "Try '$0 -h' for more information.\n"); jpayne@69: exit (1); jpayne@69: } jpayne@69: jpayne@69: #-- Set up the program path names jpayne@69: my $algo_path = "$BIN_DIR/mummer"; jpayne@69: my $mgaps_path = "$BIN_DIR/mgaps"; jpayne@69: my $prenuc_path = "$AUX_BIN_DIR/prenuc"; jpayne@69: my $postnuc_path = "$AUX_BIN_DIR/postnuc"; jpayne@69: my $showcoords_path = "$BIN_DIR/show-coords"; jpayne@69: jpayne@69: #-- Check that the files needed are all there and readable/writable jpayne@69: { jpayne@69: undef (@err); jpayne@69: if ( !$tigr->isExecutableFile ($algo_path) ) { jpayne@69: push (@err, $algo_path); jpayne@69: } jpayne@69: jpayne@69: if ( !$tigr->isExecutableFile ($mgaps_path) ) { jpayne@69: push (@err, $mgaps_path); jpayne@69: } jpayne@69: jpayne@69: if ( !$tigr->isExecutableFile ($prenuc_path) ) { jpayne@69: push (@err, $prenuc_path); jpayne@69: } jpayne@69: jpayne@69: if ( !$tigr->isExecutableFile ($postnuc_path) ) { jpayne@69: push (@err, $postnuc_path); jpayne@69: } jpayne@69: jpayne@69: if ( !$tigr->isReadableFile ($ref_file) ) { jpayne@69: push (@err, $ref_file); jpayne@69: } jpayne@69: jpayne@69: if ( !$tigr->isReadableFile ($qry_file) ) { jpayne@69: push (@err, $qry_file); jpayne@69: } jpayne@69: jpayne@69: if ( !$tigr->isCreatableFile ("$pfx.ntref") ) { jpayne@69: if ( !$tigr->isWritableFile ("$pfx.ntref") ) { jpayne@69: push (@err, "$pfx.ntref"); jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: if ( !$tigr->isCreatableFile ("$pfx.mgaps") ) { jpayne@69: if ( !$tigr->isWritableFile ("$pfx.mgaps") ) { jpayne@69: push (@err, "$pfx.mgaps"); jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: if ( !$tigr->isCreatableFile ("$pfx.delta") ) { jpayne@69: if ( !$tigr->isWritableFile ("$pfx.delta") ) { jpayne@69: push (@err, "$pfx.delta"); jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: if ( $generate_coords ) { jpayne@69: if ( !$tigr->isExecutableFile ($showcoords_path) ) { jpayne@69: push (@err, $showcoords_path); jpayne@69: } jpayne@69: if ( !$tigr->isCreatableFile ("$pfx.coords") ) { jpayne@69: if ( !$tigr->isWritableFile ("$pfx.coords") ) { jpayne@69: push (@err, "$pfx.coords"); jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: #-- If 1 or more files could not be processed, terminate script jpayne@69: if ( $#err >= 0 ) { jpayne@69: $tigr->logError jpayne@69: ("ERROR: The following critical files could not be used", 1); jpayne@69: while ( $#err >= 0 ) { jpayne@69: $tigr->logError (pop(@err), 1); jpayne@69: } jpayne@69: $tigr->logError jpayne@69: ("Check your paths and file permissions and try again", 1); jpayne@69: $tigr->bail( ); jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: jpayne@69: #-- Run prenuc and assert return value is zero jpayne@69: print (STDERR "1: PREPARING DATA\n"); jpayne@69: $err[0] = $tigr->runCommand jpayne@69: ("$prenuc_path $ref_file > $pfx.ntref"); jpayne@69: jpayne@69: if ( $err[0] != 0 ) { jpayne@69: $tigr->bail jpayne@69: ("ERROR: prenuc returned non-zero\n"); jpayne@69: } jpayne@69: jpayne@69: jpayne@69: #-- Run mummer | mgaps and assert return value is zero jpayne@69: print (STDERR "2,3: RUNNING mummer AND CREATING CLUSTERS\n"); jpayne@69: open(ALGO_PIPE, "$algo_path $algo $mdir -l $size -n $pfx.ntref $qry_file |") jpayne@69: or $tigr->bail ("ERROR: could not open $algo_path output pipe $!"); jpayne@69: open(CLUS_PIPE, "| $mgaps_path -l $clus -s $gap -d $ddiff -f $dfrac > $pfx.mgaps") jpayne@69: or $tigr->bail ("ERROR: could not open $mgaps_path input pipe $!"); jpayne@69: while ( ) { jpayne@69: print CLUS_PIPE jpayne@69: or $tigr->bail ("ERROR: could not write to $mgaps_path pipe $!"); jpayne@69: } jpayne@69: $err[0] = close(ALGO_PIPE); jpayne@69: $err[1] = close(CLUS_PIPE); jpayne@69: jpayne@69: if ( $err[0] == 0 || $err[1] == 0 ) { jpayne@69: $tigr->bail ("ERROR: mummer and/or mgaps returned non-zero\n"); jpayne@69: } jpayne@69: jpayne@69: jpayne@69: #-- Run postnuc and assert return value is zero jpayne@69: print (STDERR "4: FINISHING DATA\n"); jpayne@69: if ( $banded ) jpayne@69: { jpayne@69: $err[0] = $tigr->runCommand jpayne@69: ("$postnuc_path $psw -b $blen -B $ddiff $ref_file $qry_file $pfx < $pfx.mgaps"); jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: $err[0] = $tigr->runCommand jpayne@69: ("$postnuc_path $psw -b $blen $ref_file $qry_file $pfx < $pfx.mgaps"); jpayne@69: } jpayne@69: jpayne@69: if ( $err[0] != 0 ) { jpayne@69: $tigr->bail ("ERROR: postnuc returned non-zero\n"); jpayne@69: } jpayne@69: jpayne@69: #-- If the -o flag was set, run show-coords using NUCmer1.1 settings jpayne@69: if ( $generate_coords ) { jpayne@69: print (STDERR "5: GENERATING COORDS FILE\n"); jpayne@69: $err[0] = $tigr->runCommand jpayne@69: ("$showcoords_path -r $pfx.delta > $pfx.coords"); jpayne@69: jpayne@69: if ( $err[0] != 0 ) { jpayne@69: $tigr->bail ("ERROR: show-coords returned non-zero\n"); jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: #-- Remove the temporary output jpayne@69: $err[0] = unlink ("$pfx.ntref", "$pfx.mgaps"); jpayne@69: jpayne@69: if ( $err[0] != 2 ) { jpayne@69: $tigr->logError ("WARNING: there was a problem deleting". jpayne@69: " the temporary output files", 1); jpayne@69: } jpayne@69: jpayne@69: #-- Return success jpayne@69: return (0); jpayne@69: } jpayne@69: jpayne@69: exit ( main ( ) ); jpayne@69: jpayne@69: #-- END OF SCRIPT