annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/nucmer @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 #!/usr/bin/env perl
jpayne@69 2
jpayne@69 3 #-------------------------------------------------------------------------------
jpayne@69 4 # Programmer: Adam M Phillippy, The Institute for Genomic Research
jpayne@69 5 # File: nucmer
jpayne@69 6 # Date: 04 / 09 / 03
jpayne@69 7 #
jpayne@69 8 # Usage:
jpayne@69 9 # nucmer [options] <Reference> <Query>
jpayne@69 10 #
jpayne@69 11 # Try 'nucmer -h' for more information.
jpayne@69 12 #
jpayne@69 13 # Purpose: To create alignments between two multi-FASTA inputs by using
jpayne@69 14 # the MUMmer matching and clustering algorithms.
jpayne@69 15 #
jpayne@69 16 #-------------------------------------------------------------------------------
jpayne@69 17
jpayne@69 18 use lib "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
jpayne@69 19 use Foundation;
jpayne@69 20 use File::Spec::Functions;
jpayne@69 21 use strict;
jpayne@69 22
jpayne@69 23 my $AUX_BIN_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/aux_bin";
jpayne@69 24 my $BIN_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23";
jpayne@69 25 my $SCRIPT_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
jpayne@69 26
jpayne@69 27
jpayne@69 28 my $VERSION_INFO = q~
jpayne@69 29 NUCmer (NUCleotide MUMmer) version 3.1
jpayne@69 30 ~;
jpayne@69 31
jpayne@69 32
jpayne@69 33 my $HELP_INFO = q~
jpayne@69 34 USAGE: nucmer [options] <Reference> <Query>
jpayne@69 35
jpayne@69 36 DESCRIPTION:
jpayne@69 37 nucmer generates nucleotide alignments between two mutli-FASTA input
jpayne@69 38 files. The out.delta output file lists the distance between insertions
jpayne@69 39 and deletions that produce maximal scoring alignments between each
jpayne@69 40 sequence. The show-* utilities know how to read this format.
jpayne@69 41
jpayne@69 42 MANDATORY:
jpayne@69 43 Reference Set the input reference multi-FASTA filename
jpayne@69 44 Query Set the input query multi-FASTA filename
jpayne@69 45
jpayne@69 46 OPTIONS:
jpayne@69 47 --mum Use anchor matches that are unique in both the reference
jpayne@69 48 and query
jpayne@69 49 --mumcand Same as --mumreference
jpayne@69 50 --mumreference Use anchor matches that are unique in in the reference
jpayne@69 51 but not necessarily unique in the query (default behavior)
jpayne@69 52 --maxmatch Use all anchor matches regardless of their uniqueness
jpayne@69 53
jpayne@69 54 -b|breaklen Set the distance an alignment extension will attempt to
jpayne@69 55 extend poor scoring regions before giving up (default 200)
jpayne@69 56 --[no]banded Enforce absolute banding of dynamic programming matrix
jpayne@69 57 based on diagdiff parameter EXPERIMENTAL (default no)
jpayne@69 58 -c|mincluster Sets the minimum length of a cluster of matches (default 65)
jpayne@69 59 --[no]delta Toggle the creation of the delta file (default --delta)
jpayne@69 60 --depend Print the dependency information and exit
jpayne@69 61 -D|diagdiff Set the maximum diagonal difference between two adjacent
jpayne@69 62 anchors in a cluster (default 5)
jpayne@69 63 -d|diagfactor Set the maximum diagonal difference between two adjacent
jpayne@69 64 anchors in a cluster as a differential fraction of the gap
jpayne@69 65 length (default 0.12)
jpayne@69 66 --[no]extend Toggle the cluster extension step (default --extend)
jpayne@69 67 -f
jpayne@69 68 --forward Use only the forward strand of the Query sequences
jpayne@69 69 -g|maxgap Set the maximum gap between two adjacent matches in a
jpayne@69 70 cluster (default 90)
jpayne@69 71 -h
jpayne@69 72 --help Display help information and exit
jpayne@69 73 -l|minmatch Set the minimum length of a single match (default 20)
jpayne@69 74 -o
jpayne@69 75 --coords Automatically generate the original NUCmer1.1 coords
jpayne@69 76 output file using the 'show-coords' program
jpayne@69 77 --[no]optimize Toggle alignment score optimization, i.e. if an alignment
jpayne@69 78 extension reaches the end of a sequence, it will backtrack
jpayne@69 79 to optimize the alignment score instead of terminating the
jpayne@69 80 alignment at the end of the sequence (default --optimize)
jpayne@69 81 -p|prefix Set the prefix of the output files (default "out")
jpayne@69 82 -r
jpayne@69 83 --reverse Use only the reverse complement of the Query sequences
jpayne@69 84 --[no]simplify Simplify alignments by removing shadowed clusters. Turn
jpayne@69 85 this option off if aligning a sequence to itself to look
jpayne@69 86 for repeats (default --simplify)
jpayne@69 87 -V
jpayne@69 88 --version Display the version information and exit
jpayne@69 89 ~;
jpayne@69 90
jpayne@69 91
jpayne@69 92 my $USAGE_INFO = q~
jpayne@69 93 USAGE: nucmer [options] <Reference> <Query>
jpayne@69 94 ~;
jpayne@69 95
jpayne@69 96
jpayne@69 97 my @DEPEND_INFO =
jpayne@69 98 (
jpayne@69 99 "$BIN_DIR/mummer",
jpayne@69 100 "$BIN_DIR/mgaps",
jpayne@69 101 "$BIN_DIR/show-coords",
jpayne@69 102 "$AUX_BIN_DIR/postnuc",
jpayne@69 103 "$AUX_BIN_DIR/prenuc",
jpayne@69 104 "$SCRIPT_DIR/Foundation.pm"
jpayne@69 105 );
jpayne@69 106
jpayne@69 107
jpayne@69 108 my %DEFAULT_PARAMETERS =
jpayne@69 109 (
jpayne@69 110 "OUTPUT_PREFIX" => "out", # prefix for all output files
jpayne@69 111 "MATCH_ALGORITHM" => "-mumreference", # match finding algo switch
jpayne@69 112 "MATCH_DIRECTION" => "-b", # match direction switch
jpayne@69 113 "MIN_MATCH" => "20", # minimum match size
jpayne@69 114 "MAX_GAP" => "90", # maximum gap between matches
jpayne@69 115 "MIN_CLUSTER" => "65", # minimum cluster size
jpayne@69 116 "DIAG_DIFF" => "5", # diagonal difference absolute
jpayne@69 117 "DIAG_FACTOR" => ".12", # diagonal difference fraction
jpayne@69 118 "BREAK_LEN" => "200", # extension break length
jpayne@69 119 "POST_SWITCHES" => "" # switches for the post processing
jpayne@69 120 );
jpayne@69 121
jpayne@69 122
jpayne@69 123 sub main ( )
jpayne@69 124 {
jpayne@69 125 my $tigr; # TIGR::Foundation object
jpayne@69 126 my @err; # Error variable
jpayne@69 127
jpayne@69 128 my $ref_file; # path of the reference input file
jpayne@69 129 my $qry_file; # path of the query input file
jpayne@69 130
jpayne@69 131 #-- The command line options for the various programs
jpayne@69 132 my $pfx = $DEFAULT_PARAMETERS { "OUTPUT_PREFIX" };
jpayne@69 133 my $algo = $DEFAULT_PARAMETERS { "MATCH_ALGORITHM" };
jpayne@69 134 my $mdir = $DEFAULT_PARAMETERS { "MATCH_DIRECTION" };
jpayne@69 135 my $size = $DEFAULT_PARAMETERS { "MIN_MATCH" };
jpayne@69 136 my $gap = $DEFAULT_PARAMETERS { "MAX_GAP" };
jpayne@69 137 my $clus = $DEFAULT_PARAMETERS { "MIN_CLUSTER" };
jpayne@69 138 my $ddiff = $DEFAULT_PARAMETERS { "DIAG_DIFF" };
jpayne@69 139 my $dfrac = $DEFAULT_PARAMETERS { "DIAG_FACTOR" };
jpayne@69 140 my $blen = $DEFAULT_PARAMETERS { "BREAK_LEN" };
jpayne@69 141 my $psw = $DEFAULT_PARAMETERS { "POST_SWITCHES" };
jpayne@69 142
jpayne@69 143 my $fwd; # if true, use forward strand
jpayne@69 144 my $rev; # if true, use reverse strand
jpayne@69 145 my $maxmatch; # matching algorithm switches
jpayne@69 146 my $mumreference;
jpayne@69 147 my $mum;
jpayne@69 148 my $banded = 0; # if true, enforce absolute dp banding
jpayne@69 149 my $extend = 1; # if true, extend clusters
jpayne@69 150 my $delta = 1; # if true, create the delta file
jpayne@69 151 my $optimize = 1; # if true, optimize alignment scores
jpayne@69 152 my $simplify = 1; # if true, simplify shadowed alignments
jpayne@69 153
jpayne@69 154 my $generate_coords;
jpayne@69 155
jpayne@69 156 #-- Initialize TIGR::Foundation
jpayne@69 157 $tigr = new TIGR::Foundation;
jpayne@69 158 if ( !defined ($tigr) ) {
jpayne@69 159 print (STDERR "ERROR: TIGR::Foundation could not be initialized");
jpayne@69 160 exit (1);
jpayne@69 161 }
jpayne@69 162
jpayne@69 163 #-- Set help and usage information
jpayne@69 164 $tigr->setHelpInfo ($HELP_INFO);
jpayne@69 165 $tigr->setUsageInfo ($USAGE_INFO);
jpayne@69 166 $tigr->setVersionInfo ($VERSION_INFO);
jpayne@69 167 $tigr->addDependInfo (@DEPEND_INFO);
jpayne@69 168
jpayne@69 169 #-- Get command line parameters
jpayne@69 170 $err[0] = $tigr->TIGR_GetOptions
jpayne@69 171 (
jpayne@69 172 "maxmatch" => \$maxmatch,
jpayne@69 173 "mumcand" => \$mumreference,
jpayne@69 174 "mumreference" => \$mumreference,
jpayne@69 175 "mum" => \$mum,
jpayne@69 176 "b|breaklen=i" => \$blen,
jpayne@69 177 "banded!" => \$banded,
jpayne@69 178 "c|mincluster=i" => \$clus,
jpayne@69 179 "delta!" => \$delta,
jpayne@69 180 "D|diagdiff=i" => \$ddiff,
jpayne@69 181 "d|diagfactor=f" => \$dfrac,
jpayne@69 182 "extend!" => \$extend,
jpayne@69 183 "f|forward" => \$fwd,
jpayne@69 184 "g|maxgap=i" => \$gap,
jpayne@69 185 "l|minmatch=i" => \$size,
jpayne@69 186 "o|coords" => \$generate_coords,
jpayne@69 187 "optimize!" => \$optimize,
jpayne@69 188 "p|prefix=s" => \$pfx,
jpayne@69 189 "r|reverse" => \$rev,
jpayne@69 190 "simplify!" => \$simplify
jpayne@69 191 );
jpayne@69 192
jpayne@69 193
jpayne@69 194 #-- Check if the parsing was successful
jpayne@69 195 if ( $err[0] == 0 || $#ARGV != 1 ) {
jpayne@69 196 $tigr->printUsageInfo( );
jpayne@69 197 print (STDERR "Try '$0 -h' for more information.\n");
jpayne@69 198 exit (1);
jpayne@69 199 }
jpayne@69 200
jpayne@69 201 $ref_file = File::Spec->rel2abs ($ARGV[0]);
jpayne@69 202 $qry_file = File::Spec->rel2abs ($ARGV[1]);
jpayne@69 203
jpayne@69 204 #-- Set up the program parameters
jpayne@69 205 if ( $fwd && $rev ) {
jpayne@69 206 $mdir = "-b";
jpayne@69 207 } elsif ( $fwd ) {
jpayne@69 208 $mdir = "";
jpayne@69 209 } elsif ( $rev ) {
jpayne@69 210 $mdir = "-r";
jpayne@69 211 }
jpayne@69 212 if ( ! $extend ) {
jpayne@69 213 $psw .= "-e ";
jpayne@69 214 }
jpayne@69 215 if ( ! $delta ) {
jpayne@69 216 $psw .= "-d ";
jpayne@69 217 }
jpayne@69 218 if ( ! $optimize ) {
jpayne@69 219 $psw .= "-t ";
jpayne@69 220 }
jpayne@69 221 if ( ! $simplify ) {
jpayne@69 222 $psw .= "-s ";
jpayne@69 223 }
jpayne@69 224
jpayne@69 225 undef (@err);
jpayne@69 226 $err[0] = 0;
jpayne@69 227 if ( $mum ) {
jpayne@69 228 $err[0] ++;
jpayne@69 229 $algo = "-mum";
jpayne@69 230 }
jpayne@69 231 if ( $mumreference ) {
jpayne@69 232 $err[0] ++;
jpayne@69 233 $algo = "-mumreference";
jpayne@69 234 }
jpayne@69 235 if ( $maxmatch ) {
jpayne@69 236 $err[0] ++;
jpayne@69 237 $algo = "-maxmatch";
jpayne@69 238 }
jpayne@69 239 if ( $err[0] > 1 ) {
jpayne@69 240 $tigr->printUsageInfo( );
jpayne@69 241 print (STDERR "ERROR: Multiple matching algorithms selected\n");
jpayne@69 242 print (STDERR "Try '$0 -h' for more information.\n");
jpayne@69 243 exit (1);
jpayne@69 244 }
jpayne@69 245
jpayne@69 246 #-- Set up the program path names
jpayne@69 247 my $algo_path = "$BIN_DIR/mummer";
jpayne@69 248 my $mgaps_path = "$BIN_DIR/mgaps";
jpayne@69 249 my $prenuc_path = "$AUX_BIN_DIR/prenuc";
jpayne@69 250 my $postnuc_path = "$AUX_BIN_DIR/postnuc";
jpayne@69 251 my $showcoords_path = "$BIN_DIR/show-coords";
jpayne@69 252
jpayne@69 253 #-- Check that the files needed are all there and readable/writable
jpayne@69 254 {
jpayne@69 255 undef (@err);
jpayne@69 256 if ( !$tigr->isExecutableFile ($algo_path) ) {
jpayne@69 257 push (@err, $algo_path);
jpayne@69 258 }
jpayne@69 259
jpayne@69 260 if ( !$tigr->isExecutableFile ($mgaps_path) ) {
jpayne@69 261 push (@err, $mgaps_path);
jpayne@69 262 }
jpayne@69 263
jpayne@69 264 if ( !$tigr->isExecutableFile ($prenuc_path) ) {
jpayne@69 265 push (@err, $prenuc_path);
jpayne@69 266 }
jpayne@69 267
jpayne@69 268 if ( !$tigr->isExecutableFile ($postnuc_path) ) {
jpayne@69 269 push (@err, $postnuc_path);
jpayne@69 270 }
jpayne@69 271
jpayne@69 272 if ( !$tigr->isReadableFile ($ref_file) ) {
jpayne@69 273 push (@err, $ref_file);
jpayne@69 274 }
jpayne@69 275
jpayne@69 276 if ( !$tigr->isReadableFile ($qry_file) ) {
jpayne@69 277 push (@err, $qry_file);
jpayne@69 278 }
jpayne@69 279
jpayne@69 280 if ( !$tigr->isCreatableFile ("$pfx.ntref") ) {
jpayne@69 281 if ( !$tigr->isWritableFile ("$pfx.ntref") ) {
jpayne@69 282 push (@err, "$pfx.ntref");
jpayne@69 283 }
jpayne@69 284 }
jpayne@69 285
jpayne@69 286 if ( !$tigr->isCreatableFile ("$pfx.mgaps") ) {
jpayne@69 287 if ( !$tigr->isWritableFile ("$pfx.mgaps") ) {
jpayne@69 288 push (@err, "$pfx.mgaps");
jpayne@69 289 }
jpayne@69 290 }
jpayne@69 291
jpayne@69 292 if ( !$tigr->isCreatableFile ("$pfx.delta") ) {
jpayne@69 293 if ( !$tigr->isWritableFile ("$pfx.delta") ) {
jpayne@69 294 push (@err, "$pfx.delta");
jpayne@69 295 }
jpayne@69 296 }
jpayne@69 297
jpayne@69 298 if ( $generate_coords ) {
jpayne@69 299 if ( !$tigr->isExecutableFile ($showcoords_path) ) {
jpayne@69 300 push (@err, $showcoords_path);
jpayne@69 301 }
jpayne@69 302 if ( !$tigr->isCreatableFile ("$pfx.coords") ) {
jpayne@69 303 if ( !$tigr->isWritableFile ("$pfx.coords") ) {
jpayne@69 304 push (@err, "$pfx.coords");
jpayne@69 305 }
jpayne@69 306 }
jpayne@69 307 }
jpayne@69 308
jpayne@69 309 #-- If 1 or more files could not be processed, terminate script
jpayne@69 310 if ( $#err >= 0 ) {
jpayne@69 311 $tigr->logError
jpayne@69 312 ("ERROR: The following critical files could not be used", 1);
jpayne@69 313 while ( $#err >= 0 ) {
jpayne@69 314 $tigr->logError (pop(@err), 1);
jpayne@69 315 }
jpayne@69 316 $tigr->logError
jpayne@69 317 ("Check your paths and file permissions and try again", 1);
jpayne@69 318 $tigr->bail( );
jpayne@69 319 }
jpayne@69 320 }
jpayne@69 321
jpayne@69 322
jpayne@69 323 #-- Run prenuc and assert return value is zero
jpayne@69 324 print (STDERR "1: PREPARING DATA\n");
jpayne@69 325 $err[0] = $tigr->runCommand
jpayne@69 326 ("$prenuc_path $ref_file > $pfx.ntref");
jpayne@69 327
jpayne@69 328 if ( $err[0] != 0 ) {
jpayne@69 329 $tigr->bail
jpayne@69 330 ("ERROR: prenuc returned non-zero\n");
jpayne@69 331 }
jpayne@69 332
jpayne@69 333
jpayne@69 334 #-- Run mummer | mgaps and assert return value is zero
jpayne@69 335 print (STDERR "2,3: RUNNING mummer AND CREATING CLUSTERS\n");
jpayne@69 336 open(ALGO_PIPE, "$algo_path $algo $mdir -l $size -n $pfx.ntref $qry_file |")
jpayne@69 337 or $tigr->bail ("ERROR: could not open $algo_path output pipe $!");
jpayne@69 338 open(CLUS_PIPE, "| $mgaps_path -l $clus -s $gap -d $ddiff -f $dfrac > $pfx.mgaps")
jpayne@69 339 or $tigr->bail ("ERROR: could not open $mgaps_path input pipe $!");
jpayne@69 340 while ( <ALGO_PIPE> ) {
jpayne@69 341 print CLUS_PIPE
jpayne@69 342 or $tigr->bail ("ERROR: could not write to $mgaps_path pipe $!");
jpayne@69 343 }
jpayne@69 344 $err[0] = close(ALGO_PIPE);
jpayne@69 345 $err[1] = close(CLUS_PIPE);
jpayne@69 346
jpayne@69 347 if ( $err[0] == 0 || $err[1] == 0 ) {
jpayne@69 348 $tigr->bail ("ERROR: mummer and/or mgaps returned non-zero\n");
jpayne@69 349 }
jpayne@69 350
jpayne@69 351
jpayne@69 352 #-- Run postnuc and assert return value is zero
jpayne@69 353 print (STDERR "4: FINISHING DATA\n");
jpayne@69 354 if ( $banded )
jpayne@69 355 {
jpayne@69 356 $err[0] = $tigr->runCommand
jpayne@69 357 ("$postnuc_path $psw -b $blen -B $ddiff $ref_file $qry_file $pfx < $pfx.mgaps");
jpayne@69 358 }
jpayne@69 359 else
jpayne@69 360 {
jpayne@69 361 $err[0] = $tigr->runCommand
jpayne@69 362 ("$postnuc_path $psw -b $blen $ref_file $qry_file $pfx < $pfx.mgaps");
jpayne@69 363 }
jpayne@69 364
jpayne@69 365 if ( $err[0] != 0 ) {
jpayne@69 366 $tigr->bail ("ERROR: postnuc returned non-zero\n");
jpayne@69 367 }
jpayne@69 368
jpayne@69 369 #-- If the -o flag was set, run show-coords using NUCmer1.1 settings
jpayne@69 370 if ( $generate_coords ) {
jpayne@69 371 print (STDERR "5: GENERATING COORDS FILE\n");
jpayne@69 372 $err[0] = $tigr->runCommand
jpayne@69 373 ("$showcoords_path -r $pfx.delta > $pfx.coords");
jpayne@69 374
jpayne@69 375 if ( $err[0] != 0 ) {
jpayne@69 376 $tigr->bail ("ERROR: show-coords returned non-zero\n");
jpayne@69 377 }
jpayne@69 378 }
jpayne@69 379
jpayne@69 380 #-- Remove the temporary output
jpayne@69 381 $err[0] = unlink ("$pfx.ntref", "$pfx.mgaps");
jpayne@69 382
jpayne@69 383 if ( $err[0] != 2 ) {
jpayne@69 384 $tigr->logError ("WARNING: there was a problem deleting".
jpayne@69 385 " the temporary output files", 1);
jpayne@69 386 }
jpayne@69 387
jpayne@69 388 #-- Return success
jpayne@69 389 return (0);
jpayne@69 390 }
jpayne@69 391
jpayne@69 392 exit ( main ( ) );
jpayne@69 393
jpayne@69 394 #-- END OF SCRIPT