annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/promer @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 #!/usr/bin/env perl
jpayne@69 2
jpayne@69 3 #-------------------------------------------------------------------------------
jpayne@69 4 # Programmer: Adam M Phillippy, The Institute for Genomic Research
jpayne@69 5 # File: promer
jpayne@69 6 # Date: 04 / 09 / 03
jpayne@69 7 #
jpayne@69 8 # Usage:
jpayne@69 9 # promer [options] <Reference> <Query>
jpayne@69 10 #
jpayne@69 11 # Try 'promer -h' for more information.
jpayne@69 12 #
jpayne@69 13 # Purpose: To create alignments between two multi-FASTA inputs by using
jpayne@69 14 # the MUMmer matching and clustering algorithms.
jpayne@69 15 #
jpayne@69 16 #-------------------------------------------------------------------------------
jpayne@69 17
jpayne@69 18 use lib "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
jpayne@69 19 use Foundation;
jpayne@69 20 use File::Spec::Functions;
jpayne@69 21 use strict;
jpayne@69 22
jpayne@69 23 my $AUX_BIN_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/aux_bin";
jpayne@69 24 my $BIN_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23";
jpayne@69 25 my $SCRIPT_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
jpayne@69 26
jpayne@69 27
jpayne@69 28
jpayne@69 29 my $VERSION_INFO = q~
jpayne@69 30 PROmer (PROtein MUMmer) version 3.07
jpayne@69 31 ~;
jpayne@69 32
jpayne@69 33
jpayne@69 34
jpayne@69 35 my $HELP_INFO = q~
jpayne@69 36 USAGE: promer [options] <Reference> <Query>
jpayne@69 37
jpayne@69 38 DESCRIPTION:
jpayne@69 39 promer generates amino acid alignments between two mutli-FASTA DNA input
jpayne@69 40 files. The out.delta output file lists the distance between insertions
jpayne@69 41 and deletions that produce maximal scoring alignments between each
jpayne@69 42 sequence. The show-* utilities know how to read this format. The DNA
jpayne@69 43 input is translated into all 6 reading frames in order to generate the
jpayne@69 44 output, but the output coordinates reference the original DNA input.
jpayne@69 45
jpayne@69 46 MANDATORY:
jpayne@69 47 Reference Set the input reference multi-FASTA DNA file
jpayne@69 48 Query Set the input query multi-FASTA DNA file
jpayne@69 49
jpayne@69 50 OPTIONS:
jpayne@69 51 --mum Use anchor matches that are unique in both the reference
jpayne@69 52 and query
jpayne@69 53 --mumcand Same as --mumreference
jpayne@69 54 --mumreference Use anchor matches that are unique in in the reference
jpayne@69 55 but not necessarily unique in the query (default behavior)
jpayne@69 56 --maxmatch Use all anchor matches regardless of their uniqueness
jpayne@69 57
jpayne@69 58 -b|breaklen Set the distance an alignment extension will attempt to
jpayne@69 59 extend poor scoring regions before giving up, measured in
jpayne@69 60 amino acids (default 60)
jpayne@69 61 -c|mincluster Sets the minimum length of a cluster of matches, measured in
jpayne@69 62 amino acids (default 20)
jpayne@69 63 --[no]delta Toggle the creation of the delta file (default --delta)
jpayne@69 64 --depend Print the dependency information and exit
jpayne@69 65 -d|diagfactor Set the clustering diagonal difference separation factor
jpayne@69 66 (default .11)
jpayne@69 67 --[no]extend Toggle the cluster extension step (default --extend)
jpayne@69 68 -g|maxgap Set the maximum gap between two adjacent matches in a
jpayne@69 69 cluster, measured in amino acids (default 30)
jpayne@69 70 -h
jpayne@69 71 --help Display help information and exit.
jpayne@69 72 -l|minmatch Set the minimum length of a single match, measured in amino
jpayne@69 73 acids (default 6)
jpayne@69 74 -m|masklen Set the maximum bookend masking lenth, measured in amino
jpayne@69 75 acids (default 8)
jpayne@69 76 -o
jpayne@69 77 --coords Automatically generate the original PROmer1.1 ".coords"
jpayne@69 78 output file using the "show-coords" program
jpayne@69 79 --[no]optimize Toggle alignment score optimization, i.e. if an alignment
jpayne@69 80 extension reaches the end of a sequence, it will backtrack
jpayne@69 81 to optimize the alignment score instead of terminating the
jpayne@69 82 alignment at the end of the sequence (default --optimize)
jpayne@69 83
jpayne@69 84 -p|prefix Set the prefix of the output files (default "out")
jpayne@69 85 -V
jpayne@69 86 --version Display the version information and exit
jpayne@69 87 -x|matrix Set the alignment matrix number to 1 [BLOSUM 45], 2 [BLOSUM
jpayne@69 88 62] or 3 [BLOSUM 80] (default 2)
jpayne@69 89 ~;
jpayne@69 90
jpayne@69 91
jpayne@69 92 my $USAGE_INFO = q~
jpayne@69 93 USAGE: promer [options] <Reference> <Query>
jpayne@69 94 ~;
jpayne@69 95
jpayne@69 96
jpayne@69 97 my @DEPEND_INFO =
jpayne@69 98 (
jpayne@69 99 "$BIN_DIR/mummer",
jpayne@69 100 "$BIN_DIR/mgaps",
jpayne@69 101 "$BIN_DIR/show-coords",
jpayne@69 102 "$AUX_BIN_DIR/postpro",
jpayne@69 103 "$AUX_BIN_DIR/prepro",
jpayne@69 104 "$SCRIPT_DIR/Foundation.pm"
jpayne@69 105 );
jpayne@69 106
jpayne@69 107
jpayne@69 108 my %DEFAULT_PARAMETERS =
jpayne@69 109 (
jpayne@69 110 "OUTPUT_PREFIX" => "out", # prefix for all output files
jpayne@69 111 "MATCH_ALGORITHM" => "-mumreference", # match finding algo switch
jpayne@69 112 "MIN_MATCH" => "6", # minimum match size (aminos)
jpayne@69 113 "MAX_GAP" => "30", # maximum gap between matches (aminos)
jpayne@69 114 "MIN_CLUSTER" => "20", # minimum cluster size (aminos)
jpayne@69 115 "DIAG_FACTOR" => ".11", # diagonal difference fraction
jpayne@69 116 "BREAK_LEN" => "60", # extension break length
jpayne@69 117 "BLOSUM_NUMBER" => "2", # options are 1,2,3 (BLOSUM 45,62,80)
jpayne@69 118 "MASKING_LENGTH" => "8", # set bookend masking length
jpayne@69 119 "POST_SWITCHES" => "" # switches for the post processing
jpayne@69 120 );
jpayne@69 121
jpayne@69 122
jpayne@69 123 sub main ( )
jpayne@69 124 {
jpayne@69 125 my $tigr; # TIGR::Foundation object
jpayne@69 126 my @err; # Error variable
jpayne@69 127
jpayne@69 128 my $ref_file; # path of the reference input file
jpayne@69 129 my $qry_file; # path of the query input file
jpayne@69 130
jpayne@69 131 #-- The command line options for the various programs
jpayne@69 132 my $pfx = $DEFAULT_PARAMETERS { "OUTPUT_PREFIX" };
jpayne@69 133 my $algo = $DEFAULT_PARAMETERS { "MATCH_ALGORITHM" };
jpayne@69 134 my $size = $DEFAULT_PARAMETERS { "MIN_MATCH" };
jpayne@69 135 my $gap = $DEFAULT_PARAMETERS { "MAX_GAP" };
jpayne@69 136 my $clus = $DEFAULT_PARAMETERS { "MIN_CLUSTER" };
jpayne@69 137 my $diff = $DEFAULT_PARAMETERS { "DIAG_FACTOR" };
jpayne@69 138 my $blen = $DEFAULT_PARAMETERS { "BREAK_LEN" };
jpayne@69 139 my $blsm = $DEFAULT_PARAMETERS { "BLOSUM_NUMBER" };
jpayne@69 140 my $mask = $DEFAULT_PARAMETERS { "MASKING_LENGTH" };
jpayne@69 141 my $psw = $DEFAULT_PARAMETERS { "POST_SWITCHES" };
jpayne@69 142
jpayne@69 143 my $maxmatch; # matching algorithm switches
jpayne@69 144 my $mumreference;
jpayne@69 145 my $mum;
jpayne@69 146 my $extend = 1; # if true, extend clusters
jpayne@69 147 my $delta = 1; # if true, create the delta file
jpayne@69 148 my $optimize = 1; # if true, optimize alignment scores
jpayne@69 149
jpayne@69 150 my $generate_coords;
jpayne@69 151
jpayne@69 152 #-- Initialize TIGR::Foundation
jpayne@69 153 $tigr = new TIGR::Foundation;
jpayne@69 154 if ( !defined ($tigr) ) {
jpayne@69 155 print (STDERR "ERROR: TIGR::Foundation could not be initialized");
jpayne@69 156 exit (1);
jpayne@69 157 }
jpayne@69 158
jpayne@69 159 #-- Set help and usage information
jpayne@69 160 $tigr->setHelpInfo ($HELP_INFO);
jpayne@69 161 $tigr->setUsageInfo ($USAGE_INFO);
jpayne@69 162 $tigr->setVersionInfo ($VERSION_INFO);
jpayne@69 163 $tigr->addDependInfo (@DEPEND_INFO);
jpayne@69 164
jpayne@69 165 #-- Get command line parameters
jpayne@69 166 $err[0] = $tigr->TIGR_GetOptions
jpayne@69 167 (
jpayne@69 168 "maxmatch" => \$maxmatch,
jpayne@69 169 "mumcand" => \$mumreference,
jpayne@69 170 "mumreference" => \$mumreference,
jpayne@69 171 "mum" => \$mum,
jpayne@69 172 "b|breaklen=i" => \$blen,
jpayne@69 173 "c|mincluster=i" => \$clus,
jpayne@69 174 "delta!" => \$delta,
jpayne@69 175 "d|diagfactor=f" => \$diff,
jpayne@69 176 "extend!" => \$extend,
jpayne@69 177 "g|maxgap=i" => \$gap,
jpayne@69 178 "l|minmatch=i" => \$size,
jpayne@69 179 "m|masklen=i" => \$mask,
jpayne@69 180 "o|coords" => \$generate_coords,
jpayne@69 181 "optimize!" => \$optimize,
jpayne@69 182 "p|prefix=s" => \$pfx,
jpayne@69 183 "x|matrix=i" => \$blsm
jpayne@69 184 );
jpayne@69 185
jpayne@69 186 #-- Check if the parsing was successful
jpayne@69 187 if ( $err[0] == 0 || $#ARGV != 1 ) {
jpayne@69 188 $tigr->printUsageInfo( );
jpayne@69 189 print (STDERR "Try '$0 -h' for more information.\n");
jpayne@69 190 exit (1);
jpayne@69 191 }
jpayne@69 192
jpayne@69 193 $ref_file = File::Spec->rel2abs ($ARGV[0]);
jpayne@69 194 $qry_file = File::Spec->rel2abs ($ARGV[1]);
jpayne@69 195
jpayne@69 196 #-- Set up the program parameters
jpayne@69 197 if ( ! $extend ) {
jpayne@69 198 $psw .= "-e ";
jpayne@69 199 }
jpayne@69 200 if ( ! $delta ) {
jpayne@69 201 $psw .= "-d ";
jpayne@69 202 }
jpayne@69 203 if ( ! $optimize ) {
jpayne@69 204 $psw .= "-t ";
jpayne@69 205 }
jpayne@69 206
jpayne@69 207 undef (@err);
jpayne@69 208 $err[0] = 0;
jpayne@69 209 if ( $mum ) {
jpayne@69 210 $err[0] ++;
jpayne@69 211 $algo = "-mum";
jpayne@69 212 }
jpayne@69 213 if ( $mumreference ) {
jpayne@69 214 $err[0] ++;
jpayne@69 215 $algo = "-mumreference";
jpayne@69 216 }
jpayne@69 217 if ( $maxmatch ) {
jpayne@69 218 $err[0] ++;
jpayne@69 219 $algo = "-maxmatch";
jpayne@69 220 }
jpayne@69 221 if ( $err[0] > 1 ) {
jpayne@69 222 $tigr->printUsageInfo( );
jpayne@69 223 print (STDERR "ERROR: Multiple matching algorithms selected\n");
jpayne@69 224 print (STDERR "Try '$0 -h' for more information.\n");
jpayne@69 225 exit (1);
jpayne@69 226 }
jpayne@69 227
jpayne@69 228 #-- Set up the program path names
jpayne@69 229 my $algo_path = "$BIN_DIR/mummer";
jpayne@69 230 my $mgaps_path = "$BIN_DIR/mgaps";
jpayne@69 231 my $prepro_path = "$AUX_BIN_DIR/prepro";
jpayne@69 232 my $postpro_path = "$AUX_BIN_DIR/postpro";
jpayne@69 233 my $showcoords_path = "$BIN_DIR/show-coords";
jpayne@69 234
jpayne@69 235 #-- Check that the files needed are all there and readable/writable
jpayne@69 236 {
jpayne@69 237 undef (@err);
jpayne@69 238 if ( !$tigr->isExecutableFile ($algo_path) ) {
jpayne@69 239 push (@err, $algo_path);
jpayne@69 240 }
jpayne@69 241
jpayne@69 242 if ( !$tigr->isExecutableFile ($mgaps_path) ) {
jpayne@69 243 push (@err, $mgaps_path);
jpayne@69 244 }
jpayne@69 245
jpayne@69 246 if ( !$tigr->isExecutableFile ($prepro_path) ) {
jpayne@69 247 push (@err, $prepro_path);
jpayne@69 248 }
jpayne@69 249
jpayne@69 250 if ( !$tigr->isExecutableFile ($postpro_path) ) {
jpayne@69 251 push (@err, $postpro_path);
jpayne@69 252 }
jpayne@69 253
jpayne@69 254 if ( !$tigr->isReadableFile ($ref_file) ) {
jpayne@69 255 push (@err, $ref_file);
jpayne@69 256 }
jpayne@69 257
jpayne@69 258 if ( !$tigr->isReadableFile ($qry_file) ) {
jpayne@69 259 push (@err, $qry_file);
jpayne@69 260 }
jpayne@69 261
jpayne@69 262 if ( !$tigr->isCreatableFile ("$pfx.aaref") ) {
jpayne@69 263 if ( !$tigr->isWritableFile ("$pfx.aaref") ) {
jpayne@69 264 push (@err, "$pfx.aaref");
jpayne@69 265 }
jpayne@69 266 }
jpayne@69 267
jpayne@69 268 if ( !$tigr->isCreatableFile ("$pfx.aaqry") ) {
jpayne@69 269 if ( !$tigr->isWritableFile ("$pfx.aaqry") ) {
jpayne@69 270 push (@err, "$pfx.aaqry");
jpayne@69 271 }
jpayne@69 272 }
jpayne@69 273
jpayne@69 274 if ( !$tigr->isCreatableFile ("$pfx.mgaps") ) {
jpayne@69 275 if ( !$tigr->isWritableFile ("$pfx.mgaps") ) {
jpayne@69 276 push (@err, "$pfx.mgaps");
jpayne@69 277 }
jpayne@69 278 }
jpayne@69 279
jpayne@69 280 if ( !$tigr->isCreatableFile ("$pfx.delta") ) {
jpayne@69 281 if ( !$tigr->isWritableFile ("$pfx.delta") ) {
jpayne@69 282 push (@err, "$pfx.delta");
jpayne@69 283 }
jpayne@69 284 }
jpayne@69 285
jpayne@69 286 if ( $generate_coords ) {
jpayne@69 287 if ( !$tigr->isExecutableFile ($showcoords_path) ) {
jpayne@69 288 push (@err, $showcoords_path);
jpayne@69 289 }
jpayne@69 290 if ( !$tigr->isCreatableFile ("$pfx.coords") ) {
jpayne@69 291 if ( !$tigr->isWritableFile ("$pfx.coords") ) {
jpayne@69 292 push (@err, "$pfx.coords");
jpayne@69 293 }
jpayne@69 294 }
jpayne@69 295 }
jpayne@69 296
jpayne@69 297 #-- If 1 or more files could not be processed, terminate script
jpayne@69 298 if ( $#err >= 0 ) {
jpayne@69 299 $tigr->logError
jpayne@69 300 ("ERROR: The following critical files could not be used", 1);
jpayne@69 301 while ( $#err >= 0 ) {
jpayne@69 302 $tigr->logError (pop(@err), 1);
jpayne@69 303 }
jpayne@69 304 $tigr->logError
jpayne@69 305 ("Check your paths and file permissions and try again", 1);
jpayne@69 306 $tigr->bail( );
jpayne@69 307 }
jpayne@69 308 }
jpayne@69 309
jpayne@69 310
jpayne@69 311 #-- Run prepro -r and -q and assert return value is zero
jpayne@69 312 print (STDERR "1: PREPARING DATA\n");
jpayne@69 313 $err[0] = $tigr->runCommand
jpayne@69 314 ("$prepro_path -m $mask -r $ref_file > $pfx.aaref");
jpayne@69 315
jpayne@69 316 if ( $err[0] != 0 ) {
jpayne@69 317 $tigr->bail
jpayne@69 318 ("ERROR: prepro -r returned non-zero\n");
jpayne@69 319 }
jpayne@69 320
jpayne@69 321 $err[0] = $tigr->runCommand
jpayne@69 322 ("$prepro_path -m $mask -q $qry_file > $pfx.aaqry");
jpayne@69 323
jpayne@69 324 if ( $err[0] != 0 ) {
jpayne@69 325 $tigr->bail ("ERROR: prepro -q returned non-zero\n");
jpayne@69 326 }
jpayne@69 327
jpayne@69 328
jpayne@69 329 #-- Run mummer | mgaps and assert return value is zero
jpayne@69 330 print (STDERR "2,3: RUNNING mummer AND CREATING CLUSTERS\n");
jpayne@69 331 open(ALGO_PIPE, "$algo_path $algo -l $size $pfx.aaref $pfx.aaqry |")
jpayne@69 332 or $tigr->bail ("ERROR: could not open $algo_path output pipe $!");
jpayne@69 333 open(CLUS_PIPE, "| $mgaps_path -l $clus -s $gap -f $diff > $pfx.mgaps")
jpayne@69 334 or $tigr->bail ("ERROR: could not open $mgaps_path input pipe $!");
jpayne@69 335 while ( <ALGO_PIPE> ) {
jpayne@69 336 print CLUS_PIPE
jpayne@69 337 or $tigr->bail ("ERROR: could not write to $mgaps_path pipe $!");
jpayne@69 338 }
jpayne@69 339 $err[0] = close(ALGO_PIPE);
jpayne@69 340 $err[1] = close(CLUS_PIPE);
jpayne@69 341
jpayne@69 342 if ( $err[0] == 0 || $err[1] == 0 ) {
jpayne@69 343 $tigr->bail ("ERROR: mummer and/or mgaps returned non-zero\n");
jpayne@69 344 }
jpayne@69 345
jpayne@69 346
jpayne@69 347 #-- Run postpro and assert return value is zero
jpayne@69 348 print (STDERR "4: FINISHING DATA\n");
jpayne@69 349 $err[0] = $tigr->runCommand
jpayne@69 350 ("$postpro_path $psw -x $blsm -b $blen ".
jpayne@69 351 "$ref_file $qry_file $pfx < $pfx.mgaps");
jpayne@69 352
jpayne@69 353 if ( $err[0] != 0 ) {
jpayne@69 354 $tigr->bail ("ERROR: postpro returned non-zero\n");
jpayne@69 355 }
jpayne@69 356
jpayne@69 357 #-- If the -o flag was set, run show-coords using PROmer1.1 settings
jpayne@69 358 if ( $generate_coords ) {
jpayne@69 359 print (STDERR "5: GENERATING COORDS FILE\n");
jpayne@69 360 $err[0] = $tigr->runCommand
jpayne@69 361 ("$showcoords_path -r $pfx.delta > $pfx.coords");
jpayne@69 362
jpayne@69 363 if ( $err[0] != 0 ) {
jpayne@69 364 $tigr->bail ("ERROR: show-coords returned non-zero\n");
jpayne@69 365 }
jpayne@69 366 }
jpayne@69 367
jpayne@69 368 #-- Remove the temporary output
jpayne@69 369 $err[0] = unlink ("$pfx.aaref", "$pfx.aaqry", "$pfx.mgaps");
jpayne@69 370
jpayne@69 371 if ( $err[0] != 3 ) {
jpayne@69 372 $tigr->logError ("WARNING: there was a problem deleting".
jpayne@69 373 " the temporary output files", 1);
jpayne@69 374 }
jpayne@69 375
jpayne@69 376 #-- Return success
jpayne@69 377 return (0);
jpayne@69 378 }
jpayne@69 379
jpayne@69 380 exit ( main ( ) );
jpayne@69 381
jpayne@69 382 #-- END OF SCRIPT