comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/nucmer @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 #!/usr/bin/env perl
2
3 #-------------------------------------------------------------------------------
4 # Programmer: Adam M Phillippy, The Institute for Genomic Research
5 # File: nucmer
6 # Date: 04 / 09 / 03
7 #
8 # Usage:
9 # nucmer [options] <Reference> <Query>
10 #
11 # Try 'nucmer -h' for more information.
12 #
13 # Purpose: To create alignments between two multi-FASTA inputs by using
14 # the MUMmer matching and clustering algorithms.
15 #
16 #-------------------------------------------------------------------------------
17
18 use lib "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
19 use Foundation;
20 use File::Spec::Functions;
21 use strict;
22
23 my $AUX_BIN_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/aux_bin";
24 my $BIN_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23";
25 my $SCRIPT_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
26
27
28 my $VERSION_INFO = q~
29 NUCmer (NUCleotide MUMmer) version 3.1
30 ~;
31
32
33 my $HELP_INFO = q~
34 USAGE: nucmer [options] <Reference> <Query>
35
36 DESCRIPTION:
37 nucmer generates nucleotide alignments between two mutli-FASTA input
38 files. The out.delta output file lists the distance between insertions
39 and deletions that produce maximal scoring alignments between each
40 sequence. The show-* utilities know how to read this format.
41
42 MANDATORY:
43 Reference Set the input reference multi-FASTA filename
44 Query Set the input query multi-FASTA filename
45
46 OPTIONS:
47 --mum Use anchor matches that are unique in both the reference
48 and query
49 --mumcand Same as --mumreference
50 --mumreference Use anchor matches that are unique in in the reference
51 but not necessarily unique in the query (default behavior)
52 --maxmatch Use all anchor matches regardless of their uniqueness
53
54 -b|breaklen Set the distance an alignment extension will attempt to
55 extend poor scoring regions before giving up (default 200)
56 --[no]banded Enforce absolute banding of dynamic programming matrix
57 based on diagdiff parameter EXPERIMENTAL (default no)
58 -c|mincluster Sets the minimum length of a cluster of matches (default 65)
59 --[no]delta Toggle the creation of the delta file (default --delta)
60 --depend Print the dependency information and exit
61 -D|diagdiff Set the maximum diagonal difference between two adjacent
62 anchors in a cluster (default 5)
63 -d|diagfactor Set the maximum diagonal difference between two adjacent
64 anchors in a cluster as a differential fraction of the gap
65 length (default 0.12)
66 --[no]extend Toggle the cluster extension step (default --extend)
67 -f
68 --forward Use only the forward strand of the Query sequences
69 -g|maxgap Set the maximum gap between two adjacent matches in a
70 cluster (default 90)
71 -h
72 --help Display help information and exit
73 -l|minmatch Set the minimum length of a single match (default 20)
74 -o
75 --coords Automatically generate the original NUCmer1.1 coords
76 output file using the 'show-coords' program
77 --[no]optimize Toggle alignment score optimization, i.e. if an alignment
78 extension reaches the end of a sequence, it will backtrack
79 to optimize the alignment score instead of terminating the
80 alignment at the end of the sequence (default --optimize)
81 -p|prefix Set the prefix of the output files (default "out")
82 -r
83 --reverse Use only the reverse complement of the Query sequences
84 --[no]simplify Simplify alignments by removing shadowed clusters. Turn
85 this option off if aligning a sequence to itself to look
86 for repeats (default --simplify)
87 -V
88 --version Display the version information and exit
89 ~;
90
91
92 my $USAGE_INFO = q~
93 USAGE: nucmer [options] <Reference> <Query>
94 ~;
95
96
97 my @DEPEND_INFO =
98 (
99 "$BIN_DIR/mummer",
100 "$BIN_DIR/mgaps",
101 "$BIN_DIR/show-coords",
102 "$AUX_BIN_DIR/postnuc",
103 "$AUX_BIN_DIR/prenuc",
104 "$SCRIPT_DIR/Foundation.pm"
105 );
106
107
108 my %DEFAULT_PARAMETERS =
109 (
110 "OUTPUT_PREFIX" => "out", # prefix for all output files
111 "MATCH_ALGORITHM" => "-mumreference", # match finding algo switch
112 "MATCH_DIRECTION" => "-b", # match direction switch
113 "MIN_MATCH" => "20", # minimum match size
114 "MAX_GAP" => "90", # maximum gap between matches
115 "MIN_CLUSTER" => "65", # minimum cluster size
116 "DIAG_DIFF" => "5", # diagonal difference absolute
117 "DIAG_FACTOR" => ".12", # diagonal difference fraction
118 "BREAK_LEN" => "200", # extension break length
119 "POST_SWITCHES" => "" # switches for the post processing
120 );
121
122
123 sub main ( )
124 {
125 my $tigr; # TIGR::Foundation object
126 my @err; # Error variable
127
128 my $ref_file; # path of the reference input file
129 my $qry_file; # path of the query input file
130
131 #-- The command line options for the various programs
132 my $pfx = $DEFAULT_PARAMETERS { "OUTPUT_PREFIX" };
133 my $algo = $DEFAULT_PARAMETERS { "MATCH_ALGORITHM" };
134 my $mdir = $DEFAULT_PARAMETERS { "MATCH_DIRECTION" };
135 my $size = $DEFAULT_PARAMETERS { "MIN_MATCH" };
136 my $gap = $DEFAULT_PARAMETERS { "MAX_GAP" };
137 my $clus = $DEFAULT_PARAMETERS { "MIN_CLUSTER" };
138 my $ddiff = $DEFAULT_PARAMETERS { "DIAG_DIFF" };
139 my $dfrac = $DEFAULT_PARAMETERS { "DIAG_FACTOR" };
140 my $blen = $DEFAULT_PARAMETERS { "BREAK_LEN" };
141 my $psw = $DEFAULT_PARAMETERS { "POST_SWITCHES" };
142
143 my $fwd; # if true, use forward strand
144 my $rev; # if true, use reverse strand
145 my $maxmatch; # matching algorithm switches
146 my $mumreference;
147 my $mum;
148 my $banded = 0; # if true, enforce absolute dp banding
149 my $extend = 1; # if true, extend clusters
150 my $delta = 1; # if true, create the delta file
151 my $optimize = 1; # if true, optimize alignment scores
152 my $simplify = 1; # if true, simplify shadowed alignments
153
154 my $generate_coords;
155
156 #-- Initialize TIGR::Foundation
157 $tigr = new TIGR::Foundation;
158 if ( !defined ($tigr) ) {
159 print (STDERR "ERROR: TIGR::Foundation could not be initialized");
160 exit (1);
161 }
162
163 #-- Set help and usage information
164 $tigr->setHelpInfo ($HELP_INFO);
165 $tigr->setUsageInfo ($USAGE_INFO);
166 $tigr->setVersionInfo ($VERSION_INFO);
167 $tigr->addDependInfo (@DEPEND_INFO);
168
169 #-- Get command line parameters
170 $err[0] = $tigr->TIGR_GetOptions
171 (
172 "maxmatch" => \$maxmatch,
173 "mumcand" => \$mumreference,
174 "mumreference" => \$mumreference,
175 "mum" => \$mum,
176 "b|breaklen=i" => \$blen,
177 "banded!" => \$banded,
178 "c|mincluster=i" => \$clus,
179 "delta!" => \$delta,
180 "D|diagdiff=i" => \$ddiff,
181 "d|diagfactor=f" => \$dfrac,
182 "extend!" => \$extend,
183 "f|forward" => \$fwd,
184 "g|maxgap=i" => \$gap,
185 "l|minmatch=i" => \$size,
186 "o|coords" => \$generate_coords,
187 "optimize!" => \$optimize,
188 "p|prefix=s" => \$pfx,
189 "r|reverse" => \$rev,
190 "simplify!" => \$simplify
191 );
192
193
194 #-- Check if the parsing was successful
195 if ( $err[0] == 0 || $#ARGV != 1 ) {
196 $tigr->printUsageInfo( );
197 print (STDERR "Try '$0 -h' for more information.\n");
198 exit (1);
199 }
200
201 $ref_file = File::Spec->rel2abs ($ARGV[0]);
202 $qry_file = File::Spec->rel2abs ($ARGV[1]);
203
204 #-- Set up the program parameters
205 if ( $fwd && $rev ) {
206 $mdir = "-b";
207 } elsif ( $fwd ) {
208 $mdir = "";
209 } elsif ( $rev ) {
210 $mdir = "-r";
211 }
212 if ( ! $extend ) {
213 $psw .= "-e ";
214 }
215 if ( ! $delta ) {
216 $psw .= "-d ";
217 }
218 if ( ! $optimize ) {
219 $psw .= "-t ";
220 }
221 if ( ! $simplify ) {
222 $psw .= "-s ";
223 }
224
225 undef (@err);
226 $err[0] = 0;
227 if ( $mum ) {
228 $err[0] ++;
229 $algo = "-mum";
230 }
231 if ( $mumreference ) {
232 $err[0] ++;
233 $algo = "-mumreference";
234 }
235 if ( $maxmatch ) {
236 $err[0] ++;
237 $algo = "-maxmatch";
238 }
239 if ( $err[0] > 1 ) {
240 $tigr->printUsageInfo( );
241 print (STDERR "ERROR: Multiple matching algorithms selected\n");
242 print (STDERR "Try '$0 -h' for more information.\n");
243 exit (1);
244 }
245
246 #-- Set up the program path names
247 my $algo_path = "$BIN_DIR/mummer";
248 my $mgaps_path = "$BIN_DIR/mgaps";
249 my $prenuc_path = "$AUX_BIN_DIR/prenuc";
250 my $postnuc_path = "$AUX_BIN_DIR/postnuc";
251 my $showcoords_path = "$BIN_DIR/show-coords";
252
253 #-- Check that the files needed are all there and readable/writable
254 {
255 undef (@err);
256 if ( !$tigr->isExecutableFile ($algo_path) ) {
257 push (@err, $algo_path);
258 }
259
260 if ( !$tigr->isExecutableFile ($mgaps_path) ) {
261 push (@err, $mgaps_path);
262 }
263
264 if ( !$tigr->isExecutableFile ($prenuc_path) ) {
265 push (@err, $prenuc_path);
266 }
267
268 if ( !$tigr->isExecutableFile ($postnuc_path) ) {
269 push (@err, $postnuc_path);
270 }
271
272 if ( !$tigr->isReadableFile ($ref_file) ) {
273 push (@err, $ref_file);
274 }
275
276 if ( !$tigr->isReadableFile ($qry_file) ) {
277 push (@err, $qry_file);
278 }
279
280 if ( !$tigr->isCreatableFile ("$pfx.ntref") ) {
281 if ( !$tigr->isWritableFile ("$pfx.ntref") ) {
282 push (@err, "$pfx.ntref");
283 }
284 }
285
286 if ( !$tigr->isCreatableFile ("$pfx.mgaps") ) {
287 if ( !$tigr->isWritableFile ("$pfx.mgaps") ) {
288 push (@err, "$pfx.mgaps");
289 }
290 }
291
292 if ( !$tigr->isCreatableFile ("$pfx.delta") ) {
293 if ( !$tigr->isWritableFile ("$pfx.delta") ) {
294 push (@err, "$pfx.delta");
295 }
296 }
297
298 if ( $generate_coords ) {
299 if ( !$tigr->isExecutableFile ($showcoords_path) ) {
300 push (@err, $showcoords_path);
301 }
302 if ( !$tigr->isCreatableFile ("$pfx.coords") ) {
303 if ( !$tigr->isWritableFile ("$pfx.coords") ) {
304 push (@err, "$pfx.coords");
305 }
306 }
307 }
308
309 #-- If 1 or more files could not be processed, terminate script
310 if ( $#err >= 0 ) {
311 $tigr->logError
312 ("ERROR: The following critical files could not be used", 1);
313 while ( $#err >= 0 ) {
314 $tigr->logError (pop(@err), 1);
315 }
316 $tigr->logError
317 ("Check your paths and file permissions and try again", 1);
318 $tigr->bail( );
319 }
320 }
321
322
323 #-- Run prenuc and assert return value is zero
324 print (STDERR "1: PREPARING DATA\n");
325 $err[0] = $tigr->runCommand
326 ("$prenuc_path $ref_file > $pfx.ntref");
327
328 if ( $err[0] != 0 ) {
329 $tigr->bail
330 ("ERROR: prenuc returned non-zero\n");
331 }
332
333
334 #-- Run mummer | mgaps and assert return value is zero
335 print (STDERR "2,3: RUNNING mummer AND CREATING CLUSTERS\n");
336 open(ALGO_PIPE, "$algo_path $algo $mdir -l $size -n $pfx.ntref $qry_file |")
337 or $tigr->bail ("ERROR: could not open $algo_path output pipe $!");
338 open(CLUS_PIPE, "| $mgaps_path -l $clus -s $gap -d $ddiff -f $dfrac > $pfx.mgaps")
339 or $tigr->bail ("ERROR: could not open $mgaps_path input pipe $!");
340 while ( <ALGO_PIPE> ) {
341 print CLUS_PIPE
342 or $tigr->bail ("ERROR: could not write to $mgaps_path pipe $!");
343 }
344 $err[0] = close(ALGO_PIPE);
345 $err[1] = close(CLUS_PIPE);
346
347 if ( $err[0] == 0 || $err[1] == 0 ) {
348 $tigr->bail ("ERROR: mummer and/or mgaps returned non-zero\n");
349 }
350
351
352 #-- Run postnuc and assert return value is zero
353 print (STDERR "4: FINISHING DATA\n");
354 if ( $banded )
355 {
356 $err[0] = $tigr->runCommand
357 ("$postnuc_path $psw -b $blen -B $ddiff $ref_file $qry_file $pfx < $pfx.mgaps");
358 }
359 else
360 {
361 $err[0] = $tigr->runCommand
362 ("$postnuc_path $psw -b $blen $ref_file $qry_file $pfx < $pfx.mgaps");
363 }
364
365 if ( $err[0] != 0 ) {
366 $tigr->bail ("ERROR: postnuc returned non-zero\n");
367 }
368
369 #-- If the -o flag was set, run show-coords using NUCmer1.1 settings
370 if ( $generate_coords ) {
371 print (STDERR "5: GENERATING COORDS FILE\n");
372 $err[0] = $tigr->runCommand
373 ("$showcoords_path -r $pfx.delta > $pfx.coords");
374
375 if ( $err[0] != 0 ) {
376 $tigr->bail ("ERROR: show-coords returned non-zero\n");
377 }
378 }
379
380 #-- Remove the temporary output
381 $err[0] = unlink ("$pfx.ntref", "$pfx.mgaps");
382
383 if ( $err[0] != 2 ) {
384 $tigr->logError ("WARNING: there was a problem deleting".
385 " the temporary output files", 1);
386 }
387
388 #-- Return success
389 return (0);
390 }
391
392 exit ( main ( ) );
393
394 #-- END OF SCRIPT