Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/nucmer @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 #!/usr/bin/env perl | |
2 | |
3 #------------------------------------------------------------------------------- | |
4 # Programmer: Adam M Phillippy, The Institute for Genomic Research | |
5 # File: nucmer | |
6 # Date: 04 / 09 / 03 | |
7 # | |
8 # Usage: | |
9 # nucmer [options] <Reference> <Query> | |
10 # | |
11 # Try 'nucmer -h' for more information. | |
12 # | |
13 # Purpose: To create alignments between two multi-FASTA inputs by using | |
14 # the MUMmer matching and clustering algorithms. | |
15 # | |
16 #------------------------------------------------------------------------------- | |
17 | |
18 use lib "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts"; | |
19 use Foundation; | |
20 use File::Spec::Functions; | |
21 use strict; | |
22 | |
23 my $AUX_BIN_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/aux_bin"; | |
24 my $BIN_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23"; | |
25 my $SCRIPT_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts"; | |
26 | |
27 | |
28 my $VERSION_INFO = q~ | |
29 NUCmer (NUCleotide MUMmer) version 3.1 | |
30 ~; | |
31 | |
32 | |
33 my $HELP_INFO = q~ | |
34 USAGE: nucmer [options] <Reference> <Query> | |
35 | |
36 DESCRIPTION: | |
37 nucmer generates nucleotide alignments between two mutli-FASTA input | |
38 files. The out.delta output file lists the distance between insertions | |
39 and deletions that produce maximal scoring alignments between each | |
40 sequence. The show-* utilities know how to read this format. | |
41 | |
42 MANDATORY: | |
43 Reference Set the input reference multi-FASTA filename | |
44 Query Set the input query multi-FASTA filename | |
45 | |
46 OPTIONS: | |
47 --mum Use anchor matches that are unique in both the reference | |
48 and query | |
49 --mumcand Same as --mumreference | |
50 --mumreference Use anchor matches that are unique in in the reference | |
51 but not necessarily unique in the query (default behavior) | |
52 --maxmatch Use all anchor matches regardless of their uniqueness | |
53 | |
54 -b|breaklen Set the distance an alignment extension will attempt to | |
55 extend poor scoring regions before giving up (default 200) | |
56 --[no]banded Enforce absolute banding of dynamic programming matrix | |
57 based on diagdiff parameter EXPERIMENTAL (default no) | |
58 -c|mincluster Sets the minimum length of a cluster of matches (default 65) | |
59 --[no]delta Toggle the creation of the delta file (default --delta) | |
60 --depend Print the dependency information and exit | |
61 -D|diagdiff Set the maximum diagonal difference between two adjacent | |
62 anchors in a cluster (default 5) | |
63 -d|diagfactor Set the maximum diagonal difference between two adjacent | |
64 anchors in a cluster as a differential fraction of the gap | |
65 length (default 0.12) | |
66 --[no]extend Toggle the cluster extension step (default --extend) | |
67 -f | |
68 --forward Use only the forward strand of the Query sequences | |
69 -g|maxgap Set the maximum gap between two adjacent matches in a | |
70 cluster (default 90) | |
71 -h | |
72 --help Display help information and exit | |
73 -l|minmatch Set the minimum length of a single match (default 20) | |
74 -o | |
75 --coords Automatically generate the original NUCmer1.1 coords | |
76 output file using the 'show-coords' program | |
77 --[no]optimize Toggle alignment score optimization, i.e. if an alignment | |
78 extension reaches the end of a sequence, it will backtrack | |
79 to optimize the alignment score instead of terminating the | |
80 alignment at the end of the sequence (default --optimize) | |
81 -p|prefix Set the prefix of the output files (default "out") | |
82 -r | |
83 --reverse Use only the reverse complement of the Query sequences | |
84 --[no]simplify Simplify alignments by removing shadowed clusters. Turn | |
85 this option off if aligning a sequence to itself to look | |
86 for repeats (default --simplify) | |
87 -V | |
88 --version Display the version information and exit | |
89 ~; | |
90 | |
91 | |
92 my $USAGE_INFO = q~ | |
93 USAGE: nucmer [options] <Reference> <Query> | |
94 ~; | |
95 | |
96 | |
97 my @DEPEND_INFO = | |
98 ( | |
99 "$BIN_DIR/mummer", | |
100 "$BIN_DIR/mgaps", | |
101 "$BIN_DIR/show-coords", | |
102 "$AUX_BIN_DIR/postnuc", | |
103 "$AUX_BIN_DIR/prenuc", | |
104 "$SCRIPT_DIR/Foundation.pm" | |
105 ); | |
106 | |
107 | |
108 my %DEFAULT_PARAMETERS = | |
109 ( | |
110 "OUTPUT_PREFIX" => "out", # prefix for all output files | |
111 "MATCH_ALGORITHM" => "-mumreference", # match finding algo switch | |
112 "MATCH_DIRECTION" => "-b", # match direction switch | |
113 "MIN_MATCH" => "20", # minimum match size | |
114 "MAX_GAP" => "90", # maximum gap between matches | |
115 "MIN_CLUSTER" => "65", # minimum cluster size | |
116 "DIAG_DIFF" => "5", # diagonal difference absolute | |
117 "DIAG_FACTOR" => ".12", # diagonal difference fraction | |
118 "BREAK_LEN" => "200", # extension break length | |
119 "POST_SWITCHES" => "" # switches for the post processing | |
120 ); | |
121 | |
122 | |
123 sub main ( ) | |
124 { | |
125 my $tigr; # TIGR::Foundation object | |
126 my @err; # Error variable | |
127 | |
128 my $ref_file; # path of the reference input file | |
129 my $qry_file; # path of the query input file | |
130 | |
131 #-- The command line options for the various programs | |
132 my $pfx = $DEFAULT_PARAMETERS { "OUTPUT_PREFIX" }; | |
133 my $algo = $DEFAULT_PARAMETERS { "MATCH_ALGORITHM" }; | |
134 my $mdir = $DEFAULT_PARAMETERS { "MATCH_DIRECTION" }; | |
135 my $size = $DEFAULT_PARAMETERS { "MIN_MATCH" }; | |
136 my $gap = $DEFAULT_PARAMETERS { "MAX_GAP" }; | |
137 my $clus = $DEFAULT_PARAMETERS { "MIN_CLUSTER" }; | |
138 my $ddiff = $DEFAULT_PARAMETERS { "DIAG_DIFF" }; | |
139 my $dfrac = $DEFAULT_PARAMETERS { "DIAG_FACTOR" }; | |
140 my $blen = $DEFAULT_PARAMETERS { "BREAK_LEN" }; | |
141 my $psw = $DEFAULT_PARAMETERS { "POST_SWITCHES" }; | |
142 | |
143 my $fwd; # if true, use forward strand | |
144 my $rev; # if true, use reverse strand | |
145 my $maxmatch; # matching algorithm switches | |
146 my $mumreference; | |
147 my $mum; | |
148 my $banded = 0; # if true, enforce absolute dp banding | |
149 my $extend = 1; # if true, extend clusters | |
150 my $delta = 1; # if true, create the delta file | |
151 my $optimize = 1; # if true, optimize alignment scores | |
152 my $simplify = 1; # if true, simplify shadowed alignments | |
153 | |
154 my $generate_coords; | |
155 | |
156 #-- Initialize TIGR::Foundation | |
157 $tigr = new TIGR::Foundation; | |
158 if ( !defined ($tigr) ) { | |
159 print (STDERR "ERROR: TIGR::Foundation could not be initialized"); | |
160 exit (1); | |
161 } | |
162 | |
163 #-- Set help and usage information | |
164 $tigr->setHelpInfo ($HELP_INFO); | |
165 $tigr->setUsageInfo ($USAGE_INFO); | |
166 $tigr->setVersionInfo ($VERSION_INFO); | |
167 $tigr->addDependInfo (@DEPEND_INFO); | |
168 | |
169 #-- Get command line parameters | |
170 $err[0] = $tigr->TIGR_GetOptions | |
171 ( | |
172 "maxmatch" => \$maxmatch, | |
173 "mumcand" => \$mumreference, | |
174 "mumreference" => \$mumreference, | |
175 "mum" => \$mum, | |
176 "b|breaklen=i" => \$blen, | |
177 "banded!" => \$banded, | |
178 "c|mincluster=i" => \$clus, | |
179 "delta!" => \$delta, | |
180 "D|diagdiff=i" => \$ddiff, | |
181 "d|diagfactor=f" => \$dfrac, | |
182 "extend!" => \$extend, | |
183 "f|forward" => \$fwd, | |
184 "g|maxgap=i" => \$gap, | |
185 "l|minmatch=i" => \$size, | |
186 "o|coords" => \$generate_coords, | |
187 "optimize!" => \$optimize, | |
188 "p|prefix=s" => \$pfx, | |
189 "r|reverse" => \$rev, | |
190 "simplify!" => \$simplify | |
191 ); | |
192 | |
193 | |
194 #-- Check if the parsing was successful | |
195 if ( $err[0] == 0 || $#ARGV != 1 ) { | |
196 $tigr->printUsageInfo( ); | |
197 print (STDERR "Try '$0 -h' for more information.\n"); | |
198 exit (1); | |
199 } | |
200 | |
201 $ref_file = File::Spec->rel2abs ($ARGV[0]); | |
202 $qry_file = File::Spec->rel2abs ($ARGV[1]); | |
203 | |
204 #-- Set up the program parameters | |
205 if ( $fwd && $rev ) { | |
206 $mdir = "-b"; | |
207 } elsif ( $fwd ) { | |
208 $mdir = ""; | |
209 } elsif ( $rev ) { | |
210 $mdir = "-r"; | |
211 } | |
212 if ( ! $extend ) { | |
213 $psw .= "-e "; | |
214 } | |
215 if ( ! $delta ) { | |
216 $psw .= "-d "; | |
217 } | |
218 if ( ! $optimize ) { | |
219 $psw .= "-t "; | |
220 } | |
221 if ( ! $simplify ) { | |
222 $psw .= "-s "; | |
223 } | |
224 | |
225 undef (@err); | |
226 $err[0] = 0; | |
227 if ( $mum ) { | |
228 $err[0] ++; | |
229 $algo = "-mum"; | |
230 } | |
231 if ( $mumreference ) { | |
232 $err[0] ++; | |
233 $algo = "-mumreference"; | |
234 } | |
235 if ( $maxmatch ) { | |
236 $err[0] ++; | |
237 $algo = "-maxmatch"; | |
238 } | |
239 if ( $err[0] > 1 ) { | |
240 $tigr->printUsageInfo( ); | |
241 print (STDERR "ERROR: Multiple matching algorithms selected\n"); | |
242 print (STDERR "Try '$0 -h' for more information.\n"); | |
243 exit (1); | |
244 } | |
245 | |
246 #-- Set up the program path names | |
247 my $algo_path = "$BIN_DIR/mummer"; | |
248 my $mgaps_path = "$BIN_DIR/mgaps"; | |
249 my $prenuc_path = "$AUX_BIN_DIR/prenuc"; | |
250 my $postnuc_path = "$AUX_BIN_DIR/postnuc"; | |
251 my $showcoords_path = "$BIN_DIR/show-coords"; | |
252 | |
253 #-- Check that the files needed are all there and readable/writable | |
254 { | |
255 undef (@err); | |
256 if ( !$tigr->isExecutableFile ($algo_path) ) { | |
257 push (@err, $algo_path); | |
258 } | |
259 | |
260 if ( !$tigr->isExecutableFile ($mgaps_path) ) { | |
261 push (@err, $mgaps_path); | |
262 } | |
263 | |
264 if ( !$tigr->isExecutableFile ($prenuc_path) ) { | |
265 push (@err, $prenuc_path); | |
266 } | |
267 | |
268 if ( !$tigr->isExecutableFile ($postnuc_path) ) { | |
269 push (@err, $postnuc_path); | |
270 } | |
271 | |
272 if ( !$tigr->isReadableFile ($ref_file) ) { | |
273 push (@err, $ref_file); | |
274 } | |
275 | |
276 if ( !$tigr->isReadableFile ($qry_file) ) { | |
277 push (@err, $qry_file); | |
278 } | |
279 | |
280 if ( !$tigr->isCreatableFile ("$pfx.ntref") ) { | |
281 if ( !$tigr->isWritableFile ("$pfx.ntref") ) { | |
282 push (@err, "$pfx.ntref"); | |
283 } | |
284 } | |
285 | |
286 if ( !$tigr->isCreatableFile ("$pfx.mgaps") ) { | |
287 if ( !$tigr->isWritableFile ("$pfx.mgaps") ) { | |
288 push (@err, "$pfx.mgaps"); | |
289 } | |
290 } | |
291 | |
292 if ( !$tigr->isCreatableFile ("$pfx.delta") ) { | |
293 if ( !$tigr->isWritableFile ("$pfx.delta") ) { | |
294 push (@err, "$pfx.delta"); | |
295 } | |
296 } | |
297 | |
298 if ( $generate_coords ) { | |
299 if ( !$tigr->isExecutableFile ($showcoords_path) ) { | |
300 push (@err, $showcoords_path); | |
301 } | |
302 if ( !$tigr->isCreatableFile ("$pfx.coords") ) { | |
303 if ( !$tigr->isWritableFile ("$pfx.coords") ) { | |
304 push (@err, "$pfx.coords"); | |
305 } | |
306 } | |
307 } | |
308 | |
309 #-- If 1 or more files could not be processed, terminate script | |
310 if ( $#err >= 0 ) { | |
311 $tigr->logError | |
312 ("ERROR: The following critical files could not be used", 1); | |
313 while ( $#err >= 0 ) { | |
314 $tigr->logError (pop(@err), 1); | |
315 } | |
316 $tigr->logError | |
317 ("Check your paths and file permissions and try again", 1); | |
318 $tigr->bail( ); | |
319 } | |
320 } | |
321 | |
322 | |
323 #-- Run prenuc and assert return value is zero | |
324 print (STDERR "1: PREPARING DATA\n"); | |
325 $err[0] = $tigr->runCommand | |
326 ("$prenuc_path $ref_file > $pfx.ntref"); | |
327 | |
328 if ( $err[0] != 0 ) { | |
329 $tigr->bail | |
330 ("ERROR: prenuc returned non-zero\n"); | |
331 } | |
332 | |
333 | |
334 #-- Run mummer | mgaps and assert return value is zero | |
335 print (STDERR "2,3: RUNNING mummer AND CREATING CLUSTERS\n"); | |
336 open(ALGO_PIPE, "$algo_path $algo $mdir -l $size -n $pfx.ntref $qry_file |") | |
337 or $tigr->bail ("ERROR: could not open $algo_path output pipe $!"); | |
338 open(CLUS_PIPE, "| $mgaps_path -l $clus -s $gap -d $ddiff -f $dfrac > $pfx.mgaps") | |
339 or $tigr->bail ("ERROR: could not open $mgaps_path input pipe $!"); | |
340 while ( <ALGO_PIPE> ) { | |
341 print CLUS_PIPE | |
342 or $tigr->bail ("ERROR: could not write to $mgaps_path pipe $!"); | |
343 } | |
344 $err[0] = close(ALGO_PIPE); | |
345 $err[1] = close(CLUS_PIPE); | |
346 | |
347 if ( $err[0] == 0 || $err[1] == 0 ) { | |
348 $tigr->bail ("ERROR: mummer and/or mgaps returned non-zero\n"); | |
349 } | |
350 | |
351 | |
352 #-- Run postnuc and assert return value is zero | |
353 print (STDERR "4: FINISHING DATA\n"); | |
354 if ( $banded ) | |
355 { | |
356 $err[0] = $tigr->runCommand | |
357 ("$postnuc_path $psw -b $blen -B $ddiff $ref_file $qry_file $pfx < $pfx.mgaps"); | |
358 } | |
359 else | |
360 { | |
361 $err[0] = $tigr->runCommand | |
362 ("$postnuc_path $psw -b $blen $ref_file $qry_file $pfx < $pfx.mgaps"); | |
363 } | |
364 | |
365 if ( $err[0] != 0 ) { | |
366 $tigr->bail ("ERROR: postnuc returned non-zero\n"); | |
367 } | |
368 | |
369 #-- If the -o flag was set, run show-coords using NUCmer1.1 settings | |
370 if ( $generate_coords ) { | |
371 print (STDERR "5: GENERATING COORDS FILE\n"); | |
372 $err[0] = $tigr->runCommand | |
373 ("$showcoords_path -r $pfx.delta > $pfx.coords"); | |
374 | |
375 if ( $err[0] != 0 ) { | |
376 $tigr->bail ("ERROR: show-coords returned non-zero\n"); | |
377 } | |
378 } | |
379 | |
380 #-- Remove the temporary output | |
381 $err[0] = unlink ("$pfx.ntref", "$pfx.mgaps"); | |
382 | |
383 if ( $err[0] != 2 ) { | |
384 $tigr->logError ("WARNING: there was a problem deleting". | |
385 " the temporary output files", 1); | |
386 } | |
387 | |
388 #-- Return success | |
389 return (0); | |
390 } | |
391 | |
392 exit ( main ( ) ); | |
393 | |
394 #-- END OF SCRIPT |