comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/promer @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 #!/usr/bin/env perl
2
3 #-------------------------------------------------------------------------------
4 # Programmer: Adam M Phillippy, The Institute for Genomic Research
5 # File: promer
6 # Date: 04 / 09 / 03
7 #
8 # Usage:
9 # promer [options] <Reference> <Query>
10 #
11 # Try 'promer -h' for more information.
12 #
13 # Purpose: To create alignments between two multi-FASTA inputs by using
14 # the MUMmer matching and clustering algorithms.
15 #
16 #-------------------------------------------------------------------------------
17
18 use lib "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
19 use Foundation;
20 use File::Spec::Functions;
21 use strict;
22
23 my $AUX_BIN_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/aux_bin";
24 my $BIN_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23";
25 my $SCRIPT_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
26
27
28
29 my $VERSION_INFO = q~
30 PROmer (PROtein MUMmer) version 3.07
31 ~;
32
33
34
35 my $HELP_INFO = q~
36 USAGE: promer [options] <Reference> <Query>
37
38 DESCRIPTION:
39 promer generates amino acid alignments between two mutli-FASTA DNA input
40 files. The out.delta output file lists the distance between insertions
41 and deletions that produce maximal scoring alignments between each
42 sequence. The show-* utilities know how to read this format. The DNA
43 input is translated into all 6 reading frames in order to generate the
44 output, but the output coordinates reference the original DNA input.
45
46 MANDATORY:
47 Reference Set the input reference multi-FASTA DNA file
48 Query Set the input query multi-FASTA DNA file
49
50 OPTIONS:
51 --mum Use anchor matches that are unique in both the reference
52 and query
53 --mumcand Same as --mumreference
54 --mumreference Use anchor matches that are unique in in the reference
55 but not necessarily unique in the query (default behavior)
56 --maxmatch Use all anchor matches regardless of their uniqueness
57
58 -b|breaklen Set the distance an alignment extension will attempt to
59 extend poor scoring regions before giving up, measured in
60 amino acids (default 60)
61 -c|mincluster Sets the minimum length of a cluster of matches, measured in
62 amino acids (default 20)
63 --[no]delta Toggle the creation of the delta file (default --delta)
64 --depend Print the dependency information and exit
65 -d|diagfactor Set the clustering diagonal difference separation factor
66 (default .11)
67 --[no]extend Toggle the cluster extension step (default --extend)
68 -g|maxgap Set the maximum gap between two adjacent matches in a
69 cluster, measured in amino acids (default 30)
70 -h
71 --help Display help information and exit.
72 -l|minmatch Set the minimum length of a single match, measured in amino
73 acids (default 6)
74 -m|masklen Set the maximum bookend masking lenth, measured in amino
75 acids (default 8)
76 -o
77 --coords Automatically generate the original PROmer1.1 ".coords"
78 output file using the "show-coords" program
79 --[no]optimize Toggle alignment score optimization, i.e. if an alignment
80 extension reaches the end of a sequence, it will backtrack
81 to optimize the alignment score instead of terminating the
82 alignment at the end of the sequence (default --optimize)
83
84 -p|prefix Set the prefix of the output files (default "out")
85 -V
86 --version Display the version information and exit
87 -x|matrix Set the alignment matrix number to 1 [BLOSUM 45], 2 [BLOSUM
88 62] or 3 [BLOSUM 80] (default 2)
89 ~;
90
91
92 my $USAGE_INFO = q~
93 USAGE: promer [options] <Reference> <Query>
94 ~;
95
96
97 my @DEPEND_INFO =
98 (
99 "$BIN_DIR/mummer",
100 "$BIN_DIR/mgaps",
101 "$BIN_DIR/show-coords",
102 "$AUX_BIN_DIR/postpro",
103 "$AUX_BIN_DIR/prepro",
104 "$SCRIPT_DIR/Foundation.pm"
105 );
106
107
108 my %DEFAULT_PARAMETERS =
109 (
110 "OUTPUT_PREFIX" => "out", # prefix for all output files
111 "MATCH_ALGORITHM" => "-mumreference", # match finding algo switch
112 "MIN_MATCH" => "6", # minimum match size (aminos)
113 "MAX_GAP" => "30", # maximum gap between matches (aminos)
114 "MIN_CLUSTER" => "20", # minimum cluster size (aminos)
115 "DIAG_FACTOR" => ".11", # diagonal difference fraction
116 "BREAK_LEN" => "60", # extension break length
117 "BLOSUM_NUMBER" => "2", # options are 1,2,3 (BLOSUM 45,62,80)
118 "MASKING_LENGTH" => "8", # set bookend masking length
119 "POST_SWITCHES" => "" # switches for the post processing
120 );
121
122
123 sub main ( )
124 {
125 my $tigr; # TIGR::Foundation object
126 my @err; # Error variable
127
128 my $ref_file; # path of the reference input file
129 my $qry_file; # path of the query input file
130
131 #-- The command line options for the various programs
132 my $pfx = $DEFAULT_PARAMETERS { "OUTPUT_PREFIX" };
133 my $algo = $DEFAULT_PARAMETERS { "MATCH_ALGORITHM" };
134 my $size = $DEFAULT_PARAMETERS { "MIN_MATCH" };
135 my $gap = $DEFAULT_PARAMETERS { "MAX_GAP" };
136 my $clus = $DEFAULT_PARAMETERS { "MIN_CLUSTER" };
137 my $diff = $DEFAULT_PARAMETERS { "DIAG_FACTOR" };
138 my $blen = $DEFAULT_PARAMETERS { "BREAK_LEN" };
139 my $blsm = $DEFAULT_PARAMETERS { "BLOSUM_NUMBER" };
140 my $mask = $DEFAULT_PARAMETERS { "MASKING_LENGTH" };
141 my $psw = $DEFAULT_PARAMETERS { "POST_SWITCHES" };
142
143 my $maxmatch; # matching algorithm switches
144 my $mumreference;
145 my $mum;
146 my $extend = 1; # if true, extend clusters
147 my $delta = 1; # if true, create the delta file
148 my $optimize = 1; # if true, optimize alignment scores
149
150 my $generate_coords;
151
152 #-- Initialize TIGR::Foundation
153 $tigr = new TIGR::Foundation;
154 if ( !defined ($tigr) ) {
155 print (STDERR "ERROR: TIGR::Foundation could not be initialized");
156 exit (1);
157 }
158
159 #-- Set help and usage information
160 $tigr->setHelpInfo ($HELP_INFO);
161 $tigr->setUsageInfo ($USAGE_INFO);
162 $tigr->setVersionInfo ($VERSION_INFO);
163 $tigr->addDependInfo (@DEPEND_INFO);
164
165 #-- Get command line parameters
166 $err[0] = $tigr->TIGR_GetOptions
167 (
168 "maxmatch" => \$maxmatch,
169 "mumcand" => \$mumreference,
170 "mumreference" => \$mumreference,
171 "mum" => \$mum,
172 "b|breaklen=i" => \$blen,
173 "c|mincluster=i" => \$clus,
174 "delta!" => \$delta,
175 "d|diagfactor=f" => \$diff,
176 "extend!" => \$extend,
177 "g|maxgap=i" => \$gap,
178 "l|minmatch=i" => \$size,
179 "m|masklen=i" => \$mask,
180 "o|coords" => \$generate_coords,
181 "optimize!" => \$optimize,
182 "p|prefix=s" => \$pfx,
183 "x|matrix=i" => \$blsm
184 );
185
186 #-- Check if the parsing was successful
187 if ( $err[0] == 0 || $#ARGV != 1 ) {
188 $tigr->printUsageInfo( );
189 print (STDERR "Try '$0 -h' for more information.\n");
190 exit (1);
191 }
192
193 $ref_file = File::Spec->rel2abs ($ARGV[0]);
194 $qry_file = File::Spec->rel2abs ($ARGV[1]);
195
196 #-- Set up the program parameters
197 if ( ! $extend ) {
198 $psw .= "-e ";
199 }
200 if ( ! $delta ) {
201 $psw .= "-d ";
202 }
203 if ( ! $optimize ) {
204 $psw .= "-t ";
205 }
206
207 undef (@err);
208 $err[0] = 0;
209 if ( $mum ) {
210 $err[0] ++;
211 $algo = "-mum";
212 }
213 if ( $mumreference ) {
214 $err[0] ++;
215 $algo = "-mumreference";
216 }
217 if ( $maxmatch ) {
218 $err[0] ++;
219 $algo = "-maxmatch";
220 }
221 if ( $err[0] > 1 ) {
222 $tigr->printUsageInfo( );
223 print (STDERR "ERROR: Multiple matching algorithms selected\n");
224 print (STDERR "Try '$0 -h' for more information.\n");
225 exit (1);
226 }
227
228 #-- Set up the program path names
229 my $algo_path = "$BIN_DIR/mummer";
230 my $mgaps_path = "$BIN_DIR/mgaps";
231 my $prepro_path = "$AUX_BIN_DIR/prepro";
232 my $postpro_path = "$AUX_BIN_DIR/postpro";
233 my $showcoords_path = "$BIN_DIR/show-coords";
234
235 #-- Check that the files needed are all there and readable/writable
236 {
237 undef (@err);
238 if ( !$tigr->isExecutableFile ($algo_path) ) {
239 push (@err, $algo_path);
240 }
241
242 if ( !$tigr->isExecutableFile ($mgaps_path) ) {
243 push (@err, $mgaps_path);
244 }
245
246 if ( !$tigr->isExecutableFile ($prepro_path) ) {
247 push (@err, $prepro_path);
248 }
249
250 if ( !$tigr->isExecutableFile ($postpro_path) ) {
251 push (@err, $postpro_path);
252 }
253
254 if ( !$tigr->isReadableFile ($ref_file) ) {
255 push (@err, $ref_file);
256 }
257
258 if ( !$tigr->isReadableFile ($qry_file) ) {
259 push (@err, $qry_file);
260 }
261
262 if ( !$tigr->isCreatableFile ("$pfx.aaref") ) {
263 if ( !$tigr->isWritableFile ("$pfx.aaref") ) {
264 push (@err, "$pfx.aaref");
265 }
266 }
267
268 if ( !$tigr->isCreatableFile ("$pfx.aaqry") ) {
269 if ( !$tigr->isWritableFile ("$pfx.aaqry") ) {
270 push (@err, "$pfx.aaqry");
271 }
272 }
273
274 if ( !$tigr->isCreatableFile ("$pfx.mgaps") ) {
275 if ( !$tigr->isWritableFile ("$pfx.mgaps") ) {
276 push (@err, "$pfx.mgaps");
277 }
278 }
279
280 if ( !$tigr->isCreatableFile ("$pfx.delta") ) {
281 if ( !$tigr->isWritableFile ("$pfx.delta") ) {
282 push (@err, "$pfx.delta");
283 }
284 }
285
286 if ( $generate_coords ) {
287 if ( !$tigr->isExecutableFile ($showcoords_path) ) {
288 push (@err, $showcoords_path);
289 }
290 if ( !$tigr->isCreatableFile ("$pfx.coords") ) {
291 if ( !$tigr->isWritableFile ("$pfx.coords") ) {
292 push (@err, "$pfx.coords");
293 }
294 }
295 }
296
297 #-- If 1 or more files could not be processed, terminate script
298 if ( $#err >= 0 ) {
299 $tigr->logError
300 ("ERROR: The following critical files could not be used", 1);
301 while ( $#err >= 0 ) {
302 $tigr->logError (pop(@err), 1);
303 }
304 $tigr->logError
305 ("Check your paths and file permissions and try again", 1);
306 $tigr->bail( );
307 }
308 }
309
310
311 #-- Run prepro -r and -q and assert return value is zero
312 print (STDERR "1: PREPARING DATA\n");
313 $err[0] = $tigr->runCommand
314 ("$prepro_path -m $mask -r $ref_file > $pfx.aaref");
315
316 if ( $err[0] != 0 ) {
317 $tigr->bail
318 ("ERROR: prepro -r returned non-zero\n");
319 }
320
321 $err[0] = $tigr->runCommand
322 ("$prepro_path -m $mask -q $qry_file > $pfx.aaqry");
323
324 if ( $err[0] != 0 ) {
325 $tigr->bail ("ERROR: prepro -q returned non-zero\n");
326 }
327
328
329 #-- Run mummer | mgaps and assert return value is zero
330 print (STDERR "2,3: RUNNING mummer AND CREATING CLUSTERS\n");
331 open(ALGO_PIPE, "$algo_path $algo -l $size $pfx.aaref $pfx.aaqry |")
332 or $tigr->bail ("ERROR: could not open $algo_path output pipe $!");
333 open(CLUS_PIPE, "| $mgaps_path -l $clus -s $gap -f $diff > $pfx.mgaps")
334 or $tigr->bail ("ERROR: could not open $mgaps_path input pipe $!");
335 while ( <ALGO_PIPE> ) {
336 print CLUS_PIPE
337 or $tigr->bail ("ERROR: could not write to $mgaps_path pipe $!");
338 }
339 $err[0] = close(ALGO_PIPE);
340 $err[1] = close(CLUS_PIPE);
341
342 if ( $err[0] == 0 || $err[1] == 0 ) {
343 $tigr->bail ("ERROR: mummer and/or mgaps returned non-zero\n");
344 }
345
346
347 #-- Run postpro and assert return value is zero
348 print (STDERR "4: FINISHING DATA\n");
349 $err[0] = $tigr->runCommand
350 ("$postpro_path $psw -x $blsm -b $blen ".
351 "$ref_file $qry_file $pfx < $pfx.mgaps");
352
353 if ( $err[0] != 0 ) {
354 $tigr->bail ("ERROR: postpro returned non-zero\n");
355 }
356
357 #-- If the -o flag was set, run show-coords using PROmer1.1 settings
358 if ( $generate_coords ) {
359 print (STDERR "5: GENERATING COORDS FILE\n");
360 $err[0] = $tigr->runCommand
361 ("$showcoords_path -r $pfx.delta > $pfx.coords");
362
363 if ( $err[0] != 0 ) {
364 $tigr->bail ("ERROR: show-coords returned non-zero\n");
365 }
366 }
367
368 #-- Remove the temporary output
369 $err[0] = unlink ("$pfx.aaref", "$pfx.aaqry", "$pfx.mgaps");
370
371 if ( $err[0] != 3 ) {
372 $tigr->logError ("WARNING: there was a problem deleting".
373 " the temporary output files", 1);
374 }
375
376 #-- Return success
377 return (0);
378 }
379
380 exit ( main ( ) );
381
382 #-- END OF SCRIPT