Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/mummerplot @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 #!/usr/bin/env perl | |
2 | |
3 ################################################################################ | |
4 # Programmer: Adam M Phillippy, The Institute for Genomic Research | |
5 # File: mummerplot | |
6 # Date: 01 / 08 / 03 | |
7 # 01 / 06 / 05 rewritten (v3.0) | |
8 # | |
9 # Usage: | |
10 # mummerplot [options] <match file> | |
11 # | |
12 # Try 'mummerplot -h' for more information. | |
13 # | |
14 # Purpose: To generate a gnuplot plot for the display of mummer, nucmer, | |
15 # promer, and show-tiling alignments. | |
16 # | |
17 ################################################################################ | |
18 | |
19 use lib "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts"; | |
20 use Foundation; | |
21 use strict; | |
22 use IO::Socket; | |
23 | |
24 my $BIN_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23"; | |
25 my $SCRIPT_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts"; | |
26 | |
27 | |
28 #================================================================= Globals ====# | |
29 #-- terminal types | |
30 my $X11 = "x11"; | |
31 my $PS = "postscript"; | |
32 my $PNG = "png"; | |
33 | |
34 #-- terminal sizes | |
35 my $SMALL = "small"; | |
36 my $MEDIUM = "medium"; | |
37 my $LARGE = "large"; | |
38 | |
39 my %TERMSIZE = | |
40 ( | |
41 $X11 => { $SMALL => 500, $MEDIUM => 700, $LARGE => 900 }, # screen pix | |
42 $PS => { $SMALL => 1, $MEDIUM => 2, $LARGE => 3 }, # pages | |
43 $PNG => { $SMALL => 800, $MEDIUM => 1024, $LARGE => 1400 } # image pix | |
44 ); | |
45 | |
46 #-- terminal format | |
47 my $FFACE = "Courier"; | |
48 my $FSIZE = "8"; | |
49 my $TFORMAT = "%.0f"; | |
50 my $MFORMAT = "[%.0f, %.0f]"; | |
51 | |
52 #-- output suffixes | |
53 my $FILTER = "filter"; | |
54 my $FWDPLOT = "fplot"; | |
55 my $REVPLOT = "rplot"; | |
56 my $HLTPLOT = "hplot"; | |
57 my $GNUPLOT = "gnuplot"; | |
58 | |
59 my %SUFFIX = | |
60 ( | |
61 $FILTER => ".filter", | |
62 $FWDPLOT => ".fplot", | |
63 $REVPLOT => ".rplot", | |
64 $HLTPLOT => ".hplot", | |
65 $GNUPLOT => ".gp", | |
66 $PS => ".ps", | |
67 $PNG => ".png" | |
68 ); | |
69 | |
70 | |
71 #================================================================= Options ====# | |
72 my $OPT_breaklen; # -b option | |
73 my $OPT_color; # --[no]color option | |
74 my $OPT_coverage; # --[no]coverage option | |
75 my $OPT_filter; # -f option | |
76 my $OPT_layout; # -l option | |
77 my $OPT_prefix = "out"; # -p option | |
78 my $OPT_rv; # --rv option | |
79 my $OPT_terminal = $X11; # -t option | |
80 my $OPT_IdR; # -r option | |
81 my $OPT_IdQ; # -q option | |
82 my $OPT_IDRfile; # -R option | |
83 my $OPT_IDQfile; # -Q option | |
84 my $OPT_rport; # -rport option | |
85 my $OPT_qport; # -qport option | |
86 my $OPT_size = $SMALL; # -small, -medium, -large | |
87 my $OPT_SNP; # -S option | |
88 my $OPT_xrange; # -x option | |
89 my $OPT_yrange; # -y option | |
90 my $OPT_title; # -title option | |
91 | |
92 my $OPT_Mfile; # match file | |
93 my $OPT_Dfile; # delta filter file | |
94 my $OPT_Ffile; # .fplot output | |
95 my $OPT_Rfile; # .rplot output | |
96 my $OPT_Hfile; # .hplot output | |
97 my $OPT_Gfile; # .gp output | |
98 my $OPT_Pfile; # .ps .png output | |
99 | |
100 my $OPT_gpstatus; # gnuplot status | |
101 | |
102 my $OPT_ONLY_USE_FATTEST; # Only use fattest alignment for layout | |
103 | |
104 | |
105 #============================================================== Foundation ====# | |
106 my $VERSION = '3.5'; | |
107 | |
108 my $USAGE = qq~ | |
109 USAGE: mummerplot [options] <match file> | |
110 ~; | |
111 | |
112 my $HELP = qq~ | |
113 USAGE: mummerplot [options] <match file> | |
114 | |
115 DESCRIPTION: | |
116 mummerplot generates plots of alignment data produced by mummer, nucmer, | |
117 promer or show-tiling by using the GNU gnuplot utility. After generating | |
118 the appropriate scripts and datafiles, mummerplot will attempt to run | |
119 gnuplot to generate the plot. If this attempt fails, a warning will be | |
120 output and the resulting .gp and .[frh]plot files will remain so that the | |
121 user may run gnuplot independently. If the attempt succeeds, either an x11 | |
122 window will be spawned or an additional output file will be generated | |
123 (.ps or .png depending on the selected terminal). Feel free to edit the | |
124 resulting gnuplot script (.gp) and rerun gnuplot to change line thinkness, | |
125 labels, colors, plot size etc. | |
126 | |
127 MANDATORY: | |
128 match file Set the alignment input to 'match file' | |
129 Valid inputs are from mummer, nucmer, promer and | |
130 show-tiling (.out, .cluster, .delta and .tiling) | |
131 | |
132 OPTIONS: | |
133 -b|breaklen Highlight alignments with breakpoints further than | |
134 breaklen nucleotides from the nearest sequence end | |
135 --[no]color Color plot lines with a percent similarity gradient or | |
136 turn off all plot color (default color by match dir) | |
137 If the plot is very sparse, edit the .gp script to plot | |
138 with 'linespoints' instead of 'lines' | |
139 -c | |
140 --[no]coverage Generate a reference coverage plot (default for .tiling) | |
141 --depend Print the dependency information and exit | |
142 -f | |
143 --filter Only display .delta alignments which represent the "best" | |
144 hit to any particular spot on either sequence, i.e. a | |
145 one-to-one mapping of reference and query subsequences | |
146 -h | |
147 --help Display help information and exit | |
148 -l | |
149 --layout Layout a .delta multiplot in an intelligible fashion, | |
150 this option requires the -R -Q options | |
151 --fat Layout sequences using fattest alignment only | |
152 -p|prefix Set the prefix of the output files (default '$OPT_prefix') | |
153 -rv Reverse video for x11 plots | |
154 -r|IdR Plot a particular reference sequence ID on the X-axis | |
155 -q|IdQ Plot a particular query sequence ID on the Y-axis | |
156 -R|Rfile Plot an ordered set of reference sequences from Rfile | |
157 -Q|Qfile Plot an ordered set of query sequences from Qfile | |
158 Rfile/Qfile Can either be the original DNA multi-FastA | |
159 files or lists of sequence IDs, lens and dirs [ /+/-] | |
160 -r|rport Specify the port to send reference ID and position on | |
161 mouse double click in X11 plot window | |
162 -q|qport Specify the port to send query IDs and position on mouse | |
163 double click in X11 plot window | |
164 -s|size Set the output size to small, medium or large | |
165 --small --medium --large (default '$OPT_size') | |
166 -S | |
167 --SNP Highlight SNP locations in each alignment | |
168 -t|terminal Set the output terminal to x11, postscript or png | |
169 --x11 --postscript --png (default '$OPT_terminal') | |
170 -t|title Specify the gnuplot plot title (default none) | |
171 -x|xrange Set the xrange for the plot '[min:max]' | |
172 -y|yrange Set the yrange for the plot '[min:max]' | |
173 -V | |
174 --version Display the version information and exit | |
175 ~; | |
176 | |
177 my @DEPEND = | |
178 ( | |
179 "$SCRIPT_DIR/Foundation.pm", | |
180 "$BIN_DIR/delta-filter", | |
181 "$BIN_DIR/show-coords", | |
182 "$BIN_DIR/show-snps", | |
183 "gnuplot" | |
184 ); | |
185 | |
186 my $tigr = new TIGR::Foundation | |
187 or die "ERROR: TIGR::Foundation could not be initialized\n"; | |
188 | |
189 $tigr -> setVersionInfo ($VERSION); | |
190 $tigr -> setUsageInfo ($USAGE); | |
191 $tigr -> setHelpInfo ($HELP); | |
192 $tigr -> addDependInfo (@DEPEND); | |
193 | |
194 | |
195 #=========================================================== Function Decs ====# | |
196 sub GetParseFunc( ); | |
197 | |
198 sub ParseIDs($$); | |
199 | |
200 sub ParseDelta($); | |
201 sub ParseCluster($); | |
202 sub ParseMummer($); | |
203 sub ParseTiling($); | |
204 | |
205 sub LayoutIDs($$); | |
206 sub SpanXwY ($$$$$); | |
207 | |
208 sub PlotData($$$); | |
209 sub WriteGP($$); | |
210 sub RunGP( ); | |
211 sub ListenGP($$); | |
212 | |
213 sub ParseOptions( ); | |
214 | |
215 | |
216 #=========================================================== Function Defs ====# | |
217 MAIN: | |
218 { | |
219 my @aligns; # (sR eR sQ eQ sim lenR lenQ idR idQ) | |
220 my %refs; # (id => (off, len, [1/-1])) | |
221 my %qrys; # (id => (off, len, [1/-1])) | |
222 | |
223 #-- Get the command line options (sets OPT_ global vars) | |
224 ParseOptions( ); | |
225 | |
226 | |
227 #-- Get the alignment type | |
228 my $parsefunc = GetParseFunc( ); | |
229 | |
230 if ( $parsefunc != \&ParseDelta && | |
231 ($OPT_filter || $OPT_layout || $OPT_SNP) ) { | |
232 print STDERR "WARNING: -f -l -S only work with delta input\n"; | |
233 undef $OPT_filter; | |
234 undef $OPT_layout; | |
235 undef $OPT_SNP; | |
236 } | |
237 | |
238 #-- Parse the reference and query IDs | |
239 if ( defined $OPT_IdR ) { $refs{$OPT_IdR} = [ 0, 0, 1 ]; } | |
240 elsif ( defined $OPT_IDRfile ) { | |
241 ParseIDs ($OPT_IDRfile, \%refs); | |
242 } | |
243 | |
244 if ( defined $OPT_IdQ ) { $qrys{$OPT_IdQ} = [ 0, 0, 1 ]; } | |
245 elsif ( defined $OPT_IDQfile ) { | |
246 ParseIDs ($OPT_IDQfile, \%qrys); | |
247 } | |
248 | |
249 | |
250 #-- Filter the alignments | |
251 if ( $OPT_filter || $OPT_layout ) { | |
252 print STDERR "Writing filtered delta file $OPT_Dfile\n"; | |
253 system ("$BIN_DIR/delta-filter -r -q $OPT_Mfile > $OPT_Dfile") | |
254 and die "ERROR: Could not run delta-filter, $!\n"; | |
255 if ( $OPT_filter ) { $OPT_Mfile = $OPT_Dfile; } | |
256 } | |
257 | |
258 | |
259 #-- Parse the alignment data | |
260 $parsefunc->(\@aligns); | |
261 | |
262 | |
263 #-- Layout the alignment data if requested | |
264 if ( $OPT_layout ) { | |
265 if ( scalar (keys %refs) || scalar (keys %qrys) ) { | |
266 LayoutIDs (\%refs, \%qrys); | |
267 } | |
268 else { | |
269 print STDERR "WARNING: --layout option only works with -R or -Q\n"; | |
270 undef $OPT_layout; | |
271 } | |
272 } | |
273 | |
274 | |
275 #-- Plot the alignment data | |
276 PlotData (\@aligns, \%refs, \%qrys); | |
277 | |
278 | |
279 #-- Write the gnuplot script | |
280 WriteGP (\%refs, \%qrys); | |
281 | |
282 | |
283 #-- Run gnuplot script and fork a clipboard listener | |
284 unless ( $OPT_gpstatus == -1 ) { | |
285 | |
286 my $child = 1; | |
287 if ( $OPT_gpstatus == 0 && $OPT_terminal eq $X11 ) { | |
288 print STDERR "Forking mouse listener\n"; | |
289 $child = fork; | |
290 } | |
291 | |
292 #-- parent runs gnuplot | |
293 if ( $child ) { | |
294 RunGP( ); | |
295 kill 1, $child; | |
296 } | |
297 #-- child listens to clipboard | |
298 elsif ( defined $child ) { | |
299 ListenGP(\%refs, \%qrys); | |
300 } | |
301 else { | |
302 print STDERR "WARNING: Could not fork mouse listener\n"; | |
303 } | |
304 } | |
305 | |
306 exit (0); | |
307 } | |
308 | |
309 | |
310 #------------------------------------------------------------ GetParseFunc ----# | |
311 sub GetParseFunc ( ) | |
312 { | |
313 my $fref; | |
314 | |
315 open (MFILE, "<$OPT_Mfile") | |
316 or die "ERROR: Could not open $OPT_Mfile, $!\n"; | |
317 | |
318 $_ = <MFILE>; | |
319 if ( !defined ) { die "ERROR: Could not read $OPT_Mfile, File is empty\n" } | |
320 | |
321 SWITCH: { | |
322 #-- tiling | |
323 if ( /^>\S+ \d+ bases/ ) { | |
324 $fref = \&ParseTiling; | |
325 last SWITCH; | |
326 } | |
327 | |
328 #-- mummer | |
329 if ( /^> \S+/ ) { | |
330 $fref = \&ParseMummer; | |
331 last SWITCH; | |
332 } | |
333 | |
334 #-- nucmer/promer | |
335 if ( /^(\S+) (\S+)/ ) { | |
336 if ( ! defined $OPT_IDRfile ) { | |
337 $OPT_IDRfile = $1; | |
338 } | |
339 if ( ! defined $OPT_IDQfile ) { | |
340 $OPT_IDQfile = $2; | |
341 } | |
342 | |
343 $_ = <MFILE>; | |
344 if ( (defined) && (/^NUCMER$/ || /^PROMER$/) ) { | |
345 $_ = <MFILE>; # sequence header | |
346 $_ = <MFILE>; # alignment header | |
347 if ( !defined ) { | |
348 $fref = \&ParseDelta; | |
349 last SWITCH; | |
350 } | |
351 elsif ( /^\d+ \d+ \d+ \d+ \d+ \d+ \d+$/ ) { | |
352 $fref = \&ParseDelta; | |
353 last SWITCH; | |
354 } | |
355 elsif ( /^[ \-][1-3] [ \-][1-3]$/ ) { | |
356 $fref = \&ParseCluster; | |
357 last SWITCH; | |
358 } | |
359 } | |
360 } | |
361 | |
362 #-- default | |
363 die "ERROR: Could not read $OPT_Mfile, Unrecognized file type\n"; | |
364 } | |
365 | |
366 close (MFILE) | |
367 or print STDERR "WARNING: Trouble closing $OPT_Mfile, $!\n"; | |
368 | |
369 return $fref; | |
370 } | |
371 | |
372 | |
373 #---------------------------------------------------------------- ParseIDs ----# | |
374 sub ParseIDs ($$) | |
375 { | |
376 my $file = shift; | |
377 my $href = shift; | |
378 | |
379 open (IDFILE, "<$file") | |
380 or print STDERR "WARNING: Could not open $file, $!\n"; | |
381 | |
382 my $dir; | |
383 my $aref; | |
384 my $isfasta; | |
385 my $offset = 0; | |
386 while ( <IDFILE> ) { | |
387 #-- Ignore blank lines | |
388 if ( /^\s*$/ ) { next; } | |
389 | |
390 #-- FastA header | |
391 if ( /^>(\S+)/ ) { | |
392 if ( exists $href->{$1} ) { | |
393 print STDERR "WARNING: Duplicate sequence '$1' ignored\n"; | |
394 undef $aref; | |
395 next; | |
396 } | |
397 | |
398 if ( !$isfasta ) { $isfasta = 1; } | |
399 if ( defined $aref ) { $offset += $aref->[1] - 1; } | |
400 | |
401 $aref = [ $offset, 0, 1 ]; | |
402 $href->{$1} = $aref; | |
403 next; | |
404 } | |
405 | |
406 #-- FastA sequence | |
407 if ( $isfasta && /^\S+$/ ) { | |
408 if ( defined $aref ) { $aref->[1] += (length) - 1; } | |
409 next; | |
410 } | |
411 | |
412 #-- ID len dir | |
413 if ( !$isfasta && /^(\S+)\s+(\d+)\s+([+-]?)$/ ) { | |
414 if ( exists $href->{$1} ) { | |
415 print STDERR "WARNING: Duplicate sequence '$1' ignored\n"; | |
416 undef $aref; | |
417 next; | |
418 } | |
419 | |
420 $dir = (defined $3 && $3 eq "-") ? -1 : 1; | |
421 $aref = [ $offset, $2, $dir ]; | |
422 $offset += $2 - 1; | |
423 $href->{$1} = $aref; | |
424 next; | |
425 } | |
426 | |
427 #-- default | |
428 print STDERR "WARNING: Could not parse $file\n$_"; | |
429 undef %$href; | |
430 last; | |
431 } | |
432 | |
433 close (IDFILE) | |
434 or print STDERR "WARNING: Trouble closing $file, $!\n"; | |
435 } | |
436 | |
437 | |
438 #-------------------------------------------------------------- ParseDelta ----# | |
439 sub ParseDelta ($) | |
440 { | |
441 my $aref = shift; | |
442 | |
443 print STDERR "Reading delta file $OPT_Mfile\n"; | |
444 | |
445 open (MFILE, "<$OPT_Mfile") | |
446 or die "ERROR: Could not open $OPT_Mfile, $!\n"; | |
447 | |
448 my @align; | |
449 my $ispromer; | |
450 my ($sim, $tot); | |
451 my ($lenR, $lenQ, $idR, $idQ); | |
452 | |
453 $_ = <MFILE>; | |
454 $_ = <MFILE>; | |
455 $ispromer = /^PROMER/; | |
456 | |
457 while ( <MFILE> ) { | |
458 #-- delta int | |
459 if ( /^([-]?\d+)$/ ) { | |
460 if ( $1 < 0 ) { | |
461 $tot ++; | |
462 } | |
463 elsif ( $1 == 0 ) { | |
464 $align[4] = ($tot - $sim) / $tot * 100.0; | |
465 push @$aref, [ @align ]; | |
466 $tot = 0; | |
467 } | |
468 next; | |
469 } | |
470 | |
471 #-- alignment header | |
472 if ( /^(\d+) (\d+) (\d+) (\d+) \d+ (\d+) \d+$/ ) { | |
473 if ( $tot == 0 ) { | |
474 @align = ($1, $2, $3, $4, 0, $lenR, $lenQ, $idR, $idQ); | |
475 $tot = abs($1 - $2) + 1; | |
476 $sim = $5; | |
477 if ( $ispromer ) { $tot /= 3.0; } | |
478 next; | |
479 } | |
480 #-- drop to default | |
481 } | |
482 | |
483 #-- sequence header | |
484 if ( /^>(\S+) (\S+) (\d+) (\d+)$/ ) { | |
485 ($idR, $idQ, $lenR, $lenQ) = ($1, $2, $3, $4); | |
486 $tot = 0; | |
487 next; | |
488 } | |
489 | |
490 #-- default | |
491 die "ERROR: Could not parse $OPT_Mfile\n$_"; | |
492 } | |
493 | |
494 close (MFILE) | |
495 or print STDERR "WARNING: Trouble closing $OPT_Mfile, $!\n"; | |
496 } | |
497 | |
498 | |
499 #------------------------------------------------------------ ParseCluster ----# | |
500 sub ParseCluster ($) | |
501 { | |
502 my $aref = shift; | |
503 | |
504 print STDERR "Reading cluster file $OPT_Mfile\n"; | |
505 | |
506 open (MFILE, "<$OPT_Mfile") | |
507 or die "ERROR: Could not open $OPT_Mfile, $!\n"; | |
508 | |
509 my @align; | |
510 my ($dR, $dQ, $len); | |
511 my ($lenR, $lenQ, $idR, $idQ); | |
512 | |
513 $_ = <MFILE>; | |
514 $_ = <MFILE>; | |
515 | |
516 while ( <MFILE> ) { | |
517 #-- match | |
518 if ( /^\s+(\d+)\s+(\d+)\s+(\d+)\s+\S+\s+\S+$/ ) { | |
519 @align = ($1, $1, $2, $2, 100, $lenR, $lenQ, $idR, $idQ); | |
520 $len = $3 - 1; | |
521 $align[1] += $dR == 1 ? $len : -$len; | |
522 $align[3] += $dQ == 1 ? $len : -$len; | |
523 push @$aref, [ @align ]; | |
524 next; | |
525 } | |
526 | |
527 #-- cluster header | |
528 if ( /^[ \-][1-3] [ \-][1-3]$/ ) { | |
529 $dR = /^-/ ? -1 : 1; | |
530 $dQ = /-[1-3]$/ ? -1 : 1; | |
531 next; | |
532 } | |
533 | |
534 #-- sequence header | |
535 if ( /^>(\S+) (\S+) (\d+) (\d+)$/ ) { | |
536 ($idR, $idQ, $lenR, $lenQ) = ($1, $2, $3, $4); | |
537 next; | |
538 } | |
539 | |
540 #-- default | |
541 die "ERROR: Could not parse $OPT_Mfile\n$_"; | |
542 } | |
543 | |
544 close (MFILE) | |
545 or print STDERR "WARNING: Trouble closing $OPT_Mfile, $!\n"; | |
546 } | |
547 | |
548 | |
549 #------------------------------------------------------------- ParseMummer ----# | |
550 sub ParseMummer ($) | |
551 { | |
552 my $aref = shift; | |
553 | |
554 print STDERR "Reading mummer file $OPT_Mfile (use mummer -c)\n"; | |
555 | |
556 open (MFILE, "<$OPT_Mfile") | |
557 or die "ERROR: Could not open $OPT_Mfile, $!\n"; | |
558 | |
559 my @align; | |
560 my ($dQ, $len); | |
561 my ($lenQ, $idQ); | |
562 | |
563 while ( <MFILE> ) { | |
564 #-- 3 column match | |
565 if ( /^\s+(\d+)\s+(\d+)\s+(\d+)$/ ) { | |
566 @align = ($1, $1, $2, $2, 100, 0, $lenQ, "REF", $idQ); | |
567 $len = $3 - 1; | |
568 $align[1] += $len; | |
569 $align[3] += $dQ == 1 ? $len : -$len; | |
570 push @$aref, [ @align ]; | |
571 next; | |
572 } | |
573 | |
574 #-- 4 column match | |
575 if ( /^\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)$/ ) { | |
576 @align = ($2, $2, $3, $3, 100, 0, $lenQ, $1, $idQ); | |
577 $len = $4 - 1; | |
578 $align[1] += $len; | |
579 $align[3] += $dQ == 1 ? $len : -$len; | |
580 push @$aref, [ @align ]; | |
581 next; | |
582 } | |
583 | |
584 #-- sequence header | |
585 if ( /^> (\S+)/ ) { | |
586 $idQ = $1; | |
587 $dQ = /^> \S+ Reverse/ ? -1 : 1; | |
588 $lenQ = /Len = (\d+)/ ? $1 : 0; | |
589 next; | |
590 } | |
591 | |
592 #-- default | |
593 die "ERROR: Could not parse $OPT_Mfile\n$_"; | |
594 } | |
595 | |
596 close (MFILE) | |
597 or print STDERR "WARNING: Trouble closing $OPT_Mfile, $!\n"; | |
598 } | |
599 | |
600 | |
601 #------------------------------------------------------------- ParseTiling ----# | |
602 sub ParseTiling ($) | |
603 { | |
604 my $aref = shift; | |
605 | |
606 print STDERR "Reading tiling file $OPT_Mfile\n"; | |
607 | |
608 open (MFILE, "<$OPT_Mfile") | |
609 or die "ERROR: Could not open $OPT_Mfile, $!\n"; | |
610 | |
611 my @align; | |
612 my ($dR, $dQ, $len); | |
613 my ($lenR, $lenQ, $idR, $idQ); | |
614 | |
615 while ( <MFILE> ) { | |
616 #-- tile | |
617 if ( /^(\S+)\s+\S+\s+\S+\s+(\d+)\s+\S+\s+(\S+)\s+([+-])\s+(\S+)$/ ) { | |
618 @align = ($1, $1, 1, 1, $3, $lenR, $2, $idR, $5); | |
619 $len = $2 - 1; | |
620 $align[1] += $len; | |
621 $align[($4 eq "-" ? 2 : 3)] += $len; | |
622 push @$aref, [ @align ]; | |
623 next; | |
624 } | |
625 | |
626 #-- sequence header | |
627 if ( /^>(\S+) (\d+) bases$/ ) { | |
628 ($idR, $lenR) = ($1, $2); | |
629 next; | |
630 } | |
631 | |
632 #-- default | |
633 die "ERROR: Could not parse $OPT_Mfile\n$_"; | |
634 } | |
635 | |
636 close (MFILE) | |
637 or print STDERR "WARNING: Trouble closing $OPT_Mfile, $!\n"; | |
638 | |
639 if ( ! defined $OPT_coverage ) { $OPT_coverage = 1; } | |
640 } | |
641 | |
642 | |
643 #--------------------------------------------------------------- LayoutIDs ----# | |
644 # For each reference and query sequence, find the set of alignments that | |
645 # produce the heaviest (both in non-redundant coverage and percent | |
646 # identity) alignment subset of each sequence using a modified version | |
647 # of the longest increasing subset algorithm. Let R be the union of all | |
648 # reference LIS subsets, and Q be the union of all query LIS | |
649 # subsets. Let S be the intersection of R and Q. Using this LIS subset, | |
650 # recursively span reference and query sequences by their smaller | |
651 # counterparts until all spanning sequences have been placed. The goal | |
652 # is to cluster all the "major" alignment information along the main | |
653 # diagonal for easy viewing and interpretation. | |
654 sub LayoutIDs ($$) | |
655 { | |
656 my $rref = shift; | |
657 my $qref = shift; | |
658 | |
659 my %rc; # chains of qry seqs needed to span each ref | |
660 my %qc; # chains of ref seqs needed to span each qry | |
661 # {idR} -> [ placed, len, {idQ} -> [ \slope, \loR, \hiR, \loQ, \hiQ ] ] | |
662 # {idQ} -> [ placed, len, {idR} -> [ \slope, \loQ, \hiQ, \loR, \hiR ] ] | |
663 | |
664 my @rl; # oo of ref seqs | |
665 my @ql; # oo of qry seqs | |
666 # [ [idR, slope] ] | |
667 # [ [idQ, slope] ] | |
668 | |
669 #-- get the filtered alignments | |
670 open (BTAB, "$BIN_DIR/show-coords -B $OPT_Dfile |") | |
671 or die "ERROR: Could not open show-coords pipe, $!\n"; | |
672 | |
673 my @align; | |
674 my ($sR, $eR, $sQ, $eQ, $lenR, $lenQ, $idR, $idQ); | |
675 my ($loR, $hiR, $loQ, $hiQ); | |
676 my ($dR, $dQ, $slope); | |
677 while ( <BTAB> ) { | |
678 chomp; | |
679 @align = split "\t"; | |
680 if ( scalar @align != 21 ) { | |
681 die "ERROR: Could not read show-coords pipe, invalid btab format\n"; | |
682 } | |
683 | |
684 $sR = $align[8]; $eR = $align[9]; | |
685 $sQ = $align[6]; $eQ = $align[7]; | |
686 $lenR = $align[18]; $lenQ = $align[2]; | |
687 $idR = $align[5]; $idQ = $align[0]; | |
688 | |
689 #-- skip it if not on include list | |
690 if ( !exists $rref->{$idR} || !exists $qref->{$idQ} ) { next; } | |
691 | |
692 #-- get orientation of both alignments and alignment slope | |
693 $dR = $sR < $eR ? 1 : -1; | |
694 $dQ = $sQ < $eQ ? 1 : -1; | |
695 $slope = $dR == $dQ ? 1 : -1; | |
696 | |
697 #-- get lo's and hi's | |
698 $loR = $dR == 1 ? $sR : $eR; | |
699 $hiR = $dR == 1 ? $eR : $sR; | |
700 | |
701 $loQ = $dQ == 1 ? $sQ : $eQ; | |
702 $hiQ = $dQ == 1 ? $eQ : $sQ; | |
703 | |
704 if ($OPT_ONLY_USE_FATTEST) | |
705 { | |
706 #-- Check to see if there is another better alignment | |
707 if (exists $qc{$idQ}) | |
708 { | |
709 my ($oldR) = keys %{$qc{$idQ}[2]}; | |
710 my $val = $qc{$idQ}[2]{$oldR}; | |
711 | |
712 if (${$val->[4]} - ${$val->[3]} > $hiR - $loR) | |
713 { | |
714 #-- Old alignment is better, skip this one | |
715 next; | |
716 } | |
717 else | |
718 { | |
719 #-- This alignment is better, prune old alignment | |
720 delete $rc{$oldR}[2]{$idQ}; | |
721 delete $qc{$idQ}; | |
722 } | |
723 } | |
724 } | |
725 | |
726 #-- initialize | |
727 if ( !exists $rc{$idR} ) { $rc{$idR} = [ 0, $lenR, { } ]; } | |
728 if ( !exists $qc{$idQ} ) { $qc{$idQ} = [ 0, $lenQ, { } ]; } | |
729 | |
730 #-- if no alignments for these two exist OR | |
731 #-- this alignment is bigger than the current | |
732 if ( !exists $rc{$idR}[2]{$idQ} || !exists $qc{$idQ}[2]{$idR} || | |
733 $hiR - $loR > | |
734 ${$rc{$idR}[2]{$idQ}[2]} - ${$rc{$idR}[2]{$idQ}[1]} ) { | |
735 | |
736 #-- rc and qc reference these anonymous values | |
737 my $aref = [ $slope, $loR, $hiR, $loQ, $hiQ ]; | |
738 | |
739 #-- rc is ordered [ slope, loR, hiR, loQ, hiQ ] | |
740 #-- qc is ordered [ slope, loQ, hiQ, loR, hiR ] | |
741 $rc{$idR}[2]{$idQ}[0] = $qc{$idQ}[2]{$idR}[0] = \$aref->[0]; | |
742 $rc{$idR}[2]{$idQ}[1] = $qc{$idQ}[2]{$idR}[3] = \$aref->[1]; | |
743 $rc{$idR}[2]{$idQ}[2] = $qc{$idQ}[2]{$idR}[4] = \$aref->[2]; | |
744 $rc{$idR}[2]{$idQ}[3] = $qc{$idQ}[2]{$idR}[1] = \$aref->[3]; | |
745 $rc{$idR}[2]{$idQ}[4] = $qc{$idQ}[2]{$idR}[2] = \$aref->[4]; | |
746 } | |
747 } | |
748 | |
749 close (BTAB) | |
750 or print STDERR "WARNING: Trouble closing show-coords pipe, $!\n"; | |
751 | |
752 #-- recursively span sequences to generate the layout | |
753 foreach $idR ( sort { $rc{$b}[1] <=> $rc{$a}[1] } keys %rc ) { | |
754 SpanXwY ($idR, \%rc, \@rl, \%qc, \@ql); | |
755 } | |
756 | |
757 #-- undefine the current offsets | |
758 foreach $idR ( keys %{$rref} ) { undef $rref->{$idR}[0]; } | |
759 foreach $idQ ( keys %{$qref} ) { undef $qref->{$idQ}[0]; } | |
760 | |
761 #-- redefine the offsets according to the new layout | |
762 my $roff = 0; | |
763 foreach my $r ( @rl ) { | |
764 $idR = $r->[0]; | |
765 $rref->{$idR}[0] = $roff; | |
766 $rref->{$idR}[2] = $r->[1]; | |
767 $roff += $rref->{$idR}[1] - 1; | |
768 } | |
769 #-- append the guys left out of the layout | |
770 foreach $idR ( keys %{$rref} ) { | |
771 if ( !defined $rref->{$idR}[0] ) { | |
772 $rref->{$idR}[0] = $roff; | |
773 $roff += $rref->{$idR}[1] - 1; | |
774 } | |
775 } | |
776 | |
777 #-- redefine the offsets according to the new layout | |
778 my $qoff = 0; | |
779 foreach my $q ( @ql ) { | |
780 $idQ = $q->[0]; | |
781 $qref->{$idQ}[0] = $qoff; | |
782 $qref->{$idQ}[2] = $q->[1]; | |
783 $qoff += $qref->{$idQ}[1] - 1; | |
784 } | |
785 #-- append the guys left out of the layout | |
786 foreach $idQ ( keys %{$qref} ) { | |
787 if ( !defined $qref->{$idQ}[0] ) { | |
788 $qref->{$idQ}[0] = $qoff; | |
789 $qoff += $qref->{$idQ}[1] - 1; | |
790 } | |
791 } | |
792 } | |
793 | |
794 | |
795 #----------------------------------------------------------------- SpanXwY ----# | |
796 sub SpanXwY ($$$$$) { | |
797 my $x = shift; # idX | |
798 my $xcr = shift; # xc ref | |
799 my $xlr = shift; # xl ref | |
800 my $ycr = shift; # yc ref | |
801 my $ylr = shift; # yl ref | |
802 | |
803 my @post; | |
804 foreach my $y ( sort { ${$xcr->{$x}[2]{$a}[1]} <=> ${$xcr->{$x}[2]{$b}[1]} } | |
805 keys %{$xcr->{$x}[2]} ) { | |
806 | |
807 #-- skip if already placed (RECURSION BASE) | |
808 if ( $ycr->{$y}[0] ) { next; } | |
809 else { $ycr->{$y}[0] = 1; } | |
810 | |
811 #-- get len and slope info for y | |
812 my $len = $ycr->{$y}[1]; | |
813 my $slope = ${$xcr->{$x}[2]{$y}[0]}; | |
814 | |
815 #-- if we need to flip, reverse complement all y records | |
816 if ( $slope == -1 ) { | |
817 foreach my $xx ( keys %{$ycr->{$y}[2]} ) { | |
818 ${$ycr->{$y}[2]{$xx}[0]} *= -1; | |
819 | |
820 my $loy = ${$ycr->{$y}[2]{$xx}[1]}; | |
821 my $hiy = ${$ycr->{$y}[2]{$xx}[2]}; | |
822 ${$ycr->{$y}[2]{$xx}[1]} = $len - $hiy + 1; | |
823 ${$ycr->{$y}[2]{$xx}[2]} = $len - $loy + 1; | |
824 } | |
825 } | |
826 | |
827 #-- place y | |
828 push @{$ylr}, [ $y, $slope ]; | |
829 | |
830 #-- RECURSE if y > x, else save for later | |
831 if ( $len > $xcr->{$x}[1] ) { SpanXwY ($y, $ycr, $ylr, $xcr, $xlr); } | |
832 else { push @post, $y; } | |
833 } | |
834 | |
835 #-- RECURSE for all y < x | |
836 foreach my $y ( @post ) { SpanXwY ($y, $ycr, $ylr, $xcr, $xlr); } | |
837 } | |
838 | |
839 | |
840 #---------------------------------------------------------------- PlotData ----# | |
841 sub PlotData ($$$) | |
842 { | |
843 my $aref = shift; | |
844 my $rref = shift; | |
845 my $qref = shift; | |
846 | |
847 print STDERR "Writing plot files $OPT_Ffile, $OPT_Rfile", | |
848 (defined $OPT_Hfile ? ", $OPT_Hfile\n" : "\n"); | |
849 | |
850 open (FFILE, ">$OPT_Ffile") | |
851 or die "ERROR: Could not open $OPT_Ffile, $!\n"; | |
852 print FFILE "#-- forward hits sorted by %sim\n0 0 0\n0 0 0\n\n\n"; | |
853 | |
854 open (RFILE, ">$OPT_Rfile") | |
855 or die "ERROR: Could not open $OPT_Rfile, $!\n"; | |
856 print RFILE "#-- reverse hits sorted by %sim\n0 0 0\n0 0 0\n\n\n"; | |
857 | |
858 if ( defined $OPT_Hfile ) { | |
859 open (HFILE, ">$OPT_Hfile") | |
860 or die "ERROR: Could not open $OPT_Hfile, $!\n"; | |
861 print HFILE "#-- highlighted hits sorted by %sim\n0 0 0\n0 0 0\n\n\n"; | |
862 } | |
863 | |
864 my $fh; | |
865 my $align; | |
866 my $isplotted; | |
867 my $ismultiref; | |
868 my $ismultiqry; | |
869 my ($plenR, $plenQ, $pidR, $pidQ); | |
870 | |
871 #-- for each alignment sorted by ascending identity | |
872 foreach $align ( sort { $a->[4] <=> $b->[4] } @$aref ) { | |
873 | |
874 my ($sR, $eR, $sQ, $eQ, $sim, $lenR, $lenQ, $idR, $idQ) = @$align; | |
875 | |
876 if ( ! defined $pidR ) { | |
877 ($plenR, $plenQ, $pidR, $pidQ) = ($lenR, $lenQ, $idR, $idQ); | |
878 } | |
879 | |
880 #-- set the sequence offset, length, direction, etc... | |
881 my ($refoff, $reflen, $refdir); | |
882 my ($qryoff, $qrylen, $qrydir); | |
883 | |
884 if ( (%$rref) ) { | |
885 #-- skip reference sequence or set atts from hash | |
886 if ( !exists ($rref->{$idR}) ) { next; } | |
887 else { ($refoff, $reflen, $refdir) = @{$rref->{$idR}}; } | |
888 } | |
889 else { | |
890 #-- no reference hash, so default atts | |
891 ($refoff, $reflen, $refdir) = (0, $lenR, 1); | |
892 } | |
893 | |
894 if ( (%$qref) ) { | |
895 #-- skip query sequence or set atts from hash | |
896 if ( !exists ($qref->{$idQ}) ) { next; } | |
897 else { ($qryoff, $qrylen, $qrydir) = @{$qref->{$idQ}}; } | |
898 } | |
899 else { | |
900 #-- no query hash, so default atts | |
901 ($qryoff, $qrylen, $qrydir) = (0, $lenQ, 1); | |
902 } | |
903 | |
904 #-- get the orientation right | |
905 if ( $refdir == -1 ) { | |
906 $sR = $reflen - $sR + 1; | |
907 $eR = $reflen - $eR + 1; | |
908 } | |
909 if ( $qrydir == -1 ) { | |
910 $sQ = $qrylen - $sQ + 1; | |
911 $eQ = $qrylen - $eQ + 1; | |
912 } | |
913 | |
914 #-- forward file, reverse file, highlight file | |
915 my @fha; | |
916 | |
917 if ( defined $OPT_breaklen && | |
918 ( ($sR - 1 > $OPT_breaklen && | |
919 $sQ - 1 > $OPT_breaklen && | |
920 $reflen - $sR > $OPT_breaklen && | |
921 $qrylen - $sQ > $OPT_breaklen) | |
922 || | |
923 ($eR - 1 > $OPT_breaklen && | |
924 $eQ - 1 > $OPT_breaklen && | |
925 $reflen - $eR > $OPT_breaklen && | |
926 $qrylen - $eQ > $OPT_breaklen) ) ) { | |
927 push @fha, \*HFILE; | |
928 } | |
929 | |
930 push @fha, (($sR < $eR) == ($sQ < $eQ) ? \*FFILE : \*RFILE); | |
931 | |
932 #-- plot it | |
933 $sR += $refoff; $eR += $refoff; | |
934 $sQ += $qryoff; $eQ += $qryoff; | |
935 | |
936 if ( $OPT_coverage ) { | |
937 foreach $fh ( @fha ) { | |
938 print $fh | |
939 "$sR 10 $sim\n", "$eR 10 $sim\n\n\n", | |
940 "$sR $sim 0\n", "$eR $sim 0\n\n\n"; | |
941 } | |
942 } | |
943 else { | |
944 foreach $fh ( @fha ) { | |
945 print $fh "$sR $sQ $sim\n", "$eR $eQ $sim\n\n\n"; | |
946 } | |
947 } | |
948 | |
949 #-- set some flags | |
950 if ( !$ismultiref && $idR ne $pidR ) { $ismultiref = 1; } | |
951 if ( !$ismultiqry && $idQ ne $pidQ ) { $ismultiqry = 1; } | |
952 if ( !$isplotted ) { $isplotted = 1; } | |
953 } | |
954 | |
955 | |
956 #-- highlight the SNPs | |
957 if ( defined $OPT_SNP ) { | |
958 | |
959 print STDERR "Determining SNPs from sequence and alignment data\n"; | |
960 | |
961 open (SNPS, "$BIN_DIR/show-snps -H -T -l $OPT_Mfile |") | |
962 or die "ERROR: Could not open show-snps pipe, $!\n"; | |
963 | |
964 my @snps; | |
965 my ($pR, $pQ, $lenR, $lenQ, $idR, $idQ); | |
966 while ( <SNPS> ) { | |
967 chomp; | |
968 @snps = split "\t"; | |
969 if ( scalar @snps != 14 ) { | |
970 die "ERROR: Could not read show-snps pipe, invalid format\n"; | |
971 } | |
972 | |
973 $pR = $snps[0]; $pQ = $snps[3]; | |
974 $lenR = $snps[8]; $lenQ = $snps[9]; | |
975 $idR = $snps[12]; $idQ = $snps[13]; | |
976 | |
977 #-- set the sequence offset, length, direction, etc... | |
978 my ($refoff, $reflen, $refdir); | |
979 my ($qryoff, $qrylen, $qrydir); | |
980 | |
981 if ( (%$rref) ) { | |
982 #-- skip reference sequence or set atts from hash | |
983 if ( !exists ($rref->{$idR}) ) { next; } | |
984 else { ($refoff, $reflen, $refdir) = @{$rref->{$idR}}; } | |
985 } | |
986 else { | |
987 #-- no reference hash, so default atts | |
988 ($refoff, $reflen, $refdir) = (0, $lenR, 1); | |
989 } | |
990 | |
991 if ( (%$qref) ) { | |
992 #-- skip query sequence or set atts from hash | |
993 if ( !exists ($qref->{$idQ}) ) { next; } | |
994 else { ($qryoff, $qrylen, $qrydir) = @{$qref->{$idQ}}; } | |
995 } | |
996 else { | |
997 #-- no query hash, so default atts | |
998 ($qryoff, $qrylen, $qrydir) = (0, $lenQ, 1); | |
999 } | |
1000 | |
1001 #-- get the orientation right | |
1002 if ( $refdir == -1 ) { $pR = $reflen - $pR + 1; } | |
1003 if ( $qrydir == -1 ) { $pQ = $qrylen - $pQ + 1; } | |
1004 | |
1005 #-- plot it | |
1006 $pR += $refoff; | |
1007 $pQ += $qryoff; | |
1008 | |
1009 if ( $OPT_coverage ) { | |
1010 print HFILE "$pR 10 0\n", "$pR 10 0\n\n\n", | |
1011 } | |
1012 else { | |
1013 print HFILE "$pR $pQ 0\n", "$pR $pQ 0\n\n\n"; | |
1014 } | |
1015 } | |
1016 | |
1017 close (SNPS) | |
1018 or print STDERR "WARNING: Trouble closing show-snps pipe, $!\n"; | |
1019 } | |
1020 | |
1021 | |
1022 close (FFILE) | |
1023 or print STDERR "WARNING: Trouble closing $OPT_Ffile, $!\n"; | |
1024 | |
1025 close (RFILE) | |
1026 or print STDERR "WARNING: Trouble closing $OPT_Rfile, $!\n"; | |
1027 | |
1028 if ( defined $OPT_Hfile ) { | |
1029 close (HFILE) | |
1030 or print STDERR "WARNING: Trouble closing $OPT_Hfile, $!\n"; | |
1031 } | |
1032 | |
1033 | |
1034 if ( !(%$rref) ) { | |
1035 if ( $ismultiref ) { | |
1036 print STDERR | |
1037 "WARNING: Multiple ref sequences overlaid, try -R or -r\n"; | |
1038 } | |
1039 elsif ( defined $pidR ) { | |
1040 $rref->{$pidR} = [ 0, $plenR, 1 ]; | |
1041 } | |
1042 } | |
1043 | |
1044 if ( !(%$qref) ) { | |
1045 if ( $ismultiqry && !$OPT_coverage ) { | |
1046 print STDERR | |
1047 "WARNING: Multiple qry sequences overlaid, try -Q, -q or -c\n"; | |
1048 } | |
1049 elsif ( defined $pidQ ) { | |
1050 $qref->{$pidQ} = [ 0, $plenQ, 1 ]; | |
1051 } | |
1052 } | |
1053 | |
1054 if ( !$isplotted ) { | |
1055 die "ERROR: No alignment data to plot\n"; | |
1056 } | |
1057 } | |
1058 | |
1059 | |
1060 #----------------------------------------------------------------- WriteGP ----# | |
1061 sub WriteGP ($$) | |
1062 { | |
1063 my $rref = shift; | |
1064 my $qref = shift; | |
1065 | |
1066 print STDERR "Writing gnuplot script $OPT_Gfile\n"; | |
1067 | |
1068 open (GFILE, ">$OPT_Gfile") | |
1069 or die "ERROR: Could not open $OPT_Gfile, $!\n"; | |
1070 | |
1071 my ($FWD, $REV, $HLT) = (1, 2, 3); | |
1072 my $SIZE = $TERMSIZE{$OPT_terminal}{$OPT_size}; | |
1073 | |
1074 #-- terminal specific stuff | |
1075 my ($P_TERM, $P_SIZE, %P_PS, %P_LW); | |
1076 foreach ( $OPT_terminal ) { | |
1077 /^$X11/ and do { | |
1078 $P_TERM = $OPT_gpstatus == 0 ? | |
1079 "$X11 font \"$FFACE,$FSIZE\"" : "$X11"; | |
1080 | |
1081 %P_PS = ( $FWD => 1.0, $REV => 1.0, $HLT => 1.0 ); | |
1082 | |
1083 %P_LW = $OPT_coverage || $OPT_color ? | |
1084 ( $FWD => 3.0, $REV => 3.0, $HLT => 3.0 ) : | |
1085 ( $FWD => 2.0, $REV => 2.0, $HLT => 2.0 ); | |
1086 | |
1087 $P_SIZE = $OPT_coverage ? | |
1088 "set size 1,1" : | |
1089 "set size 1,1"; | |
1090 | |
1091 last; | |
1092 }; | |
1093 | |
1094 /^$PS/ and do { | |
1095 $P_TERM = defined $OPT_color && $OPT_color == 0 ? | |
1096 "$PS monochrome" : "$PS color"; | |
1097 $P_TERM .= $OPT_gpstatus == 0 ? | |
1098 " solid \"$FFACE\" $FSIZE" : " solid \"$FFACE\" $FSIZE"; | |
1099 | |
1100 %P_PS = ( $FWD => 0.5, $REV => 0.5, $HLT => 0.5 ); | |
1101 | |
1102 %P_LW = $OPT_coverage || $OPT_color ? | |
1103 ( $FWD => 4.0, $REV => 4.0, $HLT => 4.0 ) : | |
1104 ( $FWD => 2.0, $REV => 2.0, $HLT => 2.0 ); | |
1105 | |
1106 $P_SIZE = $OPT_coverage ? | |
1107 "set size ".(1.0 * $SIZE).",".(0.5 * $SIZE) : | |
1108 "set size ".(1.0 * $SIZE).",".(1.0 * $SIZE); | |
1109 | |
1110 last; | |
1111 }; | |
1112 | |
1113 /^$PNG/ and do { | |
1114 $P_TERM = $OPT_gpstatus == 0 ? | |
1115 "$PNG tiny size $SIZE,$SIZE" : "$PNG small"; | |
1116 if ( defined $OPT_color && $OPT_color == 0 ) { | |
1117 $P_TERM .= " xffffff x000000 x000000"; | |
1118 $P_TERM .= " x000000 x000000 x000000"; | |
1119 $P_TERM .= " x000000 x000000 x000000"; | |
1120 } | |
1121 | |
1122 %P_PS = ( $FWD => 1.0, $REV => 1.0, $HLT => 1.0 ); | |
1123 | |
1124 %P_LW = $OPT_coverage || $OPT_color ? | |
1125 ( $FWD => 3.0, $REV => 3.0, $HLT => 3.0 ) : | |
1126 ( $FWD => 3.0, $REV => 3.0, $HLT => 3.0 ); | |
1127 | |
1128 $P_SIZE = $OPT_coverage ? | |
1129 "set size 1,.375" : | |
1130 "set size 1,1"; | |
1131 | |
1132 last; | |
1133 }; | |
1134 | |
1135 die "ERROR: Don't know how to initialize terminal, $OPT_terminal\n"; | |
1136 } | |
1137 | |
1138 #-- plot commands | |
1139 my ($P_WITH, $P_FORMAT, $P_LS, $P_KEY, %P_PT, %P_LT); | |
1140 | |
1141 %P_PT = ( $FWD => 6, $REV => 6, $HLT => 6 ); | |
1142 %P_LT = defined $OPT_Hfile ? | |
1143 ( $FWD => 2, $REV => 2, $HLT => 1 ) : | |
1144 ( $FWD => 1, $REV => 3, $HLT => 2 ); | |
1145 | |
1146 $P_WITH = $OPT_coverage || $OPT_color ? "w l" : "w lp"; | |
1147 | |
1148 $P_FORMAT = "set format \"$TFORMAT\""; | |
1149 if ( $OPT_gpstatus == 0 ) { | |
1150 $P_LS = "set style line"; | |
1151 $P_KEY = "unset key"; | |
1152 $P_FORMAT .= "\nset mouse format \"$TFORMAT\""; | |
1153 $P_FORMAT .= "\nset mouse mouseformat \"$MFORMAT\""; | |
1154 $P_FORMAT .= "\nif(GPVAL_VERSION < 5) set mouse clipboardformat \"$MFORMAT\""; | |
1155 } | |
1156 else { | |
1157 $P_LS = "set linestyle"; | |
1158 $P_KEY = "set nokey"; | |
1159 } | |
1160 | |
1161 | |
1162 my @refk = keys (%$rref); | |
1163 my @qryk = keys (%$qref); | |
1164 my ($xrange, $yrange); | |
1165 my ($xlabel, $ylabel); | |
1166 my ($tic, $dir); | |
1167 my $border = 0; | |
1168 | |
1169 #-- terminal header and output | |
1170 print GFILE "set terminal $P_TERM\n"; | |
1171 | |
1172 if ( defined $OPT_Pfile ) { | |
1173 print GFILE "set output \"$OPT_Pfile\"\n"; | |
1174 } | |
1175 | |
1176 if ( defined $OPT_title ) { | |
1177 print GFILE "set title \"$OPT_title\"\n"; | |
1178 } | |
1179 | |
1180 #-- set tics, determine labels, ranges (ref) | |
1181 if ( scalar (@refk) == 1 ) { | |
1182 $xlabel = $refk[0]; | |
1183 $xrange = $rref->{$xlabel}[1]; | |
1184 } | |
1185 else { | |
1186 $xrange = 0; | |
1187 print GFILE "set xtics rotate \( \\\n"; | |
1188 foreach $xlabel ( sort { $rref->{$a}[0] <=> $rref->{$b}[0] } @refk ) { | |
1189 $xrange += $rref->{$xlabel}[1]; | |
1190 $tic = $rref->{$xlabel}[0] + 1; | |
1191 $dir = ($rref->{$xlabel}[2] == 1) ? "" : "*"; | |
1192 print GFILE " \"$dir$xlabel\" $tic, \\\n"; | |
1193 } | |
1194 print GFILE " \"\" $xrange \\\n\)\n"; | |
1195 $xlabel = "REF"; | |
1196 } | |
1197 if ( $xrange == 0 ) { $xrange = "*"; } | |
1198 | |
1199 #-- set tics, determine labels, ranges (qry) | |
1200 if ( $OPT_coverage ) { | |
1201 $ylabel = "%SIM"; | |
1202 $yrange = 110; | |
1203 } | |
1204 elsif ( scalar (@qryk) == 1 ) { | |
1205 $ylabel = $qryk[0]; | |
1206 $yrange = $qref->{$ylabel}[1]; | |
1207 } | |
1208 else { | |
1209 $yrange = 0; | |
1210 print GFILE "set ytics \( \\\n"; | |
1211 foreach $ylabel ( sort { $qref->{$a}[0] <=> $qref->{$b}[0] } @qryk ) { | |
1212 $yrange += $qref->{$ylabel}[1]; | |
1213 $tic = $qref->{$ylabel}[0] + 1; | |
1214 $dir = ($qref->{$ylabel}[2] == 1) ? "" : "*"; | |
1215 print GFILE " \"$dir$ylabel\" $tic, \\\n"; | |
1216 } | |
1217 print GFILE " \"\" $yrange \\\n\)\n"; | |
1218 $ylabel = "QRY"; | |
1219 } | |
1220 if ( $yrange == 0 ) { $yrange = "*"; } | |
1221 | |
1222 #-- determine borders | |
1223 if ( $xrange ne "*" && scalar (@refk) == 1 ) { $border |= 10; } | |
1224 if ( $yrange ne "*" && scalar (@qryk) == 1 ) { $border |= 5; } | |
1225 if ( $OPT_coverage ) { $border |= 5; } | |
1226 | |
1227 #-- grid, labels, border | |
1228 print GFILE | |
1229 "$P_SIZE\n", | |
1230 "set grid\n", | |
1231 "$P_KEY\n", | |
1232 "set border $border\n", | |
1233 "set tics scale 0\n", | |
1234 "set xlabel \"$xlabel\"\n", | |
1235 "set ylabel \"$ylabel\"\n", | |
1236 "$P_FORMAT\n"; | |
1237 | |
1238 #-- ranges | |
1239 if ( defined $OPT_xrange ) { print GFILE "set xrange $OPT_xrange\n"; } | |
1240 else { print GFILE "set xrange [1:$xrange]\n"; } | |
1241 | |
1242 if ( defined $OPT_yrange ) { print GFILE "set yrange $OPT_yrange\n"; } | |
1243 else { print GFILE "set yrange [1:$yrange]\n"; } | |
1244 | |
1245 #-- if %sim plot | |
1246 if ( $OPT_color ) { | |
1247 print GFILE | |
1248 "set zrange [0:100]\n", | |
1249 "set colorbox default\n", | |
1250 "set cblabel \"%similarity\"\n", | |
1251 "set cbrange [0:100]\n", | |
1252 "set cbtics 20\n", | |
1253 "set pm3d map\n", | |
1254 "set palette model RGB defined ( \\\n", | |
1255 " 0 \"#000000\", \\\n", | |
1256 " 4 \"#DD00DD\", \\\n", | |
1257 " 6 \"#0000DD\", \\\n", | |
1258 " 7 \"#00DDDD\", \\\n", | |
1259 " 8 \"#00DD00\", \\\n", | |
1260 " 9 \"#DDDD00\", \\\n", | |
1261 " 10 \"#DD0000\" \\\n)\n"; | |
1262 } | |
1263 | |
1264 foreach my $s ( ($FWD, $REV, $HLT) ) { | |
1265 my $ss = "$P_LS $s "; | |
1266 $ss .= $OPT_color ? " palette" : " lt $P_LT{$s}"; | |
1267 $ss .= " lw $P_LW{$s}"; | |
1268 if ( ! $OPT_coverage || $s == $HLT ) { | |
1269 $ss .= " pt $P_PT{$s} ps $P_PS{$s}"; | |
1270 } | |
1271 print GFILE "$ss\n"; | |
1272 } | |
1273 | |
1274 #-- plot it | |
1275 print GFILE | |
1276 ($OPT_color ? "splot \\\n" : "plot \\\n"); | |
1277 print GFILE | |
1278 " \"$OPT_Ffile\" title \"FWD\" $P_WITH ls $FWD, \\\n", | |
1279 " \"$OPT_Rfile\" title \"REV\" $P_WITH ls $REV", | |
1280 (! defined $OPT_Hfile ? "\n" : | |
1281 ", \\\n \"$OPT_Hfile\" title \"HLT\" w lp ls $HLT"); | |
1282 | |
1283 #-- interactive mode | |
1284 if ( $OPT_terminal eq $X11 ) { | |
1285 print GFILE "\n", | |
1286 "print \"-- INTERACTIVE MODE --\"\n", | |
1287 "print \"consult gnuplot docs for command list\"\n", | |
1288 "print \"mouse 1: coords to clipboard\"\n", | |
1289 "print \"mouse 2: mark on plot\"\n", | |
1290 "print \"mouse 3: zoom box\"\n", | |
1291 "print \"'h' for help in plot window\"\n", | |
1292 "print \"enter to exit\"\n", | |
1293 "pause -1\n"; | |
1294 } | |
1295 | |
1296 close (GFILE) | |
1297 or print STDERR "WARNING: Trouble closing $OPT_Gfile, $!\n"; | |
1298 } | |
1299 | |
1300 | |
1301 #------------------------------------------------------------------- RunGP ----# | |
1302 sub RunGP ( ) | |
1303 { | |
1304 if ( defined $OPT_Pfile ) { | |
1305 print STDERR "Rendering plot $OPT_Pfile\n"; | |
1306 } | |
1307 else { | |
1308 print STDERR "Rendering plot to screen\n"; | |
1309 } | |
1310 | |
1311 my $cmd = "gnuplot"; | |
1312 | |
1313 #-- x11 specifics | |
1314 if ( $OPT_terminal eq $X11 ) { | |
1315 my $size = $TERMSIZE{$OPT_terminal}{$OPT_size}; | |
1316 $cmd .= " -geometry ${size}x"; | |
1317 if ( $OPT_coverage ) { $size = sprintf ("%.0f", $size * .375); } | |
1318 $cmd .= "${size}+0+0 -title mummerplot"; | |
1319 | |
1320 if ( defined $OPT_color && $OPT_color == 0 ) { | |
1321 $cmd .= " -mono"; | |
1322 $cmd .= " -xrm 'gnuplot*line1Dashes: 0'"; | |
1323 $cmd .= " -xrm 'gnuplot*line2Dashes: 0'"; | |
1324 $cmd .= " -xrm 'gnuplot*line3Dashes: 0'"; | |
1325 } | |
1326 | |
1327 if ( $OPT_rv ) { | |
1328 $cmd .= " -rv"; | |
1329 $cmd .= " -xrm 'gnuplot*background: black'"; | |
1330 $cmd .= " -xrm 'gnuplot*textColor: white'"; | |
1331 $cmd .= " -xrm 'gnuplot*borderColor: white'"; | |
1332 $cmd .= " -xrm 'gnuplot*axisColor: white'"; | |
1333 } | |
1334 } | |
1335 | |
1336 $cmd .= " $OPT_Gfile"; | |
1337 | |
1338 system ($cmd) | |
1339 and print STDERR "WARNING: Unable to run '$cmd', $!\n"; | |
1340 } | |
1341 | |
1342 | |
1343 #---------------------------------------------------------------- ListenGP ----# | |
1344 sub ListenGP($$) | |
1345 { | |
1346 my $rref = shift; | |
1347 my $qref = shift; | |
1348 | |
1349 my ($refc, $qryc); | |
1350 my ($refid, $qryid); | |
1351 my ($rsock, $qsock); | |
1352 my $oldclip = ""; | |
1353 | |
1354 #-- get IDs sorted by offset | |
1355 my @refo = sort { $rref->{$a}[0] <=> $rref->{$b}[0] } keys %$rref; | |
1356 my @qryo = sort { $qref->{$a}[0] <=> $qref->{$b}[0] } keys %$qref; | |
1357 | |
1358 #-- attempt to connect sockets | |
1359 if ( $OPT_rport ) { | |
1360 $rsock = IO::Socket::INET->new("localhost:$OPT_rport") | |
1361 or print STDERR "WARNING: Could not connect to rport $OPT_rport\n"; | |
1362 } | |
1363 | |
1364 if ( $OPT_qport ) { | |
1365 $qsock = IO::Socket::INET->new("localhost:$OPT_qport") | |
1366 or print STDERR "WARNING: Could not connect to qport $OPT_qport\n"; | |
1367 } | |
1368 | |
1369 #-- while parent still exists | |
1370 while ( getppid != 1 ) { | |
1371 | |
1372 #-- query the clipboard | |
1373 $_ = `xclip -o -silent -selection primary`; | |
1374 if ( $? >> 8 ) { | |
1375 die "WARNING: Unable to query clipboard with xclip\n"; | |
1376 } | |
1377 | |
1378 #-- if cliboard has changed and contains a coordinate | |
1379 if ( $_ ne $oldclip && (($refc, $qryc) = /^\[(\d+), (\d+)\]/) ) { | |
1380 | |
1381 $oldclip = $_; | |
1382 | |
1383 #-- translate the reference position | |
1384 $refid = "NULL"; | |
1385 for ( my $i = 0; $i < (scalar @refo); ++ $i ) { | |
1386 my $aref = $rref->{$refo[$i]}; | |
1387 if ( $i == $#refo || $aref->[0] + $aref->[1] > $refc ) { | |
1388 $refid = $refo[$i]; | |
1389 $refc -= $aref->[0]; | |
1390 if ( $aref->[2] == -1 ) { | |
1391 $refc = $aref->[1] - $refc + 1; | |
1392 } | |
1393 last; | |
1394 } | |
1395 } | |
1396 | |
1397 #-- translate the query position | |
1398 $qryid = "NULL"; | |
1399 for ( my $i = 0; $i < (scalar @qryo); ++ $i ) { | |
1400 my $aref = $qref->{$qryo[$i]}; | |
1401 if ( $i == $#qryo || $aref->[0] + $aref->[1] > $qryc ) { | |
1402 $qryid = $qryo[$i]; | |
1403 $qryc -= $aref->[0]; | |
1404 if ( $aref->[2] == -1 ) { | |
1405 $qryc = $aref->[1] - $qryc + 1; | |
1406 } | |
1407 last; | |
1408 } | |
1409 } | |
1410 | |
1411 #-- print the info to stdout and socket | |
1412 print "$refid\t$qryid\t$refc\t$qryc\n"; | |
1413 | |
1414 if ( $rsock ) { | |
1415 print $rsock "contig I$refid $refc\n"; | |
1416 print "sent \"contig I$refid $refc\" to $OPT_rport\n"; | |
1417 } | |
1418 if ( $qsock ) { | |
1419 print $qsock "contig I$qryid $qryc\n"; | |
1420 print "sent \"contig I$qryid $qryc\" to $OPT_qport\n"; | |
1421 } | |
1422 } | |
1423 | |
1424 #-- sleep for half second | |
1425 select undef, undef, undef, .5; | |
1426 } | |
1427 | |
1428 exit (0); | |
1429 } | |
1430 | |
1431 | |
1432 #------------------------------------------------------------ ParseOptions ----# | |
1433 sub ParseOptions ( ) | |
1434 { | |
1435 my ($opt_small, $opt_medium, $opt_large); | |
1436 my ($opt_ps, $opt_x11, $opt_png); | |
1437 my $cnt; | |
1438 | |
1439 #-- Get options | |
1440 my $err = $tigr -> TIGR_GetOptions | |
1441 ( | |
1442 "b|breaklen:i" => \$OPT_breaklen, | |
1443 "color!" => \$OPT_color, | |
1444 "c|coverage!" => \$OPT_coverage, | |
1445 "f|filter!" => \$OPT_filter, | |
1446 "l|layout!" => \$OPT_layout, | |
1447 "p|prefix=s" => \$OPT_prefix, | |
1448 "rv" => \$OPT_rv, | |
1449 "r|IdR=s" => \$OPT_IdR, | |
1450 "q|IdQ=s" => \$OPT_IdQ, | |
1451 "R|Rfile=s" => \$OPT_IDRfile, | |
1452 "Q|Qfile=s" => \$OPT_IDQfile, | |
1453 "rport=i" => \$OPT_rport, | |
1454 "qport=i" => \$OPT_qport, | |
1455 "s|size=s" => \$OPT_size, | |
1456 "S|SNP" => \$OPT_SNP, | |
1457 "t|terminal=s" => \$OPT_terminal, | |
1458 "title=s" => \$OPT_title, | |
1459 "x|xrange=s" => \$OPT_xrange, | |
1460 "y|yrange=s" => \$OPT_yrange, | |
1461 "x11" => \$opt_x11, | |
1462 "postscript" => \$opt_ps, | |
1463 "png" => \$opt_png, | |
1464 "small" => \$opt_small, | |
1465 "medium" => \$opt_medium, | |
1466 "large" => \$opt_large, | |
1467 "fat" => \$OPT_ONLY_USE_FATTEST, | |
1468 ); | |
1469 | |
1470 if ( !$err || scalar (@ARGV) != 1 ) { | |
1471 $tigr -> printUsageInfo( ); | |
1472 die "Try '$0 -h' for more information.\n"; | |
1473 } | |
1474 | |
1475 $cnt = 0; | |
1476 if ( $opt_png ) { $OPT_terminal = $PNG; $cnt ++; } | |
1477 if ( $opt_ps ) { $OPT_terminal = $PS; $cnt ++; } | |
1478 if ( $opt_x11 ) { $OPT_terminal = $X11; $cnt ++; } | |
1479 if ( $cnt > 1 ) { | |
1480 print STDERR | |
1481 "WARNING: Multiple terminals not allowed, using '$OPT_terminal'\n"; | |
1482 } | |
1483 | |
1484 $cnt = 0; | |
1485 if ( $opt_large ) { $OPT_size = $LARGE; $cnt ++; } | |
1486 if ( $opt_medium ) { $OPT_size = $MEDIUM; $cnt ++; } | |
1487 if ( $opt_small ) { $OPT_size = $SMALL; $cnt ++; } | |
1488 if ( $cnt > 1 ) { | |
1489 print STDERR | |
1490 "WARNING: Multiple sizes now allowed, using '$OPT_size'\n"; | |
1491 } | |
1492 | |
1493 #-- Check that status of gnuplot | |
1494 $OPT_gpstatus = system ("gnuplot --version"); | |
1495 | |
1496 if ( $OPT_gpstatus == -1 ) { | |
1497 print STDERR | |
1498 "WARNING: Could not find gnuplot, plot will not be rendered\n"; | |
1499 } | |
1500 elsif ( $OPT_gpstatus ) { | |
1501 print STDERR | |
1502 "WARNING: Using outdated gnuplot, use v4.0 for best results\n"; | |
1503 | |
1504 if ( $OPT_color ) { | |
1505 print STDERR | |
1506 "WARNING: Turning of --color option for compatibility\n"; | |
1507 undef $OPT_color; | |
1508 } | |
1509 | |
1510 if ( $OPT_terminal eq $PNG && $OPT_size ne $SMALL ) { | |
1511 print STDERR | |
1512 "WARNING: Turning of --size option for compatibility\n"; | |
1513 $OPT_size = $SMALL; | |
1514 } | |
1515 } | |
1516 | |
1517 #-- Check options | |
1518 if ( !exists $TERMSIZE{$OPT_terminal} ) { | |
1519 die "ERROR: Invalid terminal type, $OPT_terminal\n"; | |
1520 } | |
1521 | |
1522 if ( !exists $TERMSIZE{$OPT_terminal}{$OPT_size} ) { | |
1523 die "ERROR: Invalid terminal size, $OPT_size\n"; | |
1524 } | |
1525 | |
1526 if ( $OPT_xrange ) { | |
1527 $OPT_xrange =~ tr/,/:/; | |
1528 $OPT_xrange =~ /^\[\d+:\d+\]$/ | |
1529 or die "ERROR: Invalid xrange format, $OPT_xrange\n"; | |
1530 } | |
1531 | |
1532 if ( $OPT_yrange ) { | |
1533 $OPT_yrange =~ tr/,/:/; | |
1534 $OPT_yrange =~ /^\[\d+:\d+\]$/ | |
1535 or die "ERROR: Invalid yrange format, $OPT_yrange\n"; | |
1536 } | |
1537 | |
1538 #-- Set file names | |
1539 $OPT_Mfile = $ARGV[0]; | |
1540 $tigr->isReadableFile ($OPT_Mfile) | |
1541 or die "ERROR: Could not read $OPT_Mfile, $!\n"; | |
1542 | |
1543 $OPT_Ffile = $OPT_prefix . $SUFFIX{$FWDPLOT}; | |
1544 $tigr->isWritableFile ($OPT_Ffile) or $tigr->isCreatableFile ($OPT_Ffile) | |
1545 or die "ERROR: Could not write $OPT_Ffile, $!\n"; | |
1546 | |
1547 $OPT_Rfile = $OPT_prefix . $SUFFIX{$REVPLOT}; | |
1548 $tigr->isWritableFile ($OPT_Rfile) or $tigr->isCreatableFile ($OPT_Rfile) | |
1549 or die "ERROR: Could not write $OPT_Rfile, $!\n"; | |
1550 | |
1551 if ( defined $OPT_breaklen || defined $OPT_SNP ) { | |
1552 $OPT_Hfile = $OPT_prefix . $SUFFIX{$HLTPLOT}; | |
1553 $tigr->isWritableFile($OPT_Hfile) or $tigr->isCreatableFile($OPT_Hfile) | |
1554 or die "ERROR: Could not write $OPT_Hfile, $!\n"; | |
1555 } | |
1556 | |
1557 if ($OPT_ONLY_USE_FATTEST) | |
1558 { | |
1559 $OPT_layout = 1; | |
1560 } | |
1561 | |
1562 if ( $OPT_filter || $OPT_layout ) { | |
1563 $OPT_Dfile = $OPT_prefix . $SUFFIX{$FILTER}; | |
1564 $tigr->isWritableFile($OPT_Dfile) or $tigr->isCreatableFile($OPT_Dfile) | |
1565 or die "ERROR: Could not write $OPT_Dfile, $!\n"; | |
1566 } | |
1567 | |
1568 $OPT_Gfile = $OPT_prefix . $SUFFIX{$GNUPLOT}; | |
1569 $tigr->isWritableFile ($OPT_Gfile) or $tigr->isCreatableFile ($OPT_Gfile) | |
1570 or die "ERROR: Could not write $OPT_Gfile, $!\n"; | |
1571 | |
1572 if ( exists $SUFFIX{$OPT_terminal} ) { | |
1573 $OPT_Pfile = $OPT_prefix . $SUFFIX{$OPT_terminal}; | |
1574 $tigr->isWritableFile($OPT_Pfile) or $tigr->isCreatableFile($OPT_Pfile) | |
1575 or die "ERROR: Could not write $OPT_Pfile, $!\n"; | |
1576 } | |
1577 | |
1578 if ( defined $OPT_IDRfile ) { | |
1579 $tigr->isReadableFile ($OPT_IDRfile) | |
1580 or die "ERROR: Could not read $OPT_IDRfile, $!\n"; | |
1581 } | |
1582 | |
1583 if ( defined $OPT_IDQfile ) { | |
1584 $tigr->isReadableFile ($OPT_IDQfile) | |
1585 or die "ERROR: Could not read $OPT_IDQfile, $!\n"; | |
1586 } | |
1587 | |
1588 if ( (defined $OPT_rport || defined $OPT_qport) && | |
1589 ($OPT_terminal ne $X11 || $OPT_gpstatus ) ) { | |
1590 print STDERR | |
1591 "WARNING: Port options available only for v4.0 X11 plots\n"; | |
1592 undef $OPT_rport; | |
1593 undef $OPT_qport; | |
1594 } | |
1595 | |
1596 | |
1597 if ( defined $OPT_color && defined $OPT_Hfile ) { | |
1598 print STDERR | |
1599 "WARNING: Turning off --color option so highlighting is visible\n"; | |
1600 undef $OPT_color; | |
1601 } | |
1602 } |