jpayne@69
|
1 #!/usr/bin/env perl
|
jpayne@69
|
2 # (c) Steven Salzberg 2001
|
jpayne@69
|
3 # Make an xfig plot for a comparison of a reference chromosome (or single
|
jpayne@69
|
4 # molecule) versus a multifasta file of contigs from another genome.
|
jpayne@69
|
5 # The input file here is a NUCmer coords file such as:
|
jpayne@69
|
6 # 2551 2577 | 240 266 | 27 27 | 96.30 | 20302755 1424 | 0.00 1.90 | 2R 1972084
|
jpayne@69
|
7 # generated by running 'show-coords -c -l <delta file>'
|
jpayne@69
|
8 # For the above example, D. melanogaster chr 2R is the reference and the query
|
jpayne@69
|
9 # is an assembly with 1000s of contigs from D. pseudoobscura
|
jpayne@69
|
10 # The file needs to be sorted by the smaller contig ids, via:
|
jpayne@69
|
11 # tail +6 Dmel2R-vs-Dpseudo.coords | sort -k 19n -k 4n > Dmel2R-vs-Dpseudo-resort.coords
|
jpayne@69
|
12 # and remember to get rid of top 5 (header) lines.
|
jpayne@69
|
13 # Usage: plot-drosoph-align-xfig.perl Dmel2R-vs-Dpseudo-resort.coords
|
jpayne@69
|
14 unless (open(coordsfile,$ARGV[0])) {
|
jpayne@69
|
15 die ("can't open file $ARGV[0].\n");
|
jpayne@69
|
16 }
|
jpayne@69
|
17 $Xscale = 0.005;
|
jpayne@69
|
18 $Yscale = 20;
|
jpayne@69
|
19 $chrcolor = 4; # 4 is red, 1 is blue, 2 is green
|
jpayne@69
|
20 $green = 2;
|
jpayne@69
|
21 $contigcolor = 1; # contigs are blue
|
jpayne@69
|
22
|
jpayne@69
|
23 # print header info
|
jpayne@69
|
24 print "#FIG 3.2\nLandscape\nCenter\nInches\nLetter \n100.00\nSingle\n-2\n1200 2\n";
|
jpayne@69
|
25 $first_time = 1;
|
jpayne@69
|
26 while (<coordsfile>) {
|
jpayne@69
|
27 ($s1,$e1,$x1,$s2,$e2,$x2,$l1,$l2,$x3,$percent_id,$x4,$Rlen,$Qlen,$x5,$Rcov,$Qcov,$x6,$Rid,$Qid) = split(" ");
|
jpayne@69
|
28 if ($prevQid eq $Qid) { # query contig is same as prev line
|
jpayne@69
|
29 $dist = abs($s1 - $prev_s1);
|
jpayne@69
|
30 if ( $dist > 2 * $Qlen) { # if this match is too far away
|
jpayne@69
|
31 # print the contig here if the matching bit is > 1000
|
jpayne@69
|
32 if ($right_end{$Qid} - $left_end{$Qid} > 1000) {
|
jpayne@69
|
33 # print it at y=50 rather than 100 because the scale is 0-50,
|
jpayne@69
|
34 # where 0=50% identical and 50 is 100% identical
|
jpayne@69
|
35 print_xfig_line($left_end{$Qid},50,$right_end{$Qid},50,$contigcolor);
|
jpayne@69
|
36 print_label($left_end{$Qid},50,$right_end{$Qid},$Qid,5);
|
jpayne@69
|
37 }
|
jpayne@69
|
38 $left_end{$Qid} = $s1; # then re-set the start and end of the contig
|
jpayne@69
|
39 $right_end{$Qid} = $e1; # and we'll print it again later
|
jpayne@69
|
40 }
|
jpayne@69
|
41 else { # extend the boundaries of the match
|
jpayne@69
|
42 if ($s1 < $left_end{$Qid}) { $left_end{$Qid} = $s1; }
|
jpayne@69
|
43 if ($e1 > $right_end{$Qid}) { $right_end{$Qid} = $e1; }
|
jpayne@69
|
44 }
|
jpayne@69
|
45 }
|
jpayne@69
|
46 else { # this is a different contig, first time seeing it
|
jpayne@69
|
47 $left_end{$Qid} = $s1;
|
jpayne@69
|
48 $right_end{$Qid} = $e1;
|
jpayne@69
|
49 # print the previous contig as a line at y=100 for 100%
|
jpayne@69
|
50 if ($first_time < 1) {
|
jpayne@69
|
51 print_xfig_line($left_end{$prevQid},50,$right_end{$prevQid},50,$contigcolor);
|
jpayne@69
|
52 print_label($left_end{$prevQid},50,$right_end{$prevQid},$prevQid,5);
|
jpayne@69
|
53 }
|
jpayne@69
|
54 else { $first_time = 0; }
|
jpayne@69
|
55 }
|
jpayne@69
|
56 $prevQid = $Qid;
|
jpayne@69
|
57 $prev_s1 = $s1;
|
jpayne@69
|
58 $prev_Qlen = $Qlen;
|
jpayne@69
|
59 # next print the matching bit as a separate line, with a separate color,
|
jpayne@69
|
60 # with its height determined by percent match
|
jpayne@69
|
61 $Xleft = int($Xscale * $s1);
|
jpayne@69
|
62 $Xright = int($Xscale * $e1);
|
jpayne@69
|
63 if ($Xleft == $Xright) { $Xright += 1; }
|
jpayne@69
|
64 if ($percent_id < 50) { $percent_id = 50; }
|
jpayne@69
|
65 print_xfig_line($s1,$percent_id - 50,$e1,$percent_id - 50,$green);
|
jpayne@69
|
66 }
|
jpayne@69
|
67 # print very last contig
|
jpayne@69
|
68 $left_end{$Qid} = $s1;
|
jpayne@69
|
69 $right_end{$Qid} = $e1;
|
jpayne@69
|
70 print_xfig_line($s1,50,$e1,50,$contigcolor);
|
jpayne@69
|
71 print_label($s1,50,$e1,$Qid,5);
|
jpayne@69
|
72 close(coordsfile);
|
jpayne@69
|
73
|
jpayne@69
|
74 # now draw the horizontal chr line for the reference
|
jpayne@69
|
75 $label_xpos = $Xscale * $Rlen;
|
jpayne@69
|
76 print "4 0 0 100 0 0 12 0.0000 4 135 405 ";
|
jpayne@69
|
77 printf("%.0f %.0f",$label_xpos, 0);
|
jpayne@69
|
78 print " ", $Rid, "\\001\n";
|
jpayne@69
|
79 print "2 1 0 2 $chrcolor 7 100 0 -1 0.000 0 0 -1 0 0 2\n";
|
jpayne@69
|
80 printf("\t %.0f %.0f %.0f %.0f\n", 0, 0,
|
jpayne@69
|
81 $Xscale * $Rlen, 0);
|
jpayne@69
|
82 # print some X-axis coordinates
|
jpayne@69
|
83 $pointsize = 5;
|
jpayne@69
|
84 for ($i = 250000; $i < $Rlen - 50000; $i+= 250000) {
|
jpayne@69
|
85 print "4 0 0 100 0 0 $pointsize 0.0000 4 135 405 ";
|
jpayne@69
|
86 printf("%.0f %.0f",$i * $Xscale + 20, -20 + ($Yscale * -0.5));
|
jpayne@69
|
87 print " $i", "\\001\n";
|
jpayne@69
|
88 #print a vertical tic mark
|
jpayne@69
|
89 print "2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 2\n";
|
jpayne@69
|
90 printf("%.0f %.0f %.0f -50\n",
|
jpayne@69
|
91 $i * $Xscale, 0, $i * $Xscale);
|
jpayne@69
|
92 }
|
jpayne@69
|
93 # print tic marks indicating % identity on the y-axis
|
jpayne@69
|
94 for ($percent_id = 50; $percent_id < 101; $percent_id += 10) {
|
jpayne@69
|
95 print "4 0 0 100 0 0 $pointsize 0.0000 4 135 405 ";
|
jpayne@69
|
96 printf("-150 %.0f", ($percent_id - 50) * $Yscale + 10); # shift down 10 pixels
|
jpayne@69
|
97 print " $percent_id", "\\001\n";
|
jpayne@69
|
98 # print the tic mark
|
jpayne@69
|
99 print "2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 2\n";
|
jpayne@69
|
100 printf("-50 %.0f 0 %.0f\n",
|
jpayne@69
|
101 ($percent_id - 50) * $Yscale, ($percent_id - 50) * $Yscale);
|
jpayne@69
|
102 }
|
jpayne@69
|
103
|
jpayne@69
|
104 # print a line in the appropriate color, scaled with Xscale,Yscale
|
jpayne@69
|
105 sub print_xfig_line {
|
jpayne@69
|
106 my ($xleft,$yleft,$xright,$yright,$color) = @_;
|
jpayne@69
|
107 # print it at the given coordinates, scaled
|
jpayne@69
|
108 $xleft_scaled = int($Xscale * $xleft);
|
jpayne@69
|
109 $xright_scaled = int($Xscale * $xright);
|
jpayne@69
|
110 # Xfig has a bug: if we re-scale and the resulting X coordinates are equal,
|
jpayne@69
|
111 # then it will print a fixed-size (large) rectangle, rather than a 1-pixel
|
jpayne@69
|
112 # wide one. So check fo this and correct.
|
jpayne@69
|
113 if ($xleft_scaled == $xright_scaled) { $xright_scaled += 1; }
|
jpayne@69
|
114 # set up and print line in xfig format
|
jpayne@69
|
115 print "2 1 0 2 $color 7 100 0 -1 0.000 0 0 -1 0 0 2\n";
|
jpayne@69
|
116 printf("\t %.0f %.0f %.0f %.0f\n",
|
jpayne@69
|
117 $xleft_scaled, $Yscale * $yleft, $xright_scaled, $Yscale * $yright);
|
jpayne@69
|
118 }
|
jpayne@69
|
119
|
jpayne@69
|
120 # print a label for each contig using its ID
|
jpayne@69
|
121 sub print_label {
|
jpayne@69
|
122 my ($xleft,$yleft,$xright,$id,$psize) = @_;
|
jpayne@69
|
123 # print it at the left edge of the contig. The angle
|
jpayne@69
|
124 # of 4.7124 means text goes down vertically. 5th argument
|
jpayne@69
|
125 # here is pointsize of the label.
|
jpayne@69
|
126 # bump the label down 20 pixels
|
jpayne@69
|
127 $yposition = $yleft * $Yscale + 20;
|
jpayne@69
|
128 # to keep the display clean, don't print the label unless the
|
jpayne@69
|
129 # contig itself is wider than the width of the text in the label
|
jpayne@69
|
130 $xleft_scaled = $xleft * $Xscale;
|
jpayne@69
|
131 $xright_scaled = $xright * $Xscale;
|
jpayne@69
|
132 # each point of type is 8 pixels
|
jpayne@69
|
133 $textheight = $psize * 8;
|
jpayne@69
|
134 if ($xright_scaled - $xleft_scaled > $textheight) {
|
jpayne@69
|
135 print "4 0 0 50 0 0 $psize 4.7124 4 135 435 ";
|
jpayne@69
|
136 printf("%.0f %.0f",$xleft_scaled, $yposition);
|
jpayne@69
|
137 print " $id", "\\001\n";
|
jpayne@69
|
138 }
|
jpayne@69
|
139 }
|