jpayne@69: #!/usr/bin/env perl jpayne@69: # (c) Steven Salzberg 2001 jpayne@69: # Make an xfig plot for a comparison of a reference chromosome (or single jpayne@69: # molecule) versus a multifasta file of contigs from another genome. jpayne@69: # The input file here is a NUCmer coords file such as: jpayne@69: # 2551 2577 | 240 266 | 27 27 | 96.30 | 20302755 1424 | 0.00 1.90 | 2R 1972084 jpayne@69: # generated by running 'show-coords -c -l ' jpayne@69: # For the above example, D. melanogaster chr 2R is the reference and the query jpayne@69: # is an assembly with 1000s of contigs from D. pseudoobscura jpayne@69: # The file needs to be sorted by the smaller contig ids, via: jpayne@69: # tail +6 Dmel2R-vs-Dpseudo.coords | sort -k 19n -k 4n > Dmel2R-vs-Dpseudo-resort.coords jpayne@69: # and remember to get rid of top 5 (header) lines. jpayne@69: # Usage: plot-drosoph-align-xfig.perl Dmel2R-vs-Dpseudo-resort.coords jpayne@69: unless (open(coordsfile,$ARGV[0])) { jpayne@69: die ("can't open file $ARGV[0].\n"); jpayne@69: } jpayne@69: $Xscale = 0.005; jpayne@69: $Yscale = 20; jpayne@69: $chrcolor = 4; # 4 is red, 1 is blue, 2 is green jpayne@69: $green = 2; jpayne@69: $contigcolor = 1; # contigs are blue jpayne@69: jpayne@69: # print header info jpayne@69: print "#FIG 3.2\nLandscape\nCenter\nInches\nLetter \n100.00\nSingle\n-2\n1200 2\n"; jpayne@69: $first_time = 1; jpayne@69: while () { jpayne@69: ($s1,$e1,$x1,$s2,$e2,$x2,$l1,$l2,$x3,$percent_id,$x4,$Rlen,$Qlen,$x5,$Rcov,$Qcov,$x6,$Rid,$Qid) = split(" "); jpayne@69: if ($prevQid eq $Qid) { # query contig is same as prev line jpayne@69: $dist = abs($s1 - $prev_s1); jpayne@69: if ( $dist > 2 * $Qlen) { # if this match is too far away jpayne@69: # print the contig here if the matching bit is > 1000 jpayne@69: if ($right_end{$Qid} - $left_end{$Qid} > 1000) { jpayne@69: # print it at y=50 rather than 100 because the scale is 0-50, jpayne@69: # where 0=50% identical and 50 is 100% identical jpayne@69: print_xfig_line($left_end{$Qid},50,$right_end{$Qid},50,$contigcolor); jpayne@69: print_label($left_end{$Qid},50,$right_end{$Qid},$Qid,5); jpayne@69: } jpayne@69: $left_end{$Qid} = $s1; # then re-set the start and end of the contig jpayne@69: $right_end{$Qid} = $e1; # and we'll print it again later jpayne@69: } jpayne@69: else { # extend the boundaries of the match jpayne@69: if ($s1 < $left_end{$Qid}) { $left_end{$Qid} = $s1; } jpayne@69: if ($e1 > $right_end{$Qid}) { $right_end{$Qid} = $e1; } jpayne@69: } jpayne@69: } jpayne@69: else { # this is a different contig, first time seeing it jpayne@69: $left_end{$Qid} = $s1; jpayne@69: $right_end{$Qid} = $e1; jpayne@69: # print the previous contig as a line at y=100 for 100% jpayne@69: if ($first_time < 1) { jpayne@69: print_xfig_line($left_end{$prevQid},50,$right_end{$prevQid},50,$contigcolor); jpayne@69: print_label($left_end{$prevQid},50,$right_end{$prevQid},$prevQid,5); jpayne@69: } jpayne@69: else { $first_time = 0; } jpayne@69: } jpayne@69: $prevQid = $Qid; jpayne@69: $prev_s1 = $s1; jpayne@69: $prev_Qlen = $Qlen; jpayne@69: # next print the matching bit as a separate line, with a separate color, jpayne@69: # with its height determined by percent match jpayne@69: $Xleft = int($Xscale * $s1); jpayne@69: $Xright = int($Xscale * $e1); jpayne@69: if ($Xleft == $Xright) { $Xright += 1; } jpayne@69: if ($percent_id < 50) { $percent_id = 50; } jpayne@69: print_xfig_line($s1,$percent_id - 50,$e1,$percent_id - 50,$green); jpayne@69: } jpayne@69: # print very last contig jpayne@69: $left_end{$Qid} = $s1; jpayne@69: $right_end{$Qid} = $e1; jpayne@69: print_xfig_line($s1,50,$e1,50,$contigcolor); jpayne@69: print_label($s1,50,$e1,$Qid,5); jpayne@69: close(coordsfile); jpayne@69: jpayne@69: # now draw the horizontal chr line for the reference jpayne@69: $label_xpos = $Xscale * $Rlen; jpayne@69: print "4 0 0 100 0 0 12 0.0000 4 135 405 "; jpayne@69: printf("%.0f %.0f",$label_xpos, 0); jpayne@69: print " ", $Rid, "\\001\n"; jpayne@69: print "2 1 0 2 $chrcolor 7 100 0 -1 0.000 0 0 -1 0 0 2\n"; jpayne@69: printf("\t %.0f %.0f %.0f %.0f\n", 0, 0, jpayne@69: $Xscale * $Rlen, 0); jpayne@69: # print some X-axis coordinates jpayne@69: $pointsize = 5; jpayne@69: for ($i = 250000; $i < $Rlen - 50000; $i+= 250000) { jpayne@69: print "4 0 0 100 0 0 $pointsize 0.0000 4 135 405 "; jpayne@69: printf("%.0f %.0f",$i * $Xscale + 20, -20 + ($Yscale * -0.5)); jpayne@69: print " $i", "\\001\n"; jpayne@69: #print a vertical tic mark jpayne@69: print "2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 2\n"; jpayne@69: printf("%.0f %.0f %.0f -50\n", jpayne@69: $i * $Xscale, 0, $i * $Xscale); jpayne@69: } jpayne@69: # print tic marks indicating % identity on the y-axis jpayne@69: for ($percent_id = 50; $percent_id < 101; $percent_id += 10) { jpayne@69: print "4 0 0 100 0 0 $pointsize 0.0000 4 135 405 "; jpayne@69: printf("-150 %.0f", ($percent_id - 50) * $Yscale + 10); # shift down 10 pixels jpayne@69: print " $percent_id", "\\001\n"; jpayne@69: # print the tic mark jpayne@69: print "2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 2\n"; jpayne@69: printf("-50 %.0f 0 %.0f\n", jpayne@69: ($percent_id - 50) * $Yscale, ($percent_id - 50) * $Yscale); jpayne@69: } jpayne@69: jpayne@69: # print a line in the appropriate color, scaled with Xscale,Yscale jpayne@69: sub print_xfig_line { jpayne@69: my ($xleft,$yleft,$xright,$yright,$color) = @_; jpayne@69: # print it at the given coordinates, scaled jpayne@69: $xleft_scaled = int($Xscale * $xleft); jpayne@69: $xright_scaled = int($Xscale * $xright); jpayne@69: # Xfig has a bug: if we re-scale and the resulting X coordinates are equal, jpayne@69: # then it will print a fixed-size (large) rectangle, rather than a 1-pixel jpayne@69: # wide one. So check fo this and correct. jpayne@69: if ($xleft_scaled == $xright_scaled) { $xright_scaled += 1; } jpayne@69: # set up and print line in xfig format jpayne@69: print "2 1 0 2 $color 7 100 0 -1 0.000 0 0 -1 0 0 2\n"; jpayne@69: printf("\t %.0f %.0f %.0f %.0f\n", jpayne@69: $xleft_scaled, $Yscale * $yleft, $xright_scaled, $Yscale * $yright); jpayne@69: } jpayne@69: jpayne@69: # print a label for each contig using its ID jpayne@69: sub print_label { jpayne@69: my ($xleft,$yleft,$xright,$id,$psize) = @_; jpayne@69: # print it at the left edge of the contig. The angle jpayne@69: # of 4.7124 means text goes down vertically. 5th argument jpayne@69: # here is pointsize of the label. jpayne@69: # bump the label down 20 pixels jpayne@69: $yposition = $yleft * $Yscale + 20; jpayne@69: # to keep the display clean, don't print the label unless the jpayne@69: # contig itself is wider than the width of the text in the label jpayne@69: $xleft_scaled = $xleft * $Xscale; jpayne@69: $xright_scaled = $xright * $Xscale; jpayne@69: # each point of type is 8 pixels jpayne@69: $textheight = $psize * 8; jpayne@69: if ($xright_scaled - $xleft_scaled > $textheight) { jpayne@69: print "4 0 0 50 0 0 $psize 4.7124 4 135 435 "; jpayne@69: printf("%.0f %.0f",$xleft_scaled, $yposition); jpayne@69: print " $id", "\\001\n"; jpayne@69: } jpayne@69: }