view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/mapview @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
line wrap: on
line source
#!/usr/bin/env perl

use lib "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
use Foundation;

my $SCRIPT_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";


my $VERSION_INFO = q~
mapview version 1.01
    ~;


my $HELP_INFO = q~
  USAGE: mapview  [options]  <coords file>  [UTR coords]  [CDS coords]

  DESCRIPTION:
    mapview is a utility program for displaying sequence alignments as
    provided by MUMmer, NUCmer, PROmer or Mgaps. mapview takes the output of
    show-coords and converts it to a FIG, PDF or PS file for visual analysis.
    It can also break the output into multiple files for easier viewing and
    printing.

  MANDATORY:
    coords file    The output of 'show-coords -rl[k]' or 'mgaps'

  OPTIONS:
    UTR coords      UTR coordinate file in GFF format
    CDS coords      CDS coordinate file in GFF format

    -d|maxdist      Set the maximum base-pair distance between linked matches
                    (default 50000)
    -f|format       Set the output format to 'pdf', 'ps' or 'fig'
                    (default 'fig')
    -h
    --help          Display help information and exit
    -m|mag          Set the magnification at which the figure is rendered,
                    this is an option for fig2dev which is used to generate
                    the PDF and PS files (default 1.0)
    -n|num          Set the number of output files used to partition the
                    output, this is to avoid generating files that are too
                    large to display (default 10)
    -p|prefix       Set the output file prefix
                    (default "PROMER_graph or NUCMER_graph")
    -v
    --verbose       Verbose logging of the processed files
    -V
    --version       Display the version information and exit
    -x1 coord       Set the lower coordinate bound of the display
    -x2 coord       Set the upper coordinate bound of the display
    -g|ref          If the input file is provided by 'mgaps', set the
                    reference sequence ID (as it appears in the first column
                    of the UTR/CDS coords file)
    -I              Display the name of query sequences
    -Ir             Display the name of reference genes
    ~;


my $USAGE_INFO = q~
  USAGE:  mapview  [options]  <coords file>  [UTR coords]  [CDS coords]
    ~;


my @DEPEND_INFO =
    (
     "fig2dev",
     "$SCRIPT_DIR/Foundation.pm"
     );

my $err_gff = q~
  ERROR in the input files ! The reference seq ID can't be found in GFF files !
      The first column in the GFF file should be the ID of the reference seq. 
      The alignments file should provide the same info in the column before the last one.
      
      Here are some example records for the GFF file:
     
      gnl|FlyBase|X	Dmel3	initial-exon	2155	2413	.	-	.	X_CG3038.1
      gnl|FlyBase|X	Dmel3	last-exon	1182	2077	.	-	.	X_CG3038.1
      ...
      The fields are :
      <seq_ID> <source> <exon type> <start> <end> <score> <strand> <frame> <gene_name>
    ~;

my $tigr;
my $err;

my $alignm;  
my $futr;
my $fcds;

#-- Initialize TIGR::Foundation
$tigr = new TIGR::Foundation;
if ( !defined ($tigr) ) {
    print (STDERR "ERROR: TIGR::Foundation could not be initialized");
    exit (1);
}
        
#-- Set help and usage information
$tigr->setHelpInfo ($HELP_INFO);
$tigr->setUsageInfo ($USAGE_INFO);
$tigr->setVersionInfo ($VERSION_INFO);
$tigr->addDependInfo (@DEPEND_INFO);

$err = $tigr->TIGR_GetOptions
    (
     "d|maxdist=i" => \$match_dist,
     "f|format=s" => \$format,
     "m|mag=f" => \$magn,
     "n|num=i" => \$noOutfiles,
     "p|prefix=s" => \$outfilename,
     "x1=i" => \$x1win,
     "x2=i" => \$x2win,
     "v|verbose" => \$verb,
     "g|ref=s" => \$Mgaps,
     "I" => \$printIDconting,
     "Ir" => \$printIDgenes
     );

if ( $err == 0  ||  scalar(@ARGV) < 1  || scalar(@ARGV) > 3 ) {
    $tigr->printUsageInfo( );
    print (STDERR "Try '$0 -h' for more information.\n");
    exit (1);
}

($alignm,$futr,$fcds)=@ARGV; 

if ((substr($x1win,0,1) eq '-') || (substr($x2win,0,1) eq '-')){
    print "ERROR2 : coords x1,x2 should be positive integers !!\n";
    $info=1;
}
if ($x1win > $x2win) {
    print "ERROR3 : wrong range coords : x1 >= x2 !!!\n";
    $info=1; 
}
if ($Mgaps){ #formating the mgaps output to be similar with show-coords output
   format_mgaps();
}

if  (!$format){$format="fig";}
if (($x1win) and ($x2win)){  $startfind=0; }
else{  $startfind=1; }
$endfind=0; 
if (!$noOutfiles) {$noOutfiles=10;} 
if (!$match_dist){$match_dist=50000;}

#```````init colors````````````````````````````
$color{"2"}=27;#dark pink 5utr
$color{"3"}=2;#green  ex
$color{"4"}=1;#blue 3utr
#``````````````````````
@linkcolors=(31,14,11);
#``````````````````````````````````````````````
open(F,$alignm);
<F>;
$prog=<F>; chomp($prog);
<F>;
$_=<F>;
@a=(m/\s+(\||\[.+?\])/g) ;
for ($ind=0;$ind<=$#a;$ind++){
   if ($a[$ind] eq "[S1]") {
     $ind_s1=$ind;     
   }
   elsif ($a[$ind] eq "[E1]"){
     $ind_e1=$ind;    
   }
  elsif ($a[$ind] eq "[% IDY]"){
     $ind_pidy=$ind;  
   }
   elsif ($a[$ind] eq "[LEN R]"){
     $ind_lenchr=$ind;   
   }
#   elsif ($a[$ind] eq "[TAGS]"){    
#    $ind_tags=$ind+1; #there are two columns for this header col
#   }   
}
<F>;$mref=-1; 
while (<F>){
   chomp;
   @a=split;                                 
   if (!exists $hRefContigId{$a[-2]}) {       # print  $a[-2]."\n";               
     $hRefContigId{$a[-2]}=$a[$ind_lenchr];
     $lenrefseqs+=$a[$ind_lenchr];  
     $mref++;                               
   }
}
$nobpinfile=int($lenrefseqs/$noOutfiles);         

close(F);

#``````````````````````


if (@ARGV > 1) {
  get_cds_ends();              
  get_utrcds_info();
  test_overlap();
} 
elsif(!$mref) {     
  $fileno=$noOutfiles;         
  $startcoord=0;$endcoord=0;
  for ($i=0;$i<$fileno;$i++) {            
   $endcoord=$startcoord+$nobpinfile-1;
   $endcoord=$lenrefseqs if ($endcoord>$lenrefseqs);
   $file[$i]="$startcoord $endcoord";  
   $startcoord=$endcoord+1;
  }
}

    
$Yorig=3000;
$YdistPID=2000;
$yscale=$YdistPID/50;
$Xscale=14.5;
$gap=800;
#$maxfiles = ($fileno < 10) ? $fileno : 10;
#---------------------------------

if (!$mref){ 
  for($i=0; $i < $fileno; $i++) {
    $nrf=$i;
    set_output_fname();  
    ($startcoord,$endcoord)=split(/\s+/,$file[$i]);  
     open(O,">$procfile".$nrf.".fig");
     print_header(); 
     print $procfile.$nrf.".fig\t range : $startcoord\t$endcoord \n" if ($verb && ($format eq "fig"));
    
     $xs=0;
     $xe=int(($endcoord-$startcoord+1)/$Xscale);
    #$xs=200;
    #$xe=$xs+int(($endcoord-$startcoord+1)/$Xscale);
     print_grid($xs,$xe,$startcoord,$endcoord);
   
     $tmpIdQrycontig="";
     $linkcolor=$linkcolors[0];               
 
     open(F,$alignm);
     <F>;<F>;<F>;<F>;<F>;
     while(<F>) {                             
	chomp;
	@a=split;                            
	if($a[$ind_s1] > $endcoord) { last;}
	if($a[$ind_s1]<$startcoord && $a[$ind_e1] > $startcoord ) { $a[$ind_s1]=$startcoord;}
	if($a[$ind_s1] < $endcoord && $endcoord < $a[$ind_e1]) { $a[$ind_e1]=$endcoord;}
	
	if($a[$ind_s1]>=$startcoord && $a[$ind_e1]<=$endcoord) {
          $x1=int(($a[$ind_s1]-$startcoord)/$Xscale);#
          $x2=int(($a[$ind_e1]-$startcoord)/$Xscale);# 
          print_align($x1,$x2);
	}
     } 
     close(F);
     %hQrycontig=();
     print_genes() if ($futr);
     print_legend();
     close(O);
     change_file_format() if ($format ne "fig");   
  }
}
elsif($mref){#multiple ref seqs
  set_output_fname();
  $tmpIdQrycontig="";
  $linkcolor=$linkcolors[0];
  $startdrawX=0;
  $proclen=0;
  $first=1;
  $nrf=0;
  open(F,$alignm);
  <F>;<F>;<F>;<F>;<F>;
  while(<F>) {
    chomp;
    @a=split;
    if ($a[-2] ne $tmpcontig){
       %hQrycontig=();
       $tmpcontig=$a[-2];
       if ($first){
         $first=0;
         $nrf++;
         open(O,">$procfile".$nrf.".fig");
         print_header();
         print $procfile.$nrf.".fig"."\n" if ($verb && ($format eq "fig"));
         $len=$hRefContigId{$a[-2]};
       }
       else {
         $startdrawX+=int($len/$Xscale)+$gap;
         $len=$hRefContigId{$a[-2]};
         if (($proclen+$len>$nobpinfile) and ($proclen != 0)){
            print_legend();
            close(O);
            change_file_format() if ($format ne "fig"); 
            $nrf++;         
            open(O,">$procfile".$nrf.".fig");
            print_header();   
            print "\n".$procfile.$nrf.".fig"."\n" if ($verb && ($format eq "fig"));
            $proclen=0;
            $startdrawX=0;
         }
       }
       $xs=$startdrawX;
       $xe=$startdrawX+int($len/$Xscale);
       print_grid($xs,$xe,0,$len);
       #print genes from %geneinfo for contig
       print $a[-2]."\t".$hRefContigId{$a[-2]}."\n";
       print_genes_mr() if ($futr);
       $proclen+=$len;      
    }#end if new contig
    $x1=$startdrawX+int($a[$ind_s1]/$Xscale);
    $x2=$startdrawX+int($a[$ind_e1]/$Xscale);   
    print_align($x1,$x2);
  }
  print_legend();
  close(O);
  change_file_format() if ($format ne "fig");

  close(F);
}
#*******************************************************************************
#*******************************************************************************
sub set_output_fname{
  
  if (!$outfilename) {$procfile=$prog."_graph"."_";}
  else {$procfile=$outfilename."_";}
  
  if ($format ne "fig"){ 
     $procfile="tmp".$procfile;
  }
}
#*********************************
sub get_cds_ends{
#3.  print "create \%hcds_ends...\n";
$testGffFormat=0;
 open(F,"<".$fcds);#|| die "can't open \" $fcds cds \" file !";
 while(<F>) {
   chomp;
   if($_) { 
     @a=split;                          
     if (exists $hRefContigId{$a[0]}){#record if at least one of the ref id is the same in GFF and Align files
       $testGffFormat++;
     }
     $genename=$a[8];                                   
     if ($genename ne $tmpname){
        if ($sign eq "+"){ $hcds_ends{$tmpname} = "$cds5 $cds3";}
        elsif ($sign eq "-"){ $hcds_ends{$tmpname} = "$cds3 $cds5";} 
        $tmpname=$genename; 
        $sign=$a[6]; 
     }
     if($sign eq "-") {
	$temp=$a[3]; 
	$a[3]=$a[4];
	$a[4]=$temp;
     }
     if ($a[2] eq "single-exon"){ 
        $cds5=$a[3];
        $cds3=$a[4];
     }
     elsif ($a[2] eq "initial-exon"){ 
        $cds5=$a[3];
     }
     elsif ($a[2] eq "last-exon"){
        $cds3=$a[4];
     }   
   }
 }
 if ($sign eq "+"){ $hcds_ends{$tmpname} = "$cds5 $cds3";}
 elsif ($sign eq "-"){$hcds_ends{$tmpname} = "$cds3 $cds5";} 
                                                           
 test_formatGFF(); 

 #foreach $k ( keys %hcds_ends){
 #  print "cds_ends: ".$k."\t"."\n"; 
 #} exit;
 
}

#*********************************
sub test_formatGFF{
 if ($testGffFormat==0){         
   print (STDERR  "$err_gff \n"); 
   exit (1);
 }

}

#*********************************
sub get_utrcds_info{
#test for gene overlap $geneinfo{gene_name}->[0]=level,stock gene 5'3' utr ends,
#determina %geneinfo{id gene}->5utr,ex,3utr  
#and @file 
# get_gene_ends();
                $testGffFormat=0;
open(F,"<".$futr);# || die "can't open \" $futr utr \" file !";
while(<F>) {
  chomp;
  if($_) { 
     @a=split;
     #get gene ends-utr
     $genename=$a[8];                      
     if (exists $hRefContigId{$a[0]}){ #check if [align file(col before the last one)] = [GFF file(col 1)]
       $testGffFormat++;
     }
           
     if ($genename ne $tmpname){
        if ($sign eq "+"){ $utr_ends{$tmpname} = "$utr5 $utr3";}
        elsif ($sign eq "-"){ $utr_ends{$tmpname} = "$utr3 $utr5";} 
       
        if ($tmpgene) {# for the distinct_utr_cds
          get_utrcds_ends() ;        
        }
        $tmpgene="$a[3] $a[4]";# 
        
        $tmpname=$genename; 
        $sign=$a[6];  
        $hContig_genes{$a[0]}.=" ".$a[8];  # for multiple ref. seqs  
     }
     else{# for the distinct_utr_cds
        if($sign eq "-") {
	$tmpgene="$a[3] $a[4];".$tmpgene;
        }
        else { $tmpgene.=";$a[3] $a[4]"; }
     }#
     
     if($sign eq "-") {
	$temp=$a[3]; 
	$a[3]=$a[4];
	$a[4]=$temp;
     }
     if ($a[2] eq "single-exon"){  
        $utr5=$a[3];
        $utr3=$a[4];
     }
     elsif ($a[2] eq "initial-exon"){ 
        $utr5=$a[3];
     }
     elsif ($a[2] eq "last-exon"){
        $utr3=$a[4];
     }
    
    #init gene info (level) 
    $geneinfo{$a[8]}->[0]=1 if (!exists $geneinfo{$a[8]});                 
        
  }
}
 # for the distinct_utr_cds
 get_utrcds_ends();  
 %cds_ends=();# 

 if ($sign eq "+"){$utr_ends{$tmpname}="$utr5 $utr3"; }
 elsif ($sign eq "-"){$utr_ends{$tmpname}="$utr3 $utr5"; } 
 $hContig_genes{$a[0]}.=" ".$a[8];

 close(F);
 test_formatGFF();
 
}


#****************************************************************
sub get_utrcds_ends{
 $u5="";$ex="";$u3="";
 
 if ($fcds eq $futr) {
   $ex=$tmpgene;
 } 
 else {  
   @ex=split(";",$tmpgene); 
   @cds=split(" ",$hcds_ends{$tmpname});
 
   for($i=0;$i<=$#ex;$i++){
      @coord=split(" ",$ex[$i]);
    
      if ($cds[0]>$coord[0]){
        if ($cds[0]>$coord[1]){
           $u5.="$coord[0] $coord[1];";      
        }
        else{
           $u5.= "$coord[0] "; $u5.=$cds[0]-1 .";" ; #?
           if ($cds[1]<$coord[1]){
             $ex.="$cds[0] $cds[1];";
             $u3.=$cds[1]+1 ." $coord[1];";
           }
           else{ $ex.="$cds[0] $coord[1];";}       
        }    
      }
      else {
        if ($cds[1]>$coord[0]){
          if ($cds[1]>$coord[1]){ 
            $ex.="$coord[0] $coord[1];";       
          }
          else{
            $ex.="$coord[0] $cds[1];";
            $u3.=$cds[1]+1 ." $coord[1];";
          }     
        }
        else { $u3.="$coord[0] $coord[1];";}    
      }    
   }
   chop($u5, $ex, $u3);
 }
 $geneinfo{$tmpname}->[1]=$sign;
 $geneinfo{$tmpname}->[2]=$u5;
 $geneinfo{$tmpname}->[3]=$ex;
 $geneinfo{$tmpname}->[4]=$u3;
}
#*********************************
sub test_overlap{

 if (!$mref){
   $fileno=0;###
   $endcoord=0;###
 }
 foreach $kcontgid (sort keys %hContig_genes){                       
      @allgenes=split(/\s+/,$hContig_genes{$kcontgid});       
      for ($i=1;$i<=$#allgenes;$i++){
         @g1=split (" ", $utr_ends{$allgenes[$i]});    
         $Utr5End{$allgenes[$i]}=$g1[0]; ###
    
         for ($j=$i+1;$j<=$#allgenes;$j++){ #comparing with the rest of the genes
            @g2=split (" ", $utr_ends{$allgenes[$j]});   
            #if the genes are overpaling and they have the same level ,the second gene is liflet to the next level 
            if ( (($g2[0]>=$g1[0]) and ($g2[0]<=$g1[1])) or (($g2[1]>=$g1[0]) and ($g2[1]<=$g1[1])) or (($g1[0]>=$g2[0]) and ($g1[0]<=$g2[1])) ){
              if ($geneinfo{$allgenes[$i]}->[0] == $geneinfo{$allgenes[$j]}->[0]){
                $geneinfo{$allgenes[$j]}->[0]=$geneinfo{$allgenes[$i]}->[0] + 1 ;
              }
            }   
         }                        
         SetTheRangeForEachFile() if ((!$endfind) and (!$mref)); ###         
      }                                              
  }
  $file[$fileno++]="$startcoord $endcoord" if (!$mref);###                        
  %utr_ends=();###  
 
}  

#**************************************
sub SetTheRangeForEachFile{      
   $currstart=$g1[0];
   $currend=$g1[1];
   #---test range ends intersection
   if (!$startfind) {
     if (($x1win <= $currstart) || ($x1win <= $currend)){
         $currstart = $x1win;
         $startfind = 1;
      }  
   }
   if ( $startfind && $x1win && $x2win){
       if (($x2win <= $currstart) || ($x2win <= $currend) ){
           $currend = $x2win;
           $endfind = 1; 
        } 
   }
#--------------------
   if ($startfind) { 
       if(!$endcoord) {
          #$startcoord=0;
          $startcoord = $x1win ? $x1win : 0;
	  $endcoord=$currend;
       }
       else {
         if($currend > $endcoord) {
	    if($currend-$startcoord < $nobpinfile) {
               $endcoord=$currend; 
	    }
	    else {
	       $file[$fileno++]="$startcoord $endcoord";
	       $startcoord=$endcoord+1;
	       $endcoord=$currend;
	    }
	  }
       }
   }#if startfind        
}
#*********************************
sub print_header{
  print O "#FIG 3.2\nLandscape\nCenter\nInches\nLetter  \n100.00\nMultiple\n-2\n1200 2\n";
}
#*********************************
sub print_align{
  my ($x1,$x2)=@_;
 
  $a[$ind_pidy]=50 if ($a[$ind_pidy]<50);
  $a[$ind_pidy]=int($a[$ind_pidy]);
  if ($Mgaps){
     $y=$Yorig+250+$YdistPID-$yscale*2;
     if($a[$#a]=~/rev$/){$y-=25*$yscale;}      
  }
  else{
    $y=$Yorig+250+$YdistPID-$yscale*($a[$ind_pidy]-50);
  }
  if($x1==$x2) { $x2++;}
     #draw the line between matches. is dif color for each contig
  if ($a[$#a] eq $tmpIdQrycontig) {               
     print_connections($hQrycontig{$tmpIdQrycontig}->[1], $x1,$y);
  }
  else{#new contig
   #remember the start coord for printing the id alignments
   if ($printIDconting){                                            
      if  ( $x1 - $XlastPrint > 400 ) { 
         print O "4 0 0 5 0 0 8 0.0000 4 90 270 ";
         printf O ("\t%.0f %.0f ",$x1,$y); 
         print O  $a[$#a], "\\001\n";
         $XlastPrint=$x1; $YlastPrint=$y;
      }
   }
   # 
     ##if it was seen before,but interrupted by another contig   
     if ((exists $hQrycontig{$a[$#a]}) and ($a[$ind_s1]-$hQrycontig{$a[$#a]}->[2] < $match_dist )) { 
        $linkcolor=$hQrycontig{$a[$#a]}->[0];
        print_connections($hQrycontig{$a[$#a]}->[1], $x1,$y);
     }
     else{
         #change the link color 
         unshift(@linkcolors, pop(@linkcolors)); 
         $linkcolor=$linkcolors[0];
         $hQrycontig{$a[$#a]}->[0] = $linkcolor;
     }
   }
   $tmpIdQrycontig=$a[$#a];
   $hQrycontig{$tmpIdQrycontig}->[1]="$x2 $y";
   $hQrycontig{$tmpIdQrycontig}->[2]=$a[$ind_e1]; 
 
    #the matches line is red
   print O "2 1 0 2 4 0 40 0 -1 0.000 0 0 -1 0 0 2\n";
   print O "\t$x1 $y $x2 $y\n";
   print O "2 1 0 5 20 0 50 0 -1 0.000 0 0 -1 0 0 2\n";		
   printf O ("\t $x1 %.0f $x2 %.0f\n",$Yorig+150 , $Yorig+150);
}
#*********************************
sub print_connections{
 my ($setc1,$setx2,$sety2)=@_;                    # print "\nparam connect  @_\n";
 my ($setx1,$sety1) =split(/ /,$setc1);
 
   if ($Mgaps){      
     if ($setx1>$setx2){
        $tmpsetx1=$setx1;
        $setx1=$setx2;
        $setx2=$tmpsetx1;
     }
       $distx1x2=int(($setx2-$setx1)/2);
       $xcenter= $setx1+$distx1x2;
      
       if ($setx2-$setx1>4000)  { #if the distance is to big then heigh of the arc is set to 20 
         $heightArcUp = 20*$yscale;  
         $yoffcenter=int((($distx1x2**2)+$heightArcUp**2)*(1/(2*$heightArcUp)))-$heightArcUp ; 
       }
       else{
         $heightArcUp = int (0.447 * $distx1x2);#sectorul de cerc la 1/3 din raza.
         $yoffcenter=2*$heightArcUp;
       }
         print O "5 1 0 2 $linkcolor 0 50 0 -1 0.000 0 0 0 0 ";
         printf O ("%.3f %.3f $setx1 $sety1 $xcenter %.0f $setx2 $sety1 \n",$xcenter,$sety1+$yoffcenter,$sety1-$heightArcUp); 
  }
  else{
    print O "2 1 0 1 $linkcolor 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
    print O "\t".$setc1." $setx2 $sety2\n";
  
  }
}
#*********************************
sub print_genes_mr{ 
 %hLastOnLevel=(); 
 @g=split(/\s+/,$hContig_genes{$a[-2]});
 for ($i=1;$i<=$#g;$i++){
 $kname=$g[$i];
   $tmpx2=0;
   $y=$Yorig-100-200*$geneinfo{$kname}->[0];
   #print id gena     
    $xid =$startdrawX+ int($Utr5End{$kname}/$Xscale);
    print_Id_genes() if ($printIDgenes);
   #     
   for ($l=2;$l<5;$l++){
     @c=split(";",$geneinfo{$kname}->[$l] ) ; 
      if (@c){ #print "de unde?@c\n" if (($l==2) or ($l==4));
      $colorend=$color{$l};
      if ($geneinfo{$kname}->[1] eq "-"){
         if ($l==2) { $colorend=$color{"4"}; }
         elsif ($l==4) {$colorend=$color{"2"};}
      }       
      for ($k=0;$k<=$#c;$k++){         
         @e=split (" ",$c[$k]); 
         $x1=$startdrawX+int($e[0]/$Xscale);
         $x2=$startdrawX+int($e[1]/$Xscale);
         if($x1==$x2) { $x2++;}
         if ( ($tmpx2) and ($x1-$tmpx2>1)){ #print the intron 
            print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
            print O "\t $tmpx2 $y $x1 $y\n";
         }
         $tmpx2=$x2;
         print O "2 1 0 5 $colorend 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#		
         print O "\t $x1 $y $x2 $y\n";  
      }
     }       
   }          
   #delete ($geneinfo{$kname});
 }
}

#*********************************
sub print_Id_genes{
 if (exists $hLastOnLevel{$geneinfo{$kname}->[0]}){
    $lastOnlevel=$hLastOnLevel{$geneinfo{$kname}->[0]};
    $printidspace = int(($Utr5End{$kname}-$Utr5End{$lastOnlevel})/$Xscale);
 }
 else{$printidspace=601;}
 if ($printidspace > 600){                
    #print contig name#
    print O "4 0 0 5 0 0 6 0.0000 4 90 270 ";
    printf O ("\t%.0f %.0f ",$xid,$y-50); 
    print O  $kname, "\\001\n";
    $hLastOnLevel{$geneinfo{$kname}->[0]}=$kname;
 } 
}
#*********************************
sub print_genes{
 %hLastOnLevel=();                                 
 foreach $kname  (sort {$Utr5End{$a} <=> $Utr5End{$b}} keys %Utr5End){  
   $tmpx2=0;
   if ($Utr5End{$kname}>$startcoord && $Utr5End{$kname}<$endcoord){       
     $y=$Yorig-100-200*$geneinfo{$kname}->[0];
     #print id gena     
        $xid = int(($Utr5End{$kname}-$startcoord)/$Xscale);
        print_Id_genes() if ($printIDgenes);
     #     
      for ($l=2;$l<5;$l++){
         @c=split(";",$geneinfo{$kname}->[$l] ); 
         $colorend=$color{$l};
         if ($geneinfo{$kname}->[1] eq "-"){
            if ($l==2) { $colorend=$color{"4"}; }
            elsif ($l==4) {$colorend=$color{"2"};}
         }
         
         for ($k=0;$k<=$#c;$k++){         
            @e=split (" ",$c[$k]); 
            $x1=int(($e[0]-$startcoord)/$Xscale);
            $x2=int(($e[1]-$startcoord)/$Xscale);
            if($x1==$x2) { $x2++;}
            if ( ($tmpx2) and ($x1-$tmpx2>1)){ #print the intron 
               print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
               print O "\t $tmpx2 $y $x1 $y\n";
            }
            $tmpx2=$x2;
            print O "2 1 0 5 $colorend 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#		
            print O "\t $x1 $y $x2 $y\n";  
         }      
      }
    }#endif "is in interval"
  }
 # delete ($Utr5End{$kname});
 # delete ($geneinfo{$kname});
 
}
#*********************************
sub print_grid{  

my ($xs,$xe,$startcontg,$endcontg)=@_;

$XlastPrint=0;$YlastPrint=0;

   #print ref contig
    print O "2 1 0 10 11 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
    printf O ("\t $xs %.0f $xe %.0f\n",$Yorig+50,$Yorig+50);
    #print orizontal axes for PId (100%,75%,50%)
    for ($percent_id = 50; $percent_id < 101; $percent_id += 25) {
        print O  "2 1 2 1 0 7 60 0 -1 4.000 0 0 -1 0 0 2\n";
        printf O  ("\t$xs %.0f $xe %.0f\n",$Yorig+250+$YdistPID-($percent_id - 50) * $yscale,$Yorig+250+$YdistPID-($percent_id - 50) * $yscale);
        #last if ($Mgaps);    
    }
    #print orizontal markers for bp.
    $increment=10000/$Xscale;
    $no_incr=0;
    $xmark = $xs ;$xmark_float= $xs;
     while ($xmark < $xe){
        print O  "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n";
        printf O  ("\t$xmark %.0f $xmark %.0f\n",$Yorig+$YdistPID+250,$Yorig+$YdistPID+300);
         #bp scale
        print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
        printf O ("\t %.0f %.0f",$xmark,$Yorig+$YdistPID+400); 
        print O  " $no_incr"."k", "\\001\n";
        $no_incr += 10;
        $xmark_float += $increment;
        $xmark=int($xmark_float);
    }
    
    #coord for chr ends
    print O "4 0 0 50 0 0 14 0.0000 4 135 450 $xs $Yorig $startcontg\\001\n";
    printf O ("4 0 0 50 0 0 14 0.0000 4 135 810 %.0f $Yorig $endcontg\\001\n",$xe-length($xe)*125);
     
    #print contig name#
    if ($mref){
      print O "4 0 0 5 0 0 8 0.0000 4 135 405 ";
      printf O ("\t%.0f %.0f ",$xs,$Yorig+70); 
      print O  $a[-2], "\\001\n";                     
    }
    #print vertical markers for PId scale 
    if (!$Mgaps){
      for ($percent_id = 50; $percent_id < 101; $percent_id += 25) {
        #left
        print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
        printf O ("\t%.0f %.0f", $xs-200,$Yorig+$YdistPID+250-($percent_id - 50) * $yscale + 20); 
        print O  " $percent_id%", "\\001\n";
        #right
        print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
        printf O ("\t%.0f %.0f",$xe+20, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale+20 ); 
        print O  " $percent_id%", "\\001\n";
      
        # print the tick mark
        #left
       # print O  "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n"; 
      #  printf O ("\t%.0f %.0f $xs %.0f\n",$xs-50,
	#   $Yorig+$YdistPID+250-($percent_id - 50) * $yscale, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale);
      #right
       # print O  "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n"; 
      #  printf O ("\t$xe %.0f %.0f %.0f\n",
	#   $Yorig+$YdistPID+250-($percent_id - 50) * $yscale,$xe+50, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale);
      }
    }
    else{ # for Mgaps
      print O "4 0 0 100 0 0 7 1.5710 4 135 405 ";
      printf O ("\t%.0f %.0f", $xs-50,$Yorig+$YdistPID+250 - 5 * $yscale + 10); 
      print O  " + qry strand", "\\001\n";

      print O "4 0 0 100 0 0 7 1.5710 4 135 405 ";
      printf O ("\t%.0f %.0f", $xs-50,$Yorig+$YdistPID+250 - 30 * $yscale + 10); 
      print O  " - qry strand", "\\001\n";      
    }
    
}
#*********************************
sub print_legend{ 

     print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
     printf O ("\t%.0f %.0f ",100,$Yorig+$YdistPID+1100); 
     print O  " Legend ", "\\001\n";
     $y= $Yorig+$YdistPID+1300; #utr
     print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
     print O "\t 70 $y 99 $y\n";
     print O "2 1 0 5 27 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
     print O "\t 100 $y 200 $y\n";
     print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
     print O "\t 200 $y 230 $y\n";        
     print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
     printf O ("\t%.0f %.0f ",300,$y+30); 
     print O  " 5' utr ", "\\001\n";
     $y += 150 ;#cds
     print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
     print O "\t 70 $y 99 $y\n";     
     print O "2 1 0 5 2 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
     print O "\t 100 $y 200 $y\n";
     print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
     print O "\t 200 $y 230 $y\n";                 
     print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
     printf O ("\t%.0f %.0f ",300,$y+30); 
     print O  " cds ", "\\001\n";
     $y += 150;        #3' utr
     print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
     print O "\t 70 $y 99 $y\n";     
     print O "2 1 0 5 1 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
     print O "\t 100 $y 200 $y\n";         
     print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
     print O "\t 200 $y 230 $y\n";             
     print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";     
     printf O ("\t%.0f %.0f ",300,$y+30); 
     print O  " 3' utr ", "\\001\n";
     $y += 150; # match
     print O "2 1 0 2 4 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
     print O "\t100 $y 200 $y\n";
     print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";    
     printf O ("\t%.0f %.0f ",300,$y+30); 
     print O  " match found by $prog ", "\\001\n";
}
#*********************************
sub change_file_format{   

  $procfile =~ /^tmp(.+)/;
  $outfile = $1.$nrf.".".$format; 
  $comand = "fig2dev -L $format -x 100";
  $comand .= " -m $magn" if ($magn);
  $comand .= " -M ".$procfile.$nrf.".fig ".$outfile;
   
  $status =system($comand);
   print E "ERROR 1: fig2dev !\n" unless $status == 0;
   
  $status =system("rm $procfile".$nrf.".fig");
  
  if ($verb){
    print "$outfile";
    if ($mref){
      print "\n" ;
    }
    else {
      print "\t range : $startcoord\t$endcoord \n" ;
    }
  }
}
#*********************************
sub format_mgaps{
$tmpfile="tmpmgaps";
$tmpfile2=$alignm."coords" ;
get_ref_len();    #print $maxlenref."\n";
 open(M,">".$tmpfile2) || die "can't open \" $tmpfile2  \" file !";
  print M "$alignm\n";
  print M "Mgaps\n\n";
  print M "    [S1]     [E1]  |     [S2]     [E2]  |  [LEN 1]  [LEN 2]  |  [% IDY]  |  [LEN R]  [LEN Q]  |  [COV R]  [COV Q]  | [TAGS]\n";
  print M "===============================================================================================================================\n";

 open(T,">".$tmpfile) || die "can't open \" $tmpfile  \" file !";
 
 open(A,"<".$alignm) || die "can't open \" $alignm  \" file !";
 
 
 while(<A>) {
  chomp;
  @a=split;
  if ($a[0] =~ /^>/){    
    $nr_cluster=1;    
    $idquery=$a[1];
    if ($a[2] eq "Reverse"){$idquery .= "_rev";}
  }
  #elsif ($a[0] eq "#") {$nr_cluster++;}
  elsif($a[0] ne "#"){  
    $e1=$a[0]+$a[2];
    print T $a[0]."\t".$e1."\t"."|";
    print T "\t-\t-\t|"; 
    print T "\t-\t-\t|";
    print T "\t-\t|"; #pid
    print T "\t$maxlenref\t-\t|";#len seqs
   # print "\t-\t-\t|";
   # print "\t-\t-\t|";
   # print T " $Mgaps\t$idquery.$nr_cluster\n"; 
   print T " $Mgaps\t$idquery\n";   
  }
  
 }
 close(A);
 close(T);
 $command="sort -n -k 1 $tmpfile >> $tmpfile2";
 $status =system($command);  
 system("rm $tmpfile");
 close(M);
 $alignm=$tmpfile2;
 print STDERR "ERROR 1:  can't sort $tmpfile \n" unless $status == 0;
 print STDERR "\n**************************************** \n";
 print STDERR "New input file created : $alignm\n";
 print STDERR "**************************************** \n\n";
 
}
#*****************************
sub get_ref_len{ 
 $firstrow=1;
 open(A,"<".$alignm) || die "can't open \" $alignm  \" file !";
 while(<A>) {
   chomp;
   @a=split;
   if ($firstrow) {
     $firstrow=0;
     if ($a[0] !~ /^>/){
        print "\nWrong file format for MGAPS file : $alignm ! \n";
        exit;
     }
  }

  if ($a[0] =~ /^>/){ next; }
  elsif($a[0] ne "#"){  
    $e1=$a[0]+$a[2];
    $maxlenref=($maxlenref < $e1 ? $e1 : $maxlenref);
  }
 }
 close(A);
}