diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/mapview @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/mapview	Tue Mar 18 17:55:14 2025 -0400
@@ -0,0 +1,967 @@
+#!/usr/bin/env perl
+
+use lib "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
+use Foundation;
+
+my $SCRIPT_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
+
+
+my $VERSION_INFO = q~
+mapview version 1.01
+    ~;
+
+
+my $HELP_INFO = q~
+  USAGE: mapview  [options]  <coords file>  [UTR coords]  [CDS coords]
+
+  DESCRIPTION:
+    mapview is a utility program for displaying sequence alignments as
+    provided by MUMmer, NUCmer, PROmer or Mgaps. mapview takes the output of
+    show-coords and converts it to a FIG, PDF or PS file for visual analysis.
+    It can also break the output into multiple files for easier viewing and
+    printing.
+
+  MANDATORY:
+    coords file    The output of 'show-coords -rl[k]' or 'mgaps'
+
+  OPTIONS:
+    UTR coords      UTR coordinate file in GFF format
+    CDS coords      CDS coordinate file in GFF format
+
+    -d|maxdist      Set the maximum base-pair distance between linked matches
+                    (default 50000)
+    -f|format       Set the output format to 'pdf', 'ps' or 'fig'
+                    (default 'fig')
+    -h
+    --help          Display help information and exit
+    -m|mag          Set the magnification at which the figure is rendered,
+                    this is an option for fig2dev which is used to generate
+                    the PDF and PS files (default 1.0)
+    -n|num          Set the number of output files used to partition the
+                    output, this is to avoid generating files that are too
+                    large to display (default 10)
+    -p|prefix       Set the output file prefix
+                    (default "PROMER_graph or NUCMER_graph")
+    -v
+    --verbose       Verbose logging of the processed files
+    -V
+    --version       Display the version information and exit
+    -x1 coord       Set the lower coordinate bound of the display
+    -x2 coord       Set the upper coordinate bound of the display
+    -g|ref          If the input file is provided by 'mgaps', set the
+                    reference sequence ID (as it appears in the first column
+                    of the UTR/CDS coords file)
+    -I              Display the name of query sequences
+    -Ir             Display the name of reference genes
+    ~;
+
+
+my $USAGE_INFO = q~
+  USAGE:  mapview  [options]  <coords file>  [UTR coords]  [CDS coords]
+    ~;
+
+
+my @DEPEND_INFO =
+    (
+     "fig2dev",
+     "$SCRIPT_DIR/Foundation.pm"
+     );
+
+my $err_gff = q~
+  ERROR in the input files ! The reference seq ID can't be found in GFF files !
+      The first column in the GFF file should be the ID of the reference seq. 
+      The alignments file should provide the same info in the column before the last one.
+      
+      Here are some example records for the GFF file:
+     
+      gnl|FlyBase|X	Dmel3	initial-exon	2155	2413	.	-	.	X_CG3038.1
+      gnl|FlyBase|X	Dmel3	last-exon	1182	2077	.	-	.	X_CG3038.1
+      ...
+      The fields are :
+      <seq_ID> <source> <exon type> <start> <end> <score> <strand> <frame> <gene_name>
+    ~;
+
+my $tigr;
+my $err;
+
+my $alignm;  
+my $futr;
+my $fcds;
+
+#-- Initialize TIGR::Foundation
+$tigr = new TIGR::Foundation;
+if ( !defined ($tigr) ) {
+    print (STDERR "ERROR: TIGR::Foundation could not be initialized");
+    exit (1);
+}
+        
+#-- Set help and usage information
+$tigr->setHelpInfo ($HELP_INFO);
+$tigr->setUsageInfo ($USAGE_INFO);
+$tigr->setVersionInfo ($VERSION_INFO);
+$tigr->addDependInfo (@DEPEND_INFO);
+
+$err = $tigr->TIGR_GetOptions
+    (
+     "d|maxdist=i" => \$match_dist,
+     "f|format=s" => \$format,
+     "m|mag=f" => \$magn,
+     "n|num=i" => \$noOutfiles,
+     "p|prefix=s" => \$outfilename,
+     "x1=i" => \$x1win,
+     "x2=i" => \$x2win,
+     "v|verbose" => \$verb,
+     "g|ref=s" => \$Mgaps,
+     "I" => \$printIDconting,
+     "Ir" => \$printIDgenes
+     );
+
+if ( $err == 0  ||  scalar(@ARGV) < 1  || scalar(@ARGV) > 3 ) {
+    $tigr->printUsageInfo( );
+    print (STDERR "Try '$0 -h' for more information.\n");
+    exit (1);
+}
+
+($alignm,$futr,$fcds)=@ARGV; 
+
+if ((substr($x1win,0,1) eq '-') || (substr($x2win,0,1) eq '-')){
+    print "ERROR2 : coords x1,x2 should be positive integers !!\n";
+    $info=1;
+}
+if ($x1win > $x2win) {
+    print "ERROR3 : wrong range coords : x1 >= x2 !!!\n";
+    $info=1; 
+}
+if ($Mgaps){ #formating the mgaps output to be similar with show-coords output
+   format_mgaps();
+}
+
+if  (!$format){$format="fig";}
+if (($x1win) and ($x2win)){  $startfind=0; }
+else{  $startfind=1; }
+$endfind=0; 
+if (!$noOutfiles) {$noOutfiles=10;} 
+if (!$match_dist){$match_dist=50000;}
+
+#```````init colors````````````````````````````
+$color{"2"}=27;#dark pink 5utr
+$color{"3"}=2;#green  ex
+$color{"4"}=1;#blue 3utr
+#``````````````````````
+@linkcolors=(31,14,11);
+#``````````````````````````````````````````````
+open(F,$alignm);
+<F>;
+$prog=<F>; chomp($prog);
+<F>;
+$_=<F>;
+@a=(m/\s+(\||\[.+?\])/g) ;
+for ($ind=0;$ind<=$#a;$ind++){
+   if ($a[$ind] eq "[S1]") {
+     $ind_s1=$ind;     
+   }
+   elsif ($a[$ind] eq "[E1]"){
+     $ind_e1=$ind;    
+   }
+  elsif ($a[$ind] eq "[% IDY]"){
+     $ind_pidy=$ind;  
+   }
+   elsif ($a[$ind] eq "[LEN R]"){
+     $ind_lenchr=$ind;   
+   }
+#   elsif ($a[$ind] eq "[TAGS]"){    
+#    $ind_tags=$ind+1; #there are two columns for this header col
+#   }   
+}
+<F>;$mref=-1; 
+while (<F>){
+   chomp;
+   @a=split;                                 
+   if (!exists $hRefContigId{$a[-2]}) {       # print  $a[-2]."\n";               
+     $hRefContigId{$a[-2]}=$a[$ind_lenchr];
+     $lenrefseqs+=$a[$ind_lenchr];  
+     $mref++;                               
+   }
+}
+$nobpinfile=int($lenrefseqs/$noOutfiles);         
+
+close(F);
+
+#``````````````````````
+
+
+if (@ARGV > 1) {
+  get_cds_ends();              
+  get_utrcds_info();
+  test_overlap();
+} 
+elsif(!$mref) {     
+  $fileno=$noOutfiles;         
+  $startcoord=0;$endcoord=0;
+  for ($i=0;$i<$fileno;$i++) {            
+   $endcoord=$startcoord+$nobpinfile-1;
+   $endcoord=$lenrefseqs if ($endcoord>$lenrefseqs);
+   $file[$i]="$startcoord $endcoord";  
+   $startcoord=$endcoord+1;
+  }
+}
+
+    
+$Yorig=3000;
+$YdistPID=2000;
+$yscale=$YdistPID/50;
+$Xscale=14.5;
+$gap=800;
+#$maxfiles = ($fileno < 10) ? $fileno : 10;
+#---------------------------------
+
+if (!$mref){ 
+  for($i=0; $i < $fileno; $i++) {
+    $nrf=$i;
+    set_output_fname();  
+    ($startcoord,$endcoord)=split(/\s+/,$file[$i]);  
+     open(O,">$procfile".$nrf.".fig");
+     print_header(); 
+     print $procfile.$nrf.".fig\t range : $startcoord\t$endcoord \n" if ($verb && ($format eq "fig"));
+    
+     $xs=0;
+     $xe=int(($endcoord-$startcoord+1)/$Xscale);
+    #$xs=200;
+    #$xe=$xs+int(($endcoord-$startcoord+1)/$Xscale);
+     print_grid($xs,$xe,$startcoord,$endcoord);
+   
+     $tmpIdQrycontig="";
+     $linkcolor=$linkcolors[0];               
+ 
+     open(F,$alignm);
+     <F>;<F>;<F>;<F>;<F>;
+     while(<F>) {                             
+	chomp;
+	@a=split;                            
+	if($a[$ind_s1] > $endcoord) { last;}
+	if($a[$ind_s1]<$startcoord && $a[$ind_e1] > $startcoord ) { $a[$ind_s1]=$startcoord;}
+	if($a[$ind_s1] < $endcoord && $endcoord < $a[$ind_e1]) { $a[$ind_e1]=$endcoord;}
+	
+	if($a[$ind_s1]>=$startcoord && $a[$ind_e1]<=$endcoord) {
+          $x1=int(($a[$ind_s1]-$startcoord)/$Xscale);#
+          $x2=int(($a[$ind_e1]-$startcoord)/$Xscale);# 
+          print_align($x1,$x2);
+	}
+     } 
+     close(F);
+     %hQrycontig=();
+     print_genes() if ($futr);
+     print_legend();
+     close(O);
+     change_file_format() if ($format ne "fig");   
+  }
+}
+elsif($mref){#multiple ref seqs
+  set_output_fname();
+  $tmpIdQrycontig="";
+  $linkcolor=$linkcolors[0];
+  $startdrawX=0;
+  $proclen=0;
+  $first=1;
+  $nrf=0;
+  open(F,$alignm);
+  <F>;<F>;<F>;<F>;<F>;
+  while(<F>) {
+    chomp;
+    @a=split;
+    if ($a[-2] ne $tmpcontig){
+       %hQrycontig=();
+       $tmpcontig=$a[-2];
+       if ($first){
+         $first=0;
+         $nrf++;
+         open(O,">$procfile".$nrf.".fig");
+         print_header();
+         print $procfile.$nrf.".fig"."\n" if ($verb && ($format eq "fig"));
+         $len=$hRefContigId{$a[-2]};
+       }
+       else {
+         $startdrawX+=int($len/$Xscale)+$gap;
+         $len=$hRefContigId{$a[-2]};
+         if (($proclen+$len>$nobpinfile) and ($proclen != 0)){
+            print_legend();
+            close(O);
+            change_file_format() if ($format ne "fig"); 
+            $nrf++;         
+            open(O,">$procfile".$nrf.".fig");
+            print_header();   
+            print "\n".$procfile.$nrf.".fig"."\n" if ($verb && ($format eq "fig"));
+            $proclen=0;
+            $startdrawX=0;
+         }
+       }
+       $xs=$startdrawX;
+       $xe=$startdrawX+int($len/$Xscale);
+       print_grid($xs,$xe,0,$len);
+       #print genes from %geneinfo for contig
+       print $a[-2]."\t".$hRefContigId{$a[-2]}."\n";
+       print_genes_mr() if ($futr);
+       $proclen+=$len;      
+    }#end if new contig
+    $x1=$startdrawX+int($a[$ind_s1]/$Xscale);
+    $x2=$startdrawX+int($a[$ind_e1]/$Xscale);   
+    print_align($x1,$x2);
+  }
+  print_legend();
+  close(O);
+  change_file_format() if ($format ne "fig");
+
+  close(F);
+}
+#*******************************************************************************
+#*******************************************************************************
+sub set_output_fname{
+  
+  if (!$outfilename) {$procfile=$prog."_graph"."_";}
+  else {$procfile=$outfilename."_";}
+  
+  if ($format ne "fig"){ 
+     $procfile="tmp".$procfile;
+  }
+}
+#*********************************
+sub get_cds_ends{
+#3.  print "create \%hcds_ends...\n";
+$testGffFormat=0;
+ open(F,"<".$fcds);#|| die "can't open \" $fcds cds \" file !";
+ while(<F>) {
+   chomp;
+   if($_) { 
+     @a=split;                          
+     if (exists $hRefContigId{$a[0]}){#record if at least one of the ref id is the same in GFF and Align files
+       $testGffFormat++;
+     }
+     $genename=$a[8];                                   
+     if ($genename ne $tmpname){
+        if ($sign eq "+"){ $hcds_ends{$tmpname} = "$cds5 $cds3";}
+        elsif ($sign eq "-"){ $hcds_ends{$tmpname} = "$cds3 $cds5";} 
+        $tmpname=$genename; 
+        $sign=$a[6]; 
+     }
+     if($sign eq "-") {
+	$temp=$a[3]; 
+	$a[3]=$a[4];
+	$a[4]=$temp;
+     }
+     if ($a[2] eq "single-exon"){ 
+        $cds5=$a[3];
+        $cds3=$a[4];
+     }
+     elsif ($a[2] eq "initial-exon"){ 
+        $cds5=$a[3];
+     }
+     elsif ($a[2] eq "last-exon"){
+        $cds3=$a[4];
+     }   
+   }
+ }
+ if ($sign eq "+"){ $hcds_ends{$tmpname} = "$cds5 $cds3";}
+ elsif ($sign eq "-"){$hcds_ends{$tmpname} = "$cds3 $cds5";} 
+                                                           
+ test_formatGFF(); 
+
+ #foreach $k ( keys %hcds_ends){
+ #  print "cds_ends: ".$k."\t"."\n"; 
+ #} exit;
+ 
+}
+
+#*********************************
+sub test_formatGFF{
+ if ($testGffFormat==0){         
+   print (STDERR  "$err_gff \n"); 
+   exit (1);
+ }
+
+}
+
+#*********************************
+sub get_utrcds_info{
+#test for gene overlap $geneinfo{gene_name}->[0]=level,stock gene 5'3' utr ends,
+#determina %geneinfo{id gene}->5utr,ex,3utr  
+#and @file 
+# get_gene_ends();
+                $testGffFormat=0;
+open(F,"<".$futr);# || die "can't open \" $futr utr \" file !";
+while(<F>) {
+  chomp;
+  if($_) { 
+     @a=split;
+     #get gene ends-utr
+     $genename=$a[8];                      
+     if (exists $hRefContigId{$a[0]}){ #check if [align file(col before the last one)] = [GFF file(col 1)]
+       $testGffFormat++;
+     }
+           
+     if ($genename ne $tmpname){
+        if ($sign eq "+"){ $utr_ends{$tmpname} = "$utr5 $utr3";}
+        elsif ($sign eq "-"){ $utr_ends{$tmpname} = "$utr3 $utr5";} 
+       
+        if ($tmpgene) {# for the distinct_utr_cds
+          get_utrcds_ends() ;        
+        }
+        $tmpgene="$a[3] $a[4]";# 
+        
+        $tmpname=$genename; 
+        $sign=$a[6];  
+        $hContig_genes{$a[0]}.=" ".$a[8];  # for multiple ref. seqs  
+     }
+     else{# for the distinct_utr_cds
+        if($sign eq "-") {
+	$tmpgene="$a[3] $a[4];".$tmpgene;
+        }
+        else { $tmpgene.=";$a[3] $a[4]"; }
+     }#
+     
+     if($sign eq "-") {
+	$temp=$a[3]; 
+	$a[3]=$a[4];
+	$a[4]=$temp;
+     }
+     if ($a[2] eq "single-exon"){  
+        $utr5=$a[3];
+        $utr3=$a[4];
+     }
+     elsif ($a[2] eq "initial-exon"){ 
+        $utr5=$a[3];
+     }
+     elsif ($a[2] eq "last-exon"){
+        $utr3=$a[4];
+     }
+    
+    #init gene info (level) 
+    $geneinfo{$a[8]}->[0]=1 if (!exists $geneinfo{$a[8]});                 
+        
+  }
+}
+ # for the distinct_utr_cds
+ get_utrcds_ends();  
+ %cds_ends=();# 
+
+ if ($sign eq "+"){$utr_ends{$tmpname}="$utr5 $utr3"; }
+ elsif ($sign eq "-"){$utr_ends{$tmpname}="$utr3 $utr5"; } 
+ $hContig_genes{$a[0]}.=" ".$a[8];
+
+ close(F);
+ test_formatGFF();
+ 
+}
+
+
+#****************************************************************
+sub get_utrcds_ends{
+ $u5="";$ex="";$u3="";
+ 
+ if ($fcds eq $futr) {
+   $ex=$tmpgene;
+ } 
+ else {  
+   @ex=split(";",$tmpgene); 
+   @cds=split(" ",$hcds_ends{$tmpname});
+ 
+   for($i=0;$i<=$#ex;$i++){
+      @coord=split(" ",$ex[$i]);
+    
+      if ($cds[0]>$coord[0]){
+        if ($cds[0]>$coord[1]){
+           $u5.="$coord[0] $coord[1];";      
+        }
+        else{
+           $u5.= "$coord[0] "; $u5.=$cds[0]-1 .";" ; #?
+           if ($cds[1]<$coord[1]){
+             $ex.="$cds[0] $cds[1];";
+             $u3.=$cds[1]+1 ." $coord[1];";
+           }
+           else{ $ex.="$cds[0] $coord[1];";}       
+        }    
+      }
+      else {
+        if ($cds[1]>$coord[0]){
+          if ($cds[1]>$coord[1]){ 
+            $ex.="$coord[0] $coord[1];";       
+          }
+          else{
+            $ex.="$coord[0] $cds[1];";
+            $u3.=$cds[1]+1 ." $coord[1];";
+          }     
+        }
+        else { $u3.="$coord[0] $coord[1];";}    
+      }    
+   }
+   chop($u5, $ex, $u3);
+ }
+ $geneinfo{$tmpname}->[1]=$sign;
+ $geneinfo{$tmpname}->[2]=$u5;
+ $geneinfo{$tmpname}->[3]=$ex;
+ $geneinfo{$tmpname}->[4]=$u3;
+}
+#*********************************
+sub test_overlap{
+
+ if (!$mref){
+   $fileno=0;###
+   $endcoord=0;###
+ }
+ foreach $kcontgid (sort keys %hContig_genes){                       
+      @allgenes=split(/\s+/,$hContig_genes{$kcontgid});       
+      for ($i=1;$i<=$#allgenes;$i++){
+         @g1=split (" ", $utr_ends{$allgenes[$i]});    
+         $Utr5End{$allgenes[$i]}=$g1[0]; ###
+    
+         for ($j=$i+1;$j<=$#allgenes;$j++){ #comparing with the rest of the genes
+            @g2=split (" ", $utr_ends{$allgenes[$j]});   
+            #if the genes are overpaling and they have the same level ,the second gene is liflet to the next level 
+            if ( (($g2[0]>=$g1[0]) and ($g2[0]<=$g1[1])) or (($g2[1]>=$g1[0]) and ($g2[1]<=$g1[1])) or (($g1[0]>=$g2[0]) and ($g1[0]<=$g2[1])) ){
+              if ($geneinfo{$allgenes[$i]}->[0] == $geneinfo{$allgenes[$j]}->[0]){
+                $geneinfo{$allgenes[$j]}->[0]=$geneinfo{$allgenes[$i]}->[0] + 1 ;
+              }
+            }   
+         }                        
+         SetTheRangeForEachFile() if ((!$endfind) and (!$mref)); ###         
+      }                                              
+  }
+  $file[$fileno++]="$startcoord $endcoord" if (!$mref);###                        
+  %utr_ends=();###  
+ 
+}  
+
+#**************************************
+sub SetTheRangeForEachFile{      
+   $currstart=$g1[0];
+   $currend=$g1[1];
+   #---test range ends intersection
+   if (!$startfind) {
+     if (($x1win <= $currstart) || ($x1win <= $currend)){
+         $currstart = $x1win;
+         $startfind = 1;
+      }  
+   }
+   if ( $startfind && $x1win && $x2win){
+       if (($x2win <= $currstart) || ($x2win <= $currend) ){
+           $currend = $x2win;
+           $endfind = 1; 
+        } 
+   }
+#--------------------
+   if ($startfind) { 
+       if(!$endcoord) {
+          #$startcoord=0;
+          $startcoord = $x1win ? $x1win : 0;
+	  $endcoord=$currend;
+       }
+       else {
+         if($currend > $endcoord) {
+	    if($currend-$startcoord < $nobpinfile) {
+               $endcoord=$currend; 
+	    }
+	    else {
+	       $file[$fileno++]="$startcoord $endcoord";
+	       $startcoord=$endcoord+1;
+	       $endcoord=$currend;
+	    }
+	  }
+       }
+   }#if startfind        
+}
+#*********************************
+sub print_header{
+  print O "#FIG 3.2\nLandscape\nCenter\nInches\nLetter  \n100.00\nMultiple\n-2\n1200 2\n";
+}
+#*********************************
+sub print_align{
+  my ($x1,$x2)=@_;
+ 
+  $a[$ind_pidy]=50 if ($a[$ind_pidy]<50);
+  $a[$ind_pidy]=int($a[$ind_pidy]);
+  if ($Mgaps){
+     $y=$Yorig+250+$YdistPID-$yscale*2;
+     if($a[$#a]=~/rev$/){$y-=25*$yscale;}      
+  }
+  else{
+    $y=$Yorig+250+$YdistPID-$yscale*($a[$ind_pidy]-50);
+  }
+  if($x1==$x2) { $x2++;}
+     #draw the line between matches. is dif color for each contig
+  if ($a[$#a] eq $tmpIdQrycontig) {               
+     print_connections($hQrycontig{$tmpIdQrycontig}->[1], $x1,$y);
+  }
+  else{#new contig
+   #remember the start coord for printing the id alignments
+   if ($printIDconting){                                            
+      if  ( $x1 - $XlastPrint > 400 ) { 
+         print O "4 0 0 5 0 0 8 0.0000 4 90 270 ";
+         printf O ("\t%.0f %.0f ",$x1,$y); 
+         print O  $a[$#a], "\\001\n";
+         $XlastPrint=$x1; $YlastPrint=$y;
+      }
+   }
+   # 
+     ##if it was seen before,but interrupted by another contig   
+     if ((exists $hQrycontig{$a[$#a]}) and ($a[$ind_s1]-$hQrycontig{$a[$#a]}->[2] < $match_dist )) { 
+        $linkcolor=$hQrycontig{$a[$#a]}->[0];
+        print_connections($hQrycontig{$a[$#a]}->[1], $x1,$y);
+     }
+     else{
+         #change the link color 
+         unshift(@linkcolors, pop(@linkcolors)); 
+         $linkcolor=$linkcolors[0];
+         $hQrycontig{$a[$#a]}->[0] = $linkcolor;
+     }
+   }
+   $tmpIdQrycontig=$a[$#a];
+   $hQrycontig{$tmpIdQrycontig}->[1]="$x2 $y";
+   $hQrycontig{$tmpIdQrycontig}->[2]=$a[$ind_e1]; 
+ 
+    #the matches line is red
+   print O "2 1 0 2 4 0 40 0 -1 0.000 0 0 -1 0 0 2\n";
+   print O "\t$x1 $y $x2 $y\n";
+   print O "2 1 0 5 20 0 50 0 -1 0.000 0 0 -1 0 0 2\n";		
+   printf O ("\t $x1 %.0f $x2 %.0f\n",$Yorig+150 , $Yorig+150);
+}
+#*********************************
+sub print_connections{
+ my ($setc1,$setx2,$sety2)=@_;                    # print "\nparam connect  @_\n";
+ my ($setx1,$sety1) =split(/ /,$setc1);
+ 
+   if ($Mgaps){      
+     if ($setx1>$setx2){
+        $tmpsetx1=$setx1;
+        $setx1=$setx2;
+        $setx2=$tmpsetx1;
+     }
+       $distx1x2=int(($setx2-$setx1)/2);
+       $xcenter= $setx1+$distx1x2;
+      
+       if ($setx2-$setx1>4000)  { #if the distance is to big then heigh of the arc is set to 20 
+         $heightArcUp = 20*$yscale;  
+         $yoffcenter=int((($distx1x2**2)+$heightArcUp**2)*(1/(2*$heightArcUp)))-$heightArcUp ; 
+       }
+       else{
+         $heightArcUp = int (0.447 * $distx1x2);#sectorul de cerc la 1/3 din raza.
+         $yoffcenter=2*$heightArcUp;
+       }
+         print O "5 1 0 2 $linkcolor 0 50 0 -1 0.000 0 0 0 0 ";
+         printf O ("%.3f %.3f $setx1 $sety1 $xcenter %.0f $setx2 $sety1 \n",$xcenter,$sety1+$yoffcenter,$sety1-$heightArcUp); 
+  }
+  else{
+    print O "2 1 0 1 $linkcolor 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
+    print O "\t".$setc1." $setx2 $sety2\n";
+  
+  }
+}
+#*********************************
+sub print_genes_mr{ 
+ %hLastOnLevel=(); 
+ @g=split(/\s+/,$hContig_genes{$a[-2]});
+ for ($i=1;$i<=$#g;$i++){
+ $kname=$g[$i];
+   $tmpx2=0;
+   $y=$Yorig-100-200*$geneinfo{$kname}->[0];
+   #print id gena     
+    $xid =$startdrawX+ int($Utr5End{$kname}/$Xscale);
+    print_Id_genes() if ($printIDgenes);
+   #     
+   for ($l=2;$l<5;$l++){
+     @c=split(";",$geneinfo{$kname}->[$l] ) ; 
+      if (@c){ #print "de unde?@c\n" if (($l==2) or ($l==4));
+      $colorend=$color{$l};
+      if ($geneinfo{$kname}->[1] eq "-"){
+         if ($l==2) { $colorend=$color{"4"}; }
+         elsif ($l==4) {$colorend=$color{"2"};}
+      }       
+      for ($k=0;$k<=$#c;$k++){         
+         @e=split (" ",$c[$k]); 
+         $x1=$startdrawX+int($e[0]/$Xscale);
+         $x2=$startdrawX+int($e[1]/$Xscale);
+         if($x1==$x2) { $x2++;}
+         if ( ($tmpx2) and ($x1-$tmpx2>1)){ #print the intron 
+            print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
+            print O "\t $tmpx2 $y $x1 $y\n";
+         }
+         $tmpx2=$x2;
+         print O "2 1 0 5 $colorend 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#		
+         print O "\t $x1 $y $x2 $y\n";  
+      }
+     }       
+   }          
+   #delete ($geneinfo{$kname});
+ }
+}
+
+#*********************************
+sub print_Id_genes{
+ if (exists $hLastOnLevel{$geneinfo{$kname}->[0]}){
+    $lastOnlevel=$hLastOnLevel{$geneinfo{$kname}->[0]};
+    $printidspace = int(($Utr5End{$kname}-$Utr5End{$lastOnlevel})/$Xscale);
+ }
+ else{$printidspace=601;}
+ if ($printidspace > 600){                
+    #print contig name#
+    print O "4 0 0 5 0 0 6 0.0000 4 90 270 ";
+    printf O ("\t%.0f %.0f ",$xid,$y-50); 
+    print O  $kname, "\\001\n";
+    $hLastOnLevel{$geneinfo{$kname}->[0]}=$kname;
+ } 
+}
+#*********************************
+sub print_genes{
+ %hLastOnLevel=();                                 
+ foreach $kname  (sort {$Utr5End{$a} <=> $Utr5End{$b}} keys %Utr5End){  
+   $tmpx2=0;
+   if ($Utr5End{$kname}>$startcoord && $Utr5End{$kname}<$endcoord){       
+     $y=$Yorig-100-200*$geneinfo{$kname}->[0];
+     #print id gena     
+        $xid = int(($Utr5End{$kname}-$startcoord)/$Xscale);
+        print_Id_genes() if ($printIDgenes);
+     #     
+      for ($l=2;$l<5;$l++){
+         @c=split(";",$geneinfo{$kname}->[$l] ); 
+         $colorend=$color{$l};
+         if ($geneinfo{$kname}->[1] eq "-"){
+            if ($l==2) { $colorend=$color{"4"}; }
+            elsif ($l==4) {$colorend=$color{"2"};}
+         }
+         
+         for ($k=0;$k<=$#c;$k++){         
+            @e=split (" ",$c[$k]); 
+            $x1=int(($e[0]-$startcoord)/$Xscale);
+            $x2=int(($e[1]-$startcoord)/$Xscale);
+            if($x1==$x2) { $x2++;}
+            if ( ($tmpx2) and ($x1-$tmpx2>1)){ #print the intron 
+               print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
+               print O "\t $tmpx2 $y $x1 $y\n";
+            }
+            $tmpx2=$x2;
+            print O "2 1 0 5 $colorend 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#		
+            print O "\t $x1 $y $x2 $y\n";  
+         }      
+      }
+    }#endif "is in interval"
+  }
+ # delete ($Utr5End{$kname});
+ # delete ($geneinfo{$kname});
+ 
+}
+#*********************************
+sub print_grid{  
+
+my ($xs,$xe,$startcontg,$endcontg)=@_;
+
+$XlastPrint=0;$YlastPrint=0;
+
+   #print ref contig
+    print O "2 1 0 10 11 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
+    printf O ("\t $xs %.0f $xe %.0f\n",$Yorig+50,$Yorig+50);
+    #print orizontal axes for PId (100%,75%,50%)
+    for ($percent_id = 50; $percent_id < 101; $percent_id += 25) {
+        print O  "2 1 2 1 0 7 60 0 -1 4.000 0 0 -1 0 0 2\n";
+        printf O  ("\t$xs %.0f $xe %.0f\n",$Yorig+250+$YdistPID-($percent_id - 50) * $yscale,$Yorig+250+$YdistPID-($percent_id - 50) * $yscale);
+        #last if ($Mgaps);    
+    }
+    #print orizontal markers for bp.
+    $increment=10000/$Xscale;
+    $no_incr=0;
+    $xmark = $xs ;$xmark_float= $xs;
+     while ($xmark < $xe){
+        print O  "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n";
+        printf O  ("\t$xmark %.0f $xmark %.0f\n",$Yorig+$YdistPID+250,$Yorig+$YdistPID+300);
+         #bp scale
+        print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
+        printf O ("\t %.0f %.0f",$xmark,$Yorig+$YdistPID+400); 
+        print O  " $no_incr"."k", "\\001\n";
+        $no_incr += 10;
+        $xmark_float += $increment;
+        $xmark=int($xmark_float);
+    }
+    
+    #coord for chr ends
+    print O "4 0 0 50 0 0 14 0.0000 4 135 450 $xs $Yorig $startcontg\\001\n";
+    printf O ("4 0 0 50 0 0 14 0.0000 4 135 810 %.0f $Yorig $endcontg\\001\n",$xe-length($xe)*125);
+     
+    #print contig name#
+    if ($mref){
+      print O "4 0 0 5 0 0 8 0.0000 4 135 405 ";
+      printf O ("\t%.0f %.0f ",$xs,$Yorig+70); 
+      print O  $a[-2], "\\001\n";                     
+    }
+    #print vertical markers for PId scale 
+    if (!$Mgaps){
+      for ($percent_id = 50; $percent_id < 101; $percent_id += 25) {
+        #left
+        print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
+        printf O ("\t%.0f %.0f", $xs-200,$Yorig+$YdistPID+250-($percent_id - 50) * $yscale + 20); 
+        print O  " $percent_id%", "\\001\n";
+        #right
+        print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
+        printf O ("\t%.0f %.0f",$xe+20, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale+20 ); 
+        print O  " $percent_id%", "\\001\n";
+      
+        # print the tick mark
+        #left
+       # print O  "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n"; 
+      #  printf O ("\t%.0f %.0f $xs %.0f\n",$xs-50,
+	#   $Yorig+$YdistPID+250-($percent_id - 50) * $yscale, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale);
+      #right
+       # print O  "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n"; 
+      #  printf O ("\t$xe %.0f %.0f %.0f\n",
+	#   $Yorig+$YdistPID+250-($percent_id - 50) * $yscale,$xe+50, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale);
+      }
+    }
+    else{ # for Mgaps
+      print O "4 0 0 100 0 0 7 1.5710 4 135 405 ";
+      printf O ("\t%.0f %.0f", $xs-50,$Yorig+$YdistPID+250 - 5 * $yscale + 10); 
+      print O  " + qry strand", "\\001\n";
+
+      print O "4 0 0 100 0 0 7 1.5710 4 135 405 ";
+      printf O ("\t%.0f %.0f", $xs-50,$Yorig+$YdistPID+250 - 30 * $yscale + 10); 
+      print O  " - qry strand", "\\001\n";      
+    }
+    
+}
+#*********************************
+sub print_legend{ 
+
+     print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
+     printf O ("\t%.0f %.0f ",100,$Yorig+$YdistPID+1100); 
+     print O  " Legend ", "\\001\n";
+     $y= $Yorig+$YdistPID+1300; #utr
+     print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
+     print O "\t 70 $y 99 $y\n";
+     print O "2 1 0 5 27 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
+     print O "\t 100 $y 200 $y\n";
+     print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
+     print O "\t 200 $y 230 $y\n";        
+     print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
+     printf O ("\t%.0f %.0f ",300,$y+30); 
+     print O  " 5' utr ", "\\001\n";
+     $y += 150 ;#cds
+     print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
+     print O "\t 70 $y 99 $y\n";     
+     print O "2 1 0 5 2 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
+     print O "\t 100 $y 200 $y\n";
+     print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
+     print O "\t 200 $y 230 $y\n";                 
+     print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
+     printf O ("\t%.0f %.0f ",300,$y+30); 
+     print O  " cds ", "\\001\n";
+     $y += 150;        #3' utr
+     print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
+     print O "\t 70 $y 99 $y\n";     
+     print O "2 1 0 5 1 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
+     print O "\t 100 $y 200 $y\n";         
+     print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
+     print O "\t 200 $y 230 $y\n";             
+     print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";     
+     printf O ("\t%.0f %.0f ",300,$y+30); 
+     print O  " 3' utr ", "\\001\n";
+     $y += 150; # match
+     print O "2 1 0 2 4 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
+     print O "\t100 $y 200 $y\n";
+     print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";    
+     printf O ("\t%.0f %.0f ",300,$y+30); 
+     print O  " match found by $prog ", "\\001\n";
+}
+#*********************************
+sub change_file_format{   
+
+  $procfile =~ /^tmp(.+)/;
+  $outfile = $1.$nrf.".".$format; 
+  $comand = "fig2dev -L $format -x 100";
+  $comand .= " -m $magn" if ($magn);
+  $comand .= " -M ".$procfile.$nrf.".fig ".$outfile;
+   
+  $status =system($comand);
+   print E "ERROR 1: fig2dev !\n" unless $status == 0;
+   
+  $status =system("rm $procfile".$nrf.".fig");
+  
+  if ($verb){
+    print "$outfile";
+    if ($mref){
+      print "\n" ;
+    }
+    else {
+      print "\t range : $startcoord\t$endcoord \n" ;
+    }
+  }
+}
+#*********************************
+sub format_mgaps{
+$tmpfile="tmpmgaps";
+$tmpfile2=$alignm."coords" ;
+get_ref_len();    #print $maxlenref."\n";
+ open(M,">".$tmpfile2) || die "can't open \" $tmpfile2  \" file !";
+  print M "$alignm\n";
+  print M "Mgaps\n\n";
+  print M "    [S1]     [E1]  |     [S2]     [E2]  |  [LEN 1]  [LEN 2]  |  [% IDY]  |  [LEN R]  [LEN Q]  |  [COV R]  [COV Q]  | [TAGS]\n";
+  print M "===============================================================================================================================\n";
+
+ open(T,">".$tmpfile) || die "can't open \" $tmpfile  \" file !";
+ 
+ open(A,"<".$alignm) || die "can't open \" $alignm  \" file !";
+ 
+ 
+ while(<A>) {
+  chomp;
+  @a=split;
+  if ($a[0] =~ /^>/){    
+    $nr_cluster=1;    
+    $idquery=$a[1];
+    if ($a[2] eq "Reverse"){$idquery .= "_rev";}
+  }
+  #elsif ($a[0] eq "#") {$nr_cluster++;}
+  elsif($a[0] ne "#"){  
+    $e1=$a[0]+$a[2];
+    print T $a[0]."\t".$e1."\t"."|";
+    print T "\t-\t-\t|"; 
+    print T "\t-\t-\t|";
+    print T "\t-\t|"; #pid
+    print T "\t$maxlenref\t-\t|";#len seqs
+   # print "\t-\t-\t|";
+   # print "\t-\t-\t|";
+   # print T " $Mgaps\t$idquery.$nr_cluster\n"; 
+   print T " $Mgaps\t$idquery\n";   
+  }
+  
+ }
+ close(A);
+ close(T);
+ $command="sort -n -k 1 $tmpfile >> $tmpfile2";
+ $status =system($command);  
+ system("rm $tmpfile");
+ close(M);
+ $alignm=$tmpfile2;
+ print STDERR "ERROR 1:  can't sort $tmpfile \n" unless $status == 0;
+ print STDERR "\n**************************************** \n";
+ print STDERR "New input file created : $alignm\n";
+ print STDERR "**************************************** \n\n";
+ 
+}
+#*****************************
+sub get_ref_len{ 
+ $firstrow=1;
+ open(A,"<".$alignm) || die "can't open \" $alignm  \" file !";
+ while(<A>) {
+   chomp;
+   @a=split;
+   if ($firstrow) {
+     $firstrow=0;
+     if ($a[0] !~ /^>/){
+        print "\nWrong file format for MGAPS file : $alignm ! \n";
+        exit;
+     }
+  }
+
+  if ($a[0] =~ /^>/){ next; }
+  elsif($a[0] ne "#"){  
+    $e1=$a[0]+$a[2];
+    $maxlenref=($maxlenref < $e1 ? $e1 : $maxlenref);
+  }
+ }
+ close(A);
+}