comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/mapview @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 #!/usr/bin/env perl
2
3 use lib "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
4 use Foundation;
5
6 my $SCRIPT_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
7
8
9 my $VERSION_INFO = q~
10 mapview version 1.01
11 ~;
12
13
14 my $HELP_INFO = q~
15 USAGE: mapview [options] <coords file> [UTR coords] [CDS coords]
16
17 DESCRIPTION:
18 mapview is a utility program for displaying sequence alignments as
19 provided by MUMmer, NUCmer, PROmer or Mgaps. mapview takes the output of
20 show-coords and converts it to a FIG, PDF or PS file for visual analysis.
21 It can also break the output into multiple files for easier viewing and
22 printing.
23
24 MANDATORY:
25 coords file The output of 'show-coords -rl[k]' or 'mgaps'
26
27 OPTIONS:
28 UTR coords UTR coordinate file in GFF format
29 CDS coords CDS coordinate file in GFF format
30
31 -d|maxdist Set the maximum base-pair distance between linked matches
32 (default 50000)
33 -f|format Set the output format to 'pdf', 'ps' or 'fig'
34 (default 'fig')
35 -h
36 --help Display help information and exit
37 -m|mag Set the magnification at which the figure is rendered,
38 this is an option for fig2dev which is used to generate
39 the PDF and PS files (default 1.0)
40 -n|num Set the number of output files used to partition the
41 output, this is to avoid generating files that are too
42 large to display (default 10)
43 -p|prefix Set the output file prefix
44 (default "PROMER_graph or NUCMER_graph")
45 -v
46 --verbose Verbose logging of the processed files
47 -V
48 --version Display the version information and exit
49 -x1 coord Set the lower coordinate bound of the display
50 -x2 coord Set the upper coordinate bound of the display
51 -g|ref If the input file is provided by 'mgaps', set the
52 reference sequence ID (as it appears in the first column
53 of the UTR/CDS coords file)
54 -I Display the name of query sequences
55 -Ir Display the name of reference genes
56 ~;
57
58
59 my $USAGE_INFO = q~
60 USAGE: mapview [options] <coords file> [UTR coords] [CDS coords]
61 ~;
62
63
64 my @DEPEND_INFO =
65 (
66 "fig2dev",
67 "$SCRIPT_DIR/Foundation.pm"
68 );
69
70 my $err_gff = q~
71 ERROR in the input files ! The reference seq ID can't be found in GFF files !
72 The first column in the GFF file should be the ID of the reference seq.
73 The alignments file should provide the same info in the column before the last one.
74
75 Here are some example records for the GFF file:
76
77 gnl|FlyBase|X Dmel3 initial-exon 2155 2413 . - . X_CG3038.1
78 gnl|FlyBase|X Dmel3 last-exon 1182 2077 . - . X_CG3038.1
79 ...
80 The fields are :
81 <seq_ID> <source> <exon type> <start> <end> <score> <strand> <frame> <gene_name>
82 ~;
83
84 my $tigr;
85 my $err;
86
87 my $alignm;
88 my $futr;
89 my $fcds;
90
91 #-- Initialize TIGR::Foundation
92 $tigr = new TIGR::Foundation;
93 if ( !defined ($tigr) ) {
94 print (STDERR "ERROR: TIGR::Foundation could not be initialized");
95 exit (1);
96 }
97
98 #-- Set help and usage information
99 $tigr->setHelpInfo ($HELP_INFO);
100 $tigr->setUsageInfo ($USAGE_INFO);
101 $tigr->setVersionInfo ($VERSION_INFO);
102 $tigr->addDependInfo (@DEPEND_INFO);
103
104 $err = $tigr->TIGR_GetOptions
105 (
106 "d|maxdist=i" => \$match_dist,
107 "f|format=s" => \$format,
108 "m|mag=f" => \$magn,
109 "n|num=i" => \$noOutfiles,
110 "p|prefix=s" => \$outfilename,
111 "x1=i" => \$x1win,
112 "x2=i" => \$x2win,
113 "v|verbose" => \$verb,
114 "g|ref=s" => \$Mgaps,
115 "I" => \$printIDconting,
116 "Ir" => \$printIDgenes
117 );
118
119 if ( $err == 0 || scalar(@ARGV) < 1 || scalar(@ARGV) > 3 ) {
120 $tigr->printUsageInfo( );
121 print (STDERR "Try '$0 -h' for more information.\n");
122 exit (1);
123 }
124
125 ($alignm,$futr,$fcds)=@ARGV;
126
127 if ((substr($x1win,0,1) eq '-') || (substr($x2win,0,1) eq '-')){
128 print "ERROR2 : coords x1,x2 should be positive integers !!\n";
129 $info=1;
130 }
131 if ($x1win > $x2win) {
132 print "ERROR3 : wrong range coords : x1 >= x2 !!!\n";
133 $info=1;
134 }
135 if ($Mgaps){ #formating the mgaps output to be similar with show-coords output
136 format_mgaps();
137 }
138
139 if (!$format){$format="fig";}
140 if (($x1win) and ($x2win)){ $startfind=0; }
141 else{ $startfind=1; }
142 $endfind=0;
143 if (!$noOutfiles) {$noOutfiles=10;}
144 if (!$match_dist){$match_dist=50000;}
145
146 #```````init colors````````````````````````````
147 $color{"2"}=27;#dark pink 5utr
148 $color{"3"}=2;#green ex
149 $color{"4"}=1;#blue 3utr
150 #``````````````````````
151 @linkcolors=(31,14,11);
152 #``````````````````````````````````````````````
153 open(F,$alignm);
154 <F>;
155 $prog=<F>; chomp($prog);
156 <F>;
157 $_=<F>;
158 @a=(m/\s+(\||\[.+?\])/g) ;
159 for ($ind=0;$ind<=$#a;$ind++){
160 if ($a[$ind] eq "[S1]") {
161 $ind_s1=$ind;
162 }
163 elsif ($a[$ind] eq "[E1]"){
164 $ind_e1=$ind;
165 }
166 elsif ($a[$ind] eq "[% IDY]"){
167 $ind_pidy=$ind;
168 }
169 elsif ($a[$ind] eq "[LEN R]"){
170 $ind_lenchr=$ind;
171 }
172 # elsif ($a[$ind] eq "[TAGS]"){
173 # $ind_tags=$ind+1; #there are two columns for this header col
174 # }
175 }
176 <F>;$mref=-1;
177 while (<F>){
178 chomp;
179 @a=split;
180 if (!exists $hRefContigId{$a[-2]}) { # print $a[-2]."\n";
181 $hRefContigId{$a[-2]}=$a[$ind_lenchr];
182 $lenrefseqs+=$a[$ind_lenchr];
183 $mref++;
184 }
185 }
186 $nobpinfile=int($lenrefseqs/$noOutfiles);
187
188 close(F);
189
190 #``````````````````````
191
192
193 if (@ARGV > 1) {
194 get_cds_ends();
195 get_utrcds_info();
196 test_overlap();
197 }
198 elsif(!$mref) {
199 $fileno=$noOutfiles;
200 $startcoord=0;$endcoord=0;
201 for ($i=0;$i<$fileno;$i++) {
202 $endcoord=$startcoord+$nobpinfile-1;
203 $endcoord=$lenrefseqs if ($endcoord>$lenrefseqs);
204 $file[$i]="$startcoord $endcoord";
205 $startcoord=$endcoord+1;
206 }
207 }
208
209
210 $Yorig=3000;
211 $YdistPID=2000;
212 $yscale=$YdistPID/50;
213 $Xscale=14.5;
214 $gap=800;
215 #$maxfiles = ($fileno < 10) ? $fileno : 10;
216 #---------------------------------
217
218 if (!$mref){
219 for($i=0; $i < $fileno; $i++) {
220 $nrf=$i;
221 set_output_fname();
222 ($startcoord,$endcoord)=split(/\s+/,$file[$i]);
223 open(O,">$procfile".$nrf.".fig");
224 print_header();
225 print $procfile.$nrf.".fig\t range : $startcoord\t$endcoord \n" if ($verb && ($format eq "fig"));
226
227 $xs=0;
228 $xe=int(($endcoord-$startcoord+1)/$Xscale);
229 #$xs=200;
230 #$xe=$xs+int(($endcoord-$startcoord+1)/$Xscale);
231 print_grid($xs,$xe,$startcoord,$endcoord);
232
233 $tmpIdQrycontig="";
234 $linkcolor=$linkcolors[0];
235
236 open(F,$alignm);
237 <F>;<F>;<F>;<F>;<F>;
238 while(<F>) {
239 chomp;
240 @a=split;
241 if($a[$ind_s1] > $endcoord) { last;}
242 if($a[$ind_s1]<$startcoord && $a[$ind_e1] > $startcoord ) { $a[$ind_s1]=$startcoord;}
243 if($a[$ind_s1] < $endcoord && $endcoord < $a[$ind_e1]) { $a[$ind_e1]=$endcoord;}
244
245 if($a[$ind_s1]>=$startcoord && $a[$ind_e1]<=$endcoord) {
246 $x1=int(($a[$ind_s1]-$startcoord)/$Xscale);#
247 $x2=int(($a[$ind_e1]-$startcoord)/$Xscale);#
248 print_align($x1,$x2);
249 }
250 }
251 close(F);
252 %hQrycontig=();
253 print_genes() if ($futr);
254 print_legend();
255 close(O);
256 change_file_format() if ($format ne "fig");
257 }
258 }
259 elsif($mref){#multiple ref seqs
260 set_output_fname();
261 $tmpIdQrycontig="";
262 $linkcolor=$linkcolors[0];
263 $startdrawX=0;
264 $proclen=0;
265 $first=1;
266 $nrf=0;
267 open(F,$alignm);
268 <F>;<F>;<F>;<F>;<F>;
269 while(<F>) {
270 chomp;
271 @a=split;
272 if ($a[-2] ne $tmpcontig){
273 %hQrycontig=();
274 $tmpcontig=$a[-2];
275 if ($first){
276 $first=0;
277 $nrf++;
278 open(O,">$procfile".$nrf.".fig");
279 print_header();
280 print $procfile.$nrf.".fig"."\n" if ($verb && ($format eq "fig"));
281 $len=$hRefContigId{$a[-2]};
282 }
283 else {
284 $startdrawX+=int($len/$Xscale)+$gap;
285 $len=$hRefContigId{$a[-2]};
286 if (($proclen+$len>$nobpinfile) and ($proclen != 0)){
287 print_legend();
288 close(O);
289 change_file_format() if ($format ne "fig");
290 $nrf++;
291 open(O,">$procfile".$nrf.".fig");
292 print_header();
293 print "\n".$procfile.$nrf.".fig"."\n" if ($verb && ($format eq "fig"));
294 $proclen=0;
295 $startdrawX=0;
296 }
297 }
298 $xs=$startdrawX;
299 $xe=$startdrawX+int($len/$Xscale);
300 print_grid($xs,$xe,0,$len);
301 #print genes from %geneinfo for contig
302 print $a[-2]."\t".$hRefContigId{$a[-2]}."\n";
303 print_genes_mr() if ($futr);
304 $proclen+=$len;
305 }#end if new contig
306 $x1=$startdrawX+int($a[$ind_s1]/$Xscale);
307 $x2=$startdrawX+int($a[$ind_e1]/$Xscale);
308 print_align($x1,$x2);
309 }
310 print_legend();
311 close(O);
312 change_file_format() if ($format ne "fig");
313
314 close(F);
315 }
316 #*******************************************************************************
317 #*******************************************************************************
318 sub set_output_fname{
319
320 if (!$outfilename) {$procfile=$prog."_graph"."_";}
321 else {$procfile=$outfilename."_";}
322
323 if ($format ne "fig"){
324 $procfile="tmp".$procfile;
325 }
326 }
327 #*********************************
328 sub get_cds_ends{
329 #3. print "create \%hcds_ends...\n";
330 $testGffFormat=0;
331 open(F,"<".$fcds);#|| die "can't open \" $fcds cds \" file !";
332 while(<F>) {
333 chomp;
334 if($_) {
335 @a=split;
336 if (exists $hRefContigId{$a[0]}){#record if at least one of the ref id is the same in GFF and Align files
337 $testGffFormat++;
338 }
339 $genename=$a[8];
340 if ($genename ne $tmpname){
341 if ($sign eq "+"){ $hcds_ends{$tmpname} = "$cds5 $cds3";}
342 elsif ($sign eq "-"){ $hcds_ends{$tmpname} = "$cds3 $cds5";}
343 $tmpname=$genename;
344 $sign=$a[6];
345 }
346 if($sign eq "-") {
347 $temp=$a[3];
348 $a[3]=$a[4];
349 $a[4]=$temp;
350 }
351 if ($a[2] eq "single-exon"){
352 $cds5=$a[3];
353 $cds3=$a[4];
354 }
355 elsif ($a[2] eq "initial-exon"){
356 $cds5=$a[3];
357 }
358 elsif ($a[2] eq "last-exon"){
359 $cds3=$a[4];
360 }
361 }
362 }
363 if ($sign eq "+"){ $hcds_ends{$tmpname} = "$cds5 $cds3";}
364 elsif ($sign eq "-"){$hcds_ends{$tmpname} = "$cds3 $cds5";}
365
366 test_formatGFF();
367
368 #foreach $k ( keys %hcds_ends){
369 # print "cds_ends: ".$k."\t"."\n";
370 #} exit;
371
372 }
373
374 #*********************************
375 sub test_formatGFF{
376 if ($testGffFormat==0){
377 print (STDERR "$err_gff \n");
378 exit (1);
379 }
380
381 }
382
383 #*********************************
384 sub get_utrcds_info{
385 #test for gene overlap $geneinfo{gene_name}->[0]=level,stock gene 5'3' utr ends,
386 #determina %geneinfo{id gene}->5utr,ex,3utr
387 #and @file
388 # get_gene_ends();
389 $testGffFormat=0;
390 open(F,"<".$futr);# || die "can't open \" $futr utr \" file !";
391 while(<F>) {
392 chomp;
393 if($_) {
394 @a=split;
395 #get gene ends-utr
396 $genename=$a[8];
397 if (exists $hRefContigId{$a[0]}){ #check if [align file(col before the last one)] = [GFF file(col 1)]
398 $testGffFormat++;
399 }
400
401 if ($genename ne $tmpname){
402 if ($sign eq "+"){ $utr_ends{$tmpname} = "$utr5 $utr3";}
403 elsif ($sign eq "-"){ $utr_ends{$tmpname} = "$utr3 $utr5";}
404
405 if ($tmpgene) {# for the distinct_utr_cds
406 get_utrcds_ends() ;
407 }
408 $tmpgene="$a[3] $a[4]";#
409
410 $tmpname=$genename;
411 $sign=$a[6];
412 $hContig_genes{$a[0]}.=" ".$a[8]; # for multiple ref. seqs
413 }
414 else{# for the distinct_utr_cds
415 if($sign eq "-") {
416 $tmpgene="$a[3] $a[4];".$tmpgene;
417 }
418 else { $tmpgene.=";$a[3] $a[4]"; }
419 }#
420
421 if($sign eq "-") {
422 $temp=$a[3];
423 $a[3]=$a[4];
424 $a[4]=$temp;
425 }
426 if ($a[2] eq "single-exon"){
427 $utr5=$a[3];
428 $utr3=$a[4];
429 }
430 elsif ($a[2] eq "initial-exon"){
431 $utr5=$a[3];
432 }
433 elsif ($a[2] eq "last-exon"){
434 $utr3=$a[4];
435 }
436
437 #init gene info (level)
438 $geneinfo{$a[8]}->[0]=1 if (!exists $geneinfo{$a[8]});
439
440 }
441 }
442 # for the distinct_utr_cds
443 get_utrcds_ends();
444 %cds_ends=();#
445
446 if ($sign eq "+"){$utr_ends{$tmpname}="$utr5 $utr3"; }
447 elsif ($sign eq "-"){$utr_ends{$tmpname}="$utr3 $utr5"; }
448 $hContig_genes{$a[0]}.=" ".$a[8];
449
450 close(F);
451 test_formatGFF();
452
453 }
454
455
456 #****************************************************************
457 sub get_utrcds_ends{
458 $u5="";$ex="";$u3="";
459
460 if ($fcds eq $futr) {
461 $ex=$tmpgene;
462 }
463 else {
464 @ex=split(";",$tmpgene);
465 @cds=split(" ",$hcds_ends{$tmpname});
466
467 for($i=0;$i<=$#ex;$i++){
468 @coord=split(" ",$ex[$i]);
469
470 if ($cds[0]>$coord[0]){
471 if ($cds[0]>$coord[1]){
472 $u5.="$coord[0] $coord[1];";
473 }
474 else{
475 $u5.= "$coord[0] "; $u5.=$cds[0]-1 .";" ; #?
476 if ($cds[1]<$coord[1]){
477 $ex.="$cds[0] $cds[1];";
478 $u3.=$cds[1]+1 ." $coord[1];";
479 }
480 else{ $ex.="$cds[0] $coord[1];";}
481 }
482 }
483 else {
484 if ($cds[1]>$coord[0]){
485 if ($cds[1]>$coord[1]){
486 $ex.="$coord[0] $coord[1];";
487 }
488 else{
489 $ex.="$coord[0] $cds[1];";
490 $u3.=$cds[1]+1 ." $coord[1];";
491 }
492 }
493 else { $u3.="$coord[0] $coord[1];";}
494 }
495 }
496 chop($u5, $ex, $u3);
497 }
498 $geneinfo{$tmpname}->[1]=$sign;
499 $geneinfo{$tmpname}->[2]=$u5;
500 $geneinfo{$tmpname}->[3]=$ex;
501 $geneinfo{$tmpname}->[4]=$u3;
502 }
503 #*********************************
504 sub test_overlap{
505
506 if (!$mref){
507 $fileno=0;###
508 $endcoord=0;###
509 }
510 foreach $kcontgid (sort keys %hContig_genes){
511 @allgenes=split(/\s+/,$hContig_genes{$kcontgid});
512 for ($i=1;$i<=$#allgenes;$i++){
513 @g1=split (" ", $utr_ends{$allgenes[$i]});
514 $Utr5End{$allgenes[$i]}=$g1[0]; ###
515
516 for ($j=$i+1;$j<=$#allgenes;$j++){ #comparing with the rest of the genes
517 @g2=split (" ", $utr_ends{$allgenes[$j]});
518 #if the genes are overpaling and they have the same level ,the second gene is liflet to the next level
519 if ( (($g2[0]>=$g1[0]) and ($g2[0]<=$g1[1])) or (($g2[1]>=$g1[0]) and ($g2[1]<=$g1[1])) or (($g1[0]>=$g2[0]) and ($g1[0]<=$g2[1])) ){
520 if ($geneinfo{$allgenes[$i]}->[0] == $geneinfo{$allgenes[$j]}->[0]){
521 $geneinfo{$allgenes[$j]}->[0]=$geneinfo{$allgenes[$i]}->[0] + 1 ;
522 }
523 }
524 }
525 SetTheRangeForEachFile() if ((!$endfind) and (!$mref)); ###
526 }
527 }
528 $file[$fileno++]="$startcoord $endcoord" if (!$mref);###
529 %utr_ends=();###
530
531 }
532
533 #**************************************
534 sub SetTheRangeForEachFile{
535 $currstart=$g1[0];
536 $currend=$g1[1];
537 #---test range ends intersection
538 if (!$startfind) {
539 if (($x1win <= $currstart) || ($x1win <= $currend)){
540 $currstart = $x1win;
541 $startfind = 1;
542 }
543 }
544 if ( $startfind && $x1win && $x2win){
545 if (($x2win <= $currstart) || ($x2win <= $currend) ){
546 $currend = $x2win;
547 $endfind = 1;
548 }
549 }
550 #--------------------
551 if ($startfind) {
552 if(!$endcoord) {
553 #$startcoord=0;
554 $startcoord = $x1win ? $x1win : 0;
555 $endcoord=$currend;
556 }
557 else {
558 if($currend > $endcoord) {
559 if($currend-$startcoord < $nobpinfile) {
560 $endcoord=$currend;
561 }
562 else {
563 $file[$fileno++]="$startcoord $endcoord";
564 $startcoord=$endcoord+1;
565 $endcoord=$currend;
566 }
567 }
568 }
569 }#if startfind
570 }
571 #*********************************
572 sub print_header{
573 print O "#FIG 3.2\nLandscape\nCenter\nInches\nLetter \n100.00\nMultiple\n-2\n1200 2\n";
574 }
575 #*********************************
576 sub print_align{
577 my ($x1,$x2)=@_;
578
579 $a[$ind_pidy]=50 if ($a[$ind_pidy]<50);
580 $a[$ind_pidy]=int($a[$ind_pidy]);
581 if ($Mgaps){
582 $y=$Yorig+250+$YdistPID-$yscale*2;
583 if($a[$#a]=~/rev$/){$y-=25*$yscale;}
584 }
585 else{
586 $y=$Yorig+250+$YdistPID-$yscale*($a[$ind_pidy]-50);
587 }
588 if($x1==$x2) { $x2++;}
589 #draw the line between matches. is dif color for each contig
590 if ($a[$#a] eq $tmpIdQrycontig) {
591 print_connections($hQrycontig{$tmpIdQrycontig}->[1], $x1,$y);
592 }
593 else{#new contig
594 #remember the start coord for printing the id alignments
595 if ($printIDconting){
596 if ( $x1 - $XlastPrint > 400 ) {
597 print O "4 0 0 5 0 0 8 0.0000 4 90 270 ";
598 printf O ("\t%.0f %.0f ",$x1,$y);
599 print O $a[$#a], "\\001\n";
600 $XlastPrint=$x1; $YlastPrint=$y;
601 }
602 }
603 #
604 ##if it was seen before,but interrupted by another contig
605 if ((exists $hQrycontig{$a[$#a]}) and ($a[$ind_s1]-$hQrycontig{$a[$#a]}->[2] < $match_dist )) {
606 $linkcolor=$hQrycontig{$a[$#a]}->[0];
607 print_connections($hQrycontig{$a[$#a]}->[1], $x1,$y);
608 }
609 else{
610 #change the link color
611 unshift(@linkcolors, pop(@linkcolors));
612 $linkcolor=$linkcolors[0];
613 $hQrycontig{$a[$#a]}->[0] = $linkcolor;
614 }
615 }
616 $tmpIdQrycontig=$a[$#a];
617 $hQrycontig{$tmpIdQrycontig}->[1]="$x2 $y";
618 $hQrycontig{$tmpIdQrycontig}->[2]=$a[$ind_e1];
619
620 #the matches line is red
621 print O "2 1 0 2 4 0 40 0 -1 0.000 0 0 -1 0 0 2\n";
622 print O "\t$x1 $y $x2 $y\n";
623 print O "2 1 0 5 20 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
624 printf O ("\t $x1 %.0f $x2 %.0f\n",$Yorig+150 , $Yorig+150);
625 }
626 #*********************************
627 sub print_connections{
628 my ($setc1,$setx2,$sety2)=@_; # print "\nparam connect @_\n";
629 my ($setx1,$sety1) =split(/ /,$setc1);
630
631 if ($Mgaps){
632 if ($setx1>$setx2){
633 $tmpsetx1=$setx1;
634 $setx1=$setx2;
635 $setx2=$tmpsetx1;
636 }
637 $distx1x2=int(($setx2-$setx1)/2);
638 $xcenter= $setx1+$distx1x2;
639
640 if ($setx2-$setx1>4000) { #if the distance is to big then heigh of the arc is set to 20
641 $heightArcUp = 20*$yscale;
642 $yoffcenter=int((($distx1x2**2)+$heightArcUp**2)*(1/(2*$heightArcUp)))-$heightArcUp ;
643 }
644 else{
645 $heightArcUp = int (0.447 * $distx1x2);#sectorul de cerc la 1/3 din raza.
646 $yoffcenter=2*$heightArcUp;
647 }
648 print O "5 1 0 2 $linkcolor 0 50 0 -1 0.000 0 0 0 0 ";
649 printf O ("%.3f %.3f $setx1 $sety1 $xcenter %.0f $setx2 $sety1 \n",$xcenter,$sety1+$yoffcenter,$sety1-$heightArcUp);
650 }
651 else{
652 print O "2 1 0 1 $linkcolor 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
653 print O "\t".$setc1." $setx2 $sety2\n";
654
655 }
656 }
657 #*********************************
658 sub print_genes_mr{
659 %hLastOnLevel=();
660 @g=split(/\s+/,$hContig_genes{$a[-2]});
661 for ($i=1;$i<=$#g;$i++){
662 $kname=$g[$i];
663 $tmpx2=0;
664 $y=$Yorig-100-200*$geneinfo{$kname}->[0];
665 #print id gena
666 $xid =$startdrawX+ int($Utr5End{$kname}/$Xscale);
667 print_Id_genes() if ($printIDgenes);
668 #
669 for ($l=2;$l<5;$l++){
670 @c=split(";",$geneinfo{$kname}->[$l] ) ;
671 if (@c){ #print "de unde?@c\n" if (($l==2) or ($l==4));
672 $colorend=$color{$l};
673 if ($geneinfo{$kname}->[1] eq "-"){
674 if ($l==2) { $colorend=$color{"4"}; }
675 elsif ($l==4) {$colorend=$color{"2"};}
676 }
677 for ($k=0;$k<=$#c;$k++){
678 @e=split (" ",$c[$k]);
679 $x1=$startdrawX+int($e[0]/$Xscale);
680 $x2=$startdrawX+int($e[1]/$Xscale);
681 if($x1==$x2) { $x2++;}
682 if ( ($tmpx2) and ($x1-$tmpx2>1)){ #print the intron
683 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
684 print O "\t $tmpx2 $y $x1 $y\n";
685 }
686 $tmpx2=$x2;
687 print O "2 1 0 5 $colorend 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#
688 print O "\t $x1 $y $x2 $y\n";
689 }
690 }
691 }
692 #delete ($geneinfo{$kname});
693 }
694 }
695
696 #*********************************
697 sub print_Id_genes{
698 if (exists $hLastOnLevel{$geneinfo{$kname}->[0]}){
699 $lastOnlevel=$hLastOnLevel{$geneinfo{$kname}->[0]};
700 $printidspace = int(($Utr5End{$kname}-$Utr5End{$lastOnlevel})/$Xscale);
701 }
702 else{$printidspace=601;}
703 if ($printidspace > 600){
704 #print contig name#
705 print O "4 0 0 5 0 0 6 0.0000 4 90 270 ";
706 printf O ("\t%.0f %.0f ",$xid,$y-50);
707 print O $kname, "\\001\n";
708 $hLastOnLevel{$geneinfo{$kname}->[0]}=$kname;
709 }
710 }
711 #*********************************
712 sub print_genes{
713 %hLastOnLevel=();
714 foreach $kname (sort {$Utr5End{$a} <=> $Utr5End{$b}} keys %Utr5End){
715 $tmpx2=0;
716 if ($Utr5End{$kname}>$startcoord && $Utr5End{$kname}<$endcoord){
717 $y=$Yorig-100-200*$geneinfo{$kname}->[0];
718 #print id gena
719 $xid = int(($Utr5End{$kname}-$startcoord)/$Xscale);
720 print_Id_genes() if ($printIDgenes);
721 #
722 for ($l=2;$l<5;$l++){
723 @c=split(";",$geneinfo{$kname}->[$l] );
724 $colorend=$color{$l};
725 if ($geneinfo{$kname}->[1] eq "-"){
726 if ($l==2) { $colorend=$color{"4"}; }
727 elsif ($l==4) {$colorend=$color{"2"};}
728 }
729
730 for ($k=0;$k<=$#c;$k++){
731 @e=split (" ",$c[$k]);
732 $x1=int(($e[0]-$startcoord)/$Xscale);
733 $x2=int(($e[1]-$startcoord)/$Xscale);
734 if($x1==$x2) { $x2++;}
735 if ( ($tmpx2) and ($x1-$tmpx2>1)){ #print the intron
736 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
737 print O "\t $tmpx2 $y $x1 $y\n";
738 }
739 $tmpx2=$x2;
740 print O "2 1 0 5 $colorend 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#
741 print O "\t $x1 $y $x2 $y\n";
742 }
743 }
744 }#endif "is in interval"
745 }
746 # delete ($Utr5End{$kname});
747 # delete ($geneinfo{$kname});
748
749 }
750 #*********************************
751 sub print_grid{
752
753 my ($xs,$xe,$startcontg,$endcontg)=@_;
754
755 $XlastPrint=0;$YlastPrint=0;
756
757 #print ref contig
758 print O "2 1 0 10 11 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
759 printf O ("\t $xs %.0f $xe %.0f\n",$Yorig+50,$Yorig+50);
760 #print orizontal axes for PId (100%,75%,50%)
761 for ($percent_id = 50; $percent_id < 101; $percent_id += 25) {
762 print O "2 1 2 1 0 7 60 0 -1 4.000 0 0 -1 0 0 2\n";
763 printf O ("\t$xs %.0f $xe %.0f\n",$Yorig+250+$YdistPID-($percent_id - 50) * $yscale,$Yorig+250+$YdistPID-($percent_id - 50) * $yscale);
764 #last if ($Mgaps);
765 }
766 #print orizontal markers for bp.
767 $increment=10000/$Xscale;
768 $no_incr=0;
769 $xmark = $xs ;$xmark_float= $xs;
770 while ($xmark < $xe){
771 print O "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n";
772 printf O ("\t$xmark %.0f $xmark %.0f\n",$Yorig+$YdistPID+250,$Yorig+$YdistPID+300);
773 #bp scale
774 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
775 printf O ("\t %.0f %.0f",$xmark,$Yorig+$YdistPID+400);
776 print O " $no_incr"."k", "\\001\n";
777 $no_incr += 10;
778 $xmark_float += $increment;
779 $xmark=int($xmark_float);
780 }
781
782 #coord for chr ends
783 print O "4 0 0 50 0 0 14 0.0000 4 135 450 $xs $Yorig $startcontg\\001\n";
784 printf O ("4 0 0 50 0 0 14 0.0000 4 135 810 %.0f $Yorig $endcontg\\001\n",$xe-length($xe)*125);
785
786 #print contig name#
787 if ($mref){
788 print O "4 0 0 5 0 0 8 0.0000 4 135 405 ";
789 printf O ("\t%.0f %.0f ",$xs,$Yorig+70);
790 print O $a[-2], "\\001\n";
791 }
792 #print vertical markers for PId scale
793 if (!$Mgaps){
794 for ($percent_id = 50; $percent_id < 101; $percent_id += 25) {
795 #left
796 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
797 printf O ("\t%.0f %.0f", $xs-200,$Yorig+$YdistPID+250-($percent_id - 50) * $yscale + 20);
798 print O " $percent_id%", "\\001\n";
799 #right
800 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
801 printf O ("\t%.0f %.0f",$xe+20, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale+20 );
802 print O " $percent_id%", "\\001\n";
803
804 # print the tick mark
805 #left
806 # print O "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n";
807 # printf O ("\t%.0f %.0f $xs %.0f\n",$xs-50,
808 # $Yorig+$YdistPID+250-($percent_id - 50) * $yscale, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale);
809 #right
810 # print O "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n";
811 # printf O ("\t$xe %.0f %.0f %.0f\n",
812 # $Yorig+$YdistPID+250-($percent_id - 50) * $yscale,$xe+50, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale);
813 }
814 }
815 else{ # for Mgaps
816 print O "4 0 0 100 0 0 7 1.5710 4 135 405 ";
817 printf O ("\t%.0f %.0f", $xs-50,$Yorig+$YdistPID+250 - 5 * $yscale + 10);
818 print O " + qry strand", "\\001\n";
819
820 print O "4 0 0 100 0 0 7 1.5710 4 135 405 ";
821 printf O ("\t%.0f %.0f", $xs-50,$Yorig+$YdistPID+250 - 30 * $yscale + 10);
822 print O " - qry strand", "\\001\n";
823 }
824
825 }
826 #*********************************
827 sub print_legend{
828
829 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
830 printf O ("\t%.0f %.0f ",100,$Yorig+$YdistPID+1100);
831 print O " Legend ", "\\001\n";
832 $y= $Yorig+$YdistPID+1300; #utr
833 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
834 print O "\t 70 $y 99 $y\n";
835 print O "2 1 0 5 27 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
836 print O "\t 100 $y 200 $y\n";
837 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
838 print O "\t 200 $y 230 $y\n";
839 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
840 printf O ("\t%.0f %.0f ",300,$y+30);
841 print O " 5' utr ", "\\001\n";
842 $y += 150 ;#cds
843 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
844 print O "\t 70 $y 99 $y\n";
845 print O "2 1 0 5 2 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
846 print O "\t 100 $y 200 $y\n";
847 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
848 print O "\t 200 $y 230 $y\n";
849 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
850 printf O ("\t%.0f %.0f ",300,$y+30);
851 print O " cds ", "\\001\n";
852 $y += 150; #3' utr
853 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
854 print O "\t 70 $y 99 $y\n";
855 print O "2 1 0 5 1 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
856 print O "\t 100 $y 200 $y\n";
857 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
858 print O "\t 200 $y 230 $y\n";
859 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
860 printf O ("\t%.0f %.0f ",300,$y+30);
861 print O " 3' utr ", "\\001\n";
862 $y += 150; # match
863 print O "2 1 0 2 4 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
864 print O "\t100 $y 200 $y\n";
865 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
866 printf O ("\t%.0f %.0f ",300,$y+30);
867 print O " match found by $prog ", "\\001\n";
868 }
869 #*********************************
870 sub change_file_format{
871
872 $procfile =~ /^tmp(.+)/;
873 $outfile = $1.$nrf.".".$format;
874 $comand = "fig2dev -L $format -x 100";
875 $comand .= " -m $magn" if ($magn);
876 $comand .= " -M ".$procfile.$nrf.".fig ".$outfile;
877
878 $status =system($comand);
879 print E "ERROR 1: fig2dev !\n" unless $status == 0;
880
881 $status =system("rm $procfile".$nrf.".fig");
882
883 if ($verb){
884 print "$outfile";
885 if ($mref){
886 print "\n" ;
887 }
888 else {
889 print "\t range : $startcoord\t$endcoord \n" ;
890 }
891 }
892 }
893 #*********************************
894 sub format_mgaps{
895 $tmpfile="tmpmgaps";
896 $tmpfile2=$alignm."coords" ;
897 get_ref_len(); #print $maxlenref."\n";
898 open(M,">".$tmpfile2) || die "can't open \" $tmpfile2 \" file !";
899 print M "$alignm\n";
900 print M "Mgaps\n\n";
901 print M " [S1] [E1] | [S2] [E2] | [LEN 1] [LEN 2] | [% IDY] | [LEN R] [LEN Q] | [COV R] [COV Q] | [TAGS]\n";
902 print M "===============================================================================================================================\n";
903
904 open(T,">".$tmpfile) || die "can't open \" $tmpfile \" file !";
905
906 open(A,"<".$alignm) || die "can't open \" $alignm \" file !";
907
908
909 while(<A>) {
910 chomp;
911 @a=split;
912 if ($a[0] =~ /^>/){
913 $nr_cluster=1;
914 $idquery=$a[1];
915 if ($a[2] eq "Reverse"){$idquery .= "_rev";}
916 }
917 #elsif ($a[0] eq "#") {$nr_cluster++;}
918 elsif($a[0] ne "#"){
919 $e1=$a[0]+$a[2];
920 print T $a[0]."\t".$e1."\t"."|";
921 print T "\t-\t-\t|";
922 print T "\t-\t-\t|";
923 print T "\t-\t|"; #pid
924 print T "\t$maxlenref\t-\t|";#len seqs
925 # print "\t-\t-\t|";
926 # print "\t-\t-\t|";
927 # print T " $Mgaps\t$idquery.$nr_cluster\n";
928 print T " $Mgaps\t$idquery\n";
929 }
930
931 }
932 close(A);
933 close(T);
934 $command="sort -n -k 1 $tmpfile >> $tmpfile2";
935 $status =system($command);
936 system("rm $tmpfile");
937 close(M);
938 $alignm=$tmpfile2;
939 print STDERR "ERROR 1: can't sort $tmpfile \n" unless $status == 0;
940 print STDERR "\n**************************************** \n";
941 print STDERR "New input file created : $alignm\n";
942 print STDERR "**************************************** \n\n";
943
944 }
945 #*****************************
946 sub get_ref_len{
947 $firstrow=1;
948 open(A,"<".$alignm) || die "can't open \" $alignm \" file !";
949 while(<A>) {
950 chomp;
951 @a=split;
952 if ($firstrow) {
953 $firstrow=0;
954 if ($a[0] !~ /^>/){
955 print "\nWrong file format for MGAPS file : $alignm ! \n";
956 exit;
957 }
958 }
959
960 if ($a[0] =~ /^>/){ next; }
961 elsif($a[0] ne "#"){
962 $e1=$a[0]+$a[2];
963 $maxlenref=($maxlenref < $e1 ? $e1 : $maxlenref);
964 }
965 }
966 close(A);
967 }