Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts/mapview.pl @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 #!__PERL_PATH | |
2 | |
3 use lib "__SCRIPT_DIR"; | |
4 use Foundation; | |
5 | |
6 my $SCRIPT_DIR = "__SCRIPT_DIR"; | |
7 | |
8 | |
9 my $VERSION_INFO = q~ | |
10 mapview version 1.01 | |
11 ~; | |
12 | |
13 | |
14 my $HELP_INFO = q~ | |
15 USAGE: mapview [options] <coords file> [UTR coords] [CDS coords] | |
16 | |
17 DESCRIPTION: | |
18 mapview is a utility program for displaying sequence alignments as | |
19 provided by MUMmer, NUCmer, PROmer or Mgaps. mapview takes the output of | |
20 show-coords and converts it to a FIG, PDF or PS file for visual analysis. | |
21 It can also break the output into multiple files for easier viewing and | |
22 printing. | |
23 | |
24 MANDATORY: | |
25 coords file The output of 'show-coords -rl[k]' or 'mgaps' | |
26 | |
27 OPTIONS: | |
28 UTR coords UTR coordinate file in GFF format | |
29 CDS coords CDS coordinate file in GFF format | |
30 | |
31 -d|maxdist Set the maximum base-pair distance between linked matches | |
32 (default 50000) | |
33 -f|format Set the output format to 'pdf', 'ps' or 'fig' | |
34 (default 'fig') | |
35 -h | |
36 --help Display help information and exit | |
37 -m|mag Set the magnification at which the figure is rendered, | |
38 this is an option for fig2dev which is used to generate | |
39 the PDF and PS files (default 1.0) | |
40 -n|num Set the number of output files used to partition the | |
41 output, this is to avoid generating files that are too | |
42 large to display (default 10) | |
43 -p|prefix Set the output file prefix | |
44 (default "PROMER_graph or NUCMER_graph") | |
45 -v | |
46 --verbose Verbose logging of the processed files | |
47 -V | |
48 --version Display the version information and exit | |
49 -x1 coord Set the lower coordinate bound of the display | |
50 -x2 coord Set the upper coordinate bound of the display | |
51 -g|ref If the input file is provided by 'mgaps', set the | |
52 reference sequence ID (as it appears in the first column | |
53 of the UTR/CDS coords file) | |
54 -I Display the name of query sequences | |
55 -Ir Display the name of reference genes | |
56 ~; | |
57 | |
58 | |
59 my $USAGE_INFO = q~ | |
60 USAGE: mapview [options] <coords file> [UTR coords] [CDS coords] | |
61 ~; | |
62 | |
63 | |
64 my @DEPEND_INFO = | |
65 ( | |
66 "fig2dev", | |
67 "$SCRIPT_DIR/Foundation.pm" | |
68 ); | |
69 | |
70 my $err_gff = q~ | |
71 ERROR in the input files ! The reference seq ID can't be found in GFF files ! | |
72 The first column in the GFF file should be the ID of the reference seq. | |
73 The alignments file should provide the same info in the column before the last one. | |
74 | |
75 Here are some example records for the GFF file: | |
76 | |
77 gnl|FlyBase|X Dmel3 initial-exon 2155 2413 . - . X_CG3038.1 | |
78 gnl|FlyBase|X Dmel3 last-exon 1182 2077 . - . X_CG3038.1 | |
79 ... | |
80 The fields are : | |
81 <seq_ID> <source> <exon type> <start> <end> <score> <strand> <frame> <gene_name> | |
82 ~; | |
83 | |
84 my $tigr; | |
85 my $err; | |
86 | |
87 my $alignm; | |
88 my $futr; | |
89 my $fcds; | |
90 | |
91 #-- Initialize TIGR::Foundation | |
92 $tigr = new TIGR::Foundation; | |
93 if ( !defined ($tigr) ) { | |
94 print (STDERR "ERROR: TIGR::Foundation could not be initialized"); | |
95 exit (1); | |
96 } | |
97 | |
98 #-- Set help and usage information | |
99 $tigr->setHelpInfo ($HELP_INFO); | |
100 $tigr->setUsageInfo ($USAGE_INFO); | |
101 $tigr->setVersionInfo ($VERSION_INFO); | |
102 $tigr->addDependInfo (@DEPEND_INFO); | |
103 | |
104 $err = $tigr->TIGR_GetOptions | |
105 ( | |
106 "d|maxdist=i" => \$match_dist, | |
107 "f|format=s" => \$format, | |
108 "m|mag=f" => \$magn, | |
109 "n|num=i" => \$noOutfiles, | |
110 "p|prefix=s" => \$outfilename, | |
111 "x1=i" => \$x1win, | |
112 "x2=i" => \$x2win, | |
113 "v|verbose" => \$verb, | |
114 "g|ref=s" => \$Mgaps, | |
115 "I" => \$printIDconting, | |
116 "Ir" => \$printIDgenes | |
117 ); | |
118 | |
119 if ( $err == 0 || scalar(@ARGV) < 1 || scalar(@ARGV) > 3 ) { | |
120 $tigr->printUsageInfo( ); | |
121 print (STDERR "Try '$0 -h' for more information.\n"); | |
122 exit (1); | |
123 } | |
124 | |
125 ($alignm,$futr,$fcds)=@ARGV; | |
126 | |
127 if ((substr($x1win,0,1) eq '-') || (substr($x2win,0,1) eq '-')){ | |
128 print "ERROR2 : coords x1,x2 should be positive integers !!\n"; | |
129 $info=1; | |
130 } | |
131 if ($x1win > $x2win) { | |
132 print "ERROR3 : wrong range coords : x1 >= x2 !!!\n"; | |
133 $info=1; | |
134 } | |
135 if ($Mgaps){ #formating the mgaps output to be similar with show-coords output | |
136 format_mgaps(); | |
137 } | |
138 | |
139 if (!$format){$format="fig";} | |
140 if (($x1win) and ($x2win)){ $startfind=0; } | |
141 else{ $startfind=1; } | |
142 $endfind=0; | |
143 if (!$noOutfiles) {$noOutfiles=10;} | |
144 if (!$match_dist){$match_dist=50000;} | |
145 | |
146 #```````init colors```````````````````````````` | |
147 $color{"2"}=27;#dark pink 5utr | |
148 $color{"3"}=2;#green ex | |
149 $color{"4"}=1;#blue 3utr | |
150 #`````````````````````` | |
151 @linkcolors=(31,14,11); | |
152 #`````````````````````````````````````````````` | |
153 open(F,$alignm); | |
154 <F>; | |
155 $prog=<F>; chomp($prog); | |
156 <F>; | |
157 $_=<F>; | |
158 @a=(m/\s+(\||\[.+?\])/g) ; | |
159 for ($ind=0;$ind<=$#a;$ind++){ | |
160 if ($a[$ind] eq "[S1]") { | |
161 $ind_s1=$ind; | |
162 } | |
163 elsif ($a[$ind] eq "[E1]"){ | |
164 $ind_e1=$ind; | |
165 } | |
166 elsif ($a[$ind] eq "[% IDY]"){ | |
167 $ind_pidy=$ind; | |
168 } | |
169 elsif ($a[$ind] eq "[LEN R]"){ | |
170 $ind_lenchr=$ind; | |
171 } | |
172 # elsif ($a[$ind] eq "[TAGS]"){ | |
173 # $ind_tags=$ind+1; #there are two columns for this header col | |
174 # } | |
175 } | |
176 <F>;$mref=-1; | |
177 while (<F>){ | |
178 chomp; | |
179 @a=split; | |
180 if (!exists $hRefContigId{$a[-2]}) { # print $a[-2]."\n"; | |
181 $hRefContigId{$a[-2]}=$a[$ind_lenchr]; | |
182 $lenrefseqs+=$a[$ind_lenchr]; | |
183 $mref++; | |
184 } | |
185 } | |
186 $nobpinfile=int($lenrefseqs/$noOutfiles); | |
187 | |
188 close(F); | |
189 | |
190 #`````````````````````` | |
191 | |
192 | |
193 if (@ARGV > 1) { | |
194 get_cds_ends(); | |
195 get_utrcds_info(); | |
196 test_overlap(); | |
197 } | |
198 elsif(!$mref) { | |
199 $fileno=$noOutfiles; | |
200 $startcoord=0;$endcoord=0; | |
201 for ($i=0;$i<$fileno;$i++) { | |
202 $endcoord=$startcoord+$nobpinfile-1; | |
203 $endcoord=$lenrefseqs if ($endcoord>$lenrefseqs); | |
204 $file[$i]="$startcoord $endcoord"; | |
205 $startcoord=$endcoord+1; | |
206 } | |
207 } | |
208 | |
209 | |
210 $Yorig=3000; | |
211 $YdistPID=2000; | |
212 $yscale=$YdistPID/50; | |
213 $Xscale=14.5; | |
214 $gap=800; | |
215 #$maxfiles = ($fileno < 10) ? $fileno : 10; | |
216 #--------------------------------- | |
217 | |
218 if (!$mref){ | |
219 for($i=0; $i < $fileno; $i++) { | |
220 $nrf=$i; | |
221 set_output_fname(); | |
222 ($startcoord,$endcoord)=split(/\s+/,$file[$i]); | |
223 open(O,">$procfile".$nrf.".fig"); | |
224 print_header(); | |
225 print $procfile.$nrf.".fig\t range : $startcoord\t$endcoord \n" if ($verb && ($format eq "fig")); | |
226 | |
227 $xs=0; | |
228 $xe=int(($endcoord-$startcoord+1)/$Xscale); | |
229 #$xs=200; | |
230 #$xe=$xs+int(($endcoord-$startcoord+1)/$Xscale); | |
231 print_grid($xs,$xe,$startcoord,$endcoord); | |
232 | |
233 $tmpIdQrycontig=""; | |
234 $linkcolor=$linkcolors[0]; | |
235 | |
236 open(F,$alignm); | |
237 <F>;<F>;<F>;<F>;<F>; | |
238 while(<F>) { | |
239 chomp; | |
240 @a=split; | |
241 if($a[$ind_s1] > $endcoord) { last;} | |
242 if($a[$ind_s1]<$startcoord && $a[$ind_e1] > $startcoord ) { $a[$ind_s1]=$startcoord;} | |
243 if($a[$ind_s1] < $endcoord && $endcoord < $a[$ind_e1]) { $a[$ind_e1]=$endcoord;} | |
244 | |
245 if($a[$ind_s1]>=$startcoord && $a[$ind_e1]<=$endcoord) { | |
246 $x1=int(($a[$ind_s1]-$startcoord)/$Xscale);# | |
247 $x2=int(($a[$ind_e1]-$startcoord)/$Xscale);# | |
248 print_align($x1,$x2); | |
249 } | |
250 } | |
251 close(F); | |
252 %hQrycontig=(); | |
253 print_genes() if ($futr); | |
254 print_legend(); | |
255 close(O); | |
256 change_file_format() if ($format ne "fig"); | |
257 } | |
258 } | |
259 elsif($mref){#multiple ref seqs | |
260 set_output_fname(); | |
261 $tmpIdQrycontig=""; | |
262 $linkcolor=$linkcolors[0]; | |
263 $startdrawX=0; | |
264 $proclen=0; | |
265 $first=1; | |
266 $nrf=0; | |
267 open(F,$alignm); | |
268 <F>;<F>;<F>;<F>;<F>; | |
269 while(<F>) { | |
270 chomp; | |
271 @a=split; | |
272 if ($a[-2] ne $tmpcontig){ | |
273 %hQrycontig=(); | |
274 $tmpcontig=$a[-2]; | |
275 if ($first){ | |
276 $first=0; | |
277 $nrf++; | |
278 open(O,">$procfile".$nrf.".fig"); | |
279 print_header(); | |
280 print $procfile.$nrf.".fig"."\n" if ($verb && ($format eq "fig")); | |
281 $len=$hRefContigId{$a[-2]}; | |
282 } | |
283 else { | |
284 $startdrawX+=int($len/$Xscale)+$gap; | |
285 $len=$hRefContigId{$a[-2]}; | |
286 if (($proclen+$len>$nobpinfile) and ($proclen != 0)){ | |
287 print_legend(); | |
288 close(O); | |
289 change_file_format() if ($format ne "fig"); | |
290 $nrf++; | |
291 open(O,">$procfile".$nrf.".fig"); | |
292 print_header(); | |
293 print "\n".$procfile.$nrf.".fig"."\n" if ($verb && ($format eq "fig")); | |
294 $proclen=0; | |
295 $startdrawX=0; | |
296 } | |
297 } | |
298 $xs=$startdrawX; | |
299 $xe=$startdrawX+int($len/$Xscale); | |
300 print_grid($xs,$xe,0,$len); | |
301 #print genes from %geneinfo for contig | |
302 print $a[-2]."\t".$hRefContigId{$a[-2]}."\n"; | |
303 print_genes_mr() if ($futr); | |
304 $proclen+=$len; | |
305 }#end if new contig | |
306 $x1=$startdrawX+int($a[$ind_s1]/$Xscale); | |
307 $x2=$startdrawX+int($a[$ind_e1]/$Xscale); | |
308 print_align($x1,$x2); | |
309 } | |
310 print_legend(); | |
311 close(O); | |
312 change_file_format() if ($format ne "fig"); | |
313 | |
314 close(F); | |
315 } | |
316 #******************************************************************************* | |
317 #******************************************************************************* | |
318 sub set_output_fname{ | |
319 | |
320 if (!$outfilename) {$procfile=$prog."_graph"."_";} | |
321 else {$procfile=$outfilename."_";} | |
322 | |
323 if ($format ne "fig"){ | |
324 $procfile="tmp".$procfile; | |
325 } | |
326 } | |
327 #********************************* | |
328 sub get_cds_ends{ | |
329 #3. print "create \%hcds_ends...\n"; | |
330 $testGffFormat=0; | |
331 open(F,"<".$fcds);#|| die "can't open \" $fcds cds \" file !"; | |
332 while(<F>) { | |
333 chomp; | |
334 if($_) { | |
335 @a=split; | |
336 if (exists $hRefContigId{$a[0]}){#record if at least one of the ref id is the same in GFF and Align files | |
337 $testGffFormat++; | |
338 } | |
339 $genename=$a[8]; | |
340 if ($genename ne $tmpname){ | |
341 if ($sign eq "+"){ $hcds_ends{$tmpname} = "$cds5 $cds3";} | |
342 elsif ($sign eq "-"){ $hcds_ends{$tmpname} = "$cds3 $cds5";} | |
343 $tmpname=$genename; | |
344 $sign=$a[6]; | |
345 } | |
346 if($sign eq "-") { | |
347 $temp=$a[3]; | |
348 $a[3]=$a[4]; | |
349 $a[4]=$temp; | |
350 } | |
351 if ($a[2] eq "single-exon"){ | |
352 $cds5=$a[3]; | |
353 $cds3=$a[4]; | |
354 } | |
355 elsif ($a[2] eq "initial-exon"){ | |
356 $cds5=$a[3]; | |
357 } | |
358 elsif ($a[2] eq "last-exon"){ | |
359 $cds3=$a[4]; | |
360 } | |
361 } | |
362 } | |
363 if ($sign eq "+"){ $hcds_ends{$tmpname} = "$cds5 $cds3";} | |
364 elsif ($sign eq "-"){$hcds_ends{$tmpname} = "$cds3 $cds5";} | |
365 | |
366 test_formatGFF(); | |
367 | |
368 #foreach $k ( keys %hcds_ends){ | |
369 # print "cds_ends: ".$k."\t"."\n"; | |
370 #} exit; | |
371 | |
372 } | |
373 | |
374 #********************************* | |
375 sub test_formatGFF{ | |
376 if ($testGffFormat==0){ | |
377 print (STDERR "$err_gff \n"); | |
378 exit (1); | |
379 } | |
380 | |
381 } | |
382 | |
383 #********************************* | |
384 sub get_utrcds_info{ | |
385 #test for gene overlap $geneinfo{gene_name}->[0]=level,stock gene 5'3' utr ends, | |
386 #determina %geneinfo{id gene}->5utr,ex,3utr | |
387 #and @file | |
388 # get_gene_ends(); | |
389 $testGffFormat=0; | |
390 open(F,"<".$futr);# || die "can't open \" $futr utr \" file !"; | |
391 while(<F>) { | |
392 chomp; | |
393 if($_) { | |
394 @a=split; | |
395 #get gene ends-utr | |
396 $genename=$a[8]; | |
397 if (exists $hRefContigId{$a[0]}){ #check if [align file(col before the last one)] = [GFF file(col 1)] | |
398 $testGffFormat++; | |
399 } | |
400 | |
401 if ($genename ne $tmpname){ | |
402 if ($sign eq "+"){ $utr_ends{$tmpname} = "$utr5 $utr3";} | |
403 elsif ($sign eq "-"){ $utr_ends{$tmpname} = "$utr3 $utr5";} | |
404 | |
405 if ($tmpgene) {# for the distinct_utr_cds | |
406 get_utrcds_ends() ; | |
407 } | |
408 $tmpgene="$a[3] $a[4]";# | |
409 | |
410 $tmpname=$genename; | |
411 $sign=$a[6]; | |
412 $hContig_genes{$a[0]}.=" ".$a[8]; # for multiple ref. seqs | |
413 } | |
414 else{# for the distinct_utr_cds | |
415 if($sign eq "-") { | |
416 $tmpgene="$a[3] $a[4];".$tmpgene; | |
417 } | |
418 else { $tmpgene.=";$a[3] $a[4]"; } | |
419 }# | |
420 | |
421 if($sign eq "-") { | |
422 $temp=$a[3]; | |
423 $a[3]=$a[4]; | |
424 $a[4]=$temp; | |
425 } | |
426 if ($a[2] eq "single-exon"){ | |
427 $utr5=$a[3]; | |
428 $utr3=$a[4]; | |
429 } | |
430 elsif ($a[2] eq "initial-exon"){ | |
431 $utr5=$a[3]; | |
432 } | |
433 elsif ($a[2] eq "last-exon"){ | |
434 $utr3=$a[4]; | |
435 } | |
436 | |
437 #init gene info (level) | |
438 $geneinfo{$a[8]}->[0]=1 if (!exists $geneinfo{$a[8]}); | |
439 | |
440 } | |
441 } | |
442 # for the distinct_utr_cds | |
443 get_utrcds_ends(); | |
444 %cds_ends=();# | |
445 | |
446 if ($sign eq "+"){$utr_ends{$tmpname}="$utr5 $utr3"; } | |
447 elsif ($sign eq "-"){$utr_ends{$tmpname}="$utr3 $utr5"; } | |
448 $hContig_genes{$a[0]}.=" ".$a[8]; | |
449 | |
450 close(F); | |
451 test_formatGFF(); | |
452 | |
453 } | |
454 | |
455 | |
456 #**************************************************************** | |
457 sub get_utrcds_ends{ | |
458 $u5="";$ex="";$u3=""; | |
459 | |
460 if ($fcds eq $futr) { | |
461 $ex=$tmpgene; | |
462 } | |
463 else { | |
464 @ex=split(";",$tmpgene); | |
465 @cds=split(" ",$hcds_ends{$tmpname}); | |
466 | |
467 for($i=0;$i<=$#ex;$i++){ | |
468 @coord=split(" ",$ex[$i]); | |
469 | |
470 if ($cds[0]>$coord[0]){ | |
471 if ($cds[0]>$coord[1]){ | |
472 $u5.="$coord[0] $coord[1];"; | |
473 } | |
474 else{ | |
475 $u5.= "$coord[0] "; $u5.=$cds[0]-1 .";" ; #? | |
476 if ($cds[1]<$coord[1]){ | |
477 $ex.="$cds[0] $cds[1];"; | |
478 $u3.=$cds[1]+1 ." $coord[1];"; | |
479 } | |
480 else{ $ex.="$cds[0] $coord[1];";} | |
481 } | |
482 } | |
483 else { | |
484 if ($cds[1]>$coord[0]){ | |
485 if ($cds[1]>$coord[1]){ | |
486 $ex.="$coord[0] $coord[1];"; | |
487 } | |
488 else{ | |
489 $ex.="$coord[0] $cds[1];"; | |
490 $u3.=$cds[1]+1 ." $coord[1];"; | |
491 } | |
492 } | |
493 else { $u3.="$coord[0] $coord[1];";} | |
494 } | |
495 } | |
496 chop($u5, $ex, $u3); | |
497 } | |
498 $geneinfo{$tmpname}->[1]=$sign; | |
499 $geneinfo{$tmpname}->[2]=$u5; | |
500 $geneinfo{$tmpname}->[3]=$ex; | |
501 $geneinfo{$tmpname}->[4]=$u3; | |
502 } | |
503 #********************************* | |
504 sub test_overlap{ | |
505 | |
506 if (!$mref){ | |
507 $fileno=0;### | |
508 $endcoord=0;### | |
509 } | |
510 foreach $kcontgid (sort keys %hContig_genes){ | |
511 @allgenes=split(/\s+/,$hContig_genes{$kcontgid}); | |
512 for ($i=1;$i<=$#allgenes;$i++){ | |
513 @g1=split (" ", $utr_ends{$allgenes[$i]}); | |
514 $Utr5End{$allgenes[$i]}=$g1[0]; ### | |
515 | |
516 for ($j=$i+1;$j<=$#allgenes;$j++){ #comparing with the rest of the genes | |
517 @g2=split (" ", $utr_ends{$allgenes[$j]}); | |
518 #if the genes are overpaling and they have the same level ,the second gene is liflet to the next level | |
519 if ( (($g2[0]>=$g1[0]) and ($g2[0]<=$g1[1])) or (($g2[1]>=$g1[0]) and ($g2[1]<=$g1[1])) or (($g1[0]>=$g2[0]) and ($g1[0]<=$g2[1])) ){ | |
520 if ($geneinfo{$allgenes[$i]}->[0] == $geneinfo{$allgenes[$j]}->[0]){ | |
521 $geneinfo{$allgenes[$j]}->[0]=$geneinfo{$allgenes[$i]}->[0] + 1 ; | |
522 } | |
523 } | |
524 } | |
525 SetTheRangeForEachFile() if ((!$endfind) and (!$mref)); ### | |
526 } | |
527 } | |
528 $file[$fileno++]="$startcoord $endcoord" if (!$mref);### | |
529 %utr_ends=();### | |
530 | |
531 } | |
532 | |
533 #************************************** | |
534 sub SetTheRangeForEachFile{ | |
535 $currstart=$g1[0]; | |
536 $currend=$g1[1]; | |
537 #---test range ends intersection | |
538 if (!$startfind) { | |
539 if (($x1win <= $currstart) || ($x1win <= $currend)){ | |
540 $currstart = $x1win; | |
541 $startfind = 1; | |
542 } | |
543 } | |
544 if ( $startfind && $x1win && $x2win){ | |
545 if (($x2win <= $currstart) || ($x2win <= $currend) ){ | |
546 $currend = $x2win; | |
547 $endfind = 1; | |
548 } | |
549 } | |
550 #-------------------- | |
551 if ($startfind) { | |
552 if(!$endcoord) { | |
553 #$startcoord=0; | |
554 $startcoord = $x1win ? $x1win : 0; | |
555 $endcoord=$currend; | |
556 } | |
557 else { | |
558 if($currend > $endcoord) { | |
559 if($currend-$startcoord < $nobpinfile) { | |
560 $endcoord=$currend; | |
561 } | |
562 else { | |
563 $file[$fileno++]="$startcoord $endcoord"; | |
564 $startcoord=$endcoord+1; | |
565 $endcoord=$currend; | |
566 } | |
567 } | |
568 } | |
569 }#if startfind | |
570 } | |
571 #********************************* | |
572 sub print_header{ | |
573 print O "#FIG 3.2\nLandscape\nCenter\nInches\nLetter \n100.00\nMultiple\n-2\n1200 2\n"; | |
574 } | |
575 #********************************* | |
576 sub print_align{ | |
577 my ($x1,$x2)=@_; | |
578 | |
579 $a[$ind_pidy]=50 if ($a[$ind_pidy]<50); | |
580 $a[$ind_pidy]=int($a[$ind_pidy]); | |
581 if ($Mgaps){ | |
582 $y=$Yorig+250+$YdistPID-$yscale*2; | |
583 if($a[$#a]=~/rev$/){$y-=25*$yscale;} | |
584 } | |
585 else{ | |
586 $y=$Yorig+250+$YdistPID-$yscale*($a[$ind_pidy]-50); | |
587 } | |
588 if($x1==$x2) { $x2++;} | |
589 #draw the line between matches. is dif color for each contig | |
590 if ($a[$#a] eq $tmpIdQrycontig) { | |
591 print_connections($hQrycontig{$tmpIdQrycontig}->[1], $x1,$y); | |
592 } | |
593 else{#new contig | |
594 #remember the start coord for printing the id alignments | |
595 if ($printIDconting){ | |
596 if ( $x1 - $XlastPrint > 400 ) { | |
597 print O "4 0 0 5 0 0 8 0.0000 4 90 270 "; | |
598 printf O ("\t%.0f %.0f ",$x1,$y); | |
599 print O $a[$#a], "\\001\n"; | |
600 $XlastPrint=$x1; $YlastPrint=$y; | |
601 } | |
602 } | |
603 # | |
604 ##if it was seen before,but interrupted by another contig | |
605 if ((exists $hQrycontig{$a[$#a]}) and ($a[$ind_s1]-$hQrycontig{$a[$#a]}->[2] < $match_dist )) { | |
606 $linkcolor=$hQrycontig{$a[$#a]}->[0]; | |
607 print_connections($hQrycontig{$a[$#a]}->[1], $x1,$y); | |
608 } | |
609 else{ | |
610 #change the link color | |
611 unshift(@linkcolors, pop(@linkcolors)); | |
612 $linkcolor=$linkcolors[0]; | |
613 $hQrycontig{$a[$#a]}->[0] = $linkcolor; | |
614 } | |
615 } | |
616 $tmpIdQrycontig=$a[$#a]; | |
617 $hQrycontig{$tmpIdQrycontig}->[1]="$x2 $y"; | |
618 $hQrycontig{$tmpIdQrycontig}->[2]=$a[$ind_e1]; | |
619 | |
620 #the matches line is red | |
621 print O "2 1 0 2 4 0 40 0 -1 0.000 0 0 -1 0 0 2\n"; | |
622 print O "\t$x1 $y $x2 $y\n"; | |
623 print O "2 1 0 5 20 0 50 0 -1 0.000 0 0 -1 0 0 2\n"; | |
624 printf O ("\t $x1 %.0f $x2 %.0f\n",$Yorig+150 , $Yorig+150); | |
625 } | |
626 #********************************* | |
627 sub print_connections{ | |
628 my ($setc1,$setx2,$sety2)=@_; # print "\nparam connect @_\n"; | |
629 my ($setx1,$sety1) =split(/ /,$setc1); | |
630 | |
631 if ($Mgaps){ | |
632 if ($setx1>$setx2){ | |
633 $tmpsetx1=$setx1; | |
634 $setx1=$setx2; | |
635 $setx2=$tmpsetx1; | |
636 } | |
637 $distx1x2=int(($setx2-$setx1)/2); | |
638 $xcenter= $setx1+$distx1x2; | |
639 | |
640 if ($setx2-$setx1>4000) { #if the distance is to big then heigh of the arc is set to 20 | |
641 $heightArcUp = 20*$yscale; | |
642 $yoffcenter=int((($distx1x2**2)+$heightArcUp**2)*(1/(2*$heightArcUp)))-$heightArcUp ; | |
643 } | |
644 else{ | |
645 $heightArcUp = int (0.447 * $distx1x2);#sectorul de cerc la 1/3 din raza. | |
646 $yoffcenter=2*$heightArcUp; | |
647 } | |
648 print O "5 1 0 2 $linkcolor 0 50 0 -1 0.000 0 0 0 0 "; | |
649 printf O ("%.3f %.3f $setx1 $sety1 $xcenter %.0f $setx2 $sety1 \n",$xcenter,$sety1+$yoffcenter,$sety1-$heightArcUp); | |
650 } | |
651 else{ | |
652 print O "2 1 0 1 $linkcolor 0 50 0 -1 0.000 0 0 -1 0 0 2\n"; | |
653 print O "\t".$setc1." $setx2 $sety2\n"; | |
654 | |
655 } | |
656 } | |
657 #********************************* | |
658 sub print_genes_mr{ | |
659 %hLastOnLevel=(); | |
660 @g=split(/\s+/,$hContig_genes{$a[-2]}); | |
661 for ($i=1;$i<=$#g;$i++){ | |
662 $kname=$g[$i]; | |
663 $tmpx2=0; | |
664 $y=$Yorig-100-200*$geneinfo{$kname}->[0]; | |
665 #print id gena | |
666 $xid =$startdrawX+ int($Utr5End{$kname}/$Xscale); | |
667 print_Id_genes() if ($printIDgenes); | |
668 # | |
669 for ($l=2;$l<5;$l++){ | |
670 @c=split(";",$geneinfo{$kname}->[$l] ) ; | |
671 if (@c){ #print "de unde?@c\n" if (($l==2) or ($l==4)); | |
672 $colorend=$color{$l}; | |
673 if ($geneinfo{$kname}->[1] eq "-"){ | |
674 if ($l==2) { $colorend=$color{"4"}; } | |
675 elsif ($l==4) {$colorend=$color{"2"};} | |
676 } | |
677 for ($k=0;$k<=$#c;$k++){ | |
678 @e=split (" ",$c[$k]); | |
679 $x1=$startdrawX+int($e[0]/$Xscale); | |
680 $x2=$startdrawX+int($e[1]/$Xscale); | |
681 if($x1==$x2) { $x2++;} | |
682 if ( ($tmpx2) and ($x1-$tmpx2>1)){ #print the intron | |
683 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n"; | |
684 print O "\t $tmpx2 $y $x1 $y\n"; | |
685 } | |
686 $tmpx2=$x2; | |
687 print O "2 1 0 5 $colorend 0 50 0 -1 0.000 0 0 -1 0 0 2\n";# | |
688 print O "\t $x1 $y $x2 $y\n"; | |
689 } | |
690 } | |
691 } | |
692 #delete ($geneinfo{$kname}); | |
693 } | |
694 } | |
695 | |
696 #********************************* | |
697 sub print_Id_genes{ | |
698 if (exists $hLastOnLevel{$geneinfo{$kname}->[0]}){ | |
699 $lastOnlevel=$hLastOnLevel{$geneinfo{$kname}->[0]}; | |
700 $printidspace = int(($Utr5End{$kname}-$Utr5End{$lastOnlevel})/$Xscale); | |
701 } | |
702 else{$printidspace=601;} | |
703 if ($printidspace > 600){ | |
704 #print contig name# | |
705 print O "4 0 0 5 0 0 6 0.0000 4 90 270 "; | |
706 printf O ("\t%.0f %.0f ",$xid,$y-50); | |
707 print O $kname, "\\001\n"; | |
708 $hLastOnLevel{$geneinfo{$kname}->[0]}=$kname; | |
709 } | |
710 } | |
711 #********************************* | |
712 sub print_genes{ | |
713 %hLastOnLevel=(); | |
714 foreach $kname (sort {$Utr5End{$a} <=> $Utr5End{$b}} keys %Utr5End){ | |
715 $tmpx2=0; | |
716 if ($Utr5End{$kname}>$startcoord && $Utr5End{$kname}<$endcoord){ | |
717 $y=$Yorig-100-200*$geneinfo{$kname}->[0]; | |
718 #print id gena | |
719 $xid = int(($Utr5End{$kname}-$startcoord)/$Xscale); | |
720 print_Id_genes() if ($printIDgenes); | |
721 # | |
722 for ($l=2;$l<5;$l++){ | |
723 @c=split(";",$geneinfo{$kname}->[$l] ); | |
724 $colorend=$color{$l}; | |
725 if ($geneinfo{$kname}->[1] eq "-"){ | |
726 if ($l==2) { $colorend=$color{"4"}; } | |
727 elsif ($l==4) {$colorend=$color{"2"};} | |
728 } | |
729 | |
730 for ($k=0;$k<=$#c;$k++){ | |
731 @e=split (" ",$c[$k]); | |
732 $x1=int(($e[0]-$startcoord)/$Xscale); | |
733 $x2=int(($e[1]-$startcoord)/$Xscale); | |
734 if($x1==$x2) { $x2++;} | |
735 if ( ($tmpx2) and ($x1-$tmpx2>1)){ #print the intron | |
736 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n"; | |
737 print O "\t $tmpx2 $y $x1 $y\n"; | |
738 } | |
739 $tmpx2=$x2; | |
740 print O "2 1 0 5 $colorend 0 50 0 -1 0.000 0 0 -1 0 0 2\n";# | |
741 print O "\t $x1 $y $x2 $y\n"; | |
742 } | |
743 } | |
744 }#endif "is in interval" | |
745 } | |
746 # delete ($Utr5End{$kname}); | |
747 # delete ($geneinfo{$kname}); | |
748 | |
749 } | |
750 #********************************* | |
751 sub print_grid{ | |
752 | |
753 my ($xs,$xe,$startcontg,$endcontg)=@_; | |
754 | |
755 $XlastPrint=0;$YlastPrint=0; | |
756 | |
757 #print ref contig | |
758 print O "2 1 0 10 11 0 50 0 -1 0.000 0 0 -1 0 0 2\n"; | |
759 printf O ("\t $xs %.0f $xe %.0f\n",$Yorig+50,$Yorig+50); | |
760 #print orizontal axes for PId (100%,75%,50%) | |
761 for ($percent_id = 50; $percent_id < 101; $percent_id += 25) { | |
762 print O "2 1 2 1 0 7 60 0 -1 4.000 0 0 -1 0 0 2\n"; | |
763 printf O ("\t$xs %.0f $xe %.0f\n",$Yorig+250+$YdistPID-($percent_id - 50) * $yscale,$Yorig+250+$YdistPID-($percent_id - 50) * $yscale); | |
764 #last if ($Mgaps); | |
765 } | |
766 #print orizontal markers for bp. | |
767 $increment=10000/$Xscale; | |
768 $no_incr=0; | |
769 $xmark = $xs ;$xmark_float= $xs; | |
770 while ($xmark < $xe){ | |
771 print O "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n"; | |
772 printf O ("\t$xmark %.0f $xmark %.0f\n",$Yorig+$YdistPID+250,$Yorig+$YdistPID+300); | |
773 #bp scale | |
774 print O "4 0 0 100 0 0 8 0.0000 4 135 405 "; | |
775 printf O ("\t %.0f %.0f",$xmark,$Yorig+$YdistPID+400); | |
776 print O " $no_incr"."k", "\\001\n"; | |
777 $no_incr += 10; | |
778 $xmark_float += $increment; | |
779 $xmark=int($xmark_float); | |
780 } | |
781 | |
782 #coord for chr ends | |
783 print O "4 0 0 50 0 0 14 0.0000 4 135 450 $xs $Yorig $startcontg\\001\n"; | |
784 printf O ("4 0 0 50 0 0 14 0.0000 4 135 810 %.0f $Yorig $endcontg\\001\n",$xe-length($xe)*125); | |
785 | |
786 #print contig name# | |
787 if ($mref){ | |
788 print O "4 0 0 5 0 0 8 0.0000 4 135 405 "; | |
789 printf O ("\t%.0f %.0f ",$xs,$Yorig+70); | |
790 print O $a[-2], "\\001\n"; | |
791 } | |
792 #print vertical markers for PId scale | |
793 if (!$Mgaps){ | |
794 for ($percent_id = 50; $percent_id < 101; $percent_id += 25) { | |
795 #left | |
796 print O "4 0 0 100 0 0 8 0.0000 4 135 405 "; | |
797 printf O ("\t%.0f %.0f", $xs-200,$Yorig+$YdistPID+250-($percent_id - 50) * $yscale + 20); | |
798 print O " $percent_id%", "\\001\n"; | |
799 #right | |
800 print O "4 0 0 100 0 0 8 0.0000 4 135 405 "; | |
801 printf O ("\t%.0f %.0f",$xe+20, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale+20 ); | |
802 print O " $percent_id%", "\\001\n"; | |
803 | |
804 # print the tick mark | |
805 #left | |
806 # print O "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n"; | |
807 # printf O ("\t%.0f %.0f $xs %.0f\n",$xs-50, | |
808 # $Yorig+$YdistPID+250-($percent_id - 50) * $yscale, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale); | |
809 #right | |
810 # print O "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n"; | |
811 # printf O ("\t$xe %.0f %.0f %.0f\n", | |
812 # $Yorig+$YdistPID+250-($percent_id - 50) * $yscale,$xe+50, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale); | |
813 } | |
814 } | |
815 else{ # for Mgaps | |
816 print O "4 0 0 100 0 0 7 1.5710 4 135 405 "; | |
817 printf O ("\t%.0f %.0f", $xs-50,$Yorig+$YdistPID+250 - 5 * $yscale + 10); | |
818 print O " + qry strand", "\\001\n"; | |
819 | |
820 print O "4 0 0 100 0 0 7 1.5710 4 135 405 "; | |
821 printf O ("\t%.0f %.0f", $xs-50,$Yorig+$YdistPID+250 - 30 * $yscale + 10); | |
822 print O " - qry strand", "\\001\n"; | |
823 } | |
824 | |
825 } | |
826 #********************************* | |
827 sub print_legend{ | |
828 | |
829 print O "4 0 0 100 0 0 8 0.0000 4 135 405 "; | |
830 printf O ("\t%.0f %.0f ",100,$Yorig+$YdistPID+1100); | |
831 print O " Legend ", "\\001\n"; | |
832 $y= $Yorig+$YdistPID+1300; #utr | |
833 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron | |
834 print O "\t 70 $y 99 $y\n"; | |
835 print O "2 1 0 5 27 0 50 0 -1 0.000 0 0 -1 0 0 2\n"; | |
836 print O "\t 100 $y 200 $y\n"; | |
837 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron | |
838 print O "\t 200 $y 230 $y\n"; | |
839 print O "4 0 0 100 0 0 8 0.0000 4 135 405 "; | |
840 printf O ("\t%.0f %.0f ",300,$y+30); | |
841 print O " 5' utr ", "\\001\n"; | |
842 $y += 150 ;#cds | |
843 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron | |
844 print O "\t 70 $y 99 $y\n"; | |
845 print O "2 1 0 5 2 0 50 0 -1 0.000 0 0 -1 0 0 2\n"; | |
846 print O "\t 100 $y 200 $y\n"; | |
847 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron | |
848 print O "\t 200 $y 230 $y\n"; | |
849 print O "4 0 0 100 0 0 8 0.0000 4 135 405 "; | |
850 printf O ("\t%.0f %.0f ",300,$y+30); | |
851 print O " cds ", "\\001\n"; | |
852 $y += 150; #3' utr | |
853 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron | |
854 print O "\t 70 $y 99 $y\n"; | |
855 print O "2 1 0 5 1 0 50 0 -1 0.000 0 0 -1 0 0 2\n"; | |
856 print O "\t 100 $y 200 $y\n"; | |
857 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron | |
858 print O "\t 200 $y 230 $y\n"; | |
859 print O "4 0 0 100 0 0 8 0.0000 4 135 405 "; | |
860 printf O ("\t%.0f %.0f ",300,$y+30); | |
861 print O " 3' utr ", "\\001\n"; | |
862 $y += 150; # match | |
863 print O "2 1 0 2 4 0 50 0 -1 0.000 0 0 -1 0 0 2\n"; | |
864 print O "\t100 $y 200 $y\n"; | |
865 print O "4 0 0 100 0 0 8 0.0000 4 135 405 "; | |
866 printf O ("\t%.0f %.0f ",300,$y+30); | |
867 print O " match found by $prog ", "\\001\n"; | |
868 } | |
869 #********************************* | |
870 sub change_file_format{ | |
871 | |
872 $procfile =~ /^tmp(.+)/; | |
873 $outfile = $1.$nrf.".".$format; | |
874 $comand = "fig2dev -L $format -x 100"; | |
875 $comand .= " -m $magn" if ($magn); | |
876 $comand .= " -M ".$procfile.$nrf.".fig ".$outfile; | |
877 | |
878 $status =system($comand); | |
879 print E "ERROR 1: fig2dev !\n" unless $status == 0; | |
880 | |
881 $status =system("rm $procfile".$nrf.".fig"); | |
882 | |
883 if ($verb){ | |
884 print "$outfile"; | |
885 if ($mref){ | |
886 print "\n" ; | |
887 } | |
888 else { | |
889 print "\t range : $startcoord\t$endcoord \n" ; | |
890 } | |
891 } | |
892 } | |
893 #********************************* | |
894 sub format_mgaps{ | |
895 $tmpfile="tmpmgaps"; | |
896 $tmpfile2=$alignm."coords" ; | |
897 get_ref_len(); #print $maxlenref."\n"; | |
898 open(M,">".$tmpfile2) || die "can't open \" $tmpfile2 \" file !"; | |
899 print M "$alignm\n"; | |
900 print M "Mgaps\n\n"; | |
901 print M " [S1] [E1] | [S2] [E2] | [LEN 1] [LEN 2] | [% IDY] | [LEN R] [LEN Q] | [COV R] [COV Q] | [TAGS]\n"; | |
902 print M "===============================================================================================================================\n"; | |
903 | |
904 open(T,">".$tmpfile) || die "can't open \" $tmpfile \" file !"; | |
905 | |
906 open(A,"<".$alignm) || die "can't open \" $alignm \" file !"; | |
907 | |
908 | |
909 while(<A>) { | |
910 chomp; | |
911 @a=split; | |
912 if ($a[0] =~ /^>/){ | |
913 $nr_cluster=1; | |
914 $idquery=$a[1]; | |
915 if ($a[2] eq "Reverse"){$idquery .= "_rev";} | |
916 } | |
917 #elsif ($a[0] eq "#") {$nr_cluster++;} | |
918 elsif($a[0] ne "#"){ | |
919 $e1=$a[0]+$a[2]; | |
920 print T $a[0]."\t".$e1."\t"."|"; | |
921 print T "\t-\t-\t|"; | |
922 print T "\t-\t-\t|"; | |
923 print T "\t-\t|"; #pid | |
924 print T "\t$maxlenref\t-\t|";#len seqs | |
925 # print "\t-\t-\t|"; | |
926 # print "\t-\t-\t|"; | |
927 # print T " $Mgaps\t$idquery.$nr_cluster\n"; | |
928 print T " $Mgaps\t$idquery\n"; | |
929 } | |
930 | |
931 } | |
932 close(A); | |
933 close(T); | |
934 $command="sort -n -k 1 $tmpfile >> $tmpfile2"; | |
935 $status =system($command); | |
936 system("rm $tmpfile"); | |
937 close(M); | |
938 $alignm=$tmpfile2; | |
939 print STDERR "ERROR 1: can't sort $tmpfile \n" unless $status == 0; | |
940 print STDERR "\n**************************************** \n"; | |
941 print STDERR "New input file created : $alignm\n"; | |
942 print STDERR "**************************************** \n\n"; | |
943 | |
944 } | |
945 #***************************** | |
946 sub get_ref_len{ | |
947 $firstrow=1; | |
948 open(A,"<".$alignm) || die "can't open \" $alignm \" file !"; | |
949 while(<A>) { | |
950 chomp; | |
951 @a=split; | |
952 if ($firstrow) { | |
953 $firstrow=0; | |
954 if ($a[0] !~ /^>/){ | |
955 print "\nWrong file format for MGAPS file : $alignm ! \n"; | |
956 exit; | |
957 } | |
958 } | |
959 | |
960 if ($a[0] =~ /^>/){ next; } | |
961 elsif($a[0] ne "#"){ | |
962 $e1=$a[0]+$a[2]; | |
963 $maxlenref=($maxlenref < $e1 ? $e1 : $maxlenref); | |
964 } | |
965 } | |
966 close(A); | |
967 } |