jpayne@69: #!__PERL_PATH jpayne@69: jpayne@69: use lib "__SCRIPT_DIR"; jpayne@69: use Foundation; jpayne@69: jpayne@69: my $SCRIPT_DIR = "__SCRIPT_DIR"; jpayne@69: jpayne@69: jpayne@69: my $VERSION_INFO = q~ jpayne@69: mapview version 1.01 jpayne@69: ~; jpayne@69: jpayne@69: jpayne@69: my $HELP_INFO = q~ jpayne@69: USAGE: mapview [options] [UTR coords] [CDS coords] jpayne@69: jpayne@69: DESCRIPTION: jpayne@69: mapview is a utility program for displaying sequence alignments as jpayne@69: provided by MUMmer, NUCmer, PROmer or Mgaps. mapview takes the output of jpayne@69: show-coords and converts it to a FIG, PDF or PS file for visual analysis. jpayne@69: It can also break the output into multiple files for easier viewing and jpayne@69: printing. jpayne@69: jpayne@69: MANDATORY: jpayne@69: coords file The output of 'show-coords -rl[k]' or 'mgaps' jpayne@69: jpayne@69: OPTIONS: jpayne@69: UTR coords UTR coordinate file in GFF format jpayne@69: CDS coords CDS coordinate file in GFF format jpayne@69: jpayne@69: -d|maxdist Set the maximum base-pair distance between linked matches jpayne@69: (default 50000) jpayne@69: -f|format Set the output format to 'pdf', 'ps' or 'fig' jpayne@69: (default 'fig') jpayne@69: -h jpayne@69: --help Display help information and exit jpayne@69: -m|mag Set the magnification at which the figure is rendered, jpayne@69: this is an option for fig2dev which is used to generate jpayne@69: the PDF and PS files (default 1.0) jpayne@69: -n|num Set the number of output files used to partition the jpayne@69: output, this is to avoid generating files that are too jpayne@69: large to display (default 10) jpayne@69: -p|prefix Set the output file prefix jpayne@69: (default "PROMER_graph or NUCMER_graph") jpayne@69: -v jpayne@69: --verbose Verbose logging of the processed files jpayne@69: -V jpayne@69: --version Display the version information and exit jpayne@69: -x1 coord Set the lower coordinate bound of the display jpayne@69: -x2 coord Set the upper coordinate bound of the display jpayne@69: -g|ref If the input file is provided by 'mgaps', set the jpayne@69: reference sequence ID (as it appears in the first column jpayne@69: of the UTR/CDS coords file) jpayne@69: -I Display the name of query sequences jpayne@69: -Ir Display the name of reference genes jpayne@69: ~; jpayne@69: jpayne@69: jpayne@69: my $USAGE_INFO = q~ jpayne@69: USAGE: mapview [options] [UTR coords] [CDS coords] jpayne@69: ~; jpayne@69: jpayne@69: jpayne@69: my @DEPEND_INFO = jpayne@69: ( jpayne@69: "fig2dev", jpayne@69: "$SCRIPT_DIR/Foundation.pm" jpayne@69: ); jpayne@69: jpayne@69: my $err_gff = q~ jpayne@69: ERROR in the input files ! The reference seq ID can't be found in GFF files ! jpayne@69: The first column in the GFF file should be the ID of the reference seq. jpayne@69: The alignments file should provide the same info in the column before the last one. jpayne@69: jpayne@69: Here are some example records for the GFF file: jpayne@69: jpayne@69: gnl|FlyBase|X Dmel3 initial-exon 2155 2413 . - . X_CG3038.1 jpayne@69: gnl|FlyBase|X Dmel3 last-exon 1182 2077 . - . X_CG3038.1 jpayne@69: ... jpayne@69: The fields are : jpayne@69: jpayne@69: ~; jpayne@69: jpayne@69: my $tigr; jpayne@69: my $err; jpayne@69: jpayne@69: my $alignm; jpayne@69: my $futr; jpayne@69: my $fcds; jpayne@69: jpayne@69: #-- Initialize TIGR::Foundation jpayne@69: $tigr = new TIGR::Foundation; jpayne@69: if ( !defined ($tigr) ) { jpayne@69: print (STDERR "ERROR: TIGR::Foundation could not be initialized"); jpayne@69: exit (1); jpayne@69: } jpayne@69: jpayne@69: #-- Set help and usage information jpayne@69: $tigr->setHelpInfo ($HELP_INFO); jpayne@69: $tigr->setUsageInfo ($USAGE_INFO); jpayne@69: $tigr->setVersionInfo ($VERSION_INFO); jpayne@69: $tigr->addDependInfo (@DEPEND_INFO); jpayne@69: jpayne@69: $err = $tigr->TIGR_GetOptions jpayne@69: ( jpayne@69: "d|maxdist=i" => \$match_dist, jpayne@69: "f|format=s" => \$format, jpayne@69: "m|mag=f" => \$magn, jpayne@69: "n|num=i" => \$noOutfiles, jpayne@69: "p|prefix=s" => \$outfilename, jpayne@69: "x1=i" => \$x1win, jpayne@69: "x2=i" => \$x2win, jpayne@69: "v|verbose" => \$verb, jpayne@69: "g|ref=s" => \$Mgaps, jpayne@69: "I" => \$printIDconting, jpayne@69: "Ir" => \$printIDgenes jpayne@69: ); jpayne@69: jpayne@69: if ( $err == 0 || scalar(@ARGV) < 1 || scalar(@ARGV) > 3 ) { jpayne@69: $tigr->printUsageInfo( ); jpayne@69: print (STDERR "Try '$0 -h' for more information.\n"); jpayne@69: exit (1); jpayne@69: } jpayne@69: jpayne@69: ($alignm,$futr,$fcds)=@ARGV; jpayne@69: jpayne@69: if ((substr($x1win,0,1) eq '-') || (substr($x2win,0,1) eq '-')){ jpayne@69: print "ERROR2 : coords x1,x2 should be positive integers !!\n"; jpayne@69: $info=1; jpayne@69: } jpayne@69: if ($x1win > $x2win) { jpayne@69: print "ERROR3 : wrong range coords : x1 >= x2 !!!\n"; jpayne@69: $info=1; jpayne@69: } jpayne@69: if ($Mgaps){ #formating the mgaps output to be similar with show-coords output jpayne@69: format_mgaps(); jpayne@69: } jpayne@69: jpayne@69: if (!$format){$format="fig";} jpayne@69: if (($x1win) and ($x2win)){ $startfind=0; } jpayne@69: else{ $startfind=1; } jpayne@69: $endfind=0; jpayne@69: if (!$noOutfiles) {$noOutfiles=10;} jpayne@69: if (!$match_dist){$match_dist=50000;} jpayne@69: jpayne@69: #```````init colors```````````````````````````` jpayne@69: $color{"2"}=27;#dark pink 5utr jpayne@69: $color{"3"}=2;#green ex jpayne@69: $color{"4"}=1;#blue 3utr jpayne@69: #`````````````````````` jpayne@69: @linkcolors=(31,14,11); jpayne@69: #`````````````````````````````````````````````` jpayne@69: open(F,$alignm); jpayne@69: ; jpayne@69: $prog=; chomp($prog); jpayne@69: ; jpayne@69: $_=; jpayne@69: @a=(m/\s+(\||\[.+?\])/g) ; jpayne@69: for ($ind=0;$ind<=$#a;$ind++){ jpayne@69: if ($a[$ind] eq "[S1]") { jpayne@69: $ind_s1=$ind; jpayne@69: } jpayne@69: elsif ($a[$ind] eq "[E1]"){ jpayne@69: $ind_e1=$ind; jpayne@69: } jpayne@69: elsif ($a[$ind] eq "[% IDY]"){ jpayne@69: $ind_pidy=$ind; jpayne@69: } jpayne@69: elsif ($a[$ind] eq "[LEN R]"){ jpayne@69: $ind_lenchr=$ind; jpayne@69: } jpayne@69: # elsif ($a[$ind] eq "[TAGS]"){ jpayne@69: # $ind_tags=$ind+1; #there are two columns for this header col jpayne@69: # } jpayne@69: } jpayne@69: ;$mref=-1; jpayne@69: while (){ jpayne@69: chomp; jpayne@69: @a=split; jpayne@69: if (!exists $hRefContigId{$a[-2]}) { # print $a[-2]."\n"; jpayne@69: $hRefContigId{$a[-2]}=$a[$ind_lenchr]; jpayne@69: $lenrefseqs+=$a[$ind_lenchr]; jpayne@69: $mref++; jpayne@69: } jpayne@69: } jpayne@69: $nobpinfile=int($lenrefseqs/$noOutfiles); jpayne@69: jpayne@69: close(F); jpayne@69: jpayne@69: #`````````````````````` jpayne@69: jpayne@69: jpayne@69: if (@ARGV > 1) { jpayne@69: get_cds_ends(); jpayne@69: get_utrcds_info(); jpayne@69: test_overlap(); jpayne@69: } jpayne@69: elsif(!$mref) { jpayne@69: $fileno=$noOutfiles; jpayne@69: $startcoord=0;$endcoord=0; jpayne@69: for ($i=0;$i<$fileno;$i++) { jpayne@69: $endcoord=$startcoord+$nobpinfile-1; jpayne@69: $endcoord=$lenrefseqs if ($endcoord>$lenrefseqs); jpayne@69: $file[$i]="$startcoord $endcoord"; jpayne@69: $startcoord=$endcoord+1; jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: jpayne@69: $Yorig=3000; jpayne@69: $YdistPID=2000; jpayne@69: $yscale=$YdistPID/50; jpayne@69: $Xscale=14.5; jpayne@69: $gap=800; jpayne@69: #$maxfiles = ($fileno < 10) ? $fileno : 10; jpayne@69: #--------------------------------- jpayne@69: jpayne@69: if (!$mref){ jpayne@69: for($i=0; $i < $fileno; $i++) { jpayne@69: $nrf=$i; jpayne@69: set_output_fname(); jpayne@69: ($startcoord,$endcoord)=split(/\s+/,$file[$i]); jpayne@69: open(O,">$procfile".$nrf.".fig"); jpayne@69: print_header(); jpayne@69: print $procfile.$nrf.".fig\t range : $startcoord\t$endcoord \n" if ($verb && ($format eq "fig")); jpayne@69: jpayne@69: $xs=0; jpayne@69: $xe=int(($endcoord-$startcoord+1)/$Xscale); jpayne@69: #$xs=200; jpayne@69: #$xe=$xs+int(($endcoord-$startcoord+1)/$Xscale); jpayne@69: print_grid($xs,$xe,$startcoord,$endcoord); jpayne@69: jpayne@69: $tmpIdQrycontig=""; jpayne@69: $linkcolor=$linkcolors[0]; jpayne@69: jpayne@69: open(F,$alignm); jpayne@69: ;;;;; jpayne@69: while() { jpayne@69: chomp; jpayne@69: @a=split; jpayne@69: if($a[$ind_s1] > $endcoord) { last;} jpayne@69: if($a[$ind_s1]<$startcoord && $a[$ind_e1] > $startcoord ) { $a[$ind_s1]=$startcoord;} jpayne@69: if($a[$ind_s1] < $endcoord && $endcoord < $a[$ind_e1]) { $a[$ind_e1]=$endcoord;} jpayne@69: jpayne@69: if($a[$ind_s1]>=$startcoord && $a[$ind_e1]<=$endcoord) { jpayne@69: $x1=int(($a[$ind_s1]-$startcoord)/$Xscale);# jpayne@69: $x2=int(($a[$ind_e1]-$startcoord)/$Xscale);# jpayne@69: print_align($x1,$x2); jpayne@69: } jpayne@69: } jpayne@69: close(F); jpayne@69: %hQrycontig=(); jpayne@69: print_genes() if ($futr); jpayne@69: print_legend(); jpayne@69: close(O); jpayne@69: change_file_format() if ($format ne "fig"); jpayne@69: } jpayne@69: } jpayne@69: elsif($mref){#multiple ref seqs jpayne@69: set_output_fname(); jpayne@69: $tmpIdQrycontig=""; jpayne@69: $linkcolor=$linkcolors[0]; jpayne@69: $startdrawX=0; jpayne@69: $proclen=0; jpayne@69: $first=1; jpayne@69: $nrf=0; jpayne@69: open(F,$alignm); jpayne@69: ;;;;; jpayne@69: while() { jpayne@69: chomp; jpayne@69: @a=split; jpayne@69: if ($a[-2] ne $tmpcontig){ jpayne@69: %hQrycontig=(); jpayne@69: $tmpcontig=$a[-2]; jpayne@69: if ($first){ jpayne@69: $first=0; jpayne@69: $nrf++; jpayne@69: open(O,">$procfile".$nrf.".fig"); jpayne@69: print_header(); jpayne@69: print $procfile.$nrf.".fig"."\n" if ($verb && ($format eq "fig")); jpayne@69: $len=$hRefContigId{$a[-2]}; jpayne@69: } jpayne@69: else { jpayne@69: $startdrawX+=int($len/$Xscale)+$gap; jpayne@69: $len=$hRefContigId{$a[-2]}; jpayne@69: if (($proclen+$len>$nobpinfile) and ($proclen != 0)){ jpayne@69: print_legend(); jpayne@69: close(O); jpayne@69: change_file_format() if ($format ne "fig"); jpayne@69: $nrf++; jpayne@69: open(O,">$procfile".$nrf.".fig"); jpayne@69: print_header(); jpayne@69: print "\n".$procfile.$nrf.".fig"."\n" if ($verb && ($format eq "fig")); jpayne@69: $proclen=0; jpayne@69: $startdrawX=0; jpayne@69: } jpayne@69: } jpayne@69: $xs=$startdrawX; jpayne@69: $xe=$startdrawX+int($len/$Xscale); jpayne@69: print_grid($xs,$xe,0,$len); jpayne@69: #print genes from %geneinfo for contig jpayne@69: print $a[-2]."\t".$hRefContigId{$a[-2]}."\n"; jpayne@69: print_genes_mr() if ($futr); jpayne@69: $proclen+=$len; jpayne@69: }#end if new contig jpayne@69: $x1=$startdrawX+int($a[$ind_s1]/$Xscale); jpayne@69: $x2=$startdrawX+int($a[$ind_e1]/$Xscale); jpayne@69: print_align($x1,$x2); jpayne@69: } jpayne@69: print_legend(); jpayne@69: close(O); jpayne@69: change_file_format() if ($format ne "fig"); jpayne@69: jpayne@69: close(F); jpayne@69: } jpayne@69: #******************************************************************************* jpayne@69: #******************************************************************************* jpayne@69: sub set_output_fname{ jpayne@69: jpayne@69: if (!$outfilename) {$procfile=$prog."_graph"."_";} jpayne@69: else {$procfile=$outfilename."_";} jpayne@69: jpayne@69: if ($format ne "fig"){ jpayne@69: $procfile="tmp".$procfile; jpayne@69: } jpayne@69: } jpayne@69: #********************************* jpayne@69: sub get_cds_ends{ jpayne@69: #3. print "create \%hcds_ends...\n"; jpayne@69: $testGffFormat=0; jpayne@69: open(F,"<".$fcds);#|| die "can't open \" $fcds cds \" file !"; jpayne@69: while() { jpayne@69: chomp; jpayne@69: if($_) { jpayne@69: @a=split; jpayne@69: if (exists $hRefContigId{$a[0]}){#record if at least one of the ref id is the same in GFF and Align files jpayne@69: $testGffFormat++; jpayne@69: } jpayne@69: $genename=$a[8]; jpayne@69: if ($genename ne $tmpname){ jpayne@69: if ($sign eq "+"){ $hcds_ends{$tmpname} = "$cds5 $cds3";} jpayne@69: elsif ($sign eq "-"){ $hcds_ends{$tmpname} = "$cds3 $cds5";} jpayne@69: $tmpname=$genename; jpayne@69: $sign=$a[6]; jpayne@69: } jpayne@69: if($sign eq "-") { jpayne@69: $temp=$a[3]; jpayne@69: $a[3]=$a[4]; jpayne@69: $a[4]=$temp; jpayne@69: } jpayne@69: if ($a[2] eq "single-exon"){ jpayne@69: $cds5=$a[3]; jpayne@69: $cds3=$a[4]; jpayne@69: } jpayne@69: elsif ($a[2] eq "initial-exon"){ jpayne@69: $cds5=$a[3]; jpayne@69: } jpayne@69: elsif ($a[2] eq "last-exon"){ jpayne@69: $cds3=$a[4]; jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: if ($sign eq "+"){ $hcds_ends{$tmpname} = "$cds5 $cds3";} jpayne@69: elsif ($sign eq "-"){$hcds_ends{$tmpname} = "$cds3 $cds5";} jpayne@69: jpayne@69: test_formatGFF(); jpayne@69: jpayne@69: #foreach $k ( keys %hcds_ends){ jpayne@69: # print "cds_ends: ".$k."\t"."\n"; jpayne@69: #} exit; jpayne@69: jpayne@69: } jpayne@69: jpayne@69: #********************************* jpayne@69: sub test_formatGFF{ jpayne@69: if ($testGffFormat==0){ jpayne@69: print (STDERR "$err_gff \n"); jpayne@69: exit (1); jpayne@69: } jpayne@69: jpayne@69: } jpayne@69: jpayne@69: #********************************* jpayne@69: sub get_utrcds_info{ jpayne@69: #test for gene overlap $geneinfo{gene_name}->[0]=level,stock gene 5'3' utr ends, jpayne@69: #determina %geneinfo{id gene}->5utr,ex,3utr jpayne@69: #and @file jpayne@69: # get_gene_ends(); jpayne@69: $testGffFormat=0; jpayne@69: open(F,"<".$futr);# || die "can't open \" $futr utr \" file !"; jpayne@69: while() { jpayne@69: chomp; jpayne@69: if($_) { jpayne@69: @a=split; jpayne@69: #get gene ends-utr jpayne@69: $genename=$a[8]; jpayne@69: if (exists $hRefContigId{$a[0]}){ #check if [align file(col before the last one)] = [GFF file(col 1)] jpayne@69: $testGffFormat++; jpayne@69: } jpayne@69: jpayne@69: if ($genename ne $tmpname){ jpayne@69: if ($sign eq "+"){ $utr_ends{$tmpname} = "$utr5 $utr3";} jpayne@69: elsif ($sign eq "-"){ $utr_ends{$tmpname} = "$utr3 $utr5";} jpayne@69: jpayne@69: if ($tmpgene) {# for the distinct_utr_cds jpayne@69: get_utrcds_ends() ; jpayne@69: } jpayne@69: $tmpgene="$a[3] $a[4]";# jpayne@69: jpayne@69: $tmpname=$genename; jpayne@69: $sign=$a[6]; jpayne@69: $hContig_genes{$a[0]}.=" ".$a[8]; # for multiple ref. seqs jpayne@69: } jpayne@69: else{# for the distinct_utr_cds jpayne@69: if($sign eq "-") { jpayne@69: $tmpgene="$a[3] $a[4];".$tmpgene; jpayne@69: } jpayne@69: else { $tmpgene.=";$a[3] $a[4]"; } jpayne@69: }# jpayne@69: jpayne@69: if($sign eq "-") { jpayne@69: $temp=$a[3]; jpayne@69: $a[3]=$a[4]; jpayne@69: $a[4]=$temp; jpayne@69: } jpayne@69: if ($a[2] eq "single-exon"){ jpayne@69: $utr5=$a[3]; jpayne@69: $utr3=$a[4]; jpayne@69: } jpayne@69: elsif ($a[2] eq "initial-exon"){ jpayne@69: $utr5=$a[3]; jpayne@69: } jpayne@69: elsif ($a[2] eq "last-exon"){ jpayne@69: $utr3=$a[4]; jpayne@69: } jpayne@69: jpayne@69: #init gene info (level) jpayne@69: $geneinfo{$a[8]}->[0]=1 if (!exists $geneinfo{$a[8]}); jpayne@69: jpayne@69: } jpayne@69: } jpayne@69: # for the distinct_utr_cds jpayne@69: get_utrcds_ends(); jpayne@69: %cds_ends=();# jpayne@69: jpayne@69: if ($sign eq "+"){$utr_ends{$tmpname}="$utr5 $utr3"; } jpayne@69: elsif ($sign eq "-"){$utr_ends{$tmpname}="$utr3 $utr5"; } jpayne@69: $hContig_genes{$a[0]}.=" ".$a[8]; jpayne@69: jpayne@69: close(F); jpayne@69: test_formatGFF(); jpayne@69: jpayne@69: } jpayne@69: jpayne@69: jpayne@69: #**************************************************************** jpayne@69: sub get_utrcds_ends{ jpayne@69: $u5="";$ex="";$u3=""; jpayne@69: jpayne@69: if ($fcds eq $futr) { jpayne@69: $ex=$tmpgene; jpayne@69: } jpayne@69: else { jpayne@69: @ex=split(";",$tmpgene); jpayne@69: @cds=split(" ",$hcds_ends{$tmpname}); jpayne@69: jpayne@69: for($i=0;$i<=$#ex;$i++){ jpayne@69: @coord=split(" ",$ex[$i]); jpayne@69: jpayne@69: if ($cds[0]>$coord[0]){ jpayne@69: if ($cds[0]>$coord[1]){ jpayne@69: $u5.="$coord[0] $coord[1];"; jpayne@69: } jpayne@69: else{ jpayne@69: $u5.= "$coord[0] "; $u5.=$cds[0]-1 .";" ; #? jpayne@69: if ($cds[1]<$coord[1]){ jpayne@69: $ex.="$cds[0] $cds[1];"; jpayne@69: $u3.=$cds[1]+1 ." $coord[1];"; jpayne@69: } jpayne@69: else{ $ex.="$cds[0] $coord[1];";} jpayne@69: } jpayne@69: } jpayne@69: else { jpayne@69: if ($cds[1]>$coord[0]){ jpayne@69: if ($cds[1]>$coord[1]){ jpayne@69: $ex.="$coord[0] $coord[1];"; jpayne@69: } jpayne@69: else{ jpayne@69: $ex.="$coord[0] $cds[1];"; jpayne@69: $u3.=$cds[1]+1 ." $coord[1];"; jpayne@69: } jpayne@69: } jpayne@69: else { $u3.="$coord[0] $coord[1];";} jpayne@69: } jpayne@69: } jpayne@69: chop($u5, $ex, $u3); jpayne@69: } jpayne@69: $geneinfo{$tmpname}->[1]=$sign; jpayne@69: $geneinfo{$tmpname}->[2]=$u5; jpayne@69: $geneinfo{$tmpname}->[3]=$ex; jpayne@69: $geneinfo{$tmpname}->[4]=$u3; jpayne@69: } jpayne@69: #********************************* jpayne@69: sub test_overlap{ jpayne@69: jpayne@69: if (!$mref){ jpayne@69: $fileno=0;### jpayne@69: $endcoord=0;### jpayne@69: } jpayne@69: foreach $kcontgid (sort keys %hContig_genes){ jpayne@69: @allgenes=split(/\s+/,$hContig_genes{$kcontgid}); jpayne@69: for ($i=1;$i<=$#allgenes;$i++){ jpayne@69: @g1=split (" ", $utr_ends{$allgenes[$i]}); jpayne@69: $Utr5End{$allgenes[$i]}=$g1[0]; ### jpayne@69: jpayne@69: for ($j=$i+1;$j<=$#allgenes;$j++){ #comparing with the rest of the genes jpayne@69: @g2=split (" ", $utr_ends{$allgenes[$j]}); jpayne@69: #if the genes are overpaling and they have the same level ,the second gene is liflet to the next level jpayne@69: if ( (($g2[0]>=$g1[0]) and ($g2[0]<=$g1[1])) or (($g2[1]>=$g1[0]) and ($g2[1]<=$g1[1])) or (($g1[0]>=$g2[0]) and ($g1[0]<=$g2[1])) ){ jpayne@69: if ($geneinfo{$allgenes[$i]}->[0] == $geneinfo{$allgenes[$j]}->[0]){ jpayne@69: $geneinfo{$allgenes[$j]}->[0]=$geneinfo{$allgenes[$i]}->[0] + 1 ; jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: SetTheRangeForEachFile() if ((!$endfind) and (!$mref)); ### jpayne@69: } jpayne@69: } jpayne@69: $file[$fileno++]="$startcoord $endcoord" if (!$mref);### jpayne@69: %utr_ends=();### jpayne@69: jpayne@69: } jpayne@69: jpayne@69: #************************************** jpayne@69: sub SetTheRangeForEachFile{ jpayne@69: $currstart=$g1[0]; jpayne@69: $currend=$g1[1]; jpayne@69: #---test range ends intersection jpayne@69: if (!$startfind) { jpayne@69: if (($x1win <= $currstart) || ($x1win <= $currend)){ jpayne@69: $currstart = $x1win; jpayne@69: $startfind = 1; jpayne@69: } jpayne@69: } jpayne@69: if ( $startfind && $x1win && $x2win){ jpayne@69: if (($x2win <= $currstart) || ($x2win <= $currend) ){ jpayne@69: $currend = $x2win; jpayne@69: $endfind = 1; jpayne@69: } jpayne@69: } jpayne@69: #-------------------- jpayne@69: if ($startfind) { jpayne@69: if(!$endcoord) { jpayne@69: #$startcoord=0; jpayne@69: $startcoord = $x1win ? $x1win : 0; jpayne@69: $endcoord=$currend; jpayne@69: } jpayne@69: else { jpayne@69: if($currend > $endcoord) { jpayne@69: if($currend-$startcoord < $nobpinfile) { jpayne@69: $endcoord=$currend; jpayne@69: } jpayne@69: else { jpayne@69: $file[$fileno++]="$startcoord $endcoord"; jpayne@69: $startcoord=$endcoord+1; jpayne@69: $endcoord=$currend; jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: }#if startfind jpayne@69: } jpayne@69: #********************************* jpayne@69: sub print_header{ jpayne@69: print O "#FIG 3.2\nLandscape\nCenter\nInches\nLetter \n100.00\nMultiple\n-2\n1200 2\n"; jpayne@69: } jpayne@69: #********************************* jpayne@69: sub print_align{ jpayne@69: my ($x1,$x2)=@_; jpayne@69: jpayne@69: $a[$ind_pidy]=50 if ($a[$ind_pidy]<50); jpayne@69: $a[$ind_pidy]=int($a[$ind_pidy]); jpayne@69: if ($Mgaps){ jpayne@69: $y=$Yorig+250+$YdistPID-$yscale*2; jpayne@69: if($a[$#a]=~/rev$/){$y-=25*$yscale;} jpayne@69: } jpayne@69: else{ jpayne@69: $y=$Yorig+250+$YdistPID-$yscale*($a[$ind_pidy]-50); jpayne@69: } jpayne@69: if($x1==$x2) { $x2++;} jpayne@69: #draw the line between matches. is dif color for each contig jpayne@69: if ($a[$#a] eq $tmpIdQrycontig) { jpayne@69: print_connections($hQrycontig{$tmpIdQrycontig}->[1], $x1,$y); jpayne@69: } jpayne@69: else{#new contig jpayne@69: #remember the start coord for printing the id alignments jpayne@69: if ($printIDconting){ jpayne@69: if ( $x1 - $XlastPrint > 400 ) { jpayne@69: print O "4 0 0 5 0 0 8 0.0000 4 90 270 "; jpayne@69: printf O ("\t%.0f %.0f ",$x1,$y); jpayne@69: print O $a[$#a], "\\001\n"; jpayne@69: $XlastPrint=$x1; $YlastPrint=$y; jpayne@69: } jpayne@69: } jpayne@69: # jpayne@69: ##if it was seen before,but interrupted by another contig jpayne@69: if ((exists $hQrycontig{$a[$#a]}) and ($a[$ind_s1]-$hQrycontig{$a[$#a]}->[2] < $match_dist )) { jpayne@69: $linkcolor=$hQrycontig{$a[$#a]}->[0]; jpayne@69: print_connections($hQrycontig{$a[$#a]}->[1], $x1,$y); jpayne@69: } jpayne@69: else{ jpayne@69: #change the link color jpayne@69: unshift(@linkcolors, pop(@linkcolors)); jpayne@69: $linkcolor=$linkcolors[0]; jpayne@69: $hQrycontig{$a[$#a]}->[0] = $linkcolor; jpayne@69: } jpayne@69: } jpayne@69: $tmpIdQrycontig=$a[$#a]; jpayne@69: $hQrycontig{$tmpIdQrycontig}->[1]="$x2 $y"; jpayne@69: $hQrycontig{$tmpIdQrycontig}->[2]=$a[$ind_e1]; jpayne@69: jpayne@69: #the matches line is red jpayne@69: print O "2 1 0 2 4 0 40 0 -1 0.000 0 0 -1 0 0 2\n"; jpayne@69: print O "\t$x1 $y $x2 $y\n"; jpayne@69: print O "2 1 0 5 20 0 50 0 -1 0.000 0 0 -1 0 0 2\n"; jpayne@69: printf O ("\t $x1 %.0f $x2 %.0f\n",$Yorig+150 , $Yorig+150); jpayne@69: } jpayne@69: #********************************* jpayne@69: sub print_connections{ jpayne@69: my ($setc1,$setx2,$sety2)=@_; # print "\nparam connect @_\n"; jpayne@69: my ($setx1,$sety1) =split(/ /,$setc1); jpayne@69: jpayne@69: if ($Mgaps){ jpayne@69: if ($setx1>$setx2){ jpayne@69: $tmpsetx1=$setx1; jpayne@69: $setx1=$setx2; jpayne@69: $setx2=$tmpsetx1; jpayne@69: } jpayne@69: $distx1x2=int(($setx2-$setx1)/2); jpayne@69: $xcenter= $setx1+$distx1x2; jpayne@69: jpayne@69: if ($setx2-$setx1>4000) { #if the distance is to big then heigh of the arc is set to 20 jpayne@69: $heightArcUp = 20*$yscale; jpayne@69: $yoffcenter=int((($distx1x2**2)+$heightArcUp**2)*(1/(2*$heightArcUp)))-$heightArcUp ; jpayne@69: } jpayne@69: else{ jpayne@69: $heightArcUp = int (0.447 * $distx1x2);#sectorul de cerc la 1/3 din raza. jpayne@69: $yoffcenter=2*$heightArcUp; jpayne@69: } jpayne@69: print O "5 1 0 2 $linkcolor 0 50 0 -1 0.000 0 0 0 0 "; jpayne@69: printf O ("%.3f %.3f $setx1 $sety1 $xcenter %.0f $setx2 $sety1 \n",$xcenter,$sety1+$yoffcenter,$sety1-$heightArcUp); jpayne@69: } jpayne@69: else{ jpayne@69: print O "2 1 0 1 $linkcolor 0 50 0 -1 0.000 0 0 -1 0 0 2\n"; jpayne@69: print O "\t".$setc1." $setx2 $sety2\n"; jpayne@69: jpayne@69: } jpayne@69: } jpayne@69: #********************************* jpayne@69: sub print_genes_mr{ jpayne@69: %hLastOnLevel=(); jpayne@69: @g=split(/\s+/,$hContig_genes{$a[-2]}); jpayne@69: for ($i=1;$i<=$#g;$i++){ jpayne@69: $kname=$g[$i]; jpayne@69: $tmpx2=0; jpayne@69: $y=$Yorig-100-200*$geneinfo{$kname}->[0]; jpayne@69: #print id gena jpayne@69: $xid =$startdrawX+ int($Utr5End{$kname}/$Xscale); jpayne@69: print_Id_genes() if ($printIDgenes); jpayne@69: # jpayne@69: for ($l=2;$l<5;$l++){ jpayne@69: @c=split(";",$geneinfo{$kname}->[$l] ) ; jpayne@69: if (@c){ #print "de unde?@c\n" if (($l==2) or ($l==4)); jpayne@69: $colorend=$color{$l}; jpayne@69: if ($geneinfo{$kname}->[1] eq "-"){ jpayne@69: if ($l==2) { $colorend=$color{"4"}; } jpayne@69: elsif ($l==4) {$colorend=$color{"2"};} jpayne@69: } jpayne@69: for ($k=0;$k<=$#c;$k++){ jpayne@69: @e=split (" ",$c[$k]); jpayne@69: $x1=$startdrawX+int($e[0]/$Xscale); jpayne@69: $x2=$startdrawX+int($e[1]/$Xscale); jpayne@69: if($x1==$x2) { $x2++;} jpayne@69: if ( ($tmpx2) and ($x1-$tmpx2>1)){ #print the intron jpayne@69: print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n"; jpayne@69: print O "\t $tmpx2 $y $x1 $y\n"; jpayne@69: } jpayne@69: $tmpx2=$x2; jpayne@69: print O "2 1 0 5 $colorend 0 50 0 -1 0.000 0 0 -1 0 0 2\n";# jpayne@69: print O "\t $x1 $y $x2 $y\n"; jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: #delete ($geneinfo{$kname}); jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: #********************************* jpayne@69: sub print_Id_genes{ jpayne@69: if (exists $hLastOnLevel{$geneinfo{$kname}->[0]}){ jpayne@69: $lastOnlevel=$hLastOnLevel{$geneinfo{$kname}->[0]}; jpayne@69: $printidspace = int(($Utr5End{$kname}-$Utr5End{$lastOnlevel})/$Xscale); jpayne@69: } jpayne@69: else{$printidspace=601;} jpayne@69: if ($printidspace > 600){ jpayne@69: #print contig name# jpayne@69: print O "4 0 0 5 0 0 6 0.0000 4 90 270 "; jpayne@69: printf O ("\t%.0f %.0f ",$xid,$y-50); jpayne@69: print O $kname, "\\001\n"; jpayne@69: $hLastOnLevel{$geneinfo{$kname}->[0]}=$kname; jpayne@69: } jpayne@69: } jpayne@69: #********************************* jpayne@69: sub print_genes{ jpayne@69: %hLastOnLevel=(); jpayne@69: foreach $kname (sort {$Utr5End{$a} <=> $Utr5End{$b}} keys %Utr5End){ jpayne@69: $tmpx2=0; jpayne@69: if ($Utr5End{$kname}>$startcoord && $Utr5End{$kname}<$endcoord){ jpayne@69: $y=$Yorig-100-200*$geneinfo{$kname}->[0]; jpayne@69: #print id gena jpayne@69: $xid = int(($Utr5End{$kname}-$startcoord)/$Xscale); jpayne@69: print_Id_genes() if ($printIDgenes); jpayne@69: # jpayne@69: for ($l=2;$l<5;$l++){ jpayne@69: @c=split(";",$geneinfo{$kname}->[$l] ); jpayne@69: $colorend=$color{$l}; jpayne@69: if ($geneinfo{$kname}->[1] eq "-"){ jpayne@69: if ($l==2) { $colorend=$color{"4"}; } jpayne@69: elsif ($l==4) {$colorend=$color{"2"};} jpayne@69: } jpayne@69: jpayne@69: for ($k=0;$k<=$#c;$k++){ jpayne@69: @e=split (" ",$c[$k]); jpayne@69: $x1=int(($e[0]-$startcoord)/$Xscale); jpayne@69: $x2=int(($e[1]-$startcoord)/$Xscale); jpayne@69: if($x1==$x2) { $x2++;} jpayne@69: if ( ($tmpx2) and ($x1-$tmpx2>1)){ #print the intron jpayne@69: print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n"; jpayne@69: print O "\t $tmpx2 $y $x1 $y\n"; jpayne@69: } jpayne@69: $tmpx2=$x2; jpayne@69: print O "2 1 0 5 $colorend 0 50 0 -1 0.000 0 0 -1 0 0 2\n";# jpayne@69: print O "\t $x1 $y $x2 $y\n"; jpayne@69: } jpayne@69: } jpayne@69: }#endif "is in interval" jpayne@69: } jpayne@69: # delete ($Utr5End{$kname}); jpayne@69: # delete ($geneinfo{$kname}); jpayne@69: jpayne@69: } jpayne@69: #********************************* jpayne@69: sub print_grid{ jpayne@69: jpayne@69: my ($xs,$xe,$startcontg,$endcontg)=@_; jpayne@69: jpayne@69: $XlastPrint=0;$YlastPrint=0; jpayne@69: jpayne@69: #print ref contig jpayne@69: print O "2 1 0 10 11 0 50 0 -1 0.000 0 0 -1 0 0 2\n"; jpayne@69: printf O ("\t $xs %.0f $xe %.0f\n",$Yorig+50,$Yorig+50); jpayne@69: #print orizontal axes for PId (100%,75%,50%) jpayne@69: for ($percent_id = 50; $percent_id < 101; $percent_id += 25) { jpayne@69: print O "2 1 2 1 0 7 60 0 -1 4.000 0 0 -1 0 0 2\n"; jpayne@69: printf O ("\t$xs %.0f $xe %.0f\n",$Yorig+250+$YdistPID-($percent_id - 50) * $yscale,$Yorig+250+$YdistPID-($percent_id - 50) * $yscale); jpayne@69: #last if ($Mgaps); jpayne@69: } jpayne@69: #print orizontal markers for bp. jpayne@69: $increment=10000/$Xscale; jpayne@69: $no_incr=0; jpayne@69: $xmark = $xs ;$xmark_float= $xs; jpayne@69: while ($xmark < $xe){ jpayne@69: print O "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n"; jpayne@69: printf O ("\t$xmark %.0f $xmark %.0f\n",$Yorig+$YdistPID+250,$Yorig+$YdistPID+300); jpayne@69: #bp scale jpayne@69: print O "4 0 0 100 0 0 8 0.0000 4 135 405 "; jpayne@69: printf O ("\t %.0f %.0f",$xmark,$Yorig+$YdistPID+400); jpayne@69: print O " $no_incr"."k", "\\001\n"; jpayne@69: $no_incr += 10; jpayne@69: $xmark_float += $increment; jpayne@69: $xmark=int($xmark_float); jpayne@69: } jpayne@69: jpayne@69: #coord for chr ends jpayne@69: print O "4 0 0 50 0 0 14 0.0000 4 135 450 $xs $Yorig $startcontg\\001\n"; jpayne@69: printf O ("4 0 0 50 0 0 14 0.0000 4 135 810 %.0f $Yorig $endcontg\\001\n",$xe-length($xe)*125); jpayne@69: jpayne@69: #print contig name# jpayne@69: if ($mref){ jpayne@69: print O "4 0 0 5 0 0 8 0.0000 4 135 405 "; jpayne@69: printf O ("\t%.0f %.0f ",$xs,$Yorig+70); jpayne@69: print O $a[-2], "\\001\n"; jpayne@69: } jpayne@69: #print vertical markers for PId scale jpayne@69: if (!$Mgaps){ jpayne@69: for ($percent_id = 50; $percent_id < 101; $percent_id += 25) { jpayne@69: #left jpayne@69: print O "4 0 0 100 0 0 8 0.0000 4 135 405 "; jpayne@69: printf O ("\t%.0f %.0f", $xs-200,$Yorig+$YdistPID+250-($percent_id - 50) * $yscale + 20); jpayne@69: print O " $percent_id%", "\\001\n"; jpayne@69: #right jpayne@69: print O "4 0 0 100 0 0 8 0.0000 4 135 405 "; jpayne@69: printf O ("\t%.0f %.0f",$xe+20, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale+20 ); jpayne@69: print O " $percent_id%", "\\001\n"; jpayne@69: jpayne@69: # print the tick mark jpayne@69: #left jpayne@69: # print O "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n"; jpayne@69: # printf O ("\t%.0f %.0f $xs %.0f\n",$xs-50, jpayne@69: # $Yorig+$YdistPID+250-($percent_id - 50) * $yscale, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale); jpayne@69: #right jpayne@69: # print O "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n"; jpayne@69: # printf O ("\t$xe %.0f %.0f %.0f\n", jpayne@69: # $Yorig+$YdistPID+250-($percent_id - 50) * $yscale,$xe+50, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale); jpayne@69: } jpayne@69: } jpayne@69: else{ # for Mgaps jpayne@69: print O "4 0 0 100 0 0 7 1.5710 4 135 405 "; jpayne@69: printf O ("\t%.0f %.0f", $xs-50,$Yorig+$YdistPID+250 - 5 * $yscale + 10); jpayne@69: print O " + qry strand", "\\001\n"; jpayne@69: jpayne@69: print O "4 0 0 100 0 0 7 1.5710 4 135 405 "; jpayne@69: printf O ("\t%.0f %.0f", $xs-50,$Yorig+$YdistPID+250 - 30 * $yscale + 10); jpayne@69: print O " - qry strand", "\\001\n"; jpayne@69: } jpayne@69: jpayne@69: } jpayne@69: #********************************* jpayne@69: sub print_legend{ jpayne@69: jpayne@69: print O "4 0 0 100 0 0 8 0.0000 4 135 405 "; jpayne@69: printf O ("\t%.0f %.0f ",100,$Yorig+$YdistPID+1100); jpayne@69: print O " Legend ", "\\001\n"; jpayne@69: $y= $Yorig+$YdistPID+1300; #utr jpayne@69: print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron jpayne@69: print O "\t 70 $y 99 $y\n"; jpayne@69: print O "2 1 0 5 27 0 50 0 -1 0.000 0 0 -1 0 0 2\n"; jpayne@69: print O "\t 100 $y 200 $y\n"; jpayne@69: print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron jpayne@69: print O "\t 200 $y 230 $y\n"; jpayne@69: print O "4 0 0 100 0 0 8 0.0000 4 135 405 "; jpayne@69: printf O ("\t%.0f %.0f ",300,$y+30); jpayne@69: print O " 5' utr ", "\\001\n"; jpayne@69: $y += 150 ;#cds jpayne@69: print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron jpayne@69: print O "\t 70 $y 99 $y\n"; jpayne@69: print O "2 1 0 5 2 0 50 0 -1 0.000 0 0 -1 0 0 2\n"; jpayne@69: print O "\t 100 $y 200 $y\n"; jpayne@69: print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron jpayne@69: print O "\t 200 $y 230 $y\n"; jpayne@69: print O "4 0 0 100 0 0 8 0.0000 4 135 405 "; jpayne@69: printf O ("\t%.0f %.0f ",300,$y+30); jpayne@69: print O " cds ", "\\001\n"; jpayne@69: $y += 150; #3' utr jpayne@69: print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron jpayne@69: print O "\t 70 $y 99 $y\n"; jpayne@69: print O "2 1 0 5 1 0 50 0 -1 0.000 0 0 -1 0 0 2\n"; jpayne@69: print O "\t 100 $y 200 $y\n"; jpayne@69: print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron jpayne@69: print O "\t 200 $y 230 $y\n"; jpayne@69: print O "4 0 0 100 0 0 8 0.0000 4 135 405 "; jpayne@69: printf O ("\t%.0f %.0f ",300,$y+30); jpayne@69: print O " 3' utr ", "\\001\n"; jpayne@69: $y += 150; # match jpayne@69: print O "2 1 0 2 4 0 50 0 -1 0.000 0 0 -1 0 0 2\n"; jpayne@69: print O "\t100 $y 200 $y\n"; jpayne@69: print O "4 0 0 100 0 0 8 0.0000 4 135 405 "; jpayne@69: printf O ("\t%.0f %.0f ",300,$y+30); jpayne@69: print O " match found by $prog ", "\\001\n"; jpayne@69: } jpayne@69: #********************************* jpayne@69: sub change_file_format{ jpayne@69: jpayne@69: $procfile =~ /^tmp(.+)/; jpayne@69: $outfile = $1.$nrf.".".$format; jpayne@69: $comand = "fig2dev -L $format -x 100"; jpayne@69: $comand .= " -m $magn" if ($magn); jpayne@69: $comand .= " -M ".$procfile.$nrf.".fig ".$outfile; jpayne@69: jpayne@69: $status =system($comand); jpayne@69: print E "ERROR 1: fig2dev !\n" unless $status == 0; jpayne@69: jpayne@69: $status =system("rm $procfile".$nrf.".fig"); jpayne@69: jpayne@69: if ($verb){ jpayne@69: print "$outfile"; jpayne@69: if ($mref){ jpayne@69: print "\n" ; jpayne@69: } jpayne@69: else { jpayne@69: print "\t range : $startcoord\t$endcoord \n" ; jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: #********************************* jpayne@69: sub format_mgaps{ jpayne@69: $tmpfile="tmpmgaps"; jpayne@69: $tmpfile2=$alignm."coords" ; jpayne@69: get_ref_len(); #print $maxlenref."\n"; jpayne@69: open(M,">".$tmpfile2) || die "can't open \" $tmpfile2 \" file !"; jpayne@69: print M "$alignm\n"; jpayne@69: print M "Mgaps\n\n"; jpayne@69: print M " [S1] [E1] | [S2] [E2] | [LEN 1] [LEN 2] | [% IDY] | [LEN R] [LEN Q] | [COV R] [COV Q] | [TAGS]\n"; jpayne@69: print M "===============================================================================================================================\n"; jpayne@69: jpayne@69: open(T,">".$tmpfile) || die "can't open \" $tmpfile \" file !"; jpayne@69: jpayne@69: open(A,"<".$alignm) || die "can't open \" $alignm \" file !"; jpayne@69: jpayne@69: jpayne@69: while() { jpayne@69: chomp; jpayne@69: @a=split; jpayne@69: if ($a[0] =~ /^>/){ jpayne@69: $nr_cluster=1; jpayne@69: $idquery=$a[1]; jpayne@69: if ($a[2] eq "Reverse"){$idquery .= "_rev";} jpayne@69: } jpayne@69: #elsif ($a[0] eq "#") {$nr_cluster++;} jpayne@69: elsif($a[0] ne "#"){ jpayne@69: $e1=$a[0]+$a[2]; jpayne@69: print T $a[0]."\t".$e1."\t"."|"; jpayne@69: print T "\t-\t-\t|"; jpayne@69: print T "\t-\t-\t|"; jpayne@69: print T "\t-\t|"; #pid jpayne@69: print T "\t$maxlenref\t-\t|";#len seqs jpayne@69: # print "\t-\t-\t|"; jpayne@69: # print "\t-\t-\t|"; jpayne@69: # print T " $Mgaps\t$idquery.$nr_cluster\n"; jpayne@69: print T " $Mgaps\t$idquery\n"; jpayne@69: } jpayne@69: jpayne@69: } jpayne@69: close(A); jpayne@69: close(T); jpayne@69: $command="sort -n -k 1 $tmpfile >> $tmpfile2"; jpayne@69: $status =system($command); jpayne@69: system("rm $tmpfile"); jpayne@69: close(M); jpayne@69: $alignm=$tmpfile2; jpayne@69: print STDERR "ERROR 1: can't sort $tmpfile \n" unless $status == 0; jpayne@69: print STDERR "\n**************************************** \n"; jpayne@69: print STDERR "New input file created : $alignm\n"; jpayne@69: print STDERR "**************************************** \n\n"; jpayne@69: jpayne@69: } jpayne@69: #***************************** jpayne@69: sub get_ref_len{ jpayne@69: $firstrow=1; jpayne@69: open(A,"<".$alignm) || die "can't open \" $alignm \" file !"; jpayne@69: while() { jpayne@69: chomp; jpayne@69: @a=split; jpayne@69: if ($firstrow) { jpayne@69: $firstrow=0; jpayne@69: if ($a[0] !~ /^>/){ jpayne@69: print "\nWrong file format for MGAPS file : $alignm ! \n"; jpayne@69: exit; jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: if ($a[0] =~ /^>/){ next; } jpayne@69: elsif($a[0] ne "#"){ jpayne@69: $e1=$a[0]+$a[2]; jpayne@69: $maxlenref=($maxlenref < $e1 ? $e1 : $maxlenref); jpayne@69: } jpayne@69: } jpayne@69: close(A); jpayne@69: }