annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/mapview @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 #!/usr/bin/env perl
jpayne@69 2
jpayne@69 3 use lib "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
jpayne@69 4 use Foundation;
jpayne@69 5
jpayne@69 6 my $SCRIPT_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
jpayne@69 7
jpayne@69 8
jpayne@69 9 my $VERSION_INFO = q~
jpayne@69 10 mapview version 1.01
jpayne@69 11 ~;
jpayne@69 12
jpayne@69 13
jpayne@69 14 my $HELP_INFO = q~
jpayne@69 15 USAGE: mapview [options] <coords file> [UTR coords] [CDS coords]
jpayne@69 16
jpayne@69 17 DESCRIPTION:
jpayne@69 18 mapview is a utility program for displaying sequence alignments as
jpayne@69 19 provided by MUMmer, NUCmer, PROmer or Mgaps. mapview takes the output of
jpayne@69 20 show-coords and converts it to a FIG, PDF or PS file for visual analysis.
jpayne@69 21 It can also break the output into multiple files for easier viewing and
jpayne@69 22 printing.
jpayne@69 23
jpayne@69 24 MANDATORY:
jpayne@69 25 coords file The output of 'show-coords -rl[k]' or 'mgaps'
jpayne@69 26
jpayne@69 27 OPTIONS:
jpayne@69 28 UTR coords UTR coordinate file in GFF format
jpayne@69 29 CDS coords CDS coordinate file in GFF format
jpayne@69 30
jpayne@69 31 -d|maxdist Set the maximum base-pair distance between linked matches
jpayne@69 32 (default 50000)
jpayne@69 33 -f|format Set the output format to 'pdf', 'ps' or 'fig'
jpayne@69 34 (default 'fig')
jpayne@69 35 -h
jpayne@69 36 --help Display help information and exit
jpayne@69 37 -m|mag Set the magnification at which the figure is rendered,
jpayne@69 38 this is an option for fig2dev which is used to generate
jpayne@69 39 the PDF and PS files (default 1.0)
jpayne@69 40 -n|num Set the number of output files used to partition the
jpayne@69 41 output, this is to avoid generating files that are too
jpayne@69 42 large to display (default 10)
jpayne@69 43 -p|prefix Set the output file prefix
jpayne@69 44 (default "PROMER_graph or NUCMER_graph")
jpayne@69 45 -v
jpayne@69 46 --verbose Verbose logging of the processed files
jpayne@69 47 -V
jpayne@69 48 --version Display the version information and exit
jpayne@69 49 -x1 coord Set the lower coordinate bound of the display
jpayne@69 50 -x2 coord Set the upper coordinate bound of the display
jpayne@69 51 -g|ref If the input file is provided by 'mgaps', set the
jpayne@69 52 reference sequence ID (as it appears in the first column
jpayne@69 53 of the UTR/CDS coords file)
jpayne@69 54 -I Display the name of query sequences
jpayne@69 55 -Ir Display the name of reference genes
jpayne@69 56 ~;
jpayne@69 57
jpayne@69 58
jpayne@69 59 my $USAGE_INFO = q~
jpayne@69 60 USAGE: mapview [options] <coords file> [UTR coords] [CDS coords]
jpayne@69 61 ~;
jpayne@69 62
jpayne@69 63
jpayne@69 64 my @DEPEND_INFO =
jpayne@69 65 (
jpayne@69 66 "fig2dev",
jpayne@69 67 "$SCRIPT_DIR/Foundation.pm"
jpayne@69 68 );
jpayne@69 69
jpayne@69 70 my $err_gff = q~
jpayne@69 71 ERROR in the input files ! The reference seq ID can't be found in GFF files !
jpayne@69 72 The first column in the GFF file should be the ID of the reference seq.
jpayne@69 73 The alignments file should provide the same info in the column before the last one.
jpayne@69 74
jpayne@69 75 Here are some example records for the GFF file:
jpayne@69 76
jpayne@69 77 gnl|FlyBase|X Dmel3 initial-exon 2155 2413 . - . X_CG3038.1
jpayne@69 78 gnl|FlyBase|X Dmel3 last-exon 1182 2077 . - . X_CG3038.1
jpayne@69 79 ...
jpayne@69 80 The fields are :
jpayne@69 81 <seq_ID> <source> <exon type> <start> <end> <score> <strand> <frame> <gene_name>
jpayne@69 82 ~;
jpayne@69 83
jpayne@69 84 my $tigr;
jpayne@69 85 my $err;
jpayne@69 86
jpayne@69 87 my $alignm;
jpayne@69 88 my $futr;
jpayne@69 89 my $fcds;
jpayne@69 90
jpayne@69 91 #-- Initialize TIGR::Foundation
jpayne@69 92 $tigr = new TIGR::Foundation;
jpayne@69 93 if ( !defined ($tigr) ) {
jpayne@69 94 print (STDERR "ERROR: TIGR::Foundation could not be initialized");
jpayne@69 95 exit (1);
jpayne@69 96 }
jpayne@69 97
jpayne@69 98 #-- Set help and usage information
jpayne@69 99 $tigr->setHelpInfo ($HELP_INFO);
jpayne@69 100 $tigr->setUsageInfo ($USAGE_INFO);
jpayne@69 101 $tigr->setVersionInfo ($VERSION_INFO);
jpayne@69 102 $tigr->addDependInfo (@DEPEND_INFO);
jpayne@69 103
jpayne@69 104 $err = $tigr->TIGR_GetOptions
jpayne@69 105 (
jpayne@69 106 "d|maxdist=i" => \$match_dist,
jpayne@69 107 "f|format=s" => \$format,
jpayne@69 108 "m|mag=f" => \$magn,
jpayne@69 109 "n|num=i" => \$noOutfiles,
jpayne@69 110 "p|prefix=s" => \$outfilename,
jpayne@69 111 "x1=i" => \$x1win,
jpayne@69 112 "x2=i" => \$x2win,
jpayne@69 113 "v|verbose" => \$verb,
jpayne@69 114 "g|ref=s" => \$Mgaps,
jpayne@69 115 "I" => \$printIDconting,
jpayne@69 116 "Ir" => \$printIDgenes
jpayne@69 117 );
jpayne@69 118
jpayne@69 119 if ( $err == 0 || scalar(@ARGV) < 1 || scalar(@ARGV) > 3 ) {
jpayne@69 120 $tigr->printUsageInfo( );
jpayne@69 121 print (STDERR "Try '$0 -h' for more information.\n");
jpayne@69 122 exit (1);
jpayne@69 123 }
jpayne@69 124
jpayne@69 125 ($alignm,$futr,$fcds)=@ARGV;
jpayne@69 126
jpayne@69 127 if ((substr($x1win,0,1) eq '-') || (substr($x2win,0,1) eq '-')){
jpayne@69 128 print "ERROR2 : coords x1,x2 should be positive integers !!\n";
jpayne@69 129 $info=1;
jpayne@69 130 }
jpayne@69 131 if ($x1win > $x2win) {
jpayne@69 132 print "ERROR3 : wrong range coords : x1 >= x2 !!!\n";
jpayne@69 133 $info=1;
jpayne@69 134 }
jpayne@69 135 if ($Mgaps){ #formating the mgaps output to be similar with show-coords output
jpayne@69 136 format_mgaps();
jpayne@69 137 }
jpayne@69 138
jpayne@69 139 if (!$format){$format="fig";}
jpayne@69 140 if (($x1win) and ($x2win)){ $startfind=0; }
jpayne@69 141 else{ $startfind=1; }
jpayne@69 142 $endfind=0;
jpayne@69 143 if (!$noOutfiles) {$noOutfiles=10;}
jpayne@69 144 if (!$match_dist){$match_dist=50000;}
jpayne@69 145
jpayne@69 146 #```````init colors````````````````````````````
jpayne@69 147 $color{"2"}=27;#dark pink 5utr
jpayne@69 148 $color{"3"}=2;#green ex
jpayne@69 149 $color{"4"}=1;#blue 3utr
jpayne@69 150 #``````````````````````
jpayne@69 151 @linkcolors=(31,14,11);
jpayne@69 152 #``````````````````````````````````````````````
jpayne@69 153 open(F,$alignm);
jpayne@69 154 <F>;
jpayne@69 155 $prog=<F>; chomp($prog);
jpayne@69 156 <F>;
jpayne@69 157 $_=<F>;
jpayne@69 158 @a=(m/\s+(\||\[.+?\])/g) ;
jpayne@69 159 for ($ind=0;$ind<=$#a;$ind++){
jpayne@69 160 if ($a[$ind] eq "[S1]") {
jpayne@69 161 $ind_s1=$ind;
jpayne@69 162 }
jpayne@69 163 elsif ($a[$ind] eq "[E1]"){
jpayne@69 164 $ind_e1=$ind;
jpayne@69 165 }
jpayne@69 166 elsif ($a[$ind] eq "[% IDY]"){
jpayne@69 167 $ind_pidy=$ind;
jpayne@69 168 }
jpayne@69 169 elsif ($a[$ind] eq "[LEN R]"){
jpayne@69 170 $ind_lenchr=$ind;
jpayne@69 171 }
jpayne@69 172 # elsif ($a[$ind] eq "[TAGS]"){
jpayne@69 173 # $ind_tags=$ind+1; #there are two columns for this header col
jpayne@69 174 # }
jpayne@69 175 }
jpayne@69 176 <F>;$mref=-1;
jpayne@69 177 while (<F>){
jpayne@69 178 chomp;
jpayne@69 179 @a=split;
jpayne@69 180 if (!exists $hRefContigId{$a[-2]}) { # print $a[-2]."\n";
jpayne@69 181 $hRefContigId{$a[-2]}=$a[$ind_lenchr];
jpayne@69 182 $lenrefseqs+=$a[$ind_lenchr];
jpayne@69 183 $mref++;
jpayne@69 184 }
jpayne@69 185 }
jpayne@69 186 $nobpinfile=int($lenrefseqs/$noOutfiles);
jpayne@69 187
jpayne@69 188 close(F);
jpayne@69 189
jpayne@69 190 #``````````````````````
jpayne@69 191
jpayne@69 192
jpayne@69 193 if (@ARGV > 1) {
jpayne@69 194 get_cds_ends();
jpayne@69 195 get_utrcds_info();
jpayne@69 196 test_overlap();
jpayne@69 197 }
jpayne@69 198 elsif(!$mref) {
jpayne@69 199 $fileno=$noOutfiles;
jpayne@69 200 $startcoord=0;$endcoord=0;
jpayne@69 201 for ($i=0;$i<$fileno;$i++) {
jpayne@69 202 $endcoord=$startcoord+$nobpinfile-1;
jpayne@69 203 $endcoord=$lenrefseqs if ($endcoord>$lenrefseqs);
jpayne@69 204 $file[$i]="$startcoord $endcoord";
jpayne@69 205 $startcoord=$endcoord+1;
jpayne@69 206 }
jpayne@69 207 }
jpayne@69 208
jpayne@69 209
jpayne@69 210 $Yorig=3000;
jpayne@69 211 $YdistPID=2000;
jpayne@69 212 $yscale=$YdistPID/50;
jpayne@69 213 $Xscale=14.5;
jpayne@69 214 $gap=800;
jpayne@69 215 #$maxfiles = ($fileno < 10) ? $fileno : 10;
jpayne@69 216 #---------------------------------
jpayne@69 217
jpayne@69 218 if (!$mref){
jpayne@69 219 for($i=0; $i < $fileno; $i++) {
jpayne@69 220 $nrf=$i;
jpayne@69 221 set_output_fname();
jpayne@69 222 ($startcoord,$endcoord)=split(/\s+/,$file[$i]);
jpayne@69 223 open(O,">$procfile".$nrf.".fig");
jpayne@69 224 print_header();
jpayne@69 225 print $procfile.$nrf.".fig\t range : $startcoord\t$endcoord \n" if ($verb && ($format eq "fig"));
jpayne@69 226
jpayne@69 227 $xs=0;
jpayne@69 228 $xe=int(($endcoord-$startcoord+1)/$Xscale);
jpayne@69 229 #$xs=200;
jpayne@69 230 #$xe=$xs+int(($endcoord-$startcoord+1)/$Xscale);
jpayne@69 231 print_grid($xs,$xe,$startcoord,$endcoord);
jpayne@69 232
jpayne@69 233 $tmpIdQrycontig="";
jpayne@69 234 $linkcolor=$linkcolors[0];
jpayne@69 235
jpayne@69 236 open(F,$alignm);
jpayne@69 237 <F>;<F>;<F>;<F>;<F>;
jpayne@69 238 while(<F>) {
jpayne@69 239 chomp;
jpayne@69 240 @a=split;
jpayne@69 241 if($a[$ind_s1] > $endcoord) { last;}
jpayne@69 242 if($a[$ind_s1]<$startcoord && $a[$ind_e1] > $startcoord ) { $a[$ind_s1]=$startcoord;}
jpayne@69 243 if($a[$ind_s1] < $endcoord && $endcoord < $a[$ind_e1]) { $a[$ind_e1]=$endcoord;}
jpayne@69 244
jpayne@69 245 if($a[$ind_s1]>=$startcoord && $a[$ind_e1]<=$endcoord) {
jpayne@69 246 $x1=int(($a[$ind_s1]-$startcoord)/$Xscale);#
jpayne@69 247 $x2=int(($a[$ind_e1]-$startcoord)/$Xscale);#
jpayne@69 248 print_align($x1,$x2);
jpayne@69 249 }
jpayne@69 250 }
jpayne@69 251 close(F);
jpayne@69 252 %hQrycontig=();
jpayne@69 253 print_genes() if ($futr);
jpayne@69 254 print_legend();
jpayne@69 255 close(O);
jpayne@69 256 change_file_format() if ($format ne "fig");
jpayne@69 257 }
jpayne@69 258 }
jpayne@69 259 elsif($mref){#multiple ref seqs
jpayne@69 260 set_output_fname();
jpayne@69 261 $tmpIdQrycontig="";
jpayne@69 262 $linkcolor=$linkcolors[0];
jpayne@69 263 $startdrawX=0;
jpayne@69 264 $proclen=0;
jpayne@69 265 $first=1;
jpayne@69 266 $nrf=0;
jpayne@69 267 open(F,$alignm);
jpayne@69 268 <F>;<F>;<F>;<F>;<F>;
jpayne@69 269 while(<F>) {
jpayne@69 270 chomp;
jpayne@69 271 @a=split;
jpayne@69 272 if ($a[-2] ne $tmpcontig){
jpayne@69 273 %hQrycontig=();
jpayne@69 274 $tmpcontig=$a[-2];
jpayne@69 275 if ($first){
jpayne@69 276 $first=0;
jpayne@69 277 $nrf++;
jpayne@69 278 open(O,">$procfile".$nrf.".fig");
jpayne@69 279 print_header();
jpayne@69 280 print $procfile.$nrf.".fig"."\n" if ($verb && ($format eq "fig"));
jpayne@69 281 $len=$hRefContigId{$a[-2]};
jpayne@69 282 }
jpayne@69 283 else {
jpayne@69 284 $startdrawX+=int($len/$Xscale)+$gap;
jpayne@69 285 $len=$hRefContigId{$a[-2]};
jpayne@69 286 if (($proclen+$len>$nobpinfile) and ($proclen != 0)){
jpayne@69 287 print_legend();
jpayne@69 288 close(O);
jpayne@69 289 change_file_format() if ($format ne "fig");
jpayne@69 290 $nrf++;
jpayne@69 291 open(O,">$procfile".$nrf.".fig");
jpayne@69 292 print_header();
jpayne@69 293 print "\n".$procfile.$nrf.".fig"."\n" if ($verb && ($format eq "fig"));
jpayne@69 294 $proclen=0;
jpayne@69 295 $startdrawX=0;
jpayne@69 296 }
jpayne@69 297 }
jpayne@69 298 $xs=$startdrawX;
jpayne@69 299 $xe=$startdrawX+int($len/$Xscale);
jpayne@69 300 print_grid($xs,$xe,0,$len);
jpayne@69 301 #print genes from %geneinfo for contig
jpayne@69 302 print $a[-2]."\t".$hRefContigId{$a[-2]}."\n";
jpayne@69 303 print_genes_mr() if ($futr);
jpayne@69 304 $proclen+=$len;
jpayne@69 305 }#end if new contig
jpayne@69 306 $x1=$startdrawX+int($a[$ind_s1]/$Xscale);
jpayne@69 307 $x2=$startdrawX+int($a[$ind_e1]/$Xscale);
jpayne@69 308 print_align($x1,$x2);
jpayne@69 309 }
jpayne@69 310 print_legend();
jpayne@69 311 close(O);
jpayne@69 312 change_file_format() if ($format ne "fig");
jpayne@69 313
jpayne@69 314 close(F);
jpayne@69 315 }
jpayne@69 316 #*******************************************************************************
jpayne@69 317 #*******************************************************************************
jpayne@69 318 sub set_output_fname{
jpayne@69 319
jpayne@69 320 if (!$outfilename) {$procfile=$prog."_graph"."_";}
jpayne@69 321 else {$procfile=$outfilename."_";}
jpayne@69 322
jpayne@69 323 if ($format ne "fig"){
jpayne@69 324 $procfile="tmp".$procfile;
jpayne@69 325 }
jpayne@69 326 }
jpayne@69 327 #*********************************
jpayne@69 328 sub get_cds_ends{
jpayne@69 329 #3. print "create \%hcds_ends...\n";
jpayne@69 330 $testGffFormat=0;
jpayne@69 331 open(F,"<".$fcds);#|| die "can't open \" $fcds cds \" file !";
jpayne@69 332 while(<F>) {
jpayne@69 333 chomp;
jpayne@69 334 if($_) {
jpayne@69 335 @a=split;
jpayne@69 336 if (exists $hRefContigId{$a[0]}){#record if at least one of the ref id is the same in GFF and Align files
jpayne@69 337 $testGffFormat++;
jpayne@69 338 }
jpayne@69 339 $genename=$a[8];
jpayne@69 340 if ($genename ne $tmpname){
jpayne@69 341 if ($sign eq "+"){ $hcds_ends{$tmpname} = "$cds5 $cds3";}
jpayne@69 342 elsif ($sign eq "-"){ $hcds_ends{$tmpname} = "$cds3 $cds5";}
jpayne@69 343 $tmpname=$genename;
jpayne@69 344 $sign=$a[6];
jpayne@69 345 }
jpayne@69 346 if($sign eq "-") {
jpayne@69 347 $temp=$a[3];
jpayne@69 348 $a[3]=$a[4];
jpayne@69 349 $a[4]=$temp;
jpayne@69 350 }
jpayne@69 351 if ($a[2] eq "single-exon"){
jpayne@69 352 $cds5=$a[3];
jpayne@69 353 $cds3=$a[4];
jpayne@69 354 }
jpayne@69 355 elsif ($a[2] eq "initial-exon"){
jpayne@69 356 $cds5=$a[3];
jpayne@69 357 }
jpayne@69 358 elsif ($a[2] eq "last-exon"){
jpayne@69 359 $cds3=$a[4];
jpayne@69 360 }
jpayne@69 361 }
jpayne@69 362 }
jpayne@69 363 if ($sign eq "+"){ $hcds_ends{$tmpname} = "$cds5 $cds3";}
jpayne@69 364 elsif ($sign eq "-"){$hcds_ends{$tmpname} = "$cds3 $cds5";}
jpayne@69 365
jpayne@69 366 test_formatGFF();
jpayne@69 367
jpayne@69 368 #foreach $k ( keys %hcds_ends){
jpayne@69 369 # print "cds_ends: ".$k."\t"."\n";
jpayne@69 370 #} exit;
jpayne@69 371
jpayne@69 372 }
jpayne@69 373
jpayne@69 374 #*********************************
jpayne@69 375 sub test_formatGFF{
jpayne@69 376 if ($testGffFormat==0){
jpayne@69 377 print (STDERR "$err_gff \n");
jpayne@69 378 exit (1);
jpayne@69 379 }
jpayne@69 380
jpayne@69 381 }
jpayne@69 382
jpayne@69 383 #*********************************
jpayne@69 384 sub get_utrcds_info{
jpayne@69 385 #test for gene overlap $geneinfo{gene_name}->[0]=level,stock gene 5'3' utr ends,
jpayne@69 386 #determina %geneinfo{id gene}->5utr,ex,3utr
jpayne@69 387 #and @file
jpayne@69 388 # get_gene_ends();
jpayne@69 389 $testGffFormat=0;
jpayne@69 390 open(F,"<".$futr);# || die "can't open \" $futr utr \" file !";
jpayne@69 391 while(<F>) {
jpayne@69 392 chomp;
jpayne@69 393 if($_) {
jpayne@69 394 @a=split;
jpayne@69 395 #get gene ends-utr
jpayne@69 396 $genename=$a[8];
jpayne@69 397 if (exists $hRefContigId{$a[0]}){ #check if [align file(col before the last one)] = [GFF file(col 1)]
jpayne@69 398 $testGffFormat++;
jpayne@69 399 }
jpayne@69 400
jpayne@69 401 if ($genename ne $tmpname){
jpayne@69 402 if ($sign eq "+"){ $utr_ends{$tmpname} = "$utr5 $utr3";}
jpayne@69 403 elsif ($sign eq "-"){ $utr_ends{$tmpname} = "$utr3 $utr5";}
jpayne@69 404
jpayne@69 405 if ($tmpgene) {# for the distinct_utr_cds
jpayne@69 406 get_utrcds_ends() ;
jpayne@69 407 }
jpayne@69 408 $tmpgene="$a[3] $a[4]";#
jpayne@69 409
jpayne@69 410 $tmpname=$genename;
jpayne@69 411 $sign=$a[6];
jpayne@69 412 $hContig_genes{$a[0]}.=" ".$a[8]; # for multiple ref. seqs
jpayne@69 413 }
jpayne@69 414 else{# for the distinct_utr_cds
jpayne@69 415 if($sign eq "-") {
jpayne@69 416 $tmpgene="$a[3] $a[4];".$tmpgene;
jpayne@69 417 }
jpayne@69 418 else { $tmpgene.=";$a[3] $a[4]"; }
jpayne@69 419 }#
jpayne@69 420
jpayne@69 421 if($sign eq "-") {
jpayne@69 422 $temp=$a[3];
jpayne@69 423 $a[3]=$a[4];
jpayne@69 424 $a[4]=$temp;
jpayne@69 425 }
jpayne@69 426 if ($a[2] eq "single-exon"){
jpayne@69 427 $utr5=$a[3];
jpayne@69 428 $utr3=$a[4];
jpayne@69 429 }
jpayne@69 430 elsif ($a[2] eq "initial-exon"){
jpayne@69 431 $utr5=$a[3];
jpayne@69 432 }
jpayne@69 433 elsif ($a[2] eq "last-exon"){
jpayne@69 434 $utr3=$a[4];
jpayne@69 435 }
jpayne@69 436
jpayne@69 437 #init gene info (level)
jpayne@69 438 $geneinfo{$a[8]}->[0]=1 if (!exists $geneinfo{$a[8]});
jpayne@69 439
jpayne@69 440 }
jpayne@69 441 }
jpayne@69 442 # for the distinct_utr_cds
jpayne@69 443 get_utrcds_ends();
jpayne@69 444 %cds_ends=();#
jpayne@69 445
jpayne@69 446 if ($sign eq "+"){$utr_ends{$tmpname}="$utr5 $utr3"; }
jpayne@69 447 elsif ($sign eq "-"){$utr_ends{$tmpname}="$utr3 $utr5"; }
jpayne@69 448 $hContig_genes{$a[0]}.=" ".$a[8];
jpayne@69 449
jpayne@69 450 close(F);
jpayne@69 451 test_formatGFF();
jpayne@69 452
jpayne@69 453 }
jpayne@69 454
jpayne@69 455
jpayne@69 456 #****************************************************************
jpayne@69 457 sub get_utrcds_ends{
jpayne@69 458 $u5="";$ex="";$u3="";
jpayne@69 459
jpayne@69 460 if ($fcds eq $futr) {
jpayne@69 461 $ex=$tmpgene;
jpayne@69 462 }
jpayne@69 463 else {
jpayne@69 464 @ex=split(";",$tmpgene);
jpayne@69 465 @cds=split(" ",$hcds_ends{$tmpname});
jpayne@69 466
jpayne@69 467 for($i=0;$i<=$#ex;$i++){
jpayne@69 468 @coord=split(" ",$ex[$i]);
jpayne@69 469
jpayne@69 470 if ($cds[0]>$coord[0]){
jpayne@69 471 if ($cds[0]>$coord[1]){
jpayne@69 472 $u5.="$coord[0] $coord[1];";
jpayne@69 473 }
jpayne@69 474 else{
jpayne@69 475 $u5.= "$coord[0] "; $u5.=$cds[0]-1 .";" ; #?
jpayne@69 476 if ($cds[1]<$coord[1]){
jpayne@69 477 $ex.="$cds[0] $cds[1];";
jpayne@69 478 $u3.=$cds[1]+1 ." $coord[1];";
jpayne@69 479 }
jpayne@69 480 else{ $ex.="$cds[0] $coord[1];";}
jpayne@69 481 }
jpayne@69 482 }
jpayne@69 483 else {
jpayne@69 484 if ($cds[1]>$coord[0]){
jpayne@69 485 if ($cds[1]>$coord[1]){
jpayne@69 486 $ex.="$coord[0] $coord[1];";
jpayne@69 487 }
jpayne@69 488 else{
jpayne@69 489 $ex.="$coord[0] $cds[1];";
jpayne@69 490 $u3.=$cds[1]+1 ." $coord[1];";
jpayne@69 491 }
jpayne@69 492 }
jpayne@69 493 else { $u3.="$coord[0] $coord[1];";}
jpayne@69 494 }
jpayne@69 495 }
jpayne@69 496 chop($u5, $ex, $u3);
jpayne@69 497 }
jpayne@69 498 $geneinfo{$tmpname}->[1]=$sign;
jpayne@69 499 $geneinfo{$tmpname}->[2]=$u5;
jpayne@69 500 $geneinfo{$tmpname}->[3]=$ex;
jpayne@69 501 $geneinfo{$tmpname}->[4]=$u3;
jpayne@69 502 }
jpayne@69 503 #*********************************
jpayne@69 504 sub test_overlap{
jpayne@69 505
jpayne@69 506 if (!$mref){
jpayne@69 507 $fileno=0;###
jpayne@69 508 $endcoord=0;###
jpayne@69 509 }
jpayne@69 510 foreach $kcontgid (sort keys %hContig_genes){
jpayne@69 511 @allgenes=split(/\s+/,$hContig_genes{$kcontgid});
jpayne@69 512 for ($i=1;$i<=$#allgenes;$i++){
jpayne@69 513 @g1=split (" ", $utr_ends{$allgenes[$i]});
jpayne@69 514 $Utr5End{$allgenes[$i]}=$g1[0]; ###
jpayne@69 515
jpayne@69 516 for ($j=$i+1;$j<=$#allgenes;$j++){ #comparing with the rest of the genes
jpayne@69 517 @g2=split (" ", $utr_ends{$allgenes[$j]});
jpayne@69 518 #if the genes are overpaling and they have the same level ,the second gene is liflet to the next level
jpayne@69 519 if ( (($g2[0]>=$g1[0]) and ($g2[0]<=$g1[1])) or (($g2[1]>=$g1[0]) and ($g2[1]<=$g1[1])) or (($g1[0]>=$g2[0]) and ($g1[0]<=$g2[1])) ){
jpayne@69 520 if ($geneinfo{$allgenes[$i]}->[0] == $geneinfo{$allgenes[$j]}->[0]){
jpayne@69 521 $geneinfo{$allgenes[$j]}->[0]=$geneinfo{$allgenes[$i]}->[0] + 1 ;
jpayne@69 522 }
jpayne@69 523 }
jpayne@69 524 }
jpayne@69 525 SetTheRangeForEachFile() if ((!$endfind) and (!$mref)); ###
jpayne@69 526 }
jpayne@69 527 }
jpayne@69 528 $file[$fileno++]="$startcoord $endcoord" if (!$mref);###
jpayne@69 529 %utr_ends=();###
jpayne@69 530
jpayne@69 531 }
jpayne@69 532
jpayne@69 533 #**************************************
jpayne@69 534 sub SetTheRangeForEachFile{
jpayne@69 535 $currstart=$g1[0];
jpayne@69 536 $currend=$g1[1];
jpayne@69 537 #---test range ends intersection
jpayne@69 538 if (!$startfind) {
jpayne@69 539 if (($x1win <= $currstart) || ($x1win <= $currend)){
jpayne@69 540 $currstart = $x1win;
jpayne@69 541 $startfind = 1;
jpayne@69 542 }
jpayne@69 543 }
jpayne@69 544 if ( $startfind && $x1win && $x2win){
jpayne@69 545 if (($x2win <= $currstart) || ($x2win <= $currend) ){
jpayne@69 546 $currend = $x2win;
jpayne@69 547 $endfind = 1;
jpayne@69 548 }
jpayne@69 549 }
jpayne@69 550 #--------------------
jpayne@69 551 if ($startfind) {
jpayne@69 552 if(!$endcoord) {
jpayne@69 553 #$startcoord=0;
jpayne@69 554 $startcoord = $x1win ? $x1win : 0;
jpayne@69 555 $endcoord=$currend;
jpayne@69 556 }
jpayne@69 557 else {
jpayne@69 558 if($currend > $endcoord) {
jpayne@69 559 if($currend-$startcoord < $nobpinfile) {
jpayne@69 560 $endcoord=$currend;
jpayne@69 561 }
jpayne@69 562 else {
jpayne@69 563 $file[$fileno++]="$startcoord $endcoord";
jpayne@69 564 $startcoord=$endcoord+1;
jpayne@69 565 $endcoord=$currend;
jpayne@69 566 }
jpayne@69 567 }
jpayne@69 568 }
jpayne@69 569 }#if startfind
jpayne@69 570 }
jpayne@69 571 #*********************************
jpayne@69 572 sub print_header{
jpayne@69 573 print O "#FIG 3.2\nLandscape\nCenter\nInches\nLetter \n100.00\nMultiple\n-2\n1200 2\n";
jpayne@69 574 }
jpayne@69 575 #*********************************
jpayne@69 576 sub print_align{
jpayne@69 577 my ($x1,$x2)=@_;
jpayne@69 578
jpayne@69 579 $a[$ind_pidy]=50 if ($a[$ind_pidy]<50);
jpayne@69 580 $a[$ind_pidy]=int($a[$ind_pidy]);
jpayne@69 581 if ($Mgaps){
jpayne@69 582 $y=$Yorig+250+$YdistPID-$yscale*2;
jpayne@69 583 if($a[$#a]=~/rev$/){$y-=25*$yscale;}
jpayne@69 584 }
jpayne@69 585 else{
jpayne@69 586 $y=$Yorig+250+$YdistPID-$yscale*($a[$ind_pidy]-50);
jpayne@69 587 }
jpayne@69 588 if($x1==$x2) { $x2++;}
jpayne@69 589 #draw the line between matches. is dif color for each contig
jpayne@69 590 if ($a[$#a] eq $tmpIdQrycontig) {
jpayne@69 591 print_connections($hQrycontig{$tmpIdQrycontig}->[1], $x1,$y);
jpayne@69 592 }
jpayne@69 593 else{#new contig
jpayne@69 594 #remember the start coord for printing the id alignments
jpayne@69 595 if ($printIDconting){
jpayne@69 596 if ( $x1 - $XlastPrint > 400 ) {
jpayne@69 597 print O "4 0 0 5 0 0 8 0.0000 4 90 270 ";
jpayne@69 598 printf O ("\t%.0f %.0f ",$x1,$y);
jpayne@69 599 print O $a[$#a], "\\001\n";
jpayne@69 600 $XlastPrint=$x1; $YlastPrint=$y;
jpayne@69 601 }
jpayne@69 602 }
jpayne@69 603 #
jpayne@69 604 ##if it was seen before,but interrupted by another contig
jpayne@69 605 if ((exists $hQrycontig{$a[$#a]}) and ($a[$ind_s1]-$hQrycontig{$a[$#a]}->[2] < $match_dist )) {
jpayne@69 606 $linkcolor=$hQrycontig{$a[$#a]}->[0];
jpayne@69 607 print_connections($hQrycontig{$a[$#a]}->[1], $x1,$y);
jpayne@69 608 }
jpayne@69 609 else{
jpayne@69 610 #change the link color
jpayne@69 611 unshift(@linkcolors, pop(@linkcolors));
jpayne@69 612 $linkcolor=$linkcolors[0];
jpayne@69 613 $hQrycontig{$a[$#a]}->[0] = $linkcolor;
jpayne@69 614 }
jpayne@69 615 }
jpayne@69 616 $tmpIdQrycontig=$a[$#a];
jpayne@69 617 $hQrycontig{$tmpIdQrycontig}->[1]="$x2 $y";
jpayne@69 618 $hQrycontig{$tmpIdQrycontig}->[2]=$a[$ind_e1];
jpayne@69 619
jpayne@69 620 #the matches line is red
jpayne@69 621 print O "2 1 0 2 4 0 40 0 -1 0.000 0 0 -1 0 0 2\n";
jpayne@69 622 print O "\t$x1 $y $x2 $y\n";
jpayne@69 623 print O "2 1 0 5 20 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
jpayne@69 624 printf O ("\t $x1 %.0f $x2 %.0f\n",$Yorig+150 , $Yorig+150);
jpayne@69 625 }
jpayne@69 626 #*********************************
jpayne@69 627 sub print_connections{
jpayne@69 628 my ($setc1,$setx2,$sety2)=@_; # print "\nparam connect @_\n";
jpayne@69 629 my ($setx1,$sety1) =split(/ /,$setc1);
jpayne@69 630
jpayne@69 631 if ($Mgaps){
jpayne@69 632 if ($setx1>$setx2){
jpayne@69 633 $tmpsetx1=$setx1;
jpayne@69 634 $setx1=$setx2;
jpayne@69 635 $setx2=$tmpsetx1;
jpayne@69 636 }
jpayne@69 637 $distx1x2=int(($setx2-$setx1)/2);
jpayne@69 638 $xcenter= $setx1+$distx1x2;
jpayne@69 639
jpayne@69 640 if ($setx2-$setx1>4000) { #if the distance is to big then heigh of the arc is set to 20
jpayne@69 641 $heightArcUp = 20*$yscale;
jpayne@69 642 $yoffcenter=int((($distx1x2**2)+$heightArcUp**2)*(1/(2*$heightArcUp)))-$heightArcUp ;
jpayne@69 643 }
jpayne@69 644 else{
jpayne@69 645 $heightArcUp = int (0.447 * $distx1x2);#sectorul de cerc la 1/3 din raza.
jpayne@69 646 $yoffcenter=2*$heightArcUp;
jpayne@69 647 }
jpayne@69 648 print O "5 1 0 2 $linkcolor 0 50 0 -1 0.000 0 0 0 0 ";
jpayne@69 649 printf O ("%.3f %.3f $setx1 $sety1 $xcenter %.0f $setx2 $sety1 \n",$xcenter,$sety1+$yoffcenter,$sety1-$heightArcUp);
jpayne@69 650 }
jpayne@69 651 else{
jpayne@69 652 print O "2 1 0 1 $linkcolor 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
jpayne@69 653 print O "\t".$setc1." $setx2 $sety2\n";
jpayne@69 654
jpayne@69 655 }
jpayne@69 656 }
jpayne@69 657 #*********************************
jpayne@69 658 sub print_genes_mr{
jpayne@69 659 %hLastOnLevel=();
jpayne@69 660 @g=split(/\s+/,$hContig_genes{$a[-2]});
jpayne@69 661 for ($i=1;$i<=$#g;$i++){
jpayne@69 662 $kname=$g[$i];
jpayne@69 663 $tmpx2=0;
jpayne@69 664 $y=$Yorig-100-200*$geneinfo{$kname}->[0];
jpayne@69 665 #print id gena
jpayne@69 666 $xid =$startdrawX+ int($Utr5End{$kname}/$Xscale);
jpayne@69 667 print_Id_genes() if ($printIDgenes);
jpayne@69 668 #
jpayne@69 669 for ($l=2;$l<5;$l++){
jpayne@69 670 @c=split(";",$geneinfo{$kname}->[$l] ) ;
jpayne@69 671 if (@c){ #print "de unde?@c\n" if (($l==2) or ($l==4));
jpayne@69 672 $colorend=$color{$l};
jpayne@69 673 if ($geneinfo{$kname}->[1] eq "-"){
jpayne@69 674 if ($l==2) { $colorend=$color{"4"}; }
jpayne@69 675 elsif ($l==4) {$colorend=$color{"2"};}
jpayne@69 676 }
jpayne@69 677 for ($k=0;$k<=$#c;$k++){
jpayne@69 678 @e=split (" ",$c[$k]);
jpayne@69 679 $x1=$startdrawX+int($e[0]/$Xscale);
jpayne@69 680 $x2=$startdrawX+int($e[1]/$Xscale);
jpayne@69 681 if($x1==$x2) { $x2++;}
jpayne@69 682 if ( ($tmpx2) and ($x1-$tmpx2>1)){ #print the intron
jpayne@69 683 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
jpayne@69 684 print O "\t $tmpx2 $y $x1 $y\n";
jpayne@69 685 }
jpayne@69 686 $tmpx2=$x2;
jpayne@69 687 print O "2 1 0 5 $colorend 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#
jpayne@69 688 print O "\t $x1 $y $x2 $y\n";
jpayne@69 689 }
jpayne@69 690 }
jpayne@69 691 }
jpayne@69 692 #delete ($geneinfo{$kname});
jpayne@69 693 }
jpayne@69 694 }
jpayne@69 695
jpayne@69 696 #*********************************
jpayne@69 697 sub print_Id_genes{
jpayne@69 698 if (exists $hLastOnLevel{$geneinfo{$kname}->[0]}){
jpayne@69 699 $lastOnlevel=$hLastOnLevel{$geneinfo{$kname}->[0]};
jpayne@69 700 $printidspace = int(($Utr5End{$kname}-$Utr5End{$lastOnlevel})/$Xscale);
jpayne@69 701 }
jpayne@69 702 else{$printidspace=601;}
jpayne@69 703 if ($printidspace > 600){
jpayne@69 704 #print contig name#
jpayne@69 705 print O "4 0 0 5 0 0 6 0.0000 4 90 270 ";
jpayne@69 706 printf O ("\t%.0f %.0f ",$xid,$y-50);
jpayne@69 707 print O $kname, "\\001\n";
jpayne@69 708 $hLastOnLevel{$geneinfo{$kname}->[0]}=$kname;
jpayne@69 709 }
jpayne@69 710 }
jpayne@69 711 #*********************************
jpayne@69 712 sub print_genes{
jpayne@69 713 %hLastOnLevel=();
jpayne@69 714 foreach $kname (sort {$Utr5End{$a} <=> $Utr5End{$b}} keys %Utr5End){
jpayne@69 715 $tmpx2=0;
jpayne@69 716 if ($Utr5End{$kname}>$startcoord && $Utr5End{$kname}<$endcoord){
jpayne@69 717 $y=$Yorig-100-200*$geneinfo{$kname}->[0];
jpayne@69 718 #print id gena
jpayne@69 719 $xid = int(($Utr5End{$kname}-$startcoord)/$Xscale);
jpayne@69 720 print_Id_genes() if ($printIDgenes);
jpayne@69 721 #
jpayne@69 722 for ($l=2;$l<5;$l++){
jpayne@69 723 @c=split(";",$geneinfo{$kname}->[$l] );
jpayne@69 724 $colorend=$color{$l};
jpayne@69 725 if ($geneinfo{$kname}->[1] eq "-"){
jpayne@69 726 if ($l==2) { $colorend=$color{"4"}; }
jpayne@69 727 elsif ($l==4) {$colorend=$color{"2"};}
jpayne@69 728 }
jpayne@69 729
jpayne@69 730 for ($k=0;$k<=$#c;$k++){
jpayne@69 731 @e=split (" ",$c[$k]);
jpayne@69 732 $x1=int(($e[0]-$startcoord)/$Xscale);
jpayne@69 733 $x2=int(($e[1]-$startcoord)/$Xscale);
jpayne@69 734 if($x1==$x2) { $x2++;}
jpayne@69 735 if ( ($tmpx2) and ($x1-$tmpx2>1)){ #print the intron
jpayne@69 736 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
jpayne@69 737 print O "\t $tmpx2 $y $x1 $y\n";
jpayne@69 738 }
jpayne@69 739 $tmpx2=$x2;
jpayne@69 740 print O "2 1 0 5 $colorend 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#
jpayne@69 741 print O "\t $x1 $y $x2 $y\n";
jpayne@69 742 }
jpayne@69 743 }
jpayne@69 744 }#endif "is in interval"
jpayne@69 745 }
jpayne@69 746 # delete ($Utr5End{$kname});
jpayne@69 747 # delete ($geneinfo{$kname});
jpayne@69 748
jpayne@69 749 }
jpayne@69 750 #*********************************
jpayne@69 751 sub print_grid{
jpayne@69 752
jpayne@69 753 my ($xs,$xe,$startcontg,$endcontg)=@_;
jpayne@69 754
jpayne@69 755 $XlastPrint=0;$YlastPrint=0;
jpayne@69 756
jpayne@69 757 #print ref contig
jpayne@69 758 print O "2 1 0 10 11 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
jpayne@69 759 printf O ("\t $xs %.0f $xe %.0f\n",$Yorig+50,$Yorig+50);
jpayne@69 760 #print orizontal axes for PId (100%,75%,50%)
jpayne@69 761 for ($percent_id = 50; $percent_id < 101; $percent_id += 25) {
jpayne@69 762 print O "2 1 2 1 0 7 60 0 -1 4.000 0 0 -1 0 0 2\n";
jpayne@69 763 printf O ("\t$xs %.0f $xe %.0f\n",$Yorig+250+$YdistPID-($percent_id - 50) * $yscale,$Yorig+250+$YdistPID-($percent_id - 50) * $yscale);
jpayne@69 764 #last if ($Mgaps);
jpayne@69 765 }
jpayne@69 766 #print orizontal markers for bp.
jpayne@69 767 $increment=10000/$Xscale;
jpayne@69 768 $no_incr=0;
jpayne@69 769 $xmark = $xs ;$xmark_float= $xs;
jpayne@69 770 while ($xmark < $xe){
jpayne@69 771 print O "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n";
jpayne@69 772 printf O ("\t$xmark %.0f $xmark %.0f\n",$Yorig+$YdistPID+250,$Yorig+$YdistPID+300);
jpayne@69 773 #bp scale
jpayne@69 774 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
jpayne@69 775 printf O ("\t %.0f %.0f",$xmark,$Yorig+$YdistPID+400);
jpayne@69 776 print O " $no_incr"."k", "\\001\n";
jpayne@69 777 $no_incr += 10;
jpayne@69 778 $xmark_float += $increment;
jpayne@69 779 $xmark=int($xmark_float);
jpayne@69 780 }
jpayne@69 781
jpayne@69 782 #coord for chr ends
jpayne@69 783 print O "4 0 0 50 0 0 14 0.0000 4 135 450 $xs $Yorig $startcontg\\001\n";
jpayne@69 784 printf O ("4 0 0 50 0 0 14 0.0000 4 135 810 %.0f $Yorig $endcontg\\001\n",$xe-length($xe)*125);
jpayne@69 785
jpayne@69 786 #print contig name#
jpayne@69 787 if ($mref){
jpayne@69 788 print O "4 0 0 5 0 0 8 0.0000 4 135 405 ";
jpayne@69 789 printf O ("\t%.0f %.0f ",$xs,$Yorig+70);
jpayne@69 790 print O $a[-2], "\\001\n";
jpayne@69 791 }
jpayne@69 792 #print vertical markers for PId scale
jpayne@69 793 if (!$Mgaps){
jpayne@69 794 for ($percent_id = 50; $percent_id < 101; $percent_id += 25) {
jpayne@69 795 #left
jpayne@69 796 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
jpayne@69 797 printf O ("\t%.0f %.0f", $xs-200,$Yorig+$YdistPID+250-($percent_id - 50) * $yscale + 20);
jpayne@69 798 print O " $percent_id%", "\\001\n";
jpayne@69 799 #right
jpayne@69 800 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
jpayne@69 801 printf O ("\t%.0f %.0f",$xe+20, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale+20 );
jpayne@69 802 print O " $percent_id%", "\\001\n";
jpayne@69 803
jpayne@69 804 # print the tick mark
jpayne@69 805 #left
jpayne@69 806 # print O "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n";
jpayne@69 807 # printf O ("\t%.0f %.0f $xs %.0f\n",$xs-50,
jpayne@69 808 # $Yorig+$YdistPID+250-($percent_id - 50) * $yscale, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale);
jpayne@69 809 #right
jpayne@69 810 # print O "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n";
jpayne@69 811 # printf O ("\t$xe %.0f %.0f %.0f\n",
jpayne@69 812 # $Yorig+$YdistPID+250-($percent_id - 50) * $yscale,$xe+50, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale);
jpayne@69 813 }
jpayne@69 814 }
jpayne@69 815 else{ # for Mgaps
jpayne@69 816 print O "4 0 0 100 0 0 7 1.5710 4 135 405 ";
jpayne@69 817 printf O ("\t%.0f %.0f", $xs-50,$Yorig+$YdistPID+250 - 5 * $yscale + 10);
jpayne@69 818 print O " + qry strand", "\\001\n";
jpayne@69 819
jpayne@69 820 print O "4 0 0 100 0 0 7 1.5710 4 135 405 ";
jpayne@69 821 printf O ("\t%.0f %.0f", $xs-50,$Yorig+$YdistPID+250 - 30 * $yscale + 10);
jpayne@69 822 print O " - qry strand", "\\001\n";
jpayne@69 823 }
jpayne@69 824
jpayne@69 825 }
jpayne@69 826 #*********************************
jpayne@69 827 sub print_legend{
jpayne@69 828
jpayne@69 829 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
jpayne@69 830 printf O ("\t%.0f %.0f ",100,$Yorig+$YdistPID+1100);
jpayne@69 831 print O " Legend ", "\\001\n";
jpayne@69 832 $y= $Yorig+$YdistPID+1300; #utr
jpayne@69 833 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
jpayne@69 834 print O "\t 70 $y 99 $y\n";
jpayne@69 835 print O "2 1 0 5 27 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
jpayne@69 836 print O "\t 100 $y 200 $y\n";
jpayne@69 837 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
jpayne@69 838 print O "\t 200 $y 230 $y\n";
jpayne@69 839 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
jpayne@69 840 printf O ("\t%.0f %.0f ",300,$y+30);
jpayne@69 841 print O " 5' utr ", "\\001\n";
jpayne@69 842 $y += 150 ;#cds
jpayne@69 843 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
jpayne@69 844 print O "\t 70 $y 99 $y\n";
jpayne@69 845 print O "2 1 0 5 2 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
jpayne@69 846 print O "\t 100 $y 200 $y\n";
jpayne@69 847 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
jpayne@69 848 print O "\t 200 $y 230 $y\n";
jpayne@69 849 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
jpayne@69 850 printf O ("\t%.0f %.0f ",300,$y+30);
jpayne@69 851 print O " cds ", "\\001\n";
jpayne@69 852 $y += 150; #3' utr
jpayne@69 853 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
jpayne@69 854 print O "\t 70 $y 99 $y\n";
jpayne@69 855 print O "2 1 0 5 1 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
jpayne@69 856 print O "\t 100 $y 200 $y\n";
jpayne@69 857 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
jpayne@69 858 print O "\t 200 $y 230 $y\n";
jpayne@69 859 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
jpayne@69 860 printf O ("\t%.0f %.0f ",300,$y+30);
jpayne@69 861 print O " 3' utr ", "\\001\n";
jpayne@69 862 $y += 150; # match
jpayne@69 863 print O "2 1 0 2 4 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
jpayne@69 864 print O "\t100 $y 200 $y\n";
jpayne@69 865 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
jpayne@69 866 printf O ("\t%.0f %.0f ",300,$y+30);
jpayne@69 867 print O " match found by $prog ", "\\001\n";
jpayne@69 868 }
jpayne@69 869 #*********************************
jpayne@69 870 sub change_file_format{
jpayne@69 871
jpayne@69 872 $procfile =~ /^tmp(.+)/;
jpayne@69 873 $outfile = $1.$nrf.".".$format;
jpayne@69 874 $comand = "fig2dev -L $format -x 100";
jpayne@69 875 $comand .= " -m $magn" if ($magn);
jpayne@69 876 $comand .= " -M ".$procfile.$nrf.".fig ".$outfile;
jpayne@69 877
jpayne@69 878 $status =system($comand);
jpayne@69 879 print E "ERROR 1: fig2dev !\n" unless $status == 0;
jpayne@69 880
jpayne@69 881 $status =system("rm $procfile".$nrf.".fig");
jpayne@69 882
jpayne@69 883 if ($verb){
jpayne@69 884 print "$outfile";
jpayne@69 885 if ($mref){
jpayne@69 886 print "\n" ;
jpayne@69 887 }
jpayne@69 888 else {
jpayne@69 889 print "\t range : $startcoord\t$endcoord \n" ;
jpayne@69 890 }
jpayne@69 891 }
jpayne@69 892 }
jpayne@69 893 #*********************************
jpayne@69 894 sub format_mgaps{
jpayne@69 895 $tmpfile="tmpmgaps";
jpayne@69 896 $tmpfile2=$alignm."coords" ;
jpayne@69 897 get_ref_len(); #print $maxlenref."\n";
jpayne@69 898 open(M,">".$tmpfile2) || die "can't open \" $tmpfile2 \" file !";
jpayne@69 899 print M "$alignm\n";
jpayne@69 900 print M "Mgaps\n\n";
jpayne@69 901 print M " [S1] [E1] | [S2] [E2] | [LEN 1] [LEN 2] | [% IDY] | [LEN R] [LEN Q] | [COV R] [COV Q] | [TAGS]\n";
jpayne@69 902 print M "===============================================================================================================================\n";
jpayne@69 903
jpayne@69 904 open(T,">".$tmpfile) || die "can't open \" $tmpfile \" file !";
jpayne@69 905
jpayne@69 906 open(A,"<".$alignm) || die "can't open \" $alignm \" file !";
jpayne@69 907
jpayne@69 908
jpayne@69 909 while(<A>) {
jpayne@69 910 chomp;
jpayne@69 911 @a=split;
jpayne@69 912 if ($a[0] =~ /^>/){
jpayne@69 913 $nr_cluster=1;
jpayne@69 914 $idquery=$a[1];
jpayne@69 915 if ($a[2] eq "Reverse"){$idquery .= "_rev";}
jpayne@69 916 }
jpayne@69 917 #elsif ($a[0] eq "#") {$nr_cluster++;}
jpayne@69 918 elsif($a[0] ne "#"){
jpayne@69 919 $e1=$a[0]+$a[2];
jpayne@69 920 print T $a[0]."\t".$e1."\t"."|";
jpayne@69 921 print T "\t-\t-\t|";
jpayne@69 922 print T "\t-\t-\t|";
jpayne@69 923 print T "\t-\t|"; #pid
jpayne@69 924 print T "\t$maxlenref\t-\t|";#len seqs
jpayne@69 925 # print "\t-\t-\t|";
jpayne@69 926 # print "\t-\t-\t|";
jpayne@69 927 # print T " $Mgaps\t$idquery.$nr_cluster\n";
jpayne@69 928 print T " $Mgaps\t$idquery\n";
jpayne@69 929 }
jpayne@69 930
jpayne@69 931 }
jpayne@69 932 close(A);
jpayne@69 933 close(T);
jpayne@69 934 $command="sort -n -k 1 $tmpfile >> $tmpfile2";
jpayne@69 935 $status =system($command);
jpayne@69 936 system("rm $tmpfile");
jpayne@69 937 close(M);
jpayne@69 938 $alignm=$tmpfile2;
jpayne@69 939 print STDERR "ERROR 1: can't sort $tmpfile \n" unless $status == 0;
jpayne@69 940 print STDERR "\n**************************************** \n";
jpayne@69 941 print STDERR "New input file created : $alignm\n";
jpayne@69 942 print STDERR "**************************************** \n\n";
jpayne@69 943
jpayne@69 944 }
jpayne@69 945 #*****************************
jpayne@69 946 sub get_ref_len{
jpayne@69 947 $firstrow=1;
jpayne@69 948 open(A,"<".$alignm) || die "can't open \" $alignm \" file !";
jpayne@69 949 while(<A>) {
jpayne@69 950 chomp;
jpayne@69 951 @a=split;
jpayne@69 952 if ($firstrow) {
jpayne@69 953 $firstrow=0;
jpayne@69 954 if ($a[0] !~ /^>/){
jpayne@69 955 print "\nWrong file format for MGAPS file : $alignm ! \n";
jpayne@69 956 exit;
jpayne@69 957 }
jpayne@69 958 }
jpayne@69 959
jpayne@69 960 if ($a[0] =~ /^>/){ next; }
jpayne@69 961 elsif($a[0] ne "#"){
jpayne@69 962 $e1=$a[0]+$a[2];
jpayne@69 963 $maxlenref=($maxlenref < $e1 ? $e1 : $maxlenref);
jpayne@69 964 }
jpayne@69 965 }
jpayne@69 966 close(A);
jpayne@69 967 }