jpayne@69
|
1 #!/usr/bin/env perl
|
jpayne@69
|
2
|
jpayne@69
|
3 use lib "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
|
jpayne@69
|
4 use Foundation;
|
jpayne@69
|
5
|
jpayne@69
|
6 my $SCRIPT_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
|
jpayne@69
|
7
|
jpayne@69
|
8
|
jpayne@69
|
9 my $VERSION_INFO = q~
|
jpayne@69
|
10 mapview version 1.01
|
jpayne@69
|
11 ~;
|
jpayne@69
|
12
|
jpayne@69
|
13
|
jpayne@69
|
14 my $HELP_INFO = q~
|
jpayne@69
|
15 USAGE: mapview [options] <coords file> [UTR coords] [CDS coords]
|
jpayne@69
|
16
|
jpayne@69
|
17 DESCRIPTION:
|
jpayne@69
|
18 mapview is a utility program for displaying sequence alignments as
|
jpayne@69
|
19 provided by MUMmer, NUCmer, PROmer or Mgaps. mapview takes the output of
|
jpayne@69
|
20 show-coords and converts it to a FIG, PDF or PS file for visual analysis.
|
jpayne@69
|
21 It can also break the output into multiple files for easier viewing and
|
jpayne@69
|
22 printing.
|
jpayne@69
|
23
|
jpayne@69
|
24 MANDATORY:
|
jpayne@69
|
25 coords file The output of 'show-coords -rl[k]' or 'mgaps'
|
jpayne@69
|
26
|
jpayne@69
|
27 OPTIONS:
|
jpayne@69
|
28 UTR coords UTR coordinate file in GFF format
|
jpayne@69
|
29 CDS coords CDS coordinate file in GFF format
|
jpayne@69
|
30
|
jpayne@69
|
31 -d|maxdist Set the maximum base-pair distance between linked matches
|
jpayne@69
|
32 (default 50000)
|
jpayne@69
|
33 -f|format Set the output format to 'pdf', 'ps' or 'fig'
|
jpayne@69
|
34 (default 'fig')
|
jpayne@69
|
35 -h
|
jpayne@69
|
36 --help Display help information and exit
|
jpayne@69
|
37 -m|mag Set the magnification at which the figure is rendered,
|
jpayne@69
|
38 this is an option for fig2dev which is used to generate
|
jpayne@69
|
39 the PDF and PS files (default 1.0)
|
jpayne@69
|
40 -n|num Set the number of output files used to partition the
|
jpayne@69
|
41 output, this is to avoid generating files that are too
|
jpayne@69
|
42 large to display (default 10)
|
jpayne@69
|
43 -p|prefix Set the output file prefix
|
jpayne@69
|
44 (default "PROMER_graph or NUCMER_graph")
|
jpayne@69
|
45 -v
|
jpayne@69
|
46 --verbose Verbose logging of the processed files
|
jpayne@69
|
47 -V
|
jpayne@69
|
48 --version Display the version information and exit
|
jpayne@69
|
49 -x1 coord Set the lower coordinate bound of the display
|
jpayne@69
|
50 -x2 coord Set the upper coordinate bound of the display
|
jpayne@69
|
51 -g|ref If the input file is provided by 'mgaps', set the
|
jpayne@69
|
52 reference sequence ID (as it appears in the first column
|
jpayne@69
|
53 of the UTR/CDS coords file)
|
jpayne@69
|
54 -I Display the name of query sequences
|
jpayne@69
|
55 -Ir Display the name of reference genes
|
jpayne@69
|
56 ~;
|
jpayne@69
|
57
|
jpayne@69
|
58
|
jpayne@69
|
59 my $USAGE_INFO = q~
|
jpayne@69
|
60 USAGE: mapview [options] <coords file> [UTR coords] [CDS coords]
|
jpayne@69
|
61 ~;
|
jpayne@69
|
62
|
jpayne@69
|
63
|
jpayne@69
|
64 my @DEPEND_INFO =
|
jpayne@69
|
65 (
|
jpayne@69
|
66 "fig2dev",
|
jpayne@69
|
67 "$SCRIPT_DIR/Foundation.pm"
|
jpayne@69
|
68 );
|
jpayne@69
|
69
|
jpayne@69
|
70 my $err_gff = q~
|
jpayne@69
|
71 ERROR in the input files ! The reference seq ID can't be found in GFF files !
|
jpayne@69
|
72 The first column in the GFF file should be the ID of the reference seq.
|
jpayne@69
|
73 The alignments file should provide the same info in the column before the last one.
|
jpayne@69
|
74
|
jpayne@69
|
75 Here are some example records for the GFF file:
|
jpayne@69
|
76
|
jpayne@69
|
77 gnl|FlyBase|X Dmel3 initial-exon 2155 2413 . - . X_CG3038.1
|
jpayne@69
|
78 gnl|FlyBase|X Dmel3 last-exon 1182 2077 . - . X_CG3038.1
|
jpayne@69
|
79 ...
|
jpayne@69
|
80 The fields are :
|
jpayne@69
|
81 <seq_ID> <source> <exon type> <start> <end> <score> <strand> <frame> <gene_name>
|
jpayne@69
|
82 ~;
|
jpayne@69
|
83
|
jpayne@69
|
84 my $tigr;
|
jpayne@69
|
85 my $err;
|
jpayne@69
|
86
|
jpayne@69
|
87 my $alignm;
|
jpayne@69
|
88 my $futr;
|
jpayne@69
|
89 my $fcds;
|
jpayne@69
|
90
|
jpayne@69
|
91 #-- Initialize TIGR::Foundation
|
jpayne@69
|
92 $tigr = new TIGR::Foundation;
|
jpayne@69
|
93 if ( !defined ($tigr) ) {
|
jpayne@69
|
94 print (STDERR "ERROR: TIGR::Foundation could not be initialized");
|
jpayne@69
|
95 exit (1);
|
jpayne@69
|
96 }
|
jpayne@69
|
97
|
jpayne@69
|
98 #-- Set help and usage information
|
jpayne@69
|
99 $tigr->setHelpInfo ($HELP_INFO);
|
jpayne@69
|
100 $tigr->setUsageInfo ($USAGE_INFO);
|
jpayne@69
|
101 $tigr->setVersionInfo ($VERSION_INFO);
|
jpayne@69
|
102 $tigr->addDependInfo (@DEPEND_INFO);
|
jpayne@69
|
103
|
jpayne@69
|
104 $err = $tigr->TIGR_GetOptions
|
jpayne@69
|
105 (
|
jpayne@69
|
106 "d|maxdist=i" => \$match_dist,
|
jpayne@69
|
107 "f|format=s" => \$format,
|
jpayne@69
|
108 "m|mag=f" => \$magn,
|
jpayne@69
|
109 "n|num=i" => \$noOutfiles,
|
jpayne@69
|
110 "p|prefix=s" => \$outfilename,
|
jpayne@69
|
111 "x1=i" => \$x1win,
|
jpayne@69
|
112 "x2=i" => \$x2win,
|
jpayne@69
|
113 "v|verbose" => \$verb,
|
jpayne@69
|
114 "g|ref=s" => \$Mgaps,
|
jpayne@69
|
115 "I" => \$printIDconting,
|
jpayne@69
|
116 "Ir" => \$printIDgenes
|
jpayne@69
|
117 );
|
jpayne@69
|
118
|
jpayne@69
|
119 if ( $err == 0 || scalar(@ARGV) < 1 || scalar(@ARGV) > 3 ) {
|
jpayne@69
|
120 $tigr->printUsageInfo( );
|
jpayne@69
|
121 print (STDERR "Try '$0 -h' for more information.\n");
|
jpayne@69
|
122 exit (1);
|
jpayne@69
|
123 }
|
jpayne@69
|
124
|
jpayne@69
|
125 ($alignm,$futr,$fcds)=@ARGV;
|
jpayne@69
|
126
|
jpayne@69
|
127 if ((substr($x1win,0,1) eq '-') || (substr($x2win,0,1) eq '-')){
|
jpayne@69
|
128 print "ERROR2 : coords x1,x2 should be positive integers !!\n";
|
jpayne@69
|
129 $info=1;
|
jpayne@69
|
130 }
|
jpayne@69
|
131 if ($x1win > $x2win) {
|
jpayne@69
|
132 print "ERROR3 : wrong range coords : x1 >= x2 !!!\n";
|
jpayne@69
|
133 $info=1;
|
jpayne@69
|
134 }
|
jpayne@69
|
135 if ($Mgaps){ #formating the mgaps output to be similar with show-coords output
|
jpayne@69
|
136 format_mgaps();
|
jpayne@69
|
137 }
|
jpayne@69
|
138
|
jpayne@69
|
139 if (!$format){$format="fig";}
|
jpayne@69
|
140 if (($x1win) and ($x2win)){ $startfind=0; }
|
jpayne@69
|
141 else{ $startfind=1; }
|
jpayne@69
|
142 $endfind=0;
|
jpayne@69
|
143 if (!$noOutfiles) {$noOutfiles=10;}
|
jpayne@69
|
144 if (!$match_dist){$match_dist=50000;}
|
jpayne@69
|
145
|
jpayne@69
|
146 #```````init colors````````````````````````````
|
jpayne@69
|
147 $color{"2"}=27;#dark pink 5utr
|
jpayne@69
|
148 $color{"3"}=2;#green ex
|
jpayne@69
|
149 $color{"4"}=1;#blue 3utr
|
jpayne@69
|
150 #``````````````````````
|
jpayne@69
|
151 @linkcolors=(31,14,11);
|
jpayne@69
|
152 #``````````````````````````````````````````````
|
jpayne@69
|
153 open(F,$alignm);
|
jpayne@69
|
154 <F>;
|
jpayne@69
|
155 $prog=<F>; chomp($prog);
|
jpayne@69
|
156 <F>;
|
jpayne@69
|
157 $_=<F>;
|
jpayne@69
|
158 @a=(m/\s+(\||\[.+?\])/g) ;
|
jpayne@69
|
159 for ($ind=0;$ind<=$#a;$ind++){
|
jpayne@69
|
160 if ($a[$ind] eq "[S1]") {
|
jpayne@69
|
161 $ind_s1=$ind;
|
jpayne@69
|
162 }
|
jpayne@69
|
163 elsif ($a[$ind] eq "[E1]"){
|
jpayne@69
|
164 $ind_e1=$ind;
|
jpayne@69
|
165 }
|
jpayne@69
|
166 elsif ($a[$ind] eq "[% IDY]"){
|
jpayne@69
|
167 $ind_pidy=$ind;
|
jpayne@69
|
168 }
|
jpayne@69
|
169 elsif ($a[$ind] eq "[LEN R]"){
|
jpayne@69
|
170 $ind_lenchr=$ind;
|
jpayne@69
|
171 }
|
jpayne@69
|
172 # elsif ($a[$ind] eq "[TAGS]"){
|
jpayne@69
|
173 # $ind_tags=$ind+1; #there are two columns for this header col
|
jpayne@69
|
174 # }
|
jpayne@69
|
175 }
|
jpayne@69
|
176 <F>;$mref=-1;
|
jpayne@69
|
177 while (<F>){
|
jpayne@69
|
178 chomp;
|
jpayne@69
|
179 @a=split;
|
jpayne@69
|
180 if (!exists $hRefContigId{$a[-2]}) { # print $a[-2]."\n";
|
jpayne@69
|
181 $hRefContigId{$a[-2]}=$a[$ind_lenchr];
|
jpayne@69
|
182 $lenrefseqs+=$a[$ind_lenchr];
|
jpayne@69
|
183 $mref++;
|
jpayne@69
|
184 }
|
jpayne@69
|
185 }
|
jpayne@69
|
186 $nobpinfile=int($lenrefseqs/$noOutfiles);
|
jpayne@69
|
187
|
jpayne@69
|
188 close(F);
|
jpayne@69
|
189
|
jpayne@69
|
190 #``````````````````````
|
jpayne@69
|
191
|
jpayne@69
|
192
|
jpayne@69
|
193 if (@ARGV > 1) {
|
jpayne@69
|
194 get_cds_ends();
|
jpayne@69
|
195 get_utrcds_info();
|
jpayne@69
|
196 test_overlap();
|
jpayne@69
|
197 }
|
jpayne@69
|
198 elsif(!$mref) {
|
jpayne@69
|
199 $fileno=$noOutfiles;
|
jpayne@69
|
200 $startcoord=0;$endcoord=0;
|
jpayne@69
|
201 for ($i=0;$i<$fileno;$i++) {
|
jpayne@69
|
202 $endcoord=$startcoord+$nobpinfile-1;
|
jpayne@69
|
203 $endcoord=$lenrefseqs if ($endcoord>$lenrefseqs);
|
jpayne@69
|
204 $file[$i]="$startcoord $endcoord";
|
jpayne@69
|
205 $startcoord=$endcoord+1;
|
jpayne@69
|
206 }
|
jpayne@69
|
207 }
|
jpayne@69
|
208
|
jpayne@69
|
209
|
jpayne@69
|
210 $Yorig=3000;
|
jpayne@69
|
211 $YdistPID=2000;
|
jpayne@69
|
212 $yscale=$YdistPID/50;
|
jpayne@69
|
213 $Xscale=14.5;
|
jpayne@69
|
214 $gap=800;
|
jpayne@69
|
215 #$maxfiles = ($fileno < 10) ? $fileno : 10;
|
jpayne@69
|
216 #---------------------------------
|
jpayne@69
|
217
|
jpayne@69
|
218 if (!$mref){
|
jpayne@69
|
219 for($i=0; $i < $fileno; $i++) {
|
jpayne@69
|
220 $nrf=$i;
|
jpayne@69
|
221 set_output_fname();
|
jpayne@69
|
222 ($startcoord,$endcoord)=split(/\s+/,$file[$i]);
|
jpayne@69
|
223 open(O,">$procfile".$nrf.".fig");
|
jpayne@69
|
224 print_header();
|
jpayne@69
|
225 print $procfile.$nrf.".fig\t range : $startcoord\t$endcoord \n" if ($verb && ($format eq "fig"));
|
jpayne@69
|
226
|
jpayne@69
|
227 $xs=0;
|
jpayne@69
|
228 $xe=int(($endcoord-$startcoord+1)/$Xscale);
|
jpayne@69
|
229 #$xs=200;
|
jpayne@69
|
230 #$xe=$xs+int(($endcoord-$startcoord+1)/$Xscale);
|
jpayne@69
|
231 print_grid($xs,$xe,$startcoord,$endcoord);
|
jpayne@69
|
232
|
jpayne@69
|
233 $tmpIdQrycontig="";
|
jpayne@69
|
234 $linkcolor=$linkcolors[0];
|
jpayne@69
|
235
|
jpayne@69
|
236 open(F,$alignm);
|
jpayne@69
|
237 <F>;<F>;<F>;<F>;<F>;
|
jpayne@69
|
238 while(<F>) {
|
jpayne@69
|
239 chomp;
|
jpayne@69
|
240 @a=split;
|
jpayne@69
|
241 if($a[$ind_s1] > $endcoord) { last;}
|
jpayne@69
|
242 if($a[$ind_s1]<$startcoord && $a[$ind_e1] > $startcoord ) { $a[$ind_s1]=$startcoord;}
|
jpayne@69
|
243 if($a[$ind_s1] < $endcoord && $endcoord < $a[$ind_e1]) { $a[$ind_e1]=$endcoord;}
|
jpayne@69
|
244
|
jpayne@69
|
245 if($a[$ind_s1]>=$startcoord && $a[$ind_e1]<=$endcoord) {
|
jpayne@69
|
246 $x1=int(($a[$ind_s1]-$startcoord)/$Xscale);#
|
jpayne@69
|
247 $x2=int(($a[$ind_e1]-$startcoord)/$Xscale);#
|
jpayne@69
|
248 print_align($x1,$x2);
|
jpayne@69
|
249 }
|
jpayne@69
|
250 }
|
jpayne@69
|
251 close(F);
|
jpayne@69
|
252 %hQrycontig=();
|
jpayne@69
|
253 print_genes() if ($futr);
|
jpayne@69
|
254 print_legend();
|
jpayne@69
|
255 close(O);
|
jpayne@69
|
256 change_file_format() if ($format ne "fig");
|
jpayne@69
|
257 }
|
jpayne@69
|
258 }
|
jpayne@69
|
259 elsif($mref){#multiple ref seqs
|
jpayne@69
|
260 set_output_fname();
|
jpayne@69
|
261 $tmpIdQrycontig="";
|
jpayne@69
|
262 $linkcolor=$linkcolors[0];
|
jpayne@69
|
263 $startdrawX=0;
|
jpayne@69
|
264 $proclen=0;
|
jpayne@69
|
265 $first=1;
|
jpayne@69
|
266 $nrf=0;
|
jpayne@69
|
267 open(F,$alignm);
|
jpayne@69
|
268 <F>;<F>;<F>;<F>;<F>;
|
jpayne@69
|
269 while(<F>) {
|
jpayne@69
|
270 chomp;
|
jpayne@69
|
271 @a=split;
|
jpayne@69
|
272 if ($a[-2] ne $tmpcontig){
|
jpayne@69
|
273 %hQrycontig=();
|
jpayne@69
|
274 $tmpcontig=$a[-2];
|
jpayne@69
|
275 if ($first){
|
jpayne@69
|
276 $first=0;
|
jpayne@69
|
277 $nrf++;
|
jpayne@69
|
278 open(O,">$procfile".$nrf.".fig");
|
jpayne@69
|
279 print_header();
|
jpayne@69
|
280 print $procfile.$nrf.".fig"."\n" if ($verb && ($format eq "fig"));
|
jpayne@69
|
281 $len=$hRefContigId{$a[-2]};
|
jpayne@69
|
282 }
|
jpayne@69
|
283 else {
|
jpayne@69
|
284 $startdrawX+=int($len/$Xscale)+$gap;
|
jpayne@69
|
285 $len=$hRefContigId{$a[-2]};
|
jpayne@69
|
286 if (($proclen+$len>$nobpinfile) and ($proclen != 0)){
|
jpayne@69
|
287 print_legend();
|
jpayne@69
|
288 close(O);
|
jpayne@69
|
289 change_file_format() if ($format ne "fig");
|
jpayne@69
|
290 $nrf++;
|
jpayne@69
|
291 open(O,">$procfile".$nrf.".fig");
|
jpayne@69
|
292 print_header();
|
jpayne@69
|
293 print "\n".$procfile.$nrf.".fig"."\n" if ($verb && ($format eq "fig"));
|
jpayne@69
|
294 $proclen=0;
|
jpayne@69
|
295 $startdrawX=0;
|
jpayne@69
|
296 }
|
jpayne@69
|
297 }
|
jpayne@69
|
298 $xs=$startdrawX;
|
jpayne@69
|
299 $xe=$startdrawX+int($len/$Xscale);
|
jpayne@69
|
300 print_grid($xs,$xe,0,$len);
|
jpayne@69
|
301 #print genes from %geneinfo for contig
|
jpayne@69
|
302 print $a[-2]."\t".$hRefContigId{$a[-2]}."\n";
|
jpayne@69
|
303 print_genes_mr() if ($futr);
|
jpayne@69
|
304 $proclen+=$len;
|
jpayne@69
|
305 }#end if new contig
|
jpayne@69
|
306 $x1=$startdrawX+int($a[$ind_s1]/$Xscale);
|
jpayne@69
|
307 $x2=$startdrawX+int($a[$ind_e1]/$Xscale);
|
jpayne@69
|
308 print_align($x1,$x2);
|
jpayne@69
|
309 }
|
jpayne@69
|
310 print_legend();
|
jpayne@69
|
311 close(O);
|
jpayne@69
|
312 change_file_format() if ($format ne "fig");
|
jpayne@69
|
313
|
jpayne@69
|
314 close(F);
|
jpayne@69
|
315 }
|
jpayne@69
|
316 #*******************************************************************************
|
jpayne@69
|
317 #*******************************************************************************
|
jpayne@69
|
318 sub set_output_fname{
|
jpayne@69
|
319
|
jpayne@69
|
320 if (!$outfilename) {$procfile=$prog."_graph"."_";}
|
jpayne@69
|
321 else {$procfile=$outfilename."_";}
|
jpayne@69
|
322
|
jpayne@69
|
323 if ($format ne "fig"){
|
jpayne@69
|
324 $procfile="tmp".$procfile;
|
jpayne@69
|
325 }
|
jpayne@69
|
326 }
|
jpayne@69
|
327 #*********************************
|
jpayne@69
|
328 sub get_cds_ends{
|
jpayne@69
|
329 #3. print "create \%hcds_ends...\n";
|
jpayne@69
|
330 $testGffFormat=0;
|
jpayne@69
|
331 open(F,"<".$fcds);#|| die "can't open \" $fcds cds \" file !";
|
jpayne@69
|
332 while(<F>) {
|
jpayne@69
|
333 chomp;
|
jpayne@69
|
334 if($_) {
|
jpayne@69
|
335 @a=split;
|
jpayne@69
|
336 if (exists $hRefContigId{$a[0]}){#record if at least one of the ref id is the same in GFF and Align files
|
jpayne@69
|
337 $testGffFormat++;
|
jpayne@69
|
338 }
|
jpayne@69
|
339 $genename=$a[8];
|
jpayne@69
|
340 if ($genename ne $tmpname){
|
jpayne@69
|
341 if ($sign eq "+"){ $hcds_ends{$tmpname} = "$cds5 $cds3";}
|
jpayne@69
|
342 elsif ($sign eq "-"){ $hcds_ends{$tmpname} = "$cds3 $cds5";}
|
jpayne@69
|
343 $tmpname=$genename;
|
jpayne@69
|
344 $sign=$a[6];
|
jpayne@69
|
345 }
|
jpayne@69
|
346 if($sign eq "-") {
|
jpayne@69
|
347 $temp=$a[3];
|
jpayne@69
|
348 $a[3]=$a[4];
|
jpayne@69
|
349 $a[4]=$temp;
|
jpayne@69
|
350 }
|
jpayne@69
|
351 if ($a[2] eq "single-exon"){
|
jpayne@69
|
352 $cds5=$a[3];
|
jpayne@69
|
353 $cds3=$a[4];
|
jpayne@69
|
354 }
|
jpayne@69
|
355 elsif ($a[2] eq "initial-exon"){
|
jpayne@69
|
356 $cds5=$a[3];
|
jpayne@69
|
357 }
|
jpayne@69
|
358 elsif ($a[2] eq "last-exon"){
|
jpayne@69
|
359 $cds3=$a[4];
|
jpayne@69
|
360 }
|
jpayne@69
|
361 }
|
jpayne@69
|
362 }
|
jpayne@69
|
363 if ($sign eq "+"){ $hcds_ends{$tmpname} = "$cds5 $cds3";}
|
jpayne@69
|
364 elsif ($sign eq "-"){$hcds_ends{$tmpname} = "$cds3 $cds5";}
|
jpayne@69
|
365
|
jpayne@69
|
366 test_formatGFF();
|
jpayne@69
|
367
|
jpayne@69
|
368 #foreach $k ( keys %hcds_ends){
|
jpayne@69
|
369 # print "cds_ends: ".$k."\t"."\n";
|
jpayne@69
|
370 #} exit;
|
jpayne@69
|
371
|
jpayne@69
|
372 }
|
jpayne@69
|
373
|
jpayne@69
|
374 #*********************************
|
jpayne@69
|
375 sub test_formatGFF{
|
jpayne@69
|
376 if ($testGffFormat==0){
|
jpayne@69
|
377 print (STDERR "$err_gff \n");
|
jpayne@69
|
378 exit (1);
|
jpayne@69
|
379 }
|
jpayne@69
|
380
|
jpayne@69
|
381 }
|
jpayne@69
|
382
|
jpayne@69
|
383 #*********************************
|
jpayne@69
|
384 sub get_utrcds_info{
|
jpayne@69
|
385 #test for gene overlap $geneinfo{gene_name}->[0]=level,stock gene 5'3' utr ends,
|
jpayne@69
|
386 #determina %geneinfo{id gene}->5utr,ex,3utr
|
jpayne@69
|
387 #and @file
|
jpayne@69
|
388 # get_gene_ends();
|
jpayne@69
|
389 $testGffFormat=0;
|
jpayne@69
|
390 open(F,"<".$futr);# || die "can't open \" $futr utr \" file !";
|
jpayne@69
|
391 while(<F>) {
|
jpayne@69
|
392 chomp;
|
jpayne@69
|
393 if($_) {
|
jpayne@69
|
394 @a=split;
|
jpayne@69
|
395 #get gene ends-utr
|
jpayne@69
|
396 $genename=$a[8];
|
jpayne@69
|
397 if (exists $hRefContigId{$a[0]}){ #check if [align file(col before the last one)] = [GFF file(col 1)]
|
jpayne@69
|
398 $testGffFormat++;
|
jpayne@69
|
399 }
|
jpayne@69
|
400
|
jpayne@69
|
401 if ($genename ne $tmpname){
|
jpayne@69
|
402 if ($sign eq "+"){ $utr_ends{$tmpname} = "$utr5 $utr3";}
|
jpayne@69
|
403 elsif ($sign eq "-"){ $utr_ends{$tmpname} = "$utr3 $utr5";}
|
jpayne@69
|
404
|
jpayne@69
|
405 if ($tmpgene) {# for the distinct_utr_cds
|
jpayne@69
|
406 get_utrcds_ends() ;
|
jpayne@69
|
407 }
|
jpayne@69
|
408 $tmpgene="$a[3] $a[4]";#
|
jpayne@69
|
409
|
jpayne@69
|
410 $tmpname=$genename;
|
jpayne@69
|
411 $sign=$a[6];
|
jpayne@69
|
412 $hContig_genes{$a[0]}.=" ".$a[8]; # for multiple ref. seqs
|
jpayne@69
|
413 }
|
jpayne@69
|
414 else{# for the distinct_utr_cds
|
jpayne@69
|
415 if($sign eq "-") {
|
jpayne@69
|
416 $tmpgene="$a[3] $a[4];".$tmpgene;
|
jpayne@69
|
417 }
|
jpayne@69
|
418 else { $tmpgene.=";$a[3] $a[4]"; }
|
jpayne@69
|
419 }#
|
jpayne@69
|
420
|
jpayne@69
|
421 if($sign eq "-") {
|
jpayne@69
|
422 $temp=$a[3];
|
jpayne@69
|
423 $a[3]=$a[4];
|
jpayne@69
|
424 $a[4]=$temp;
|
jpayne@69
|
425 }
|
jpayne@69
|
426 if ($a[2] eq "single-exon"){
|
jpayne@69
|
427 $utr5=$a[3];
|
jpayne@69
|
428 $utr3=$a[4];
|
jpayne@69
|
429 }
|
jpayne@69
|
430 elsif ($a[2] eq "initial-exon"){
|
jpayne@69
|
431 $utr5=$a[3];
|
jpayne@69
|
432 }
|
jpayne@69
|
433 elsif ($a[2] eq "last-exon"){
|
jpayne@69
|
434 $utr3=$a[4];
|
jpayne@69
|
435 }
|
jpayne@69
|
436
|
jpayne@69
|
437 #init gene info (level)
|
jpayne@69
|
438 $geneinfo{$a[8]}->[0]=1 if (!exists $geneinfo{$a[8]});
|
jpayne@69
|
439
|
jpayne@69
|
440 }
|
jpayne@69
|
441 }
|
jpayne@69
|
442 # for the distinct_utr_cds
|
jpayne@69
|
443 get_utrcds_ends();
|
jpayne@69
|
444 %cds_ends=();#
|
jpayne@69
|
445
|
jpayne@69
|
446 if ($sign eq "+"){$utr_ends{$tmpname}="$utr5 $utr3"; }
|
jpayne@69
|
447 elsif ($sign eq "-"){$utr_ends{$tmpname}="$utr3 $utr5"; }
|
jpayne@69
|
448 $hContig_genes{$a[0]}.=" ".$a[8];
|
jpayne@69
|
449
|
jpayne@69
|
450 close(F);
|
jpayne@69
|
451 test_formatGFF();
|
jpayne@69
|
452
|
jpayne@69
|
453 }
|
jpayne@69
|
454
|
jpayne@69
|
455
|
jpayne@69
|
456 #****************************************************************
|
jpayne@69
|
457 sub get_utrcds_ends{
|
jpayne@69
|
458 $u5="";$ex="";$u3="";
|
jpayne@69
|
459
|
jpayne@69
|
460 if ($fcds eq $futr) {
|
jpayne@69
|
461 $ex=$tmpgene;
|
jpayne@69
|
462 }
|
jpayne@69
|
463 else {
|
jpayne@69
|
464 @ex=split(";",$tmpgene);
|
jpayne@69
|
465 @cds=split(" ",$hcds_ends{$tmpname});
|
jpayne@69
|
466
|
jpayne@69
|
467 for($i=0;$i<=$#ex;$i++){
|
jpayne@69
|
468 @coord=split(" ",$ex[$i]);
|
jpayne@69
|
469
|
jpayne@69
|
470 if ($cds[0]>$coord[0]){
|
jpayne@69
|
471 if ($cds[0]>$coord[1]){
|
jpayne@69
|
472 $u5.="$coord[0] $coord[1];";
|
jpayne@69
|
473 }
|
jpayne@69
|
474 else{
|
jpayne@69
|
475 $u5.= "$coord[0] "; $u5.=$cds[0]-1 .";" ; #?
|
jpayne@69
|
476 if ($cds[1]<$coord[1]){
|
jpayne@69
|
477 $ex.="$cds[0] $cds[1];";
|
jpayne@69
|
478 $u3.=$cds[1]+1 ." $coord[1];";
|
jpayne@69
|
479 }
|
jpayne@69
|
480 else{ $ex.="$cds[0] $coord[1];";}
|
jpayne@69
|
481 }
|
jpayne@69
|
482 }
|
jpayne@69
|
483 else {
|
jpayne@69
|
484 if ($cds[1]>$coord[0]){
|
jpayne@69
|
485 if ($cds[1]>$coord[1]){
|
jpayne@69
|
486 $ex.="$coord[0] $coord[1];";
|
jpayne@69
|
487 }
|
jpayne@69
|
488 else{
|
jpayne@69
|
489 $ex.="$coord[0] $cds[1];";
|
jpayne@69
|
490 $u3.=$cds[1]+1 ." $coord[1];";
|
jpayne@69
|
491 }
|
jpayne@69
|
492 }
|
jpayne@69
|
493 else { $u3.="$coord[0] $coord[1];";}
|
jpayne@69
|
494 }
|
jpayne@69
|
495 }
|
jpayne@69
|
496 chop($u5, $ex, $u3);
|
jpayne@69
|
497 }
|
jpayne@69
|
498 $geneinfo{$tmpname}->[1]=$sign;
|
jpayne@69
|
499 $geneinfo{$tmpname}->[2]=$u5;
|
jpayne@69
|
500 $geneinfo{$tmpname}->[3]=$ex;
|
jpayne@69
|
501 $geneinfo{$tmpname}->[4]=$u3;
|
jpayne@69
|
502 }
|
jpayne@69
|
503 #*********************************
|
jpayne@69
|
504 sub test_overlap{
|
jpayne@69
|
505
|
jpayne@69
|
506 if (!$mref){
|
jpayne@69
|
507 $fileno=0;###
|
jpayne@69
|
508 $endcoord=0;###
|
jpayne@69
|
509 }
|
jpayne@69
|
510 foreach $kcontgid (sort keys %hContig_genes){
|
jpayne@69
|
511 @allgenes=split(/\s+/,$hContig_genes{$kcontgid});
|
jpayne@69
|
512 for ($i=1;$i<=$#allgenes;$i++){
|
jpayne@69
|
513 @g1=split (" ", $utr_ends{$allgenes[$i]});
|
jpayne@69
|
514 $Utr5End{$allgenes[$i]}=$g1[0]; ###
|
jpayne@69
|
515
|
jpayne@69
|
516 for ($j=$i+1;$j<=$#allgenes;$j++){ #comparing with the rest of the genes
|
jpayne@69
|
517 @g2=split (" ", $utr_ends{$allgenes[$j]});
|
jpayne@69
|
518 #if the genes are overpaling and they have the same level ,the second gene is liflet to the next level
|
jpayne@69
|
519 if ( (($g2[0]>=$g1[0]) and ($g2[0]<=$g1[1])) or (($g2[1]>=$g1[0]) and ($g2[1]<=$g1[1])) or (($g1[0]>=$g2[0]) and ($g1[0]<=$g2[1])) ){
|
jpayne@69
|
520 if ($geneinfo{$allgenes[$i]}->[0] == $geneinfo{$allgenes[$j]}->[0]){
|
jpayne@69
|
521 $geneinfo{$allgenes[$j]}->[0]=$geneinfo{$allgenes[$i]}->[0] + 1 ;
|
jpayne@69
|
522 }
|
jpayne@69
|
523 }
|
jpayne@69
|
524 }
|
jpayne@69
|
525 SetTheRangeForEachFile() if ((!$endfind) and (!$mref)); ###
|
jpayne@69
|
526 }
|
jpayne@69
|
527 }
|
jpayne@69
|
528 $file[$fileno++]="$startcoord $endcoord" if (!$mref);###
|
jpayne@69
|
529 %utr_ends=();###
|
jpayne@69
|
530
|
jpayne@69
|
531 }
|
jpayne@69
|
532
|
jpayne@69
|
533 #**************************************
|
jpayne@69
|
534 sub SetTheRangeForEachFile{
|
jpayne@69
|
535 $currstart=$g1[0];
|
jpayne@69
|
536 $currend=$g1[1];
|
jpayne@69
|
537 #---test range ends intersection
|
jpayne@69
|
538 if (!$startfind) {
|
jpayne@69
|
539 if (($x1win <= $currstart) || ($x1win <= $currend)){
|
jpayne@69
|
540 $currstart = $x1win;
|
jpayne@69
|
541 $startfind = 1;
|
jpayne@69
|
542 }
|
jpayne@69
|
543 }
|
jpayne@69
|
544 if ( $startfind && $x1win && $x2win){
|
jpayne@69
|
545 if (($x2win <= $currstart) || ($x2win <= $currend) ){
|
jpayne@69
|
546 $currend = $x2win;
|
jpayne@69
|
547 $endfind = 1;
|
jpayne@69
|
548 }
|
jpayne@69
|
549 }
|
jpayne@69
|
550 #--------------------
|
jpayne@69
|
551 if ($startfind) {
|
jpayne@69
|
552 if(!$endcoord) {
|
jpayne@69
|
553 #$startcoord=0;
|
jpayne@69
|
554 $startcoord = $x1win ? $x1win : 0;
|
jpayne@69
|
555 $endcoord=$currend;
|
jpayne@69
|
556 }
|
jpayne@69
|
557 else {
|
jpayne@69
|
558 if($currend > $endcoord) {
|
jpayne@69
|
559 if($currend-$startcoord < $nobpinfile) {
|
jpayne@69
|
560 $endcoord=$currend;
|
jpayne@69
|
561 }
|
jpayne@69
|
562 else {
|
jpayne@69
|
563 $file[$fileno++]="$startcoord $endcoord";
|
jpayne@69
|
564 $startcoord=$endcoord+1;
|
jpayne@69
|
565 $endcoord=$currend;
|
jpayne@69
|
566 }
|
jpayne@69
|
567 }
|
jpayne@69
|
568 }
|
jpayne@69
|
569 }#if startfind
|
jpayne@69
|
570 }
|
jpayne@69
|
571 #*********************************
|
jpayne@69
|
572 sub print_header{
|
jpayne@69
|
573 print O "#FIG 3.2\nLandscape\nCenter\nInches\nLetter \n100.00\nMultiple\n-2\n1200 2\n";
|
jpayne@69
|
574 }
|
jpayne@69
|
575 #*********************************
|
jpayne@69
|
576 sub print_align{
|
jpayne@69
|
577 my ($x1,$x2)=@_;
|
jpayne@69
|
578
|
jpayne@69
|
579 $a[$ind_pidy]=50 if ($a[$ind_pidy]<50);
|
jpayne@69
|
580 $a[$ind_pidy]=int($a[$ind_pidy]);
|
jpayne@69
|
581 if ($Mgaps){
|
jpayne@69
|
582 $y=$Yorig+250+$YdistPID-$yscale*2;
|
jpayne@69
|
583 if($a[$#a]=~/rev$/){$y-=25*$yscale;}
|
jpayne@69
|
584 }
|
jpayne@69
|
585 else{
|
jpayne@69
|
586 $y=$Yorig+250+$YdistPID-$yscale*($a[$ind_pidy]-50);
|
jpayne@69
|
587 }
|
jpayne@69
|
588 if($x1==$x2) { $x2++;}
|
jpayne@69
|
589 #draw the line between matches. is dif color for each contig
|
jpayne@69
|
590 if ($a[$#a] eq $tmpIdQrycontig) {
|
jpayne@69
|
591 print_connections($hQrycontig{$tmpIdQrycontig}->[1], $x1,$y);
|
jpayne@69
|
592 }
|
jpayne@69
|
593 else{#new contig
|
jpayne@69
|
594 #remember the start coord for printing the id alignments
|
jpayne@69
|
595 if ($printIDconting){
|
jpayne@69
|
596 if ( $x1 - $XlastPrint > 400 ) {
|
jpayne@69
|
597 print O "4 0 0 5 0 0 8 0.0000 4 90 270 ";
|
jpayne@69
|
598 printf O ("\t%.0f %.0f ",$x1,$y);
|
jpayne@69
|
599 print O $a[$#a], "\\001\n";
|
jpayne@69
|
600 $XlastPrint=$x1; $YlastPrint=$y;
|
jpayne@69
|
601 }
|
jpayne@69
|
602 }
|
jpayne@69
|
603 #
|
jpayne@69
|
604 ##if it was seen before,but interrupted by another contig
|
jpayne@69
|
605 if ((exists $hQrycontig{$a[$#a]}) and ($a[$ind_s1]-$hQrycontig{$a[$#a]}->[2] < $match_dist )) {
|
jpayne@69
|
606 $linkcolor=$hQrycontig{$a[$#a]}->[0];
|
jpayne@69
|
607 print_connections($hQrycontig{$a[$#a]}->[1], $x1,$y);
|
jpayne@69
|
608 }
|
jpayne@69
|
609 else{
|
jpayne@69
|
610 #change the link color
|
jpayne@69
|
611 unshift(@linkcolors, pop(@linkcolors));
|
jpayne@69
|
612 $linkcolor=$linkcolors[0];
|
jpayne@69
|
613 $hQrycontig{$a[$#a]}->[0] = $linkcolor;
|
jpayne@69
|
614 }
|
jpayne@69
|
615 }
|
jpayne@69
|
616 $tmpIdQrycontig=$a[$#a];
|
jpayne@69
|
617 $hQrycontig{$tmpIdQrycontig}->[1]="$x2 $y";
|
jpayne@69
|
618 $hQrycontig{$tmpIdQrycontig}->[2]=$a[$ind_e1];
|
jpayne@69
|
619
|
jpayne@69
|
620 #the matches line is red
|
jpayne@69
|
621 print O "2 1 0 2 4 0 40 0 -1 0.000 0 0 -1 0 0 2\n";
|
jpayne@69
|
622 print O "\t$x1 $y $x2 $y\n";
|
jpayne@69
|
623 print O "2 1 0 5 20 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
|
jpayne@69
|
624 printf O ("\t $x1 %.0f $x2 %.0f\n",$Yorig+150 , $Yorig+150);
|
jpayne@69
|
625 }
|
jpayne@69
|
626 #*********************************
|
jpayne@69
|
627 sub print_connections{
|
jpayne@69
|
628 my ($setc1,$setx2,$sety2)=@_; # print "\nparam connect @_\n";
|
jpayne@69
|
629 my ($setx1,$sety1) =split(/ /,$setc1);
|
jpayne@69
|
630
|
jpayne@69
|
631 if ($Mgaps){
|
jpayne@69
|
632 if ($setx1>$setx2){
|
jpayne@69
|
633 $tmpsetx1=$setx1;
|
jpayne@69
|
634 $setx1=$setx2;
|
jpayne@69
|
635 $setx2=$tmpsetx1;
|
jpayne@69
|
636 }
|
jpayne@69
|
637 $distx1x2=int(($setx2-$setx1)/2);
|
jpayne@69
|
638 $xcenter= $setx1+$distx1x2;
|
jpayne@69
|
639
|
jpayne@69
|
640 if ($setx2-$setx1>4000) { #if the distance is to big then heigh of the arc is set to 20
|
jpayne@69
|
641 $heightArcUp = 20*$yscale;
|
jpayne@69
|
642 $yoffcenter=int((($distx1x2**2)+$heightArcUp**2)*(1/(2*$heightArcUp)))-$heightArcUp ;
|
jpayne@69
|
643 }
|
jpayne@69
|
644 else{
|
jpayne@69
|
645 $heightArcUp = int (0.447 * $distx1x2);#sectorul de cerc la 1/3 din raza.
|
jpayne@69
|
646 $yoffcenter=2*$heightArcUp;
|
jpayne@69
|
647 }
|
jpayne@69
|
648 print O "5 1 0 2 $linkcolor 0 50 0 -1 0.000 0 0 0 0 ";
|
jpayne@69
|
649 printf O ("%.3f %.3f $setx1 $sety1 $xcenter %.0f $setx2 $sety1 \n",$xcenter,$sety1+$yoffcenter,$sety1-$heightArcUp);
|
jpayne@69
|
650 }
|
jpayne@69
|
651 else{
|
jpayne@69
|
652 print O "2 1 0 1 $linkcolor 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
|
jpayne@69
|
653 print O "\t".$setc1." $setx2 $sety2\n";
|
jpayne@69
|
654
|
jpayne@69
|
655 }
|
jpayne@69
|
656 }
|
jpayne@69
|
657 #*********************************
|
jpayne@69
|
658 sub print_genes_mr{
|
jpayne@69
|
659 %hLastOnLevel=();
|
jpayne@69
|
660 @g=split(/\s+/,$hContig_genes{$a[-2]});
|
jpayne@69
|
661 for ($i=1;$i<=$#g;$i++){
|
jpayne@69
|
662 $kname=$g[$i];
|
jpayne@69
|
663 $tmpx2=0;
|
jpayne@69
|
664 $y=$Yorig-100-200*$geneinfo{$kname}->[0];
|
jpayne@69
|
665 #print id gena
|
jpayne@69
|
666 $xid =$startdrawX+ int($Utr5End{$kname}/$Xscale);
|
jpayne@69
|
667 print_Id_genes() if ($printIDgenes);
|
jpayne@69
|
668 #
|
jpayne@69
|
669 for ($l=2;$l<5;$l++){
|
jpayne@69
|
670 @c=split(";",$geneinfo{$kname}->[$l] ) ;
|
jpayne@69
|
671 if (@c){ #print "de unde?@c\n" if (($l==2) or ($l==4));
|
jpayne@69
|
672 $colorend=$color{$l};
|
jpayne@69
|
673 if ($geneinfo{$kname}->[1] eq "-"){
|
jpayne@69
|
674 if ($l==2) { $colorend=$color{"4"}; }
|
jpayne@69
|
675 elsif ($l==4) {$colorend=$color{"2"};}
|
jpayne@69
|
676 }
|
jpayne@69
|
677 for ($k=0;$k<=$#c;$k++){
|
jpayne@69
|
678 @e=split (" ",$c[$k]);
|
jpayne@69
|
679 $x1=$startdrawX+int($e[0]/$Xscale);
|
jpayne@69
|
680 $x2=$startdrawX+int($e[1]/$Xscale);
|
jpayne@69
|
681 if($x1==$x2) { $x2++;}
|
jpayne@69
|
682 if ( ($tmpx2) and ($x1-$tmpx2>1)){ #print the intron
|
jpayne@69
|
683 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
|
jpayne@69
|
684 print O "\t $tmpx2 $y $x1 $y\n";
|
jpayne@69
|
685 }
|
jpayne@69
|
686 $tmpx2=$x2;
|
jpayne@69
|
687 print O "2 1 0 5 $colorend 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#
|
jpayne@69
|
688 print O "\t $x1 $y $x2 $y\n";
|
jpayne@69
|
689 }
|
jpayne@69
|
690 }
|
jpayne@69
|
691 }
|
jpayne@69
|
692 #delete ($geneinfo{$kname});
|
jpayne@69
|
693 }
|
jpayne@69
|
694 }
|
jpayne@69
|
695
|
jpayne@69
|
696 #*********************************
|
jpayne@69
|
697 sub print_Id_genes{
|
jpayne@69
|
698 if (exists $hLastOnLevel{$geneinfo{$kname}->[0]}){
|
jpayne@69
|
699 $lastOnlevel=$hLastOnLevel{$geneinfo{$kname}->[0]};
|
jpayne@69
|
700 $printidspace = int(($Utr5End{$kname}-$Utr5End{$lastOnlevel})/$Xscale);
|
jpayne@69
|
701 }
|
jpayne@69
|
702 else{$printidspace=601;}
|
jpayne@69
|
703 if ($printidspace > 600){
|
jpayne@69
|
704 #print contig name#
|
jpayne@69
|
705 print O "4 0 0 5 0 0 6 0.0000 4 90 270 ";
|
jpayne@69
|
706 printf O ("\t%.0f %.0f ",$xid,$y-50);
|
jpayne@69
|
707 print O $kname, "\\001\n";
|
jpayne@69
|
708 $hLastOnLevel{$geneinfo{$kname}->[0]}=$kname;
|
jpayne@69
|
709 }
|
jpayne@69
|
710 }
|
jpayne@69
|
711 #*********************************
|
jpayne@69
|
712 sub print_genes{
|
jpayne@69
|
713 %hLastOnLevel=();
|
jpayne@69
|
714 foreach $kname (sort {$Utr5End{$a} <=> $Utr5End{$b}} keys %Utr5End){
|
jpayne@69
|
715 $tmpx2=0;
|
jpayne@69
|
716 if ($Utr5End{$kname}>$startcoord && $Utr5End{$kname}<$endcoord){
|
jpayne@69
|
717 $y=$Yorig-100-200*$geneinfo{$kname}->[0];
|
jpayne@69
|
718 #print id gena
|
jpayne@69
|
719 $xid = int(($Utr5End{$kname}-$startcoord)/$Xscale);
|
jpayne@69
|
720 print_Id_genes() if ($printIDgenes);
|
jpayne@69
|
721 #
|
jpayne@69
|
722 for ($l=2;$l<5;$l++){
|
jpayne@69
|
723 @c=split(";",$geneinfo{$kname}->[$l] );
|
jpayne@69
|
724 $colorend=$color{$l};
|
jpayne@69
|
725 if ($geneinfo{$kname}->[1] eq "-"){
|
jpayne@69
|
726 if ($l==2) { $colorend=$color{"4"}; }
|
jpayne@69
|
727 elsif ($l==4) {$colorend=$color{"2"};}
|
jpayne@69
|
728 }
|
jpayne@69
|
729
|
jpayne@69
|
730 for ($k=0;$k<=$#c;$k++){
|
jpayne@69
|
731 @e=split (" ",$c[$k]);
|
jpayne@69
|
732 $x1=int(($e[0]-$startcoord)/$Xscale);
|
jpayne@69
|
733 $x2=int(($e[1]-$startcoord)/$Xscale);
|
jpayne@69
|
734 if($x1==$x2) { $x2++;}
|
jpayne@69
|
735 if ( ($tmpx2) and ($x1-$tmpx2>1)){ #print the intron
|
jpayne@69
|
736 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
|
jpayne@69
|
737 print O "\t $tmpx2 $y $x1 $y\n";
|
jpayne@69
|
738 }
|
jpayne@69
|
739 $tmpx2=$x2;
|
jpayne@69
|
740 print O "2 1 0 5 $colorend 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#
|
jpayne@69
|
741 print O "\t $x1 $y $x2 $y\n";
|
jpayne@69
|
742 }
|
jpayne@69
|
743 }
|
jpayne@69
|
744 }#endif "is in interval"
|
jpayne@69
|
745 }
|
jpayne@69
|
746 # delete ($Utr5End{$kname});
|
jpayne@69
|
747 # delete ($geneinfo{$kname});
|
jpayne@69
|
748
|
jpayne@69
|
749 }
|
jpayne@69
|
750 #*********************************
|
jpayne@69
|
751 sub print_grid{
|
jpayne@69
|
752
|
jpayne@69
|
753 my ($xs,$xe,$startcontg,$endcontg)=@_;
|
jpayne@69
|
754
|
jpayne@69
|
755 $XlastPrint=0;$YlastPrint=0;
|
jpayne@69
|
756
|
jpayne@69
|
757 #print ref contig
|
jpayne@69
|
758 print O "2 1 0 10 11 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
|
jpayne@69
|
759 printf O ("\t $xs %.0f $xe %.0f\n",$Yorig+50,$Yorig+50);
|
jpayne@69
|
760 #print orizontal axes for PId (100%,75%,50%)
|
jpayne@69
|
761 for ($percent_id = 50; $percent_id < 101; $percent_id += 25) {
|
jpayne@69
|
762 print O "2 1 2 1 0 7 60 0 -1 4.000 0 0 -1 0 0 2\n";
|
jpayne@69
|
763 printf O ("\t$xs %.0f $xe %.0f\n",$Yorig+250+$YdistPID-($percent_id - 50) * $yscale,$Yorig+250+$YdistPID-($percent_id - 50) * $yscale);
|
jpayne@69
|
764 #last if ($Mgaps);
|
jpayne@69
|
765 }
|
jpayne@69
|
766 #print orizontal markers for bp.
|
jpayne@69
|
767 $increment=10000/$Xscale;
|
jpayne@69
|
768 $no_incr=0;
|
jpayne@69
|
769 $xmark = $xs ;$xmark_float= $xs;
|
jpayne@69
|
770 while ($xmark < $xe){
|
jpayne@69
|
771 print O "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n";
|
jpayne@69
|
772 printf O ("\t$xmark %.0f $xmark %.0f\n",$Yorig+$YdistPID+250,$Yorig+$YdistPID+300);
|
jpayne@69
|
773 #bp scale
|
jpayne@69
|
774 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
|
jpayne@69
|
775 printf O ("\t %.0f %.0f",$xmark,$Yorig+$YdistPID+400);
|
jpayne@69
|
776 print O " $no_incr"."k", "\\001\n";
|
jpayne@69
|
777 $no_incr += 10;
|
jpayne@69
|
778 $xmark_float += $increment;
|
jpayne@69
|
779 $xmark=int($xmark_float);
|
jpayne@69
|
780 }
|
jpayne@69
|
781
|
jpayne@69
|
782 #coord for chr ends
|
jpayne@69
|
783 print O "4 0 0 50 0 0 14 0.0000 4 135 450 $xs $Yorig $startcontg\\001\n";
|
jpayne@69
|
784 printf O ("4 0 0 50 0 0 14 0.0000 4 135 810 %.0f $Yorig $endcontg\\001\n",$xe-length($xe)*125);
|
jpayne@69
|
785
|
jpayne@69
|
786 #print contig name#
|
jpayne@69
|
787 if ($mref){
|
jpayne@69
|
788 print O "4 0 0 5 0 0 8 0.0000 4 135 405 ";
|
jpayne@69
|
789 printf O ("\t%.0f %.0f ",$xs,$Yorig+70);
|
jpayne@69
|
790 print O $a[-2], "\\001\n";
|
jpayne@69
|
791 }
|
jpayne@69
|
792 #print vertical markers for PId scale
|
jpayne@69
|
793 if (!$Mgaps){
|
jpayne@69
|
794 for ($percent_id = 50; $percent_id < 101; $percent_id += 25) {
|
jpayne@69
|
795 #left
|
jpayne@69
|
796 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
|
jpayne@69
|
797 printf O ("\t%.0f %.0f", $xs-200,$Yorig+$YdistPID+250-($percent_id - 50) * $yscale + 20);
|
jpayne@69
|
798 print O " $percent_id%", "\\001\n";
|
jpayne@69
|
799 #right
|
jpayne@69
|
800 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
|
jpayne@69
|
801 printf O ("\t%.0f %.0f",$xe+20, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale+20 );
|
jpayne@69
|
802 print O " $percent_id%", "\\001\n";
|
jpayne@69
|
803
|
jpayne@69
|
804 # print the tick mark
|
jpayne@69
|
805 #left
|
jpayne@69
|
806 # print O "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n";
|
jpayne@69
|
807 # printf O ("\t%.0f %.0f $xs %.0f\n",$xs-50,
|
jpayne@69
|
808 # $Yorig+$YdistPID+250-($percent_id - 50) * $yscale, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale);
|
jpayne@69
|
809 #right
|
jpayne@69
|
810 # print O "2 1 0 1 0 7 60 0 -1 0.000 0 0 -1 0 0 2\n";
|
jpayne@69
|
811 # printf O ("\t$xe %.0f %.0f %.0f\n",
|
jpayne@69
|
812 # $Yorig+$YdistPID+250-($percent_id - 50) * $yscale,$xe+50, $Yorig+$YdistPID+250-($percent_id - 50) * $yscale);
|
jpayne@69
|
813 }
|
jpayne@69
|
814 }
|
jpayne@69
|
815 else{ # for Mgaps
|
jpayne@69
|
816 print O "4 0 0 100 0 0 7 1.5710 4 135 405 ";
|
jpayne@69
|
817 printf O ("\t%.0f %.0f", $xs-50,$Yorig+$YdistPID+250 - 5 * $yscale + 10);
|
jpayne@69
|
818 print O " + qry strand", "\\001\n";
|
jpayne@69
|
819
|
jpayne@69
|
820 print O "4 0 0 100 0 0 7 1.5710 4 135 405 ";
|
jpayne@69
|
821 printf O ("\t%.0f %.0f", $xs-50,$Yorig+$YdistPID+250 - 30 * $yscale + 10);
|
jpayne@69
|
822 print O " - qry strand", "\\001\n";
|
jpayne@69
|
823 }
|
jpayne@69
|
824
|
jpayne@69
|
825 }
|
jpayne@69
|
826 #*********************************
|
jpayne@69
|
827 sub print_legend{
|
jpayne@69
|
828
|
jpayne@69
|
829 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
|
jpayne@69
|
830 printf O ("\t%.0f %.0f ",100,$Yorig+$YdistPID+1100);
|
jpayne@69
|
831 print O " Legend ", "\\001\n";
|
jpayne@69
|
832 $y= $Yorig+$YdistPID+1300; #utr
|
jpayne@69
|
833 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
|
jpayne@69
|
834 print O "\t 70 $y 99 $y\n";
|
jpayne@69
|
835 print O "2 1 0 5 27 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
|
jpayne@69
|
836 print O "\t 100 $y 200 $y\n";
|
jpayne@69
|
837 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
|
jpayne@69
|
838 print O "\t 200 $y 230 $y\n";
|
jpayne@69
|
839 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
|
jpayne@69
|
840 printf O ("\t%.0f %.0f ",300,$y+30);
|
jpayne@69
|
841 print O " 5' utr ", "\\001\n";
|
jpayne@69
|
842 $y += 150 ;#cds
|
jpayne@69
|
843 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
|
jpayne@69
|
844 print O "\t 70 $y 99 $y\n";
|
jpayne@69
|
845 print O "2 1 0 5 2 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
|
jpayne@69
|
846 print O "\t 100 $y 200 $y\n";
|
jpayne@69
|
847 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
|
jpayne@69
|
848 print O "\t 200 $y 230 $y\n";
|
jpayne@69
|
849 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
|
jpayne@69
|
850 printf O ("\t%.0f %.0f ",300,$y+30);
|
jpayne@69
|
851 print O " cds ", "\\001\n";
|
jpayne@69
|
852 $y += 150; #3' utr
|
jpayne@69
|
853 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
|
jpayne@69
|
854 print O "\t 70 $y 99 $y\n";
|
jpayne@69
|
855 print O "2 1 0 5 1 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
|
jpayne@69
|
856 print O "\t 100 $y 200 $y\n";
|
jpayne@69
|
857 print O "2 1 0 1 0 0 50 0 -1 0.000 0 0 -1 0 0 2\n";#intron
|
jpayne@69
|
858 print O "\t 200 $y 230 $y\n";
|
jpayne@69
|
859 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
|
jpayne@69
|
860 printf O ("\t%.0f %.0f ",300,$y+30);
|
jpayne@69
|
861 print O " 3' utr ", "\\001\n";
|
jpayne@69
|
862 $y += 150; # match
|
jpayne@69
|
863 print O "2 1 0 2 4 0 50 0 -1 0.000 0 0 -1 0 0 2\n";
|
jpayne@69
|
864 print O "\t100 $y 200 $y\n";
|
jpayne@69
|
865 print O "4 0 0 100 0 0 8 0.0000 4 135 405 ";
|
jpayne@69
|
866 printf O ("\t%.0f %.0f ",300,$y+30);
|
jpayne@69
|
867 print O " match found by $prog ", "\\001\n";
|
jpayne@69
|
868 }
|
jpayne@69
|
869 #*********************************
|
jpayne@69
|
870 sub change_file_format{
|
jpayne@69
|
871
|
jpayne@69
|
872 $procfile =~ /^tmp(.+)/;
|
jpayne@69
|
873 $outfile = $1.$nrf.".".$format;
|
jpayne@69
|
874 $comand = "fig2dev -L $format -x 100";
|
jpayne@69
|
875 $comand .= " -m $magn" if ($magn);
|
jpayne@69
|
876 $comand .= " -M ".$procfile.$nrf.".fig ".$outfile;
|
jpayne@69
|
877
|
jpayne@69
|
878 $status =system($comand);
|
jpayne@69
|
879 print E "ERROR 1: fig2dev !\n" unless $status == 0;
|
jpayne@69
|
880
|
jpayne@69
|
881 $status =system("rm $procfile".$nrf.".fig");
|
jpayne@69
|
882
|
jpayne@69
|
883 if ($verb){
|
jpayne@69
|
884 print "$outfile";
|
jpayne@69
|
885 if ($mref){
|
jpayne@69
|
886 print "\n" ;
|
jpayne@69
|
887 }
|
jpayne@69
|
888 else {
|
jpayne@69
|
889 print "\t range : $startcoord\t$endcoord \n" ;
|
jpayne@69
|
890 }
|
jpayne@69
|
891 }
|
jpayne@69
|
892 }
|
jpayne@69
|
893 #*********************************
|
jpayne@69
|
894 sub format_mgaps{
|
jpayne@69
|
895 $tmpfile="tmpmgaps";
|
jpayne@69
|
896 $tmpfile2=$alignm."coords" ;
|
jpayne@69
|
897 get_ref_len(); #print $maxlenref."\n";
|
jpayne@69
|
898 open(M,">".$tmpfile2) || die "can't open \" $tmpfile2 \" file !";
|
jpayne@69
|
899 print M "$alignm\n";
|
jpayne@69
|
900 print M "Mgaps\n\n";
|
jpayne@69
|
901 print M " [S1] [E1] | [S2] [E2] | [LEN 1] [LEN 2] | [% IDY] | [LEN R] [LEN Q] | [COV R] [COV Q] | [TAGS]\n";
|
jpayne@69
|
902 print M "===============================================================================================================================\n";
|
jpayne@69
|
903
|
jpayne@69
|
904 open(T,">".$tmpfile) || die "can't open \" $tmpfile \" file !";
|
jpayne@69
|
905
|
jpayne@69
|
906 open(A,"<".$alignm) || die "can't open \" $alignm \" file !";
|
jpayne@69
|
907
|
jpayne@69
|
908
|
jpayne@69
|
909 while(<A>) {
|
jpayne@69
|
910 chomp;
|
jpayne@69
|
911 @a=split;
|
jpayne@69
|
912 if ($a[0] =~ /^>/){
|
jpayne@69
|
913 $nr_cluster=1;
|
jpayne@69
|
914 $idquery=$a[1];
|
jpayne@69
|
915 if ($a[2] eq "Reverse"){$idquery .= "_rev";}
|
jpayne@69
|
916 }
|
jpayne@69
|
917 #elsif ($a[0] eq "#") {$nr_cluster++;}
|
jpayne@69
|
918 elsif($a[0] ne "#"){
|
jpayne@69
|
919 $e1=$a[0]+$a[2];
|
jpayne@69
|
920 print T $a[0]."\t".$e1."\t"."|";
|
jpayne@69
|
921 print T "\t-\t-\t|";
|
jpayne@69
|
922 print T "\t-\t-\t|";
|
jpayne@69
|
923 print T "\t-\t|"; #pid
|
jpayne@69
|
924 print T "\t$maxlenref\t-\t|";#len seqs
|
jpayne@69
|
925 # print "\t-\t-\t|";
|
jpayne@69
|
926 # print "\t-\t-\t|";
|
jpayne@69
|
927 # print T " $Mgaps\t$idquery.$nr_cluster\n";
|
jpayne@69
|
928 print T " $Mgaps\t$idquery\n";
|
jpayne@69
|
929 }
|
jpayne@69
|
930
|
jpayne@69
|
931 }
|
jpayne@69
|
932 close(A);
|
jpayne@69
|
933 close(T);
|
jpayne@69
|
934 $command="sort -n -k 1 $tmpfile >> $tmpfile2";
|
jpayne@69
|
935 $status =system($command);
|
jpayne@69
|
936 system("rm $tmpfile");
|
jpayne@69
|
937 close(M);
|
jpayne@69
|
938 $alignm=$tmpfile2;
|
jpayne@69
|
939 print STDERR "ERROR 1: can't sort $tmpfile \n" unless $status == 0;
|
jpayne@69
|
940 print STDERR "\n**************************************** \n";
|
jpayne@69
|
941 print STDERR "New input file created : $alignm\n";
|
jpayne@69
|
942 print STDERR "**************************************** \n\n";
|
jpayne@69
|
943
|
jpayne@69
|
944 }
|
jpayne@69
|
945 #*****************************
|
jpayne@69
|
946 sub get_ref_len{
|
jpayne@69
|
947 $firstrow=1;
|
jpayne@69
|
948 open(A,"<".$alignm) || die "can't open \" $alignm \" file !";
|
jpayne@69
|
949 while(<A>) {
|
jpayne@69
|
950 chomp;
|
jpayne@69
|
951 @a=split;
|
jpayne@69
|
952 if ($firstrow) {
|
jpayne@69
|
953 $firstrow=0;
|
jpayne@69
|
954 if ($a[0] !~ /^>/){
|
jpayne@69
|
955 print "\nWrong file format for MGAPS file : $alignm ! \n";
|
jpayne@69
|
956 exit;
|
jpayne@69
|
957 }
|
jpayne@69
|
958 }
|
jpayne@69
|
959
|
jpayne@69
|
960 if ($a[0] =~ /^>/){ next; }
|
jpayne@69
|
961 elsif($a[0] ne "#"){
|
jpayne@69
|
962 $e1=$a[0]+$a[2];
|
jpayne@69
|
963 $maxlenref=($maxlenref < $e1 ? $e1 : $maxlenref);
|
jpayne@69
|
964 }
|
jpayne@69
|
965 }
|
jpayne@69
|
966 close(A);
|
jpayne@69
|
967 }
|