Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/dnadiff @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 #!/usr/bin/env perl | |
2 | |
3 #------------------------------------------------------------------------------- | |
4 # Programmer: Adam M Phillippy, University of Maryland | |
5 # File: dnadiff | |
6 # Date: 11 / 29 / 06 | |
7 # | |
8 # Try 'dnadiff -h' for more information. | |
9 # | |
10 #------------------------------------------------------------------------------- | |
11 | |
12 use lib "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts"; | |
13 use Foundation; | |
14 use File::Spec::Functions; | |
15 use strict; | |
16 | |
17 my $BIN_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23"; | |
18 my $SCRIPT_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts"; | |
19 | |
20 my $VERSION_INFO = q~ | |
21 DNAdiff version 1.3 | |
22 ~; | |
23 | |
24 my $HELP_INFO = q~ | |
25 USAGE: dnadiff [options] <reference> <query> | |
26 or dnadiff [options] -d <delta file> | |
27 | |
28 DESCRIPTION: | |
29 Run comparative analysis of two sequence sets using nucmer and its | |
30 associated utilities with recommended parameters. See MUMmer | |
31 documentation for a more detailed description of the | |
32 output. Produces the following output files: | |
33 | |
34 .report - Summary of alignments, differences and SNPs | |
35 .delta - Standard nucmer alignment output | |
36 .1delta - 1-to-1 alignment from delta-filter -1 | |
37 .mdelta - M-to-M alignment from delta-filter -m | |
38 .1coords - 1-to-1 coordinates from show-coords -THrcl .1delta | |
39 .mcoords - M-to-M coordinates from show-coords -THrcl .mdelta | |
40 .snps - SNPs from show-snps -rlTHC .1delta | |
41 .rdiff - Classified ref breakpoints from show-diff -rH .mdelta | |
42 .qdiff - Classified qry breakpoints from show-diff -qH .mdelta | |
43 .unref - Unaligned reference IDs and lengths (if applicable) | |
44 .unqry - Unaligned query IDs and lengths (if applicable) | |
45 | |
46 MANDATORY: | |
47 reference Set the input reference multi-FASTA filename | |
48 query Set the input query multi-FASTA filename | |
49 or | |
50 delta file Unfiltered .delta alignment file from nucmer | |
51 | |
52 OPTIONS: | |
53 -d|delta Provide precomputed delta file for analysis | |
54 -h | |
55 --help Display help information and exit | |
56 -p|prefix Set the prefix of the output files (default "out") | |
57 -V | |
58 --version Display the version information and exit | |
59 ~; | |
60 | |
61 | |
62 my $USAGE_INFO = q~ | |
63 USAGE: dnadiff [options] <reference> <query> | |
64 or dnadiff [options] -d <delta file> | |
65 ~; | |
66 | |
67 | |
68 my @DEPEND_INFO = | |
69 ( | |
70 "$BIN_DIR/delta-filter", | |
71 "$BIN_DIR/show-diff", | |
72 "$BIN_DIR/show-snps", | |
73 "$BIN_DIR/show-coords", | |
74 "$BIN_DIR/nucmer", | |
75 "$SCRIPT_DIR/Foundation.pm" | |
76 ); | |
77 | |
78 my $DELTA_FILTER = "$BIN_DIR/delta-filter"; | |
79 my $SHOW_DIFF = "$BIN_DIR/show-diff"; | |
80 my $SHOW_SNPS = "$BIN_DIR/show-snps"; | |
81 my $SHOW_COORDS = "$BIN_DIR/show-coords"; | |
82 my $NUCMER = "$BIN_DIR/nucmer"; | |
83 | |
84 my $SNPBuff = 20; # required buffer around "good" snps | |
85 my $OPT_Prefix = "out"; # prefix for all output files | |
86 my $OPT_RefFile; # reference file | |
87 my $OPT_QryFile; # query file | |
88 my $OPT_DeltaFile; # unfiltered alignment file | |
89 my $OPT_ReportFile = ".report"; # report file | |
90 my $OPT_DeltaFile1 = ".1delta"; # 1-to-1 delta alignment | |
91 my $OPT_DeltaFileM = ".mdelta"; # M-to-M delta alignment | |
92 my $OPT_CoordsFile1 = ".1coords"; # 1-to-1 alignment coords | |
93 my $OPT_CoordsFileM = ".mcoords"; # M-to-M alignment coords | |
94 my $OPT_SnpsFile = ".snps"; # snps output file | |
95 my $OPT_DiffRFile = ".rdiff"; # diffile for R | |
96 my $OPT_DiffQFile = ".qdiff"; # diffile for Q | |
97 my $OPT_UnRefFile = ".unref"; # unaligned ref IDs and lengths | |
98 my $OPT_UnQryFile = ".unqry"; # unaligned qry IDs and lengths | |
99 | |
100 my $TIGR; # TIGR Foundation object | |
101 | |
102 | |
103 sub RunAlignment(); | |
104 sub RunFilter(); | |
105 sub RunCoords(); | |
106 sub RunSNPs(); | |
107 sub RunDiff(); | |
108 sub MakeReport(); | |
109 | |
110 sub FastaSizes($$); | |
111 | |
112 sub FileOpen($$); | |
113 sub FileClose($$); | |
114 | |
115 sub GetOpt(); | |
116 | |
117 | |
118 #--------------------------------------------------------------------- main ---- | |
119 main: | |
120 { | |
121 GetOpt(); | |
122 | |
123 RunAlignment() unless defined($OPT_DeltaFile); | |
124 RunFilter(); | |
125 RunCoords(); | |
126 RunSNPs(); | |
127 RunDiff(); | |
128 MakeReport(); | |
129 | |
130 exit(0); | |
131 } | |
132 | |
133 | |
134 #------------------------------------------------------------- RunAlignment ---- | |
135 # Run nucmer | |
136 sub RunAlignment() | |
137 { | |
138 print STDERR "Building alignments\n"; | |
139 my $cmd = "$NUCMER --maxmatch -p $OPT_Prefix $OPT_RefFile $OPT_QryFile"; | |
140 my $err = "ERROR: Failed to run nucmer, aborting.\n"; | |
141 | |
142 system($cmd) == 0 or die $err; | |
143 $OPT_DeltaFile = $OPT_Prefix . ".delta"; | |
144 } | |
145 | |
146 | |
147 #---------------------------------------------------------------- RunFilter ---- | |
148 # Run delta-filter | |
149 sub RunFilter() | |
150 { | |
151 print STDERR "Filtering alignments\n"; | |
152 my $cmd1 = "$DELTA_FILTER -1 $OPT_DeltaFile > $OPT_DeltaFile1"; | |
153 my $cmd2 = "$DELTA_FILTER -m $OPT_DeltaFile > $OPT_DeltaFileM"; | |
154 my $err = "ERROR: Failed to run delta-filter, aborting.\n"; | |
155 | |
156 system($cmd1) == 0 or die $err; | |
157 system($cmd2) == 0 or die $err; | |
158 } | |
159 | |
160 | |
161 #------------------------------------------------------------------ RunSNPs ---- | |
162 # Run show-snps | |
163 sub RunSNPs() | |
164 { | |
165 print STDERR "Analyzing SNPs\n"; | |
166 my $cmd = "$SHOW_SNPS -rlTHC $OPT_DeltaFile1 > $OPT_SnpsFile"; | |
167 my $err = "ERROR: Failed to run show-snps, aborting.\n"; | |
168 | |
169 system($cmd) == 0 or die $err; | |
170 } | |
171 | |
172 | |
173 #---------------------------------------------------------------- RunCoords ---- | |
174 # Run show-coords | |
175 sub RunCoords() | |
176 { | |
177 print STDERR "Extracting alignment coordinates\n"; | |
178 my $cmd1 = "$SHOW_COORDS -rclTH $OPT_DeltaFile1 > $OPT_CoordsFile1"; | |
179 my $cmd2 = "$SHOW_COORDS -rclTH $OPT_DeltaFileM > $OPT_CoordsFileM"; | |
180 my $err = "ERROR: Failed to run show-coords, aborting.\n"; | |
181 | |
182 system($cmd1) == 0 or die $err; | |
183 system($cmd2) == 0 or die $err; | |
184 } | |
185 | |
186 | |
187 #------------------------------------------------------------------ RunDiff ---- | |
188 # Run show-diff | |
189 sub RunDiff() | |
190 { | |
191 print STDERR "Extracting alignment breakpoints\n"; | |
192 my $cmd1 = "$SHOW_DIFF -rH $OPT_DeltaFileM > $OPT_DiffRFile"; | |
193 my $cmd2 = "$SHOW_DIFF -qH $OPT_DeltaFileM > $OPT_DiffQFile"; | |
194 my $err = "ERROR: Failed to run show-diff, aborting.\n"; | |
195 | |
196 system($cmd1) == 0 or die $err; | |
197 system($cmd2) == 0 or die $err; | |
198 } | |
199 | |
200 | |
201 #--------------------------------------------------------------- MakeReport ---- | |
202 # Output alignment report | |
203 sub MakeReport() | |
204 { | |
205 print STDERR "Generating report file\n"; | |
206 | |
207 my ($fhi, $fho); # filehandle-in and filehandle-out | |
208 my (%refs, %qrys) = ((),()); # R and Q ID->length | |
209 my ($rqnAligns1, $rqnAlignsM) = (0,0); # alignment counter | |
210 my ($rSumLen1, $qSumLen1) = (0,0); # alignment length sum | |
211 my ($rSumLenM, $qSumLenM) = (0,0); # alignment length sum | |
212 my ($rqSumLen1, $rqSumLenM) = (0,0); # combined alignment length sum | |
213 my ($rqSumIdy1, $rqSumIdyM) = (0,0); # weighted alignment identity sum | |
214 my ($qnIns, $rnIns) = (0,0); # insertion count | |
215 my ($qSumIns, $rSumIns) = (0,0); # insertion length sum | |
216 my ($qnTIns, $rnTIns) = (0,0); # tandem insertion count | |
217 my ($qSumTIns, $rSumTIns) = (0,0); # tandem insertion length sum | |
218 my ($qnInv, $rnInv) = (0,0); # inversion count | |
219 my ($qnRel, $rnRel) = (0,0); # relocation count | |
220 my ($qnTrn, $rnTrn) = (0,0); # translocation count | |
221 my ($rnSeqs, $qnSeqs) = (0,0); # sequence count | |
222 my ($rnASeqs, $qnASeqs) = (0,0); # aligned sequence count | |
223 my ($rnBases, $qnBases) = (0,0); # bases count | |
224 my ($rnABases, $qnABases) = (0,0); # aligned bases count | |
225 my ($rnBrk, $qnBrk) = (0,0); # breakpoint count | |
226 my ($rqnSNPs, $rqnIndels) = (0,0); # snp and indel counts | |
227 my ($rqnGSNPs, $rqnGIndels) = (0,0); # good snp and indel counts | |
228 my %rqSNPs = # SNP hash | |
229 ( "."=>{"A"=>0,"C"=>0,"G"=>0,"T"=>0}, | |
230 "A"=>{"."=>0,"C"=>0,"G"=>0,"T"=>0}, | |
231 "C"=>{"."=>0,"A"=>0,"G"=>0,"T"=>0}, | |
232 "G"=>{"."=>0,"A"=>0,"C"=>0,"T"=>0}, | |
233 "T"=>{"."=>0,"A"=>0,"C"=>0,"G"=>0} ); | |
234 my %rqGSNPs = # good SNP hash | |
235 ( "."=>{"A"=>0,"C"=>0,"G"=>0,"T"=>0}, | |
236 "A"=>{"."=>0,"C"=>0,"G"=>0,"T"=>0}, | |
237 "C"=>{"."=>0,"A"=>0,"G"=>0,"T"=>0}, | |
238 "G"=>{"."=>0,"A"=>0,"C"=>0,"T"=>0}, | |
239 "T"=>{"."=>0,"A"=>0,"C"=>0,"G"=>0} ); | |
240 | |
241 my $header; # delta header | |
242 | |
243 #-- Get delta header | |
244 $fhi = FileOpen("<", $OPT_DeltaFile); | |
245 $header .= <$fhi>; | |
246 $header .= <$fhi>; | |
247 $header .= "\n"; | |
248 FileClose($fhi, $OPT_DeltaFile); | |
249 | |
250 #-- Collect all reference and query IDs and lengths | |
251 FastaSizes($OPT_RefFile, \%refs); | |
252 FastaSizes($OPT_QryFile, \%qrys); | |
253 | |
254 #-- Count ref and qry seqs and lengths | |
255 foreach ( values(%refs) ) { | |
256 $rnSeqs++; | |
257 $rnBases += $_; | |
258 } | |
259 foreach ( values(%qrys) ) { | |
260 $qnSeqs++; | |
261 $qnBases += $_; | |
262 } | |
263 | |
264 #-- Count aligned seqs, aligned bases, and breakpoints for each R and Q | |
265 $fhi = FileOpen("<", $OPT_CoordsFileM); | |
266 while (<$fhi>) { | |
267 chomp; | |
268 my @A = split "\t"; | |
269 scalar(@A) == 13 | |
270 or die "ERROR: Unrecognized format $OPT_CoordsFileM, aborting.\n"; | |
271 | |
272 #-- Add to M-to-M alignment counts | |
273 $rqnAlignsM++; | |
274 $rSumLenM += $A[4]; | |
275 $qSumLenM += $A[5]; | |
276 $rqSumIdyM += ($A[6] / 100.0) * ($A[4] + $A[5]); | |
277 $rqSumLenM += ($A[4] + $A[5]); | |
278 | |
279 #-- If new ID, add to sequence and base count | |
280 if ( $refs{$A[11]} > 0 ) { | |
281 $rnASeqs++; | |
282 $rnABases += $refs{$A[11]}; | |
283 $refs{$A[11]} *= -1; # If ref has alignment, length will be -neg | |
284 } | |
285 if ( $qrys{$A[12]} > 0 ) { | |
286 $qnASeqs++; | |
287 $qnABases += $qrys{$A[12]}; | |
288 $qrys{$A[12]} *= -1; # If qry has alignment, length will be -neg | |
289 } | |
290 | |
291 #-- Add to breakpoint counts | |
292 my ($lo, $hi); | |
293 if ( $A[0] < $A[1] ) { $lo = $A[0]; $hi = $A[1]; } | |
294 else { $lo = $A[1]; $hi = $A[0]; } | |
295 $rnBrk++ if ( $lo != 1 ); | |
296 $rnBrk++ if ( $hi != $A[7] ); | |
297 | |
298 if ( $A[2] < $A[3] ) { $lo = $A[2]; $hi = $A[3]; } | |
299 else { $lo = $A[3]; $hi = $A[2]; } | |
300 $qnBrk++ if ( $lo != 1 ); | |
301 $qnBrk++ if ( $hi != $A[8] ); | |
302 } | |
303 FileClose($fhi, $OPT_CoordsFileM); | |
304 | |
305 #-- Calculate average %idy, length, etc. | |
306 $fhi = FileOpen("<", $OPT_CoordsFile1); | |
307 while (<$fhi>) { | |
308 chomp; | |
309 my @A = split "\t"; | |
310 scalar(@A) == 13 | |
311 or die "ERROR: Unrecognized format $OPT_CoordsFile1, aborting.\n"; | |
312 | |
313 #-- Add to 1-to-1 alignment counts | |
314 $rqnAligns1++; | |
315 $rSumLen1 += $A[4]; | |
316 $qSumLen1 += $A[5]; | |
317 $rqSumIdy1 += ($A[6] / 100.0) * ($A[4] + $A[5]); | |
318 $rqSumLen1 += ($A[4] + $A[5]); | |
319 } | |
320 FileClose($fhi, $OPT_CoordsFile1); | |
321 | |
322 #-- If you are reading this, you need to get out more... | |
323 | |
324 #-- Count reference diff features and indels | |
325 $fhi = FileOpen("<", $OPT_DiffRFile); | |
326 while (<$fhi>) { | |
327 chomp; | |
328 my @A = split "\t"; | |
329 defined($A[4]) | |
330 or die "ERROR: Unrecognized format $OPT_DiffRFile, aborting.\n"; | |
331 my $gap = $A[4]; | |
332 my $ins = $gap; | |
333 | |
334 #-- Add to tandem insertion counts | |
335 if ( $A[1] eq "GAP" ) { | |
336 scalar(@A) == 7 | |
337 or die "ERROR: Unrecognized format $OPT_DiffRFile, aborting.\n"; | |
338 $ins = $A[6] if ( $A[6] > $gap ); | |
339 if ( $A[4] <= 0 && $A[5] <= 0 && $A[6] > 0 ) { | |
340 $rnTIns++; | |
341 $rSumTIns += $A[6]; | |
342 } | |
343 } | |
344 | |
345 #-- Remove unaligned sequence from count | |
346 if ( $A[1] ne "DUP" ) { | |
347 $rnABases -= $gap if ( $gap > 0 ); | |
348 } | |
349 | |
350 #-- Add to insertion count | |
351 if ( $ins > 0 ) { | |
352 $rnIns++; | |
353 $rSumIns += $ins; | |
354 } | |
355 | |
356 #-- Add to rearrangement counts | |
357 $rnInv++ if ( $A[1] eq "INV" ); | |
358 $rnRel++ if ( $A[1] eq "JMP" ); | |
359 $rnTrn++ if ( $A[1] eq "SEQ" ); | |
360 } | |
361 FileClose($fhi, $OPT_DiffRFile); | |
362 | |
363 #-- Count query diff features and indels | |
364 $fhi = FileOpen("<", $OPT_DiffQFile); | |
365 while (<$fhi>) { | |
366 chomp; | |
367 my @A = split "\t"; | |
368 defined($A[4]) | |
369 or die "ERROR: Unrecognized format $OPT_DiffRFile, aborting.\n"; | |
370 my $gap = $A[4]; | |
371 my $ins = $gap; | |
372 | |
373 #-- Add to tandem insertion counts | |
374 if ( $A[1] eq "GAP" ) { | |
375 scalar(@A) == 7 | |
376 or die "ERROR: Unrecognized format $OPT_DiffRFile, aborting.\n"; | |
377 $ins = $A[6] if ( $A[6] > $gap ); | |
378 if ( $A[4] <= 0 && $A[5] <= 0 && $A[6] > 0 ) { | |
379 $qnTIns++; | |
380 $qSumTIns += $A[6]; | |
381 } | |
382 } | |
383 | |
384 #-- Remove unaligned sequence from count | |
385 if ( $A[1] ne "DUP" ) { | |
386 $qnABases -= $gap if ( $gap > 0 ); | |
387 } | |
388 | |
389 #-- Add to insertion count | |
390 if ( $ins > 0 ) { | |
391 $qnIns++; | |
392 $qSumIns += $ins; | |
393 } | |
394 | |
395 #-- Add to rearrangement counts | |
396 $qnInv++ if ( $A[1] eq "INV" ); | |
397 $qnRel++ if ( $A[1] eq "JMP" ); | |
398 $qnTrn++ if ( $A[1] eq "SEQ" ); | |
399 } | |
400 FileClose($fhi, $OPT_DiffQFile); | |
401 | |
402 #-- Count SNPs | |
403 $fhi = FileOpen("<", $OPT_SnpsFile); | |
404 while(<$fhi>) { | |
405 chomp; | |
406 my @A = split "\t"; | |
407 scalar(@A) == 12 | |
408 or die "ERROR: Unrecognized format $OPT_SnpsFile, aborting\n"; | |
409 | |
410 my $r = uc($A[1]); | |
411 my $q = uc($A[2]); | |
412 | |
413 #-- Plain SNPs | |
414 $rqSNPs{$r}{$q}++; | |
415 if ( !exists($rqSNPs{$q}{$r}) ) { $rqSNPs{$q}{$r} = 0; } | |
416 if ( $r eq '.' || $q eq '.' ) { $rqnIndels++; } | |
417 else { $rqnSNPs++; } | |
418 | |
419 #-- Good SNPs with sufficient match buffer | |
420 if ( $A[4] >= $SNPBuff ) { | |
421 $rqGSNPs{$r}{$q}++; | |
422 if ( !exists($rqGSNPs{$q}{$r}) ) { $rqGSNPs{$q}{$r} = 0; } | |
423 if ( $r eq '.' || $q eq '.' ) { $rqnGIndels++; } | |
424 else { $rqnGSNPs++; } | |
425 } | |
426 } | |
427 FileClose($fhi, $OPT_SnpsFile); | |
428 | |
429 | |
430 #-- Output report | |
431 $fho = FileOpen(">", $OPT_ReportFile); | |
432 | |
433 print $fho $header; | |
434 printf $fho "%-15s %20s %20s\n", "", "[REF]", "[QRY]"; | |
435 | |
436 print $fho "[Sequences]\n"; | |
437 | |
438 printf $fho "%-15s %20d %20d\n", | |
439 "TotalSeqs", $rnSeqs, $qnSeqs; | |
440 printf $fho "%-15s %20s %20s\n", | |
441 "AlignedSeqs", | |
442 ( sprintf "%10d(%.2f%%)", | |
443 $rnASeqs, ($rnSeqs ? $rnASeqs / $rnSeqs * 100.0 : 0) ), | |
444 ( sprintf "%10d(%.2f%%)", | |
445 $qnASeqs, ($rnSeqs ? $qnASeqs / $qnSeqs * 100.0 : 0) ); | |
446 printf $fho "%-15s %20s %20s\n", | |
447 "UnalignedSeqs", | |
448 ( sprintf "%10d(%.2f%%)", | |
449 $rnSeqs - $rnASeqs, | |
450 ($rnSeqs ? ($rnSeqs - $rnASeqs) / $rnSeqs * 100.0 : 0) ), | |
451 ( sprintf "%10d(%.2f%%)", | |
452 $qnSeqs - $qnASeqs, | |
453 ($qnSeqs ? ($qnSeqs - $qnASeqs) / $qnSeqs * 100.0 : 0) ); | |
454 | |
455 print $fho "\n[Bases]\n"; | |
456 | |
457 printf $fho "%-15s %20d %20d\n", | |
458 "TotalBases", $rnBases, $qnBases; | |
459 printf $fho "%-15s %20s %20s\n", | |
460 "AlignedBases", | |
461 ( sprintf "%10d(%.2f%%)", | |
462 $rnABases, ($rnBases ? $rnABases / $rnBases * 100.0 : 0) ), | |
463 ( sprintf "%10d(%.2f%%)", | |
464 $qnABases, ($qnBases ? $qnABases / $qnBases * 100.0 : 0) ); | |
465 printf $fho "%-15s %20s %20s\n", | |
466 "UnalignedBases", | |
467 ( sprintf "%10d(%.2f%%)", | |
468 $rnBases - $rnABases, | |
469 ($rnBases ? ($rnBases - $rnABases) / $rnBases * 100.0 : 0) ), | |
470 ( sprintf "%10d(%.2f%%)", | |
471 $qnBases - $qnABases, | |
472 ($qnBases ? ($qnBases - $qnABases) / $qnBases * 100.0 : 0) ); | |
473 | |
474 print $fho "\n[Alignments]\n"; | |
475 | |
476 printf $fho "%-15s %20d %20d\n", | |
477 "1-to-1", $rqnAligns1, $rqnAligns1; | |
478 printf $fho "%-15s %20d %20d\n", | |
479 "TotalLength", $rSumLen1, $qSumLen1; | |
480 printf $fho "%-15s %20.2f %20.2f\n", | |
481 "AvgLength", | |
482 ($rqnAligns1 ? $rSumLen1 / $rqnAligns1 : 0), | |
483 ($rqnAligns1 ? $qSumLen1 / $rqnAligns1 : 0); | |
484 printf $fho "%-15s %20.2f %20.2f\n", | |
485 "AvgIdentity", | |
486 ($rqSumLen1 ? $rqSumIdy1 / $rqSumLen1 * 100.0 : 0), | |
487 ($rqSumLen1 ? $rqSumIdy1 / $rqSumLen1 * 100.0 : 0); | |
488 | |
489 print $fho "\n"; | |
490 | |
491 printf $fho "%-15s %20d %20d\n", | |
492 "M-to-M", $rqnAlignsM, $rqnAlignsM; | |
493 printf $fho "%-15s %20d %20d\n", | |
494 "TotalLength", $rSumLenM, $qSumLenM; | |
495 printf $fho "%-15s %20.2f %20.2f\n", | |
496 "AvgLength", | |
497 ($rqnAlignsM ? $rSumLenM / $rqnAlignsM : 0), | |
498 ($rqnAlignsM ? $qSumLenM / $rqnAlignsM : 0); | |
499 printf $fho "%-15s %20.2f %20.2f\n", | |
500 "AvgIdentity", | |
501 ($rqSumLenM ? $rqSumIdyM / $rqSumLenM * 100.0 : 0), | |
502 ($rqSumLenM ? $rqSumIdyM / $rqSumLenM * 100.0 : 0); | |
503 | |
504 print $fho "\n[Feature Estimates]\n"; | |
505 | |
506 printf $fho "%-15s %20d %20d\n", | |
507 "Breakpoints", $rnBrk, $qnBrk; | |
508 printf $fho "%-15s %20d %20d\n", | |
509 "Relocations", $rnRel, $qnRel; | |
510 printf $fho "%-15s %20d %20d\n", | |
511 "Translocations", $rnTrn, $qnTrn; | |
512 printf $fho "%-15s %20d %20d\n", | |
513 "Inversions", $rnInv, $qnInv; | |
514 | |
515 print $fho "\n"; | |
516 | |
517 printf $fho "%-15s %20d %20d\n", | |
518 "Insertions", $rnIns, $qnIns; | |
519 printf $fho "%-15s %20d %20d\n", | |
520 "InsertionSum", $rSumIns, $qSumIns; | |
521 printf $fho "%-15s %20.2f %20.2f\n", | |
522 "InsertionAvg", | |
523 ($rnIns ? $rSumIns / $rnIns : 0), | |
524 ($qnIns ? $qSumIns / $qnIns : 0); | |
525 | |
526 print $fho "\n"; | |
527 | |
528 printf $fho "%-15s %20d %20d\n", | |
529 "TandemIns", $rnTIns, $qnTIns; | |
530 printf $fho "%-15s %20d %20d\n", | |
531 "TandemInsSum", $rSumTIns, $qSumTIns; | |
532 printf $fho "%-15s %20.2f %20.2f\n", | |
533 "TandemInsAvg", | |
534 ($rnTIns ? $rSumTIns / $rnTIns : 0), | |
535 ($qnTIns ? $qSumTIns / $qnTIns : 0); | |
536 | |
537 print $fho "\n[SNPs]\n"; | |
538 | |
539 printf $fho "%-15s %20d %20d\n", | |
540 "TotalSNPs", $rqnSNPs, $rqnSNPs; | |
541 foreach my $r (keys %rqSNPs) { | |
542 foreach my $q (keys %{$rqSNPs{$r}}) { | |
543 if ( $r ne "." && $q ne "." ) { | |
544 printf $fho "%-15s %20s %20s\n", | |
545 "$r$q", | |
546 ( sprintf "%10d(%.2f%%)", | |
547 $rqSNPs{$r}{$q}, | |
548 ($rqnSNPs ? $rqSNPs{$r}{$q} / $rqnSNPs * 100.0 : 0) ), | |
549 ( sprintf "%10d(%.2f%%)", | |
550 $rqSNPs{$q}{$r}, | |
551 ($rqnSNPs ? $rqSNPs{$q}{$r} / $rqnSNPs * 100.0 : 0) ); | |
552 } | |
553 } | |
554 } | |
555 | |
556 print $fho "\n"; | |
557 | |
558 printf $fho "%-15s %20d %20d\n", | |
559 "TotalGSNPs", $rqnGSNPs, $rqnGSNPs; | |
560 foreach my $r (keys %rqGSNPs) { | |
561 foreach my $q (keys %{$rqGSNPs{$r}}) { | |
562 if ( $r ne "." && $q ne "." ) { | |
563 printf $fho "%-15s %20s %20s\n", | |
564 "$r$q", | |
565 ( sprintf "%10d(%.2f%%)", | |
566 $rqGSNPs{$r}{$q}, | |
567 ($rqnGSNPs ? $rqGSNPs{$r}{$q} / $rqnGSNPs * 100.0 : 0) ), | |
568 ( sprintf "%10d(%.2f%%)", | |
569 $rqGSNPs{$q}{$r}, | |
570 ($rqnGSNPs ? $rqGSNPs{$q}{$r} / $rqnGSNPs * 100.0 : 0) ); | |
571 } | |
572 } | |
573 } | |
574 | |
575 print $fho "\n"; | |
576 | |
577 printf $fho "%-15s %20d %20d\n", | |
578 "TotalIndels", $rqnIndels, $rqnIndels; | |
579 foreach my $r (keys %rqSNPs) { | |
580 foreach my $q (keys %{$rqSNPs{$r}}) { | |
581 if ( $q eq "." ) { | |
582 printf $fho "%-15s %20s %20s\n", | |
583 "$r$q", | |
584 ( sprintf "%10d(%.2f%%)", | |
585 $rqSNPs{$r}{$q}, | |
586 ($rqnIndels ? $rqSNPs{$r}{$q} / $rqnIndels * 100.0 : 0) ), | |
587 ( sprintf "%10d(%.2f%%)", | |
588 $rqSNPs{$q}{$r}, | |
589 ($rqnIndels ? $rqSNPs{$q}{$r} / $rqnIndels * 100.0 : 0) ); | |
590 } | |
591 } | |
592 } | |
593 foreach my $r (keys %rqSNPs) { | |
594 foreach my $q (keys %{$rqSNPs{$r}}) { | |
595 if ( $r eq "." ) { | |
596 printf $fho "%-15s %20s %20s\n", | |
597 "$r$q", | |
598 ( sprintf "%10d(%.2f%%)", | |
599 $rqSNPs{$r}{$q}, | |
600 ($rqnIndels ? $rqSNPs{$r}{$q} / $rqnIndels * 100.0 : 0) ), | |
601 ( sprintf "%10d(%.2f%%)", | |
602 $rqSNPs{$q}{$r}, | |
603 ($rqnIndels ? $rqSNPs{$q}{$r} / $rqnIndels * 100.0 : 0) ); | |
604 } | |
605 } | |
606 } | |
607 | |
608 print $fho "\n"; | |
609 | |
610 printf $fho "%-15s %20d %20d\n", | |
611 "TotalGIndels", $rqnGIndels, $rqnGIndels; | |
612 foreach my $r (keys %rqGSNPs) { | |
613 foreach my $q (keys %{$rqGSNPs{$r}}) { | |
614 if ( $q eq "." ) { | |
615 printf $fho "%-15s %20s %20s\n", | |
616 "$r$q", | |
617 ( sprintf "%10d(%.2f%%)", | |
618 $rqGSNPs{$r}{$q}, | |
619 ($rqnGIndels ? $rqGSNPs{$r}{$q} / $rqnGIndels * 100.0 : 0) ), | |
620 ( sprintf "%10d(%.2f%%)", | |
621 $rqGSNPs{$q}{$r}, | |
622 ($rqnGIndels ? $rqGSNPs{$q}{$r} / $rqnGIndels * 100.0 : 0) ); | |
623 } | |
624 } | |
625 } | |
626 foreach my $r (keys %rqGSNPs) { | |
627 foreach my $q (keys %{$rqGSNPs{$r}}) { | |
628 if ( $r eq "." ) { | |
629 printf $fho "%-15s %20s %20s\n", | |
630 "$r$q", | |
631 ( sprintf "%10d(%.2f%%)", | |
632 $rqGSNPs{$r}{$q}, | |
633 ($rqnGIndels ? $rqGSNPs{$r}{$q} / $rqnGIndels * 100.0 : 0) ), | |
634 ( sprintf "%10d(%.2f%%)", | |
635 $rqGSNPs{$q}{$r}, | |
636 ($rqnGIndels ? $rqGSNPs{$q}{$r} / $rqnGIndels * 100.0 : 0) ); | |
637 } | |
638 } | |
639 } | |
640 | |
641 FileClose($fho, $OPT_ReportFile); | |
642 | |
643 | |
644 #-- Output unaligned reference and query IDs, if applicable | |
645 if ( $rnSeqs != $rnASeqs ) { | |
646 $fho = FileOpen(">", $OPT_UnRefFile); | |
647 while ( my ($key, $val) = each(%refs) ) { | |
648 print $fho "$key\tUNI\t1\t$val\t$val\n" unless $val < 0; | |
649 } | |
650 FileClose($fho, $OPT_UnRefFile); | |
651 } | |
652 if ( $qnSeqs != $qnASeqs ) { | |
653 $fho = FileOpen(">", $OPT_UnQryFile); | |
654 while ( my ($key, $val) = each(%qrys) ) { | |
655 print $fho "$key\tUNI\t1\t$val\t$val\n" unless $val < 0; | |
656 } | |
657 FileClose($fho, $OPT_UnQryFile); | |
658 } | |
659 } | |
660 | |
661 | |
662 #--------------------------------------------------------------- FastaSizes ---- | |
663 # Compute lengths for a multi-fasta file and store in hash reference | |
664 sub FastaSizes($$) | |
665 { | |
666 | |
667 my $file = shift; | |
668 my $href = shift; | |
669 my ($tag, $len); | |
670 | |
671 my $fhi = FileOpen("<", $file); | |
672 while (<$fhi>) { | |
673 chomp; | |
674 | |
675 if ( /^>/ ) { | |
676 $href->{$tag} = $len if defined($tag); | |
677 ($tag) = /^>(\S+)/; | |
678 $len = 0; | |
679 } else { | |
680 if ( /\s/ ) { | |
681 die "ERROR: Whitespace found in FastA $file, aborting.\n"; | |
682 } | |
683 $len += length; | |
684 } | |
685 } | |
686 $href->{$tag} = $len if defined($tag); | |
687 FileClose($fhi, $file); | |
688 } | |
689 | |
690 | |
691 #----------------------------------------------------------------- FileOpen ---- | |
692 # Open file, return filehandle, or die | |
693 sub FileOpen($$) | |
694 { | |
695 my ($mode, $name) = @_; | |
696 my $fhi; | |
697 open($fhi, $mode, $name) | |
698 or die "ERROR: Could not open $name, aborting. $!\n"; | |
699 return $fhi; | |
700 } | |
701 | |
702 | |
703 #---------------------------------------------------------------- FileClose ---- | |
704 # Close file, or die | |
705 sub FileClose($$) | |
706 { | |
707 my ($fho, $name) = @_; | |
708 close($fho) or die "ERROR: Could not close $name, aborting. $!\n" | |
709 } | |
710 | |
711 | |
712 #------------------------------------------------------------------- GetOpt ---- | |
713 # Get command options and check file permissions | |
714 sub GetOpt() | |
715 { | |
716 #-- Initialize TIGR::Foundation | |
717 $TIGR = new TIGR::Foundation; | |
718 if ( !defined($TIGR) ) { | |
719 print STDERR "ERROR: TIGR::Foundation could not be initialized"; | |
720 exit(1); | |
721 } | |
722 | |
723 #-- Set help and usage information | |
724 $TIGR->setHelpInfo($HELP_INFO); | |
725 $TIGR->setUsageInfo($USAGE_INFO); | |
726 $TIGR->setVersionInfo($VERSION_INFO); | |
727 $TIGR->addDependInfo(@DEPEND_INFO); | |
728 | |
729 #-- Get options | |
730 my $err = !$TIGR->TIGR_GetOptions | |
731 ( | |
732 "d|delta=s" => \$OPT_DeltaFile, | |
733 "p|prefix=s" => \$OPT_Prefix, | |
734 ); | |
735 | |
736 #-- Check if the parsing was successful | |
737 if ( $err | |
738 || (defined($OPT_DeltaFile) && scalar(@ARGV) != 0) | |
739 || (!defined($OPT_DeltaFile) && scalar(@ARGV) != 2) ) { | |
740 $TIGR->printUsageInfo(); | |
741 print STDERR "Try '$0 -h' for more information.\n"; | |
742 exit(1); | |
743 } | |
744 | |
745 my @errs; | |
746 | |
747 $TIGR->isExecutableFile($DELTA_FILTER) | |
748 or push(@errs, $DELTA_FILTER); | |
749 | |
750 $TIGR->isExecutableFile($SHOW_DIFF) | |
751 or push(@errs, $SHOW_DIFF); | |
752 | |
753 $TIGR->isExecutableFile($SHOW_SNPS) | |
754 or push(@errs, $SHOW_SNPS); | |
755 | |
756 $TIGR->isExecutableFile($SHOW_COORDS) | |
757 or push(@errs, $SHOW_COORDS); | |
758 | |
759 $TIGR->isExecutableFile($NUCMER) | |
760 or push(@errs, $NUCMER); | |
761 | |
762 if ( defined($OPT_DeltaFile) ) { | |
763 $TIGR->isReadableFile($OPT_DeltaFile) | |
764 or push(@errs, $OPT_DeltaFile); | |
765 | |
766 my $fhi = FileOpen("<", $OPT_DeltaFile); | |
767 $_ = <$fhi>; | |
768 FileClose($fhi, $OPT_DeltaFile); | |
769 | |
770 ($OPT_RefFile, $OPT_QryFile) = /^(.+) (.+)$/; | |
771 } | |
772 else { | |
773 $OPT_RefFile = File::Spec->rel2abs($ARGV[0]); | |
774 $OPT_QryFile = File::Spec->rel2abs($ARGV[1]); | |
775 } | |
776 | |
777 $TIGR->isReadableFile($OPT_RefFile) | |
778 or push(@errs, $OPT_RefFile); | |
779 | |
780 $TIGR->isReadableFile($OPT_QryFile) | |
781 or push(@errs, $OPT_QryFile); | |
782 | |
783 $OPT_ReportFile = $OPT_Prefix . $OPT_ReportFile; | |
784 $TIGR->isCreatableFile("$OPT_ReportFile") | |
785 or $TIGR->isWritableFile("$OPT_ReportFile") | |
786 or push(@errs, "$OPT_ReportFile"); | |
787 | |
788 $OPT_DeltaFile1 = $OPT_Prefix . $OPT_DeltaFile1; | |
789 $TIGR->isCreatableFile("$OPT_DeltaFile1") | |
790 or $TIGR->isWritableFile("$OPT_DeltaFile1") | |
791 or push(@errs, "$OPT_DeltaFile1"); | |
792 | |
793 $OPT_DeltaFileM = $OPT_Prefix . $OPT_DeltaFileM; | |
794 $TIGR->isCreatableFile("$OPT_DeltaFileM") | |
795 or $TIGR->isWritableFile("$OPT_DeltaFileM") | |
796 or push(@errs, "$OPT_DeltaFileM"); | |
797 | |
798 $OPT_CoordsFile1 = $OPT_Prefix . $OPT_CoordsFile1; | |
799 $TIGR->isCreatableFile("$OPT_CoordsFile1") | |
800 or $TIGR->isWritableFile("$OPT_CoordsFile1") | |
801 or push(@errs, "$OPT_CoordsFile1"); | |
802 | |
803 $OPT_CoordsFileM = $OPT_Prefix . $OPT_CoordsFileM; | |
804 $TIGR->isCreatableFile("$OPT_CoordsFileM") | |
805 or $TIGR->isWritableFile("$OPT_CoordsFileM") | |
806 or push(@errs, "$OPT_CoordsFileM"); | |
807 | |
808 $OPT_SnpsFile = $OPT_Prefix . $OPT_SnpsFile; | |
809 $TIGR->isCreatableFile("$OPT_SnpsFile") | |
810 or $TIGR->isWritableFile("$OPT_SnpsFile") | |
811 or push(@errs, "$OPT_SnpsFile"); | |
812 | |
813 $OPT_DiffRFile = $OPT_Prefix . $OPT_DiffRFile; | |
814 $TIGR->isCreatableFile("$OPT_DiffRFile") | |
815 or $TIGR->isWritableFile("$OPT_DiffRFile") | |
816 or push(@errs, "$OPT_DiffRFile"); | |
817 | |
818 $OPT_DiffQFile = $OPT_Prefix . $OPT_DiffQFile; | |
819 $TIGR->isCreatableFile("$OPT_DiffQFile") | |
820 or $TIGR->isWritableFile("$OPT_DiffQFile") | |
821 or push(@errs, "$OPT_DiffQFile"); | |
822 | |
823 $OPT_UnRefFile = $OPT_Prefix . $OPT_UnRefFile; | |
824 $TIGR->isCreatableFile("$OPT_UnRefFile") | |
825 or $TIGR->isWritableFile("$OPT_UnRefFile") | |
826 or push(@errs, "$OPT_UnRefFile"); | |
827 | |
828 $OPT_UnQryFile = $OPT_Prefix . $OPT_UnQryFile; | |
829 $TIGR->isCreatableFile("$OPT_UnQryFile") | |
830 or $TIGR->isWritableFile("$OPT_UnQryFile") | |
831 or push(@errs, "$OPT_UnQryFile"); | |
832 | |
833 if ( scalar(@errs) ) { | |
834 print STDERR "ERROR: The following critical files could not be used\n"; | |
835 while ( scalar(@errs) ) { print(STDERR pop(@errs),"\n"); } | |
836 print STDERR "Check your paths and file permissions and try again\n"; | |
837 exit(1); | |
838 } | |
839 } |