comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/dnadiff @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 #!/usr/bin/env perl
2
3 #-------------------------------------------------------------------------------
4 # Programmer: Adam M Phillippy, University of Maryland
5 # File: dnadiff
6 # Date: 11 / 29 / 06
7 #
8 # Try 'dnadiff -h' for more information.
9 #
10 #-------------------------------------------------------------------------------
11
12 use lib "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
13 use Foundation;
14 use File::Spec::Functions;
15 use strict;
16
17 my $BIN_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23";
18 my $SCRIPT_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts";
19
20 my $VERSION_INFO = q~
21 DNAdiff version 1.3
22 ~;
23
24 my $HELP_INFO = q~
25 USAGE: dnadiff [options] <reference> <query>
26 or dnadiff [options] -d <delta file>
27
28 DESCRIPTION:
29 Run comparative analysis of two sequence sets using nucmer and its
30 associated utilities with recommended parameters. See MUMmer
31 documentation for a more detailed description of the
32 output. Produces the following output files:
33
34 .report - Summary of alignments, differences and SNPs
35 .delta - Standard nucmer alignment output
36 .1delta - 1-to-1 alignment from delta-filter -1
37 .mdelta - M-to-M alignment from delta-filter -m
38 .1coords - 1-to-1 coordinates from show-coords -THrcl .1delta
39 .mcoords - M-to-M coordinates from show-coords -THrcl .mdelta
40 .snps - SNPs from show-snps -rlTHC .1delta
41 .rdiff - Classified ref breakpoints from show-diff -rH .mdelta
42 .qdiff - Classified qry breakpoints from show-diff -qH .mdelta
43 .unref - Unaligned reference IDs and lengths (if applicable)
44 .unqry - Unaligned query IDs and lengths (if applicable)
45
46 MANDATORY:
47 reference Set the input reference multi-FASTA filename
48 query Set the input query multi-FASTA filename
49 or
50 delta file Unfiltered .delta alignment file from nucmer
51
52 OPTIONS:
53 -d|delta Provide precomputed delta file for analysis
54 -h
55 --help Display help information and exit
56 -p|prefix Set the prefix of the output files (default "out")
57 -V
58 --version Display the version information and exit
59 ~;
60
61
62 my $USAGE_INFO = q~
63 USAGE: dnadiff [options] <reference> <query>
64 or dnadiff [options] -d <delta file>
65 ~;
66
67
68 my @DEPEND_INFO =
69 (
70 "$BIN_DIR/delta-filter",
71 "$BIN_DIR/show-diff",
72 "$BIN_DIR/show-snps",
73 "$BIN_DIR/show-coords",
74 "$BIN_DIR/nucmer",
75 "$SCRIPT_DIR/Foundation.pm"
76 );
77
78 my $DELTA_FILTER = "$BIN_DIR/delta-filter";
79 my $SHOW_DIFF = "$BIN_DIR/show-diff";
80 my $SHOW_SNPS = "$BIN_DIR/show-snps";
81 my $SHOW_COORDS = "$BIN_DIR/show-coords";
82 my $NUCMER = "$BIN_DIR/nucmer";
83
84 my $SNPBuff = 20; # required buffer around "good" snps
85 my $OPT_Prefix = "out"; # prefix for all output files
86 my $OPT_RefFile; # reference file
87 my $OPT_QryFile; # query file
88 my $OPT_DeltaFile; # unfiltered alignment file
89 my $OPT_ReportFile = ".report"; # report file
90 my $OPT_DeltaFile1 = ".1delta"; # 1-to-1 delta alignment
91 my $OPT_DeltaFileM = ".mdelta"; # M-to-M delta alignment
92 my $OPT_CoordsFile1 = ".1coords"; # 1-to-1 alignment coords
93 my $OPT_CoordsFileM = ".mcoords"; # M-to-M alignment coords
94 my $OPT_SnpsFile = ".snps"; # snps output file
95 my $OPT_DiffRFile = ".rdiff"; # diffile for R
96 my $OPT_DiffQFile = ".qdiff"; # diffile for Q
97 my $OPT_UnRefFile = ".unref"; # unaligned ref IDs and lengths
98 my $OPT_UnQryFile = ".unqry"; # unaligned qry IDs and lengths
99
100 my $TIGR; # TIGR Foundation object
101
102
103 sub RunAlignment();
104 sub RunFilter();
105 sub RunCoords();
106 sub RunSNPs();
107 sub RunDiff();
108 sub MakeReport();
109
110 sub FastaSizes($$);
111
112 sub FileOpen($$);
113 sub FileClose($$);
114
115 sub GetOpt();
116
117
118 #--------------------------------------------------------------------- main ----
119 main:
120 {
121 GetOpt();
122
123 RunAlignment() unless defined($OPT_DeltaFile);
124 RunFilter();
125 RunCoords();
126 RunSNPs();
127 RunDiff();
128 MakeReport();
129
130 exit(0);
131 }
132
133
134 #------------------------------------------------------------- RunAlignment ----
135 # Run nucmer
136 sub RunAlignment()
137 {
138 print STDERR "Building alignments\n";
139 my $cmd = "$NUCMER --maxmatch -p $OPT_Prefix $OPT_RefFile $OPT_QryFile";
140 my $err = "ERROR: Failed to run nucmer, aborting.\n";
141
142 system($cmd) == 0 or die $err;
143 $OPT_DeltaFile = $OPT_Prefix . ".delta";
144 }
145
146
147 #---------------------------------------------------------------- RunFilter ----
148 # Run delta-filter
149 sub RunFilter()
150 {
151 print STDERR "Filtering alignments\n";
152 my $cmd1 = "$DELTA_FILTER -1 $OPT_DeltaFile > $OPT_DeltaFile1";
153 my $cmd2 = "$DELTA_FILTER -m $OPT_DeltaFile > $OPT_DeltaFileM";
154 my $err = "ERROR: Failed to run delta-filter, aborting.\n";
155
156 system($cmd1) == 0 or die $err;
157 system($cmd2) == 0 or die $err;
158 }
159
160
161 #------------------------------------------------------------------ RunSNPs ----
162 # Run show-snps
163 sub RunSNPs()
164 {
165 print STDERR "Analyzing SNPs\n";
166 my $cmd = "$SHOW_SNPS -rlTHC $OPT_DeltaFile1 > $OPT_SnpsFile";
167 my $err = "ERROR: Failed to run show-snps, aborting.\n";
168
169 system($cmd) == 0 or die $err;
170 }
171
172
173 #---------------------------------------------------------------- RunCoords ----
174 # Run show-coords
175 sub RunCoords()
176 {
177 print STDERR "Extracting alignment coordinates\n";
178 my $cmd1 = "$SHOW_COORDS -rclTH $OPT_DeltaFile1 > $OPT_CoordsFile1";
179 my $cmd2 = "$SHOW_COORDS -rclTH $OPT_DeltaFileM > $OPT_CoordsFileM";
180 my $err = "ERROR: Failed to run show-coords, aborting.\n";
181
182 system($cmd1) == 0 or die $err;
183 system($cmd2) == 0 or die $err;
184 }
185
186
187 #------------------------------------------------------------------ RunDiff ----
188 # Run show-diff
189 sub RunDiff()
190 {
191 print STDERR "Extracting alignment breakpoints\n";
192 my $cmd1 = "$SHOW_DIFF -rH $OPT_DeltaFileM > $OPT_DiffRFile";
193 my $cmd2 = "$SHOW_DIFF -qH $OPT_DeltaFileM > $OPT_DiffQFile";
194 my $err = "ERROR: Failed to run show-diff, aborting.\n";
195
196 system($cmd1) == 0 or die $err;
197 system($cmd2) == 0 or die $err;
198 }
199
200
201 #--------------------------------------------------------------- MakeReport ----
202 # Output alignment report
203 sub MakeReport()
204 {
205 print STDERR "Generating report file\n";
206
207 my ($fhi, $fho); # filehandle-in and filehandle-out
208 my (%refs, %qrys) = ((),()); # R and Q ID->length
209 my ($rqnAligns1, $rqnAlignsM) = (0,0); # alignment counter
210 my ($rSumLen1, $qSumLen1) = (0,0); # alignment length sum
211 my ($rSumLenM, $qSumLenM) = (0,0); # alignment length sum
212 my ($rqSumLen1, $rqSumLenM) = (0,0); # combined alignment length sum
213 my ($rqSumIdy1, $rqSumIdyM) = (0,0); # weighted alignment identity sum
214 my ($qnIns, $rnIns) = (0,0); # insertion count
215 my ($qSumIns, $rSumIns) = (0,0); # insertion length sum
216 my ($qnTIns, $rnTIns) = (0,0); # tandem insertion count
217 my ($qSumTIns, $rSumTIns) = (0,0); # tandem insertion length sum
218 my ($qnInv, $rnInv) = (0,0); # inversion count
219 my ($qnRel, $rnRel) = (0,0); # relocation count
220 my ($qnTrn, $rnTrn) = (0,0); # translocation count
221 my ($rnSeqs, $qnSeqs) = (0,0); # sequence count
222 my ($rnASeqs, $qnASeqs) = (0,0); # aligned sequence count
223 my ($rnBases, $qnBases) = (0,0); # bases count
224 my ($rnABases, $qnABases) = (0,0); # aligned bases count
225 my ($rnBrk, $qnBrk) = (0,0); # breakpoint count
226 my ($rqnSNPs, $rqnIndels) = (0,0); # snp and indel counts
227 my ($rqnGSNPs, $rqnGIndels) = (0,0); # good snp and indel counts
228 my %rqSNPs = # SNP hash
229 ( "."=>{"A"=>0,"C"=>0,"G"=>0,"T"=>0},
230 "A"=>{"."=>0,"C"=>0,"G"=>0,"T"=>0},
231 "C"=>{"."=>0,"A"=>0,"G"=>0,"T"=>0},
232 "G"=>{"."=>0,"A"=>0,"C"=>0,"T"=>0},
233 "T"=>{"."=>0,"A"=>0,"C"=>0,"G"=>0} );
234 my %rqGSNPs = # good SNP hash
235 ( "."=>{"A"=>0,"C"=>0,"G"=>0,"T"=>0},
236 "A"=>{"."=>0,"C"=>0,"G"=>0,"T"=>0},
237 "C"=>{"."=>0,"A"=>0,"G"=>0,"T"=>0},
238 "G"=>{"."=>0,"A"=>0,"C"=>0,"T"=>0},
239 "T"=>{"."=>0,"A"=>0,"C"=>0,"G"=>0} );
240
241 my $header; # delta header
242
243 #-- Get delta header
244 $fhi = FileOpen("<", $OPT_DeltaFile);
245 $header .= <$fhi>;
246 $header .= <$fhi>;
247 $header .= "\n";
248 FileClose($fhi, $OPT_DeltaFile);
249
250 #-- Collect all reference and query IDs and lengths
251 FastaSizes($OPT_RefFile, \%refs);
252 FastaSizes($OPT_QryFile, \%qrys);
253
254 #-- Count ref and qry seqs and lengths
255 foreach ( values(%refs) ) {
256 $rnSeqs++;
257 $rnBases += $_;
258 }
259 foreach ( values(%qrys) ) {
260 $qnSeqs++;
261 $qnBases += $_;
262 }
263
264 #-- Count aligned seqs, aligned bases, and breakpoints for each R and Q
265 $fhi = FileOpen("<", $OPT_CoordsFileM);
266 while (<$fhi>) {
267 chomp;
268 my @A = split "\t";
269 scalar(@A) == 13
270 or die "ERROR: Unrecognized format $OPT_CoordsFileM, aborting.\n";
271
272 #-- Add to M-to-M alignment counts
273 $rqnAlignsM++;
274 $rSumLenM += $A[4];
275 $qSumLenM += $A[5];
276 $rqSumIdyM += ($A[6] / 100.0) * ($A[4] + $A[5]);
277 $rqSumLenM += ($A[4] + $A[5]);
278
279 #-- If new ID, add to sequence and base count
280 if ( $refs{$A[11]} > 0 ) {
281 $rnASeqs++;
282 $rnABases += $refs{$A[11]};
283 $refs{$A[11]} *= -1; # If ref has alignment, length will be -neg
284 }
285 if ( $qrys{$A[12]} > 0 ) {
286 $qnASeqs++;
287 $qnABases += $qrys{$A[12]};
288 $qrys{$A[12]} *= -1; # If qry has alignment, length will be -neg
289 }
290
291 #-- Add to breakpoint counts
292 my ($lo, $hi);
293 if ( $A[0] < $A[1] ) { $lo = $A[0]; $hi = $A[1]; }
294 else { $lo = $A[1]; $hi = $A[0]; }
295 $rnBrk++ if ( $lo != 1 );
296 $rnBrk++ if ( $hi != $A[7] );
297
298 if ( $A[2] < $A[3] ) { $lo = $A[2]; $hi = $A[3]; }
299 else { $lo = $A[3]; $hi = $A[2]; }
300 $qnBrk++ if ( $lo != 1 );
301 $qnBrk++ if ( $hi != $A[8] );
302 }
303 FileClose($fhi, $OPT_CoordsFileM);
304
305 #-- Calculate average %idy, length, etc.
306 $fhi = FileOpen("<", $OPT_CoordsFile1);
307 while (<$fhi>) {
308 chomp;
309 my @A = split "\t";
310 scalar(@A) == 13
311 or die "ERROR: Unrecognized format $OPT_CoordsFile1, aborting.\n";
312
313 #-- Add to 1-to-1 alignment counts
314 $rqnAligns1++;
315 $rSumLen1 += $A[4];
316 $qSumLen1 += $A[5];
317 $rqSumIdy1 += ($A[6] / 100.0) * ($A[4] + $A[5]);
318 $rqSumLen1 += ($A[4] + $A[5]);
319 }
320 FileClose($fhi, $OPT_CoordsFile1);
321
322 #-- If you are reading this, you need to get out more...
323
324 #-- Count reference diff features and indels
325 $fhi = FileOpen("<", $OPT_DiffRFile);
326 while (<$fhi>) {
327 chomp;
328 my @A = split "\t";
329 defined($A[4])
330 or die "ERROR: Unrecognized format $OPT_DiffRFile, aborting.\n";
331 my $gap = $A[4];
332 my $ins = $gap;
333
334 #-- Add to tandem insertion counts
335 if ( $A[1] eq "GAP" ) {
336 scalar(@A) == 7
337 or die "ERROR: Unrecognized format $OPT_DiffRFile, aborting.\n";
338 $ins = $A[6] if ( $A[6] > $gap );
339 if ( $A[4] <= 0 && $A[5] <= 0 && $A[6] > 0 ) {
340 $rnTIns++;
341 $rSumTIns += $A[6];
342 }
343 }
344
345 #-- Remove unaligned sequence from count
346 if ( $A[1] ne "DUP" ) {
347 $rnABases -= $gap if ( $gap > 0 );
348 }
349
350 #-- Add to insertion count
351 if ( $ins > 0 ) {
352 $rnIns++;
353 $rSumIns += $ins;
354 }
355
356 #-- Add to rearrangement counts
357 $rnInv++ if ( $A[1] eq "INV" );
358 $rnRel++ if ( $A[1] eq "JMP" );
359 $rnTrn++ if ( $A[1] eq "SEQ" );
360 }
361 FileClose($fhi, $OPT_DiffRFile);
362
363 #-- Count query diff features and indels
364 $fhi = FileOpen("<", $OPT_DiffQFile);
365 while (<$fhi>) {
366 chomp;
367 my @A = split "\t";
368 defined($A[4])
369 or die "ERROR: Unrecognized format $OPT_DiffRFile, aborting.\n";
370 my $gap = $A[4];
371 my $ins = $gap;
372
373 #-- Add to tandem insertion counts
374 if ( $A[1] eq "GAP" ) {
375 scalar(@A) == 7
376 or die "ERROR: Unrecognized format $OPT_DiffRFile, aborting.\n";
377 $ins = $A[6] if ( $A[6] > $gap );
378 if ( $A[4] <= 0 && $A[5] <= 0 && $A[6] > 0 ) {
379 $qnTIns++;
380 $qSumTIns += $A[6];
381 }
382 }
383
384 #-- Remove unaligned sequence from count
385 if ( $A[1] ne "DUP" ) {
386 $qnABases -= $gap if ( $gap > 0 );
387 }
388
389 #-- Add to insertion count
390 if ( $ins > 0 ) {
391 $qnIns++;
392 $qSumIns += $ins;
393 }
394
395 #-- Add to rearrangement counts
396 $qnInv++ if ( $A[1] eq "INV" );
397 $qnRel++ if ( $A[1] eq "JMP" );
398 $qnTrn++ if ( $A[1] eq "SEQ" );
399 }
400 FileClose($fhi, $OPT_DiffQFile);
401
402 #-- Count SNPs
403 $fhi = FileOpen("<", $OPT_SnpsFile);
404 while(<$fhi>) {
405 chomp;
406 my @A = split "\t";
407 scalar(@A) == 12
408 or die "ERROR: Unrecognized format $OPT_SnpsFile, aborting\n";
409
410 my $r = uc($A[1]);
411 my $q = uc($A[2]);
412
413 #-- Plain SNPs
414 $rqSNPs{$r}{$q}++;
415 if ( !exists($rqSNPs{$q}{$r}) ) { $rqSNPs{$q}{$r} = 0; }
416 if ( $r eq '.' || $q eq '.' ) { $rqnIndels++; }
417 else { $rqnSNPs++; }
418
419 #-- Good SNPs with sufficient match buffer
420 if ( $A[4] >= $SNPBuff ) {
421 $rqGSNPs{$r}{$q}++;
422 if ( !exists($rqGSNPs{$q}{$r}) ) { $rqGSNPs{$q}{$r} = 0; }
423 if ( $r eq '.' || $q eq '.' ) { $rqnGIndels++; }
424 else { $rqnGSNPs++; }
425 }
426 }
427 FileClose($fhi, $OPT_SnpsFile);
428
429
430 #-- Output report
431 $fho = FileOpen(">", $OPT_ReportFile);
432
433 print $fho $header;
434 printf $fho "%-15s %20s %20s\n", "", "[REF]", "[QRY]";
435
436 print $fho "[Sequences]\n";
437
438 printf $fho "%-15s %20d %20d\n",
439 "TotalSeqs", $rnSeqs, $qnSeqs;
440 printf $fho "%-15s %20s %20s\n",
441 "AlignedSeqs",
442 ( sprintf "%10d(%.2f%%)",
443 $rnASeqs, ($rnSeqs ? $rnASeqs / $rnSeqs * 100.0 : 0) ),
444 ( sprintf "%10d(%.2f%%)",
445 $qnASeqs, ($rnSeqs ? $qnASeqs / $qnSeqs * 100.0 : 0) );
446 printf $fho "%-15s %20s %20s\n",
447 "UnalignedSeqs",
448 ( sprintf "%10d(%.2f%%)",
449 $rnSeqs - $rnASeqs,
450 ($rnSeqs ? ($rnSeqs - $rnASeqs) / $rnSeqs * 100.0 : 0) ),
451 ( sprintf "%10d(%.2f%%)",
452 $qnSeqs - $qnASeqs,
453 ($qnSeqs ? ($qnSeqs - $qnASeqs) / $qnSeqs * 100.0 : 0) );
454
455 print $fho "\n[Bases]\n";
456
457 printf $fho "%-15s %20d %20d\n",
458 "TotalBases", $rnBases, $qnBases;
459 printf $fho "%-15s %20s %20s\n",
460 "AlignedBases",
461 ( sprintf "%10d(%.2f%%)",
462 $rnABases, ($rnBases ? $rnABases / $rnBases * 100.0 : 0) ),
463 ( sprintf "%10d(%.2f%%)",
464 $qnABases, ($qnBases ? $qnABases / $qnBases * 100.0 : 0) );
465 printf $fho "%-15s %20s %20s\n",
466 "UnalignedBases",
467 ( sprintf "%10d(%.2f%%)",
468 $rnBases - $rnABases,
469 ($rnBases ? ($rnBases - $rnABases) / $rnBases * 100.0 : 0) ),
470 ( sprintf "%10d(%.2f%%)",
471 $qnBases - $qnABases,
472 ($qnBases ? ($qnBases - $qnABases) / $qnBases * 100.0 : 0) );
473
474 print $fho "\n[Alignments]\n";
475
476 printf $fho "%-15s %20d %20d\n",
477 "1-to-1", $rqnAligns1, $rqnAligns1;
478 printf $fho "%-15s %20d %20d\n",
479 "TotalLength", $rSumLen1, $qSumLen1;
480 printf $fho "%-15s %20.2f %20.2f\n",
481 "AvgLength",
482 ($rqnAligns1 ? $rSumLen1 / $rqnAligns1 : 0),
483 ($rqnAligns1 ? $qSumLen1 / $rqnAligns1 : 0);
484 printf $fho "%-15s %20.2f %20.2f\n",
485 "AvgIdentity",
486 ($rqSumLen1 ? $rqSumIdy1 / $rqSumLen1 * 100.0 : 0),
487 ($rqSumLen1 ? $rqSumIdy1 / $rqSumLen1 * 100.0 : 0);
488
489 print $fho "\n";
490
491 printf $fho "%-15s %20d %20d\n",
492 "M-to-M", $rqnAlignsM, $rqnAlignsM;
493 printf $fho "%-15s %20d %20d\n",
494 "TotalLength", $rSumLenM, $qSumLenM;
495 printf $fho "%-15s %20.2f %20.2f\n",
496 "AvgLength",
497 ($rqnAlignsM ? $rSumLenM / $rqnAlignsM : 0),
498 ($rqnAlignsM ? $qSumLenM / $rqnAlignsM : 0);
499 printf $fho "%-15s %20.2f %20.2f\n",
500 "AvgIdentity",
501 ($rqSumLenM ? $rqSumIdyM / $rqSumLenM * 100.0 : 0),
502 ($rqSumLenM ? $rqSumIdyM / $rqSumLenM * 100.0 : 0);
503
504 print $fho "\n[Feature Estimates]\n";
505
506 printf $fho "%-15s %20d %20d\n",
507 "Breakpoints", $rnBrk, $qnBrk;
508 printf $fho "%-15s %20d %20d\n",
509 "Relocations", $rnRel, $qnRel;
510 printf $fho "%-15s %20d %20d\n",
511 "Translocations", $rnTrn, $qnTrn;
512 printf $fho "%-15s %20d %20d\n",
513 "Inversions", $rnInv, $qnInv;
514
515 print $fho "\n";
516
517 printf $fho "%-15s %20d %20d\n",
518 "Insertions", $rnIns, $qnIns;
519 printf $fho "%-15s %20d %20d\n",
520 "InsertionSum", $rSumIns, $qSumIns;
521 printf $fho "%-15s %20.2f %20.2f\n",
522 "InsertionAvg",
523 ($rnIns ? $rSumIns / $rnIns : 0),
524 ($qnIns ? $qSumIns / $qnIns : 0);
525
526 print $fho "\n";
527
528 printf $fho "%-15s %20d %20d\n",
529 "TandemIns", $rnTIns, $qnTIns;
530 printf $fho "%-15s %20d %20d\n",
531 "TandemInsSum", $rSumTIns, $qSumTIns;
532 printf $fho "%-15s %20.2f %20.2f\n",
533 "TandemInsAvg",
534 ($rnTIns ? $rSumTIns / $rnTIns : 0),
535 ($qnTIns ? $qSumTIns / $qnTIns : 0);
536
537 print $fho "\n[SNPs]\n";
538
539 printf $fho "%-15s %20d %20d\n",
540 "TotalSNPs", $rqnSNPs, $rqnSNPs;
541 foreach my $r (keys %rqSNPs) {
542 foreach my $q (keys %{$rqSNPs{$r}}) {
543 if ( $r ne "." && $q ne "." ) {
544 printf $fho "%-15s %20s %20s\n",
545 "$r$q",
546 ( sprintf "%10d(%.2f%%)",
547 $rqSNPs{$r}{$q},
548 ($rqnSNPs ? $rqSNPs{$r}{$q} / $rqnSNPs * 100.0 : 0) ),
549 ( sprintf "%10d(%.2f%%)",
550 $rqSNPs{$q}{$r},
551 ($rqnSNPs ? $rqSNPs{$q}{$r} / $rqnSNPs * 100.0 : 0) );
552 }
553 }
554 }
555
556 print $fho "\n";
557
558 printf $fho "%-15s %20d %20d\n",
559 "TotalGSNPs", $rqnGSNPs, $rqnGSNPs;
560 foreach my $r (keys %rqGSNPs) {
561 foreach my $q (keys %{$rqGSNPs{$r}}) {
562 if ( $r ne "." && $q ne "." ) {
563 printf $fho "%-15s %20s %20s\n",
564 "$r$q",
565 ( sprintf "%10d(%.2f%%)",
566 $rqGSNPs{$r}{$q},
567 ($rqnGSNPs ? $rqGSNPs{$r}{$q} / $rqnGSNPs * 100.0 : 0) ),
568 ( sprintf "%10d(%.2f%%)",
569 $rqGSNPs{$q}{$r},
570 ($rqnGSNPs ? $rqGSNPs{$q}{$r} / $rqnGSNPs * 100.0 : 0) );
571 }
572 }
573 }
574
575 print $fho "\n";
576
577 printf $fho "%-15s %20d %20d\n",
578 "TotalIndels", $rqnIndels, $rqnIndels;
579 foreach my $r (keys %rqSNPs) {
580 foreach my $q (keys %{$rqSNPs{$r}}) {
581 if ( $q eq "." ) {
582 printf $fho "%-15s %20s %20s\n",
583 "$r$q",
584 ( sprintf "%10d(%.2f%%)",
585 $rqSNPs{$r}{$q},
586 ($rqnIndels ? $rqSNPs{$r}{$q} / $rqnIndels * 100.0 : 0) ),
587 ( sprintf "%10d(%.2f%%)",
588 $rqSNPs{$q}{$r},
589 ($rqnIndels ? $rqSNPs{$q}{$r} / $rqnIndels * 100.0 : 0) );
590 }
591 }
592 }
593 foreach my $r (keys %rqSNPs) {
594 foreach my $q (keys %{$rqSNPs{$r}}) {
595 if ( $r eq "." ) {
596 printf $fho "%-15s %20s %20s\n",
597 "$r$q",
598 ( sprintf "%10d(%.2f%%)",
599 $rqSNPs{$r}{$q},
600 ($rqnIndels ? $rqSNPs{$r}{$q} / $rqnIndels * 100.0 : 0) ),
601 ( sprintf "%10d(%.2f%%)",
602 $rqSNPs{$q}{$r},
603 ($rqnIndels ? $rqSNPs{$q}{$r} / $rqnIndels * 100.0 : 0) );
604 }
605 }
606 }
607
608 print $fho "\n";
609
610 printf $fho "%-15s %20d %20d\n",
611 "TotalGIndels", $rqnGIndels, $rqnGIndels;
612 foreach my $r (keys %rqGSNPs) {
613 foreach my $q (keys %{$rqGSNPs{$r}}) {
614 if ( $q eq "." ) {
615 printf $fho "%-15s %20s %20s\n",
616 "$r$q",
617 ( sprintf "%10d(%.2f%%)",
618 $rqGSNPs{$r}{$q},
619 ($rqnGIndels ? $rqGSNPs{$r}{$q} / $rqnGIndels * 100.0 : 0) ),
620 ( sprintf "%10d(%.2f%%)",
621 $rqGSNPs{$q}{$r},
622 ($rqnGIndels ? $rqGSNPs{$q}{$r} / $rqnGIndels * 100.0 : 0) );
623 }
624 }
625 }
626 foreach my $r (keys %rqGSNPs) {
627 foreach my $q (keys %{$rqGSNPs{$r}}) {
628 if ( $r eq "." ) {
629 printf $fho "%-15s %20s %20s\n",
630 "$r$q",
631 ( sprintf "%10d(%.2f%%)",
632 $rqGSNPs{$r}{$q},
633 ($rqnGIndels ? $rqGSNPs{$r}{$q} / $rqnGIndels * 100.0 : 0) ),
634 ( sprintf "%10d(%.2f%%)",
635 $rqGSNPs{$q}{$r},
636 ($rqnGIndels ? $rqGSNPs{$q}{$r} / $rqnGIndels * 100.0 : 0) );
637 }
638 }
639 }
640
641 FileClose($fho, $OPT_ReportFile);
642
643
644 #-- Output unaligned reference and query IDs, if applicable
645 if ( $rnSeqs != $rnASeqs ) {
646 $fho = FileOpen(">", $OPT_UnRefFile);
647 while ( my ($key, $val) = each(%refs) ) {
648 print $fho "$key\tUNI\t1\t$val\t$val\n" unless $val < 0;
649 }
650 FileClose($fho, $OPT_UnRefFile);
651 }
652 if ( $qnSeqs != $qnASeqs ) {
653 $fho = FileOpen(">", $OPT_UnQryFile);
654 while ( my ($key, $val) = each(%qrys) ) {
655 print $fho "$key\tUNI\t1\t$val\t$val\n" unless $val < 0;
656 }
657 FileClose($fho, $OPT_UnQryFile);
658 }
659 }
660
661
662 #--------------------------------------------------------------- FastaSizes ----
663 # Compute lengths for a multi-fasta file and store in hash reference
664 sub FastaSizes($$)
665 {
666
667 my $file = shift;
668 my $href = shift;
669 my ($tag, $len);
670
671 my $fhi = FileOpen("<", $file);
672 while (<$fhi>) {
673 chomp;
674
675 if ( /^>/ ) {
676 $href->{$tag} = $len if defined($tag);
677 ($tag) = /^>(\S+)/;
678 $len = 0;
679 } else {
680 if ( /\s/ ) {
681 die "ERROR: Whitespace found in FastA $file, aborting.\n";
682 }
683 $len += length;
684 }
685 }
686 $href->{$tag} = $len if defined($tag);
687 FileClose($fhi, $file);
688 }
689
690
691 #----------------------------------------------------------------- FileOpen ----
692 # Open file, return filehandle, or die
693 sub FileOpen($$)
694 {
695 my ($mode, $name) = @_;
696 my $fhi;
697 open($fhi, $mode, $name)
698 or die "ERROR: Could not open $name, aborting. $!\n";
699 return $fhi;
700 }
701
702
703 #---------------------------------------------------------------- FileClose ----
704 # Close file, or die
705 sub FileClose($$)
706 {
707 my ($fho, $name) = @_;
708 close($fho) or die "ERROR: Could not close $name, aborting. $!\n"
709 }
710
711
712 #------------------------------------------------------------------- GetOpt ----
713 # Get command options and check file permissions
714 sub GetOpt()
715 {
716 #-- Initialize TIGR::Foundation
717 $TIGR = new TIGR::Foundation;
718 if ( !defined($TIGR) ) {
719 print STDERR "ERROR: TIGR::Foundation could not be initialized";
720 exit(1);
721 }
722
723 #-- Set help and usage information
724 $TIGR->setHelpInfo($HELP_INFO);
725 $TIGR->setUsageInfo($USAGE_INFO);
726 $TIGR->setVersionInfo($VERSION_INFO);
727 $TIGR->addDependInfo(@DEPEND_INFO);
728
729 #-- Get options
730 my $err = !$TIGR->TIGR_GetOptions
731 (
732 "d|delta=s" => \$OPT_DeltaFile,
733 "p|prefix=s" => \$OPT_Prefix,
734 );
735
736 #-- Check if the parsing was successful
737 if ( $err
738 || (defined($OPT_DeltaFile) && scalar(@ARGV) != 0)
739 || (!defined($OPT_DeltaFile) && scalar(@ARGV) != 2) ) {
740 $TIGR->printUsageInfo();
741 print STDERR "Try '$0 -h' for more information.\n";
742 exit(1);
743 }
744
745 my @errs;
746
747 $TIGR->isExecutableFile($DELTA_FILTER)
748 or push(@errs, $DELTA_FILTER);
749
750 $TIGR->isExecutableFile($SHOW_DIFF)
751 or push(@errs, $SHOW_DIFF);
752
753 $TIGR->isExecutableFile($SHOW_SNPS)
754 or push(@errs, $SHOW_SNPS);
755
756 $TIGR->isExecutableFile($SHOW_COORDS)
757 or push(@errs, $SHOW_COORDS);
758
759 $TIGR->isExecutableFile($NUCMER)
760 or push(@errs, $NUCMER);
761
762 if ( defined($OPT_DeltaFile) ) {
763 $TIGR->isReadableFile($OPT_DeltaFile)
764 or push(@errs, $OPT_DeltaFile);
765
766 my $fhi = FileOpen("<", $OPT_DeltaFile);
767 $_ = <$fhi>;
768 FileClose($fhi, $OPT_DeltaFile);
769
770 ($OPT_RefFile, $OPT_QryFile) = /^(.+) (.+)$/;
771 }
772 else {
773 $OPT_RefFile = File::Spec->rel2abs($ARGV[0]);
774 $OPT_QryFile = File::Spec->rel2abs($ARGV[1]);
775 }
776
777 $TIGR->isReadableFile($OPT_RefFile)
778 or push(@errs, $OPT_RefFile);
779
780 $TIGR->isReadableFile($OPT_QryFile)
781 or push(@errs, $OPT_QryFile);
782
783 $OPT_ReportFile = $OPT_Prefix . $OPT_ReportFile;
784 $TIGR->isCreatableFile("$OPT_ReportFile")
785 or $TIGR->isWritableFile("$OPT_ReportFile")
786 or push(@errs, "$OPT_ReportFile");
787
788 $OPT_DeltaFile1 = $OPT_Prefix . $OPT_DeltaFile1;
789 $TIGR->isCreatableFile("$OPT_DeltaFile1")
790 or $TIGR->isWritableFile("$OPT_DeltaFile1")
791 or push(@errs, "$OPT_DeltaFile1");
792
793 $OPT_DeltaFileM = $OPT_Prefix . $OPT_DeltaFileM;
794 $TIGR->isCreatableFile("$OPT_DeltaFileM")
795 or $TIGR->isWritableFile("$OPT_DeltaFileM")
796 or push(@errs, "$OPT_DeltaFileM");
797
798 $OPT_CoordsFile1 = $OPT_Prefix . $OPT_CoordsFile1;
799 $TIGR->isCreatableFile("$OPT_CoordsFile1")
800 or $TIGR->isWritableFile("$OPT_CoordsFile1")
801 or push(@errs, "$OPT_CoordsFile1");
802
803 $OPT_CoordsFileM = $OPT_Prefix . $OPT_CoordsFileM;
804 $TIGR->isCreatableFile("$OPT_CoordsFileM")
805 or $TIGR->isWritableFile("$OPT_CoordsFileM")
806 or push(@errs, "$OPT_CoordsFileM");
807
808 $OPT_SnpsFile = $OPT_Prefix . $OPT_SnpsFile;
809 $TIGR->isCreatableFile("$OPT_SnpsFile")
810 or $TIGR->isWritableFile("$OPT_SnpsFile")
811 or push(@errs, "$OPT_SnpsFile");
812
813 $OPT_DiffRFile = $OPT_Prefix . $OPT_DiffRFile;
814 $TIGR->isCreatableFile("$OPT_DiffRFile")
815 or $TIGR->isWritableFile("$OPT_DiffRFile")
816 or push(@errs, "$OPT_DiffRFile");
817
818 $OPT_DiffQFile = $OPT_Prefix . $OPT_DiffQFile;
819 $TIGR->isCreatableFile("$OPT_DiffQFile")
820 or $TIGR->isWritableFile("$OPT_DiffQFile")
821 or push(@errs, "$OPT_DiffQFile");
822
823 $OPT_UnRefFile = $OPT_Prefix . $OPT_UnRefFile;
824 $TIGR->isCreatableFile("$OPT_UnRefFile")
825 or $TIGR->isWritableFile("$OPT_UnRefFile")
826 or push(@errs, "$OPT_UnRefFile");
827
828 $OPT_UnQryFile = $OPT_Prefix . $OPT_UnQryFile;
829 $TIGR->isCreatableFile("$OPT_UnQryFile")
830 or $TIGR->isWritableFile("$OPT_UnQryFile")
831 or push(@errs, "$OPT_UnQryFile");
832
833 if ( scalar(@errs) ) {
834 print STDERR "ERROR: The following critical files could not be used\n";
835 while ( scalar(@errs) ) { print(STDERR pop(@errs),"\n"); }
836 print STDERR "Check your paths and file permissions and try again\n";
837 exit(1);
838 }
839 }