jpayne@69: #!/usr/bin/env perl jpayne@69: jpayne@69: #------------------------------------------------------------------------------- jpayne@69: # Programmer: Adam M Phillippy, University of Maryland jpayne@69: # File: dnadiff jpayne@69: # Date: 11 / 29 / 06 jpayne@69: # jpayne@69: # Try 'dnadiff -h' for more information. jpayne@69: # jpayne@69: #------------------------------------------------------------------------------- jpayne@69: jpayne@69: use lib "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts"; jpayne@69: use Foundation; jpayne@69: use File::Spec::Functions; jpayne@69: use strict; jpayne@69: jpayne@69: my $BIN_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23"; jpayne@69: my $SCRIPT_DIR = "/mnt/c/Users/crash/Documents/BobLiterman/CSP2_Galaxy/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/scripts"; jpayne@69: jpayne@69: my $VERSION_INFO = q~ jpayne@69: DNAdiff version 1.3 jpayne@69: ~; jpayne@69: jpayne@69: my $HELP_INFO = q~ jpayne@69: USAGE: dnadiff [options] jpayne@69: or dnadiff [options] -d jpayne@69: jpayne@69: DESCRIPTION: jpayne@69: Run comparative analysis of two sequence sets using nucmer and its jpayne@69: associated utilities with recommended parameters. See MUMmer jpayne@69: documentation for a more detailed description of the jpayne@69: output. Produces the following output files: jpayne@69: jpayne@69: .report - Summary of alignments, differences and SNPs jpayne@69: .delta - Standard nucmer alignment output jpayne@69: .1delta - 1-to-1 alignment from delta-filter -1 jpayne@69: .mdelta - M-to-M alignment from delta-filter -m jpayne@69: .1coords - 1-to-1 coordinates from show-coords -THrcl .1delta jpayne@69: .mcoords - M-to-M coordinates from show-coords -THrcl .mdelta jpayne@69: .snps - SNPs from show-snps -rlTHC .1delta jpayne@69: .rdiff - Classified ref breakpoints from show-diff -rH .mdelta jpayne@69: .qdiff - Classified qry breakpoints from show-diff -qH .mdelta jpayne@69: .unref - Unaligned reference IDs and lengths (if applicable) jpayne@69: .unqry - Unaligned query IDs and lengths (if applicable) jpayne@69: jpayne@69: MANDATORY: jpayne@69: reference Set the input reference multi-FASTA filename jpayne@69: query Set the input query multi-FASTA filename jpayne@69: or jpayne@69: delta file Unfiltered .delta alignment file from nucmer jpayne@69: jpayne@69: OPTIONS: jpayne@69: -d|delta Provide precomputed delta file for analysis jpayne@69: -h jpayne@69: --help Display help information and exit jpayne@69: -p|prefix Set the prefix of the output files (default "out") jpayne@69: -V jpayne@69: --version Display the version information and exit jpayne@69: ~; jpayne@69: jpayne@69: jpayne@69: my $USAGE_INFO = q~ jpayne@69: USAGE: dnadiff [options] jpayne@69: or dnadiff [options] -d jpayne@69: ~; jpayne@69: jpayne@69: jpayne@69: my @DEPEND_INFO = jpayne@69: ( jpayne@69: "$BIN_DIR/delta-filter", jpayne@69: "$BIN_DIR/show-diff", jpayne@69: "$BIN_DIR/show-snps", jpayne@69: "$BIN_DIR/show-coords", jpayne@69: "$BIN_DIR/nucmer", jpayne@69: "$SCRIPT_DIR/Foundation.pm" jpayne@69: ); jpayne@69: jpayne@69: my $DELTA_FILTER = "$BIN_DIR/delta-filter"; jpayne@69: my $SHOW_DIFF = "$BIN_DIR/show-diff"; jpayne@69: my $SHOW_SNPS = "$BIN_DIR/show-snps"; jpayne@69: my $SHOW_COORDS = "$BIN_DIR/show-coords"; jpayne@69: my $NUCMER = "$BIN_DIR/nucmer"; jpayne@69: jpayne@69: my $SNPBuff = 20; # required buffer around "good" snps jpayne@69: my $OPT_Prefix = "out"; # prefix for all output files jpayne@69: my $OPT_RefFile; # reference file jpayne@69: my $OPT_QryFile; # query file jpayne@69: my $OPT_DeltaFile; # unfiltered alignment file jpayne@69: my $OPT_ReportFile = ".report"; # report file jpayne@69: my $OPT_DeltaFile1 = ".1delta"; # 1-to-1 delta alignment jpayne@69: my $OPT_DeltaFileM = ".mdelta"; # M-to-M delta alignment jpayne@69: my $OPT_CoordsFile1 = ".1coords"; # 1-to-1 alignment coords jpayne@69: my $OPT_CoordsFileM = ".mcoords"; # M-to-M alignment coords jpayne@69: my $OPT_SnpsFile = ".snps"; # snps output file jpayne@69: my $OPT_DiffRFile = ".rdiff"; # diffile for R jpayne@69: my $OPT_DiffQFile = ".qdiff"; # diffile for Q jpayne@69: my $OPT_UnRefFile = ".unref"; # unaligned ref IDs and lengths jpayne@69: my $OPT_UnQryFile = ".unqry"; # unaligned qry IDs and lengths jpayne@69: jpayne@69: my $TIGR; # TIGR Foundation object jpayne@69: jpayne@69: jpayne@69: sub RunAlignment(); jpayne@69: sub RunFilter(); jpayne@69: sub RunCoords(); jpayne@69: sub RunSNPs(); jpayne@69: sub RunDiff(); jpayne@69: sub MakeReport(); jpayne@69: jpayne@69: sub FastaSizes($$); jpayne@69: jpayne@69: sub FileOpen($$); jpayne@69: sub FileClose($$); jpayne@69: jpayne@69: sub GetOpt(); jpayne@69: jpayne@69: jpayne@69: #--------------------------------------------------------------------- main ---- jpayne@69: main: jpayne@69: { jpayne@69: GetOpt(); jpayne@69: jpayne@69: RunAlignment() unless defined($OPT_DeltaFile); jpayne@69: RunFilter(); jpayne@69: RunCoords(); jpayne@69: RunSNPs(); jpayne@69: RunDiff(); jpayne@69: MakeReport(); jpayne@69: jpayne@69: exit(0); jpayne@69: } jpayne@69: jpayne@69: jpayne@69: #------------------------------------------------------------- RunAlignment ---- jpayne@69: # Run nucmer jpayne@69: sub RunAlignment() jpayne@69: { jpayne@69: print STDERR "Building alignments\n"; jpayne@69: my $cmd = "$NUCMER --maxmatch -p $OPT_Prefix $OPT_RefFile $OPT_QryFile"; jpayne@69: my $err = "ERROR: Failed to run nucmer, aborting.\n"; jpayne@69: jpayne@69: system($cmd) == 0 or die $err; jpayne@69: $OPT_DeltaFile = $OPT_Prefix . ".delta"; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: #---------------------------------------------------------------- RunFilter ---- jpayne@69: # Run delta-filter jpayne@69: sub RunFilter() jpayne@69: { jpayne@69: print STDERR "Filtering alignments\n"; jpayne@69: my $cmd1 = "$DELTA_FILTER -1 $OPT_DeltaFile > $OPT_DeltaFile1"; jpayne@69: my $cmd2 = "$DELTA_FILTER -m $OPT_DeltaFile > $OPT_DeltaFileM"; jpayne@69: my $err = "ERROR: Failed to run delta-filter, aborting.\n"; jpayne@69: jpayne@69: system($cmd1) == 0 or die $err; jpayne@69: system($cmd2) == 0 or die $err; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: #------------------------------------------------------------------ RunSNPs ---- jpayne@69: # Run show-snps jpayne@69: sub RunSNPs() jpayne@69: { jpayne@69: print STDERR "Analyzing SNPs\n"; jpayne@69: my $cmd = "$SHOW_SNPS -rlTHC $OPT_DeltaFile1 > $OPT_SnpsFile"; jpayne@69: my $err = "ERROR: Failed to run show-snps, aborting.\n"; jpayne@69: jpayne@69: system($cmd) == 0 or die $err; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: #---------------------------------------------------------------- RunCoords ---- jpayne@69: # Run show-coords jpayne@69: sub RunCoords() jpayne@69: { jpayne@69: print STDERR "Extracting alignment coordinates\n"; jpayne@69: my $cmd1 = "$SHOW_COORDS -rclTH $OPT_DeltaFile1 > $OPT_CoordsFile1"; jpayne@69: my $cmd2 = "$SHOW_COORDS -rclTH $OPT_DeltaFileM > $OPT_CoordsFileM"; jpayne@69: my $err = "ERROR: Failed to run show-coords, aborting.\n"; jpayne@69: jpayne@69: system($cmd1) == 0 or die $err; jpayne@69: system($cmd2) == 0 or die $err; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: #------------------------------------------------------------------ RunDiff ---- jpayne@69: # Run show-diff jpayne@69: sub RunDiff() jpayne@69: { jpayne@69: print STDERR "Extracting alignment breakpoints\n"; jpayne@69: my $cmd1 = "$SHOW_DIFF -rH $OPT_DeltaFileM > $OPT_DiffRFile"; jpayne@69: my $cmd2 = "$SHOW_DIFF -qH $OPT_DeltaFileM > $OPT_DiffQFile"; jpayne@69: my $err = "ERROR: Failed to run show-diff, aborting.\n"; jpayne@69: jpayne@69: system($cmd1) == 0 or die $err; jpayne@69: system($cmd2) == 0 or die $err; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: #--------------------------------------------------------------- MakeReport ---- jpayne@69: # Output alignment report jpayne@69: sub MakeReport() jpayne@69: { jpayne@69: print STDERR "Generating report file\n"; jpayne@69: jpayne@69: my ($fhi, $fho); # filehandle-in and filehandle-out jpayne@69: my (%refs, %qrys) = ((),()); # R and Q ID->length jpayne@69: my ($rqnAligns1, $rqnAlignsM) = (0,0); # alignment counter jpayne@69: my ($rSumLen1, $qSumLen1) = (0,0); # alignment length sum jpayne@69: my ($rSumLenM, $qSumLenM) = (0,0); # alignment length sum jpayne@69: my ($rqSumLen1, $rqSumLenM) = (0,0); # combined alignment length sum jpayne@69: my ($rqSumIdy1, $rqSumIdyM) = (0,0); # weighted alignment identity sum jpayne@69: my ($qnIns, $rnIns) = (0,0); # insertion count jpayne@69: my ($qSumIns, $rSumIns) = (0,0); # insertion length sum jpayne@69: my ($qnTIns, $rnTIns) = (0,0); # tandem insertion count jpayne@69: my ($qSumTIns, $rSumTIns) = (0,0); # tandem insertion length sum jpayne@69: my ($qnInv, $rnInv) = (0,0); # inversion count jpayne@69: my ($qnRel, $rnRel) = (0,0); # relocation count jpayne@69: my ($qnTrn, $rnTrn) = (0,0); # translocation count jpayne@69: my ($rnSeqs, $qnSeqs) = (0,0); # sequence count jpayne@69: my ($rnASeqs, $qnASeqs) = (0,0); # aligned sequence count jpayne@69: my ($rnBases, $qnBases) = (0,0); # bases count jpayne@69: my ($rnABases, $qnABases) = (0,0); # aligned bases count jpayne@69: my ($rnBrk, $qnBrk) = (0,0); # breakpoint count jpayne@69: my ($rqnSNPs, $rqnIndels) = (0,0); # snp and indel counts jpayne@69: my ($rqnGSNPs, $rqnGIndels) = (0,0); # good snp and indel counts jpayne@69: my %rqSNPs = # SNP hash jpayne@69: ( "."=>{"A"=>0,"C"=>0,"G"=>0,"T"=>0}, jpayne@69: "A"=>{"."=>0,"C"=>0,"G"=>0,"T"=>0}, jpayne@69: "C"=>{"."=>0,"A"=>0,"G"=>0,"T"=>0}, jpayne@69: "G"=>{"."=>0,"A"=>0,"C"=>0,"T"=>0}, jpayne@69: "T"=>{"."=>0,"A"=>0,"C"=>0,"G"=>0} ); jpayne@69: my %rqGSNPs = # good SNP hash jpayne@69: ( "."=>{"A"=>0,"C"=>0,"G"=>0,"T"=>0}, jpayne@69: "A"=>{"."=>0,"C"=>0,"G"=>0,"T"=>0}, jpayne@69: "C"=>{"."=>0,"A"=>0,"G"=>0,"T"=>0}, jpayne@69: "G"=>{"."=>0,"A"=>0,"C"=>0,"T"=>0}, jpayne@69: "T"=>{"."=>0,"A"=>0,"C"=>0,"G"=>0} ); jpayne@69: jpayne@69: my $header; # delta header jpayne@69: jpayne@69: #-- Get delta header jpayne@69: $fhi = FileOpen("<", $OPT_DeltaFile); jpayne@69: $header .= <$fhi>; jpayne@69: $header .= <$fhi>; jpayne@69: $header .= "\n"; jpayne@69: FileClose($fhi, $OPT_DeltaFile); jpayne@69: jpayne@69: #-- Collect all reference and query IDs and lengths jpayne@69: FastaSizes($OPT_RefFile, \%refs); jpayne@69: FastaSizes($OPT_QryFile, \%qrys); jpayne@69: jpayne@69: #-- Count ref and qry seqs and lengths jpayne@69: foreach ( values(%refs) ) { jpayne@69: $rnSeqs++; jpayne@69: $rnBases += $_; jpayne@69: } jpayne@69: foreach ( values(%qrys) ) { jpayne@69: $qnSeqs++; jpayne@69: $qnBases += $_; jpayne@69: } jpayne@69: jpayne@69: #-- Count aligned seqs, aligned bases, and breakpoints for each R and Q jpayne@69: $fhi = FileOpen("<", $OPT_CoordsFileM); jpayne@69: while (<$fhi>) { jpayne@69: chomp; jpayne@69: my @A = split "\t"; jpayne@69: scalar(@A) == 13 jpayne@69: or die "ERROR: Unrecognized format $OPT_CoordsFileM, aborting.\n"; jpayne@69: jpayne@69: #-- Add to M-to-M alignment counts jpayne@69: $rqnAlignsM++; jpayne@69: $rSumLenM += $A[4]; jpayne@69: $qSumLenM += $A[5]; jpayne@69: $rqSumIdyM += ($A[6] / 100.0) * ($A[4] + $A[5]); jpayne@69: $rqSumLenM += ($A[4] + $A[5]); jpayne@69: jpayne@69: #-- If new ID, add to sequence and base count jpayne@69: if ( $refs{$A[11]} > 0 ) { jpayne@69: $rnASeqs++; jpayne@69: $rnABases += $refs{$A[11]}; jpayne@69: $refs{$A[11]} *= -1; # If ref has alignment, length will be -neg jpayne@69: } jpayne@69: if ( $qrys{$A[12]} > 0 ) { jpayne@69: $qnASeqs++; jpayne@69: $qnABases += $qrys{$A[12]}; jpayne@69: $qrys{$A[12]} *= -1; # If qry has alignment, length will be -neg jpayne@69: } jpayne@69: jpayne@69: #-- Add to breakpoint counts jpayne@69: my ($lo, $hi); jpayne@69: if ( $A[0] < $A[1] ) { $lo = $A[0]; $hi = $A[1]; } jpayne@69: else { $lo = $A[1]; $hi = $A[0]; } jpayne@69: $rnBrk++ if ( $lo != 1 ); jpayne@69: $rnBrk++ if ( $hi != $A[7] ); jpayne@69: jpayne@69: if ( $A[2] < $A[3] ) { $lo = $A[2]; $hi = $A[3]; } jpayne@69: else { $lo = $A[3]; $hi = $A[2]; } jpayne@69: $qnBrk++ if ( $lo != 1 ); jpayne@69: $qnBrk++ if ( $hi != $A[8] ); jpayne@69: } jpayne@69: FileClose($fhi, $OPT_CoordsFileM); jpayne@69: jpayne@69: #-- Calculate average %idy, length, etc. jpayne@69: $fhi = FileOpen("<", $OPT_CoordsFile1); jpayne@69: while (<$fhi>) { jpayne@69: chomp; jpayne@69: my @A = split "\t"; jpayne@69: scalar(@A) == 13 jpayne@69: or die "ERROR: Unrecognized format $OPT_CoordsFile1, aborting.\n"; jpayne@69: jpayne@69: #-- Add to 1-to-1 alignment counts jpayne@69: $rqnAligns1++; jpayne@69: $rSumLen1 += $A[4]; jpayne@69: $qSumLen1 += $A[5]; jpayne@69: $rqSumIdy1 += ($A[6] / 100.0) * ($A[4] + $A[5]); jpayne@69: $rqSumLen1 += ($A[4] + $A[5]); jpayne@69: } jpayne@69: FileClose($fhi, $OPT_CoordsFile1); jpayne@69: jpayne@69: #-- If you are reading this, you need to get out more... jpayne@69: jpayne@69: #-- Count reference diff features and indels jpayne@69: $fhi = FileOpen("<", $OPT_DiffRFile); jpayne@69: while (<$fhi>) { jpayne@69: chomp; jpayne@69: my @A = split "\t"; jpayne@69: defined($A[4]) jpayne@69: or die "ERROR: Unrecognized format $OPT_DiffRFile, aborting.\n"; jpayne@69: my $gap = $A[4]; jpayne@69: my $ins = $gap; jpayne@69: jpayne@69: #-- Add to tandem insertion counts jpayne@69: if ( $A[1] eq "GAP" ) { jpayne@69: scalar(@A) == 7 jpayne@69: or die "ERROR: Unrecognized format $OPT_DiffRFile, aborting.\n"; jpayne@69: $ins = $A[6] if ( $A[6] > $gap ); jpayne@69: if ( $A[4] <= 0 && $A[5] <= 0 && $A[6] > 0 ) { jpayne@69: $rnTIns++; jpayne@69: $rSumTIns += $A[6]; jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: #-- Remove unaligned sequence from count jpayne@69: if ( $A[1] ne "DUP" ) { jpayne@69: $rnABases -= $gap if ( $gap > 0 ); jpayne@69: } jpayne@69: jpayne@69: #-- Add to insertion count jpayne@69: if ( $ins > 0 ) { jpayne@69: $rnIns++; jpayne@69: $rSumIns += $ins; jpayne@69: } jpayne@69: jpayne@69: #-- Add to rearrangement counts jpayne@69: $rnInv++ if ( $A[1] eq "INV" ); jpayne@69: $rnRel++ if ( $A[1] eq "JMP" ); jpayne@69: $rnTrn++ if ( $A[1] eq "SEQ" ); jpayne@69: } jpayne@69: FileClose($fhi, $OPT_DiffRFile); jpayne@69: jpayne@69: #-- Count query diff features and indels jpayne@69: $fhi = FileOpen("<", $OPT_DiffQFile); jpayne@69: while (<$fhi>) { jpayne@69: chomp; jpayne@69: my @A = split "\t"; jpayne@69: defined($A[4]) jpayne@69: or die "ERROR: Unrecognized format $OPT_DiffRFile, aborting.\n"; jpayne@69: my $gap = $A[4]; jpayne@69: my $ins = $gap; jpayne@69: jpayne@69: #-- Add to tandem insertion counts jpayne@69: if ( $A[1] eq "GAP" ) { jpayne@69: scalar(@A) == 7 jpayne@69: or die "ERROR: Unrecognized format $OPT_DiffRFile, aborting.\n"; jpayne@69: $ins = $A[6] if ( $A[6] > $gap ); jpayne@69: if ( $A[4] <= 0 && $A[5] <= 0 && $A[6] > 0 ) { jpayne@69: $qnTIns++; jpayne@69: $qSumTIns += $A[6]; jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: #-- Remove unaligned sequence from count jpayne@69: if ( $A[1] ne "DUP" ) { jpayne@69: $qnABases -= $gap if ( $gap > 0 ); jpayne@69: } jpayne@69: jpayne@69: #-- Add to insertion count jpayne@69: if ( $ins > 0 ) { jpayne@69: $qnIns++; jpayne@69: $qSumIns += $ins; jpayne@69: } jpayne@69: jpayne@69: #-- Add to rearrangement counts jpayne@69: $qnInv++ if ( $A[1] eq "INV" ); jpayne@69: $qnRel++ if ( $A[1] eq "JMP" ); jpayne@69: $qnTrn++ if ( $A[1] eq "SEQ" ); jpayne@69: } jpayne@69: FileClose($fhi, $OPT_DiffQFile); jpayne@69: jpayne@69: #-- Count SNPs jpayne@69: $fhi = FileOpen("<", $OPT_SnpsFile); jpayne@69: while(<$fhi>) { jpayne@69: chomp; jpayne@69: my @A = split "\t"; jpayne@69: scalar(@A) == 12 jpayne@69: or die "ERROR: Unrecognized format $OPT_SnpsFile, aborting\n"; jpayne@69: jpayne@69: my $r = uc($A[1]); jpayne@69: my $q = uc($A[2]); jpayne@69: jpayne@69: #-- Plain SNPs jpayne@69: $rqSNPs{$r}{$q}++; jpayne@69: if ( !exists($rqSNPs{$q}{$r}) ) { $rqSNPs{$q}{$r} = 0; } jpayne@69: if ( $r eq '.' || $q eq '.' ) { $rqnIndels++; } jpayne@69: else { $rqnSNPs++; } jpayne@69: jpayne@69: #-- Good SNPs with sufficient match buffer jpayne@69: if ( $A[4] >= $SNPBuff ) { jpayne@69: $rqGSNPs{$r}{$q}++; jpayne@69: if ( !exists($rqGSNPs{$q}{$r}) ) { $rqGSNPs{$q}{$r} = 0; } jpayne@69: if ( $r eq '.' || $q eq '.' ) { $rqnGIndels++; } jpayne@69: else { $rqnGSNPs++; } jpayne@69: } jpayne@69: } jpayne@69: FileClose($fhi, $OPT_SnpsFile); jpayne@69: jpayne@69: jpayne@69: #-- Output report jpayne@69: $fho = FileOpen(">", $OPT_ReportFile); jpayne@69: jpayne@69: print $fho $header; jpayne@69: printf $fho "%-15s %20s %20s\n", "", "[REF]", "[QRY]"; jpayne@69: jpayne@69: print $fho "[Sequences]\n"; jpayne@69: jpayne@69: printf $fho "%-15s %20d %20d\n", jpayne@69: "TotalSeqs", $rnSeqs, $qnSeqs; jpayne@69: printf $fho "%-15s %20s %20s\n", jpayne@69: "AlignedSeqs", jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $rnASeqs, ($rnSeqs ? $rnASeqs / $rnSeqs * 100.0 : 0) ), jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $qnASeqs, ($rnSeqs ? $qnASeqs / $qnSeqs * 100.0 : 0) ); jpayne@69: printf $fho "%-15s %20s %20s\n", jpayne@69: "UnalignedSeqs", jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $rnSeqs - $rnASeqs, jpayne@69: ($rnSeqs ? ($rnSeqs - $rnASeqs) / $rnSeqs * 100.0 : 0) ), jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $qnSeqs - $qnASeqs, jpayne@69: ($qnSeqs ? ($qnSeqs - $qnASeqs) / $qnSeqs * 100.0 : 0) ); jpayne@69: jpayne@69: print $fho "\n[Bases]\n"; jpayne@69: jpayne@69: printf $fho "%-15s %20d %20d\n", jpayne@69: "TotalBases", $rnBases, $qnBases; jpayne@69: printf $fho "%-15s %20s %20s\n", jpayne@69: "AlignedBases", jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $rnABases, ($rnBases ? $rnABases / $rnBases * 100.0 : 0) ), jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $qnABases, ($qnBases ? $qnABases / $qnBases * 100.0 : 0) ); jpayne@69: printf $fho "%-15s %20s %20s\n", jpayne@69: "UnalignedBases", jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $rnBases - $rnABases, jpayne@69: ($rnBases ? ($rnBases - $rnABases) / $rnBases * 100.0 : 0) ), jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $qnBases - $qnABases, jpayne@69: ($qnBases ? ($qnBases - $qnABases) / $qnBases * 100.0 : 0) ); jpayne@69: jpayne@69: print $fho "\n[Alignments]\n"; jpayne@69: jpayne@69: printf $fho "%-15s %20d %20d\n", jpayne@69: "1-to-1", $rqnAligns1, $rqnAligns1; jpayne@69: printf $fho "%-15s %20d %20d\n", jpayne@69: "TotalLength", $rSumLen1, $qSumLen1; jpayne@69: printf $fho "%-15s %20.2f %20.2f\n", jpayne@69: "AvgLength", jpayne@69: ($rqnAligns1 ? $rSumLen1 / $rqnAligns1 : 0), jpayne@69: ($rqnAligns1 ? $qSumLen1 / $rqnAligns1 : 0); jpayne@69: printf $fho "%-15s %20.2f %20.2f\n", jpayne@69: "AvgIdentity", jpayne@69: ($rqSumLen1 ? $rqSumIdy1 / $rqSumLen1 * 100.0 : 0), jpayne@69: ($rqSumLen1 ? $rqSumIdy1 / $rqSumLen1 * 100.0 : 0); jpayne@69: jpayne@69: print $fho "\n"; jpayne@69: jpayne@69: printf $fho "%-15s %20d %20d\n", jpayne@69: "M-to-M", $rqnAlignsM, $rqnAlignsM; jpayne@69: printf $fho "%-15s %20d %20d\n", jpayne@69: "TotalLength", $rSumLenM, $qSumLenM; jpayne@69: printf $fho "%-15s %20.2f %20.2f\n", jpayne@69: "AvgLength", jpayne@69: ($rqnAlignsM ? $rSumLenM / $rqnAlignsM : 0), jpayne@69: ($rqnAlignsM ? $qSumLenM / $rqnAlignsM : 0); jpayne@69: printf $fho "%-15s %20.2f %20.2f\n", jpayne@69: "AvgIdentity", jpayne@69: ($rqSumLenM ? $rqSumIdyM / $rqSumLenM * 100.0 : 0), jpayne@69: ($rqSumLenM ? $rqSumIdyM / $rqSumLenM * 100.0 : 0); jpayne@69: jpayne@69: print $fho "\n[Feature Estimates]\n"; jpayne@69: jpayne@69: printf $fho "%-15s %20d %20d\n", jpayne@69: "Breakpoints", $rnBrk, $qnBrk; jpayne@69: printf $fho "%-15s %20d %20d\n", jpayne@69: "Relocations", $rnRel, $qnRel; jpayne@69: printf $fho "%-15s %20d %20d\n", jpayne@69: "Translocations", $rnTrn, $qnTrn; jpayne@69: printf $fho "%-15s %20d %20d\n", jpayne@69: "Inversions", $rnInv, $qnInv; jpayne@69: jpayne@69: print $fho "\n"; jpayne@69: jpayne@69: printf $fho "%-15s %20d %20d\n", jpayne@69: "Insertions", $rnIns, $qnIns; jpayne@69: printf $fho "%-15s %20d %20d\n", jpayne@69: "InsertionSum", $rSumIns, $qSumIns; jpayne@69: printf $fho "%-15s %20.2f %20.2f\n", jpayne@69: "InsertionAvg", jpayne@69: ($rnIns ? $rSumIns / $rnIns : 0), jpayne@69: ($qnIns ? $qSumIns / $qnIns : 0); jpayne@69: jpayne@69: print $fho "\n"; jpayne@69: jpayne@69: printf $fho "%-15s %20d %20d\n", jpayne@69: "TandemIns", $rnTIns, $qnTIns; jpayne@69: printf $fho "%-15s %20d %20d\n", jpayne@69: "TandemInsSum", $rSumTIns, $qSumTIns; jpayne@69: printf $fho "%-15s %20.2f %20.2f\n", jpayne@69: "TandemInsAvg", jpayne@69: ($rnTIns ? $rSumTIns / $rnTIns : 0), jpayne@69: ($qnTIns ? $qSumTIns / $qnTIns : 0); jpayne@69: jpayne@69: print $fho "\n[SNPs]\n"; jpayne@69: jpayne@69: printf $fho "%-15s %20d %20d\n", jpayne@69: "TotalSNPs", $rqnSNPs, $rqnSNPs; jpayne@69: foreach my $r (keys %rqSNPs) { jpayne@69: foreach my $q (keys %{$rqSNPs{$r}}) { jpayne@69: if ( $r ne "." && $q ne "." ) { jpayne@69: printf $fho "%-15s %20s %20s\n", jpayne@69: "$r$q", jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $rqSNPs{$r}{$q}, jpayne@69: ($rqnSNPs ? $rqSNPs{$r}{$q} / $rqnSNPs * 100.0 : 0) ), jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $rqSNPs{$q}{$r}, jpayne@69: ($rqnSNPs ? $rqSNPs{$q}{$r} / $rqnSNPs * 100.0 : 0) ); jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: print $fho "\n"; jpayne@69: jpayne@69: printf $fho "%-15s %20d %20d\n", jpayne@69: "TotalGSNPs", $rqnGSNPs, $rqnGSNPs; jpayne@69: foreach my $r (keys %rqGSNPs) { jpayne@69: foreach my $q (keys %{$rqGSNPs{$r}}) { jpayne@69: if ( $r ne "." && $q ne "." ) { jpayne@69: printf $fho "%-15s %20s %20s\n", jpayne@69: "$r$q", jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $rqGSNPs{$r}{$q}, jpayne@69: ($rqnGSNPs ? $rqGSNPs{$r}{$q} / $rqnGSNPs * 100.0 : 0) ), jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $rqGSNPs{$q}{$r}, jpayne@69: ($rqnGSNPs ? $rqGSNPs{$q}{$r} / $rqnGSNPs * 100.0 : 0) ); jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: print $fho "\n"; jpayne@69: jpayne@69: printf $fho "%-15s %20d %20d\n", jpayne@69: "TotalIndels", $rqnIndels, $rqnIndels; jpayne@69: foreach my $r (keys %rqSNPs) { jpayne@69: foreach my $q (keys %{$rqSNPs{$r}}) { jpayne@69: if ( $q eq "." ) { jpayne@69: printf $fho "%-15s %20s %20s\n", jpayne@69: "$r$q", jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $rqSNPs{$r}{$q}, jpayne@69: ($rqnIndels ? $rqSNPs{$r}{$q} / $rqnIndels * 100.0 : 0) ), jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $rqSNPs{$q}{$r}, jpayne@69: ($rqnIndels ? $rqSNPs{$q}{$r} / $rqnIndels * 100.0 : 0) ); jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: foreach my $r (keys %rqSNPs) { jpayne@69: foreach my $q (keys %{$rqSNPs{$r}}) { jpayne@69: if ( $r eq "." ) { jpayne@69: printf $fho "%-15s %20s %20s\n", jpayne@69: "$r$q", jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $rqSNPs{$r}{$q}, jpayne@69: ($rqnIndels ? $rqSNPs{$r}{$q} / $rqnIndels * 100.0 : 0) ), jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $rqSNPs{$q}{$r}, jpayne@69: ($rqnIndels ? $rqSNPs{$q}{$r} / $rqnIndels * 100.0 : 0) ); jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: print $fho "\n"; jpayne@69: jpayne@69: printf $fho "%-15s %20d %20d\n", jpayne@69: "TotalGIndels", $rqnGIndels, $rqnGIndels; jpayne@69: foreach my $r (keys %rqGSNPs) { jpayne@69: foreach my $q (keys %{$rqGSNPs{$r}}) { jpayne@69: if ( $q eq "." ) { jpayne@69: printf $fho "%-15s %20s %20s\n", jpayne@69: "$r$q", jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $rqGSNPs{$r}{$q}, jpayne@69: ($rqnGIndels ? $rqGSNPs{$r}{$q} / $rqnGIndels * 100.0 : 0) ), jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $rqGSNPs{$q}{$r}, jpayne@69: ($rqnGIndels ? $rqGSNPs{$q}{$r} / $rqnGIndels * 100.0 : 0) ); jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: foreach my $r (keys %rqGSNPs) { jpayne@69: foreach my $q (keys %{$rqGSNPs{$r}}) { jpayne@69: if ( $r eq "." ) { jpayne@69: printf $fho "%-15s %20s %20s\n", jpayne@69: "$r$q", jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $rqGSNPs{$r}{$q}, jpayne@69: ($rqnGIndels ? $rqGSNPs{$r}{$q} / $rqnGIndels * 100.0 : 0) ), jpayne@69: ( sprintf "%10d(%.2f%%)", jpayne@69: $rqGSNPs{$q}{$r}, jpayne@69: ($rqnGIndels ? $rqGSNPs{$q}{$r} / $rqnGIndels * 100.0 : 0) ); jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: FileClose($fho, $OPT_ReportFile); jpayne@69: jpayne@69: jpayne@69: #-- Output unaligned reference and query IDs, if applicable jpayne@69: if ( $rnSeqs != $rnASeqs ) { jpayne@69: $fho = FileOpen(">", $OPT_UnRefFile); jpayne@69: while ( my ($key, $val) = each(%refs) ) { jpayne@69: print $fho "$key\tUNI\t1\t$val\t$val\n" unless $val < 0; jpayne@69: } jpayne@69: FileClose($fho, $OPT_UnRefFile); jpayne@69: } jpayne@69: if ( $qnSeqs != $qnASeqs ) { jpayne@69: $fho = FileOpen(">", $OPT_UnQryFile); jpayne@69: while ( my ($key, $val) = each(%qrys) ) { jpayne@69: print $fho "$key\tUNI\t1\t$val\t$val\n" unless $val < 0; jpayne@69: } jpayne@69: FileClose($fho, $OPT_UnQryFile); jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: jpayne@69: #--------------------------------------------------------------- FastaSizes ---- jpayne@69: # Compute lengths for a multi-fasta file and store in hash reference jpayne@69: sub FastaSizes($$) jpayne@69: { jpayne@69: jpayne@69: my $file = shift; jpayne@69: my $href = shift; jpayne@69: my ($tag, $len); jpayne@69: jpayne@69: my $fhi = FileOpen("<", $file); jpayne@69: while (<$fhi>) { jpayne@69: chomp; jpayne@69: jpayne@69: if ( /^>/ ) { jpayne@69: $href->{$tag} = $len if defined($tag); jpayne@69: ($tag) = /^>(\S+)/; jpayne@69: $len = 0; jpayne@69: } else { jpayne@69: if ( /\s/ ) { jpayne@69: die "ERROR: Whitespace found in FastA $file, aborting.\n"; jpayne@69: } jpayne@69: $len += length; jpayne@69: } jpayne@69: } jpayne@69: $href->{$tag} = $len if defined($tag); jpayne@69: FileClose($fhi, $file); jpayne@69: } jpayne@69: jpayne@69: jpayne@69: #----------------------------------------------------------------- FileOpen ---- jpayne@69: # Open file, return filehandle, or die jpayne@69: sub FileOpen($$) jpayne@69: { jpayne@69: my ($mode, $name) = @_; jpayne@69: my $fhi; jpayne@69: open($fhi, $mode, $name) jpayne@69: or die "ERROR: Could not open $name, aborting. $!\n"; jpayne@69: return $fhi; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: #---------------------------------------------------------------- FileClose ---- jpayne@69: # Close file, or die jpayne@69: sub FileClose($$) jpayne@69: { jpayne@69: my ($fho, $name) = @_; jpayne@69: close($fho) or die "ERROR: Could not close $name, aborting. $!\n" jpayne@69: } jpayne@69: jpayne@69: jpayne@69: #------------------------------------------------------------------- GetOpt ---- jpayne@69: # Get command options and check file permissions jpayne@69: sub GetOpt() jpayne@69: { jpayne@69: #-- Initialize TIGR::Foundation jpayne@69: $TIGR = new TIGR::Foundation; jpayne@69: if ( !defined($TIGR) ) { jpayne@69: print STDERR "ERROR: TIGR::Foundation could not be initialized"; jpayne@69: exit(1); jpayne@69: } jpayne@69: jpayne@69: #-- Set help and usage information jpayne@69: $TIGR->setHelpInfo($HELP_INFO); jpayne@69: $TIGR->setUsageInfo($USAGE_INFO); jpayne@69: $TIGR->setVersionInfo($VERSION_INFO); jpayne@69: $TIGR->addDependInfo(@DEPEND_INFO); jpayne@69: jpayne@69: #-- Get options jpayne@69: my $err = !$TIGR->TIGR_GetOptions jpayne@69: ( jpayne@69: "d|delta=s" => \$OPT_DeltaFile, jpayne@69: "p|prefix=s" => \$OPT_Prefix, jpayne@69: ); jpayne@69: jpayne@69: #-- Check if the parsing was successful jpayne@69: if ( $err jpayne@69: || (defined($OPT_DeltaFile) && scalar(@ARGV) != 0) jpayne@69: || (!defined($OPT_DeltaFile) && scalar(@ARGV) != 2) ) { jpayne@69: $TIGR->printUsageInfo(); jpayne@69: print STDERR "Try '$0 -h' for more information.\n"; jpayne@69: exit(1); jpayne@69: } jpayne@69: jpayne@69: my @errs; jpayne@69: jpayne@69: $TIGR->isExecutableFile($DELTA_FILTER) jpayne@69: or push(@errs, $DELTA_FILTER); jpayne@69: jpayne@69: $TIGR->isExecutableFile($SHOW_DIFF) jpayne@69: or push(@errs, $SHOW_DIFF); jpayne@69: jpayne@69: $TIGR->isExecutableFile($SHOW_SNPS) jpayne@69: or push(@errs, $SHOW_SNPS); jpayne@69: jpayne@69: $TIGR->isExecutableFile($SHOW_COORDS) jpayne@69: or push(@errs, $SHOW_COORDS); jpayne@69: jpayne@69: $TIGR->isExecutableFile($NUCMER) jpayne@69: or push(@errs, $NUCMER); jpayne@69: jpayne@69: if ( defined($OPT_DeltaFile) ) { jpayne@69: $TIGR->isReadableFile($OPT_DeltaFile) jpayne@69: or push(@errs, $OPT_DeltaFile); jpayne@69: jpayne@69: my $fhi = FileOpen("<", $OPT_DeltaFile); jpayne@69: $_ = <$fhi>; jpayne@69: FileClose($fhi, $OPT_DeltaFile); jpayne@69: jpayne@69: ($OPT_RefFile, $OPT_QryFile) = /^(.+) (.+)$/; jpayne@69: } jpayne@69: else { jpayne@69: $OPT_RefFile = File::Spec->rel2abs($ARGV[0]); jpayne@69: $OPT_QryFile = File::Spec->rel2abs($ARGV[1]); jpayne@69: } jpayne@69: jpayne@69: $TIGR->isReadableFile($OPT_RefFile) jpayne@69: or push(@errs, $OPT_RefFile); jpayne@69: jpayne@69: $TIGR->isReadableFile($OPT_QryFile) jpayne@69: or push(@errs, $OPT_QryFile); jpayne@69: jpayne@69: $OPT_ReportFile = $OPT_Prefix . $OPT_ReportFile; jpayne@69: $TIGR->isCreatableFile("$OPT_ReportFile") jpayne@69: or $TIGR->isWritableFile("$OPT_ReportFile") jpayne@69: or push(@errs, "$OPT_ReportFile"); jpayne@69: jpayne@69: $OPT_DeltaFile1 = $OPT_Prefix . $OPT_DeltaFile1; jpayne@69: $TIGR->isCreatableFile("$OPT_DeltaFile1") jpayne@69: or $TIGR->isWritableFile("$OPT_DeltaFile1") jpayne@69: or push(@errs, "$OPT_DeltaFile1"); jpayne@69: jpayne@69: $OPT_DeltaFileM = $OPT_Prefix . $OPT_DeltaFileM; jpayne@69: $TIGR->isCreatableFile("$OPT_DeltaFileM") jpayne@69: or $TIGR->isWritableFile("$OPT_DeltaFileM") jpayne@69: or push(@errs, "$OPT_DeltaFileM"); jpayne@69: jpayne@69: $OPT_CoordsFile1 = $OPT_Prefix . $OPT_CoordsFile1; jpayne@69: $TIGR->isCreatableFile("$OPT_CoordsFile1") jpayne@69: or $TIGR->isWritableFile("$OPT_CoordsFile1") jpayne@69: or push(@errs, "$OPT_CoordsFile1"); jpayne@69: jpayne@69: $OPT_CoordsFileM = $OPT_Prefix . $OPT_CoordsFileM; jpayne@69: $TIGR->isCreatableFile("$OPT_CoordsFileM") jpayne@69: or $TIGR->isWritableFile("$OPT_CoordsFileM") jpayne@69: or push(@errs, "$OPT_CoordsFileM"); jpayne@69: jpayne@69: $OPT_SnpsFile = $OPT_Prefix . $OPT_SnpsFile; jpayne@69: $TIGR->isCreatableFile("$OPT_SnpsFile") jpayne@69: or $TIGR->isWritableFile("$OPT_SnpsFile") jpayne@69: or push(@errs, "$OPT_SnpsFile"); jpayne@69: jpayne@69: $OPT_DiffRFile = $OPT_Prefix . $OPT_DiffRFile; jpayne@69: $TIGR->isCreatableFile("$OPT_DiffRFile") jpayne@69: or $TIGR->isWritableFile("$OPT_DiffRFile") jpayne@69: or push(@errs, "$OPT_DiffRFile"); jpayne@69: jpayne@69: $OPT_DiffQFile = $OPT_Prefix . $OPT_DiffQFile; jpayne@69: $TIGR->isCreatableFile("$OPT_DiffQFile") jpayne@69: or $TIGR->isWritableFile("$OPT_DiffQFile") jpayne@69: or push(@errs, "$OPT_DiffQFile"); jpayne@69: jpayne@69: $OPT_UnRefFile = $OPT_Prefix . $OPT_UnRefFile; jpayne@69: $TIGR->isCreatableFile("$OPT_UnRefFile") jpayne@69: or $TIGR->isWritableFile("$OPT_UnRefFile") jpayne@69: or push(@errs, "$OPT_UnRefFile"); jpayne@69: jpayne@69: $OPT_UnQryFile = $OPT_Prefix . $OPT_UnQryFile; jpayne@69: $TIGR->isCreatableFile("$OPT_UnQryFile") jpayne@69: or $TIGR->isWritableFile("$OPT_UnQryFile") jpayne@69: or push(@errs, "$OPT_UnQryFile"); jpayne@69: jpayne@69: if ( scalar(@errs) ) { jpayne@69: print STDERR "ERROR: The following critical files could not be used\n"; jpayne@69: while ( scalar(@errs) ) { print(STDERR pop(@errs),"\n"); } jpayne@69: print STDERR "Check your paths and file permissions and try again\n"; jpayne@69: exit(1); jpayne@69: } jpayne@69: }