cstrittmatter@0: #!/usr/bin/env perl cstrittmatter@0: ## A wrapper script to call spades.py and collect its output cstrittmatter@0: use strict; cstrittmatter@0: use warnings; cstrittmatter@0: use File::Temp qw/ tempfile tempdir /; cstrittmatter@0: use File::Copy; cstrittmatter@0: use Getopt::Long; cstrittmatter@0: cstrittmatter@0: # Parse arguments cstrittmatter@0: my ($out_contigs_file, cstrittmatter@0: $out_contigs_stats, cstrittmatter@0: $out_scaffolds_file, cstrittmatter@0: $out_scaffolds_stats, cstrittmatter@0: $out_log_file, cstrittmatter@0: $new_name, cstrittmatter@0: @sysargs) = @ARGV; cstrittmatter@0: cstrittmatter@0: ## GetOptions not compatible with parsing the rest of the arguments in an array. cstrittmatter@0: ## Keeping the not-so-nice parse-in-one-go method, without named arguments. cstrittmatter@0: # GetOptions( cstrittmatter@0: # 'contigs-file=s' => \$out_contigs_file, cstrittmatter@0: # 'contigs-stats=s' => \$out_contigs_stats, cstrittmatter@0: # 'scaffolds-file=s' => \$out_scaffolds_file, cstrittmatter@0: # 'scaffolds-stats=s' => \$out_scaffolds_stats, cstrittmatter@0: # 'out_log_file=s' => \$out_log_file, cstrittmatter@0: # ); cstrittmatter@0: cstrittmatter@0: # my @sysargs = @ARGV; cstrittmatter@0: cstrittmatter@0: # Create temporary folder to store files, delete after use cstrittmatter@0: #my $output_dir = tempdir( CLEANUP => 0 ); cstrittmatter@0: my $output_dir = 'output_dir'; cstrittmatter@0: # Link "dat" files as fastq, otherwise spades complains about file format cstrittmatter@0: cstrittmatter@0: # Create log handle cstrittmatter@0: open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n"; cstrittmatter@0: cstrittmatter@0: # Run program cstrittmatter@0: # To do: record time cstrittmatter@0: runSpades(@sysargs); cstrittmatter@0: collectOutput($new_name); cstrittmatter@0: extractCoverageLength($out_contigs_file, $out_contigs_stats); cstrittmatter@0: extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats); cstrittmatter@0: print $log "Done\n"; cstrittmatter@0: close $log; cstrittmatter@0: exit 0; cstrittmatter@0: cstrittmatter@0: # Run spades cstrittmatter@0: sub runSpades { cstrittmatter@0: my $cmd = join(" ", @_) . " -o $output_dir"; cstrittmatter@0: my $return_code = system($cmd); cstrittmatter@0: if ($return_code) { cstrittmatter@0: print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; cstrittmatter@0: die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; cstrittmatter@0: } cstrittmatter@0: return 0; cstrittmatter@0: } cstrittmatter@0: cstrittmatter@0: # Collect output cstrittmatter@0: sub collectOutput{ cstrittmatter@0: my ($new_name) = @_; cstrittmatter@0: cstrittmatter@0: # Collects output cstrittmatter@0: #if a new name is given for the contigs and scaffolds, change them before moving them cstrittmatter@0: if ( $new_name ne 'NODE') { cstrittmatter@0: renameContigs($new_name); cstrittmatter@0: } cstrittmatter@0: else { cstrittmatter@0: if ( -e "$output_dir/contigs.fasta") { cstrittmatter@0: move "$output_dir/contigs.fasta", $out_contigs_file; cstrittmatter@0: } cstrittmatter@0: if ( -e "$output_dir/scaffolds.fasta") { cstrittmatter@0: move "$output_dir/scaffolds.fasta", $out_scaffolds_file; cstrittmatter@0: } cstrittmatter@0: } cstrittmatter@0: cstrittmatter@0: open LOG, '<', "$output_dir/spades.log" cstrittmatter@0: or die "Cannot open log file $output_dir/spades.log: $?"; cstrittmatter@0: print $log $_ while (); cstrittmatter@0: return 0; cstrittmatter@0: } cstrittmatter@0: cstrittmatter@0: #Change name in contig and scaffolds file cstrittmatter@0: sub renameContigs{ cstrittmatter@0: my ($name) = @_; cstrittmatter@0: cstrittmatter@0: open my $in, '<',"$output_dir/contigs.fasta" or die $!; cstrittmatter@0: open my $out,'>', $out_contigs_file; cstrittmatter@0: cstrittmatter@0: while ( my $line = <$in>) { cstrittmatter@0: #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. cstrittmatter@0: #also move the remainder of the length cstrittmatter@0: if ( $line =~ />NODE_(\d+)_(.+)/) { cstrittmatter@0: $line = ">$name" . "_$1 $2\n"; cstrittmatter@0: } cstrittmatter@0: print $out $line; cstrittmatter@0: } cstrittmatter@0: close $in; cstrittmatter@0: close $out; cstrittmatter@0: cstrittmatter@0: cstrittmatter@0: open $in, '<',"$output_dir/scaffolds.fasta" or die $!; cstrittmatter@0: open $out,'>', $out_scaffolds_file; cstrittmatter@0: cstrittmatter@0: while ( my $line = <$in>) { cstrittmatter@0: #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. cstrittmatter@0: #also move the remainder of the length cstrittmatter@0: if ( $line =~ />NODE_(\d+)_(.+)/) { cstrittmatter@0: $line = ">$name" . "_$1 $2\n"; cstrittmatter@0: } cstrittmatter@0: print $out $line; cstrittmatter@0: } cstrittmatter@0: close $in; cstrittmatter@0: close $out; cstrittmatter@0: cstrittmatter@0: } cstrittmatter@0: cstrittmatter@0: cstrittmatter@0: # Extract cstrittmatter@0: sub extractCoverageLength{ cstrittmatter@0: my ($in, $out) = @_; cstrittmatter@0: open FASTA, '<', $in or die $!; cstrittmatter@0: open TAB, '>', $out or die $!; cstrittmatter@0: print TAB "#name\tlength\tcoverage\n"; cstrittmatter@0: while (){ cstrittmatter@0: next unless /^>/; cstrittmatter@0: chomp; cstrittmatter@0: die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)/); cstrittmatter@0: my ($name,$n, $l, $cov) = ($1,$2, $3, $4); cstrittmatter@0: print TAB "$name" . "_$n\t$l\t$cov\n"; cstrittmatter@0: } cstrittmatter@0: close TAB; cstrittmatter@0: }