annotate scripts/spades.pl @ 1:5224b4d51065 tip

"planemo upload commit cb65588391944306ff3cb32a23e1c28f65122014-dirty"
author cstrittmatter
date Fri, 11 Mar 2022 15:52:43 -0500
parents 8be2feb96994
children
rev   line source
cstrittmatter@0 1 #!/usr/bin/env perl
cstrittmatter@0 2 ## A wrapper script to call spades.py and collect its output
cstrittmatter@0 3 use strict;
cstrittmatter@0 4 use warnings;
cstrittmatter@0 5 use File::Temp qw/ tempfile tempdir /;
cstrittmatter@0 6 use File::Copy;
cstrittmatter@0 7 use Getopt::Long;
cstrittmatter@0 8
cstrittmatter@0 9 # Parse arguments
cstrittmatter@0 10 my ($out_contigs_file,
cstrittmatter@0 11 $out_contigs_stats,
cstrittmatter@0 12 $out_scaffolds_file,
cstrittmatter@0 13 $out_scaffolds_stats,
cstrittmatter@0 14 $out_log_file,
cstrittmatter@0 15 $new_name,
cstrittmatter@0 16 @sysargs) = @ARGV;
cstrittmatter@0 17
cstrittmatter@0 18 ## GetOptions not compatible with parsing the rest of the arguments in an array.
cstrittmatter@0 19 ## Keeping the not-so-nice parse-in-one-go method, without named arguments.
cstrittmatter@0 20 # GetOptions(
cstrittmatter@0 21 # 'contigs-file=s' => \$out_contigs_file,
cstrittmatter@0 22 # 'contigs-stats=s' => \$out_contigs_stats,
cstrittmatter@0 23 # 'scaffolds-file=s' => \$out_scaffolds_file,
cstrittmatter@0 24 # 'scaffolds-stats=s' => \$out_scaffolds_stats,
cstrittmatter@0 25 # 'out_log_file=s' => \$out_log_file,
cstrittmatter@0 26 # );
cstrittmatter@0 27
cstrittmatter@0 28 # my @sysargs = @ARGV;
cstrittmatter@0 29
cstrittmatter@0 30 # Create temporary folder to store files, delete after use
cstrittmatter@0 31 #my $output_dir = tempdir( CLEANUP => 0 );
cstrittmatter@0 32 my $output_dir = 'output_dir';
cstrittmatter@0 33 # Link "dat" files as fastq, otherwise spades complains about file format
cstrittmatter@0 34
cstrittmatter@0 35 # Create log handle
cstrittmatter@0 36 open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n";
cstrittmatter@0 37
cstrittmatter@0 38 # Run program
cstrittmatter@0 39 # To do: record time
cstrittmatter@0 40 runSpades(@sysargs);
cstrittmatter@0 41 collectOutput($new_name);
cstrittmatter@0 42 extractCoverageLength($out_contigs_file, $out_contigs_stats);
cstrittmatter@0 43 extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats);
cstrittmatter@0 44 print $log "Done\n";
cstrittmatter@0 45 close $log;
cstrittmatter@0 46 exit 0;
cstrittmatter@0 47
cstrittmatter@0 48 # Run spades
cstrittmatter@0 49 sub runSpades {
cstrittmatter@0 50 my $cmd = join(" ", @_) . " -o $output_dir";
cstrittmatter@0 51 my $return_code = system($cmd);
cstrittmatter@0 52 if ($return_code) {
cstrittmatter@0 53 print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
cstrittmatter@0 54 die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
cstrittmatter@0 55 }
cstrittmatter@0 56 return 0;
cstrittmatter@0 57 }
cstrittmatter@0 58
cstrittmatter@0 59 # Collect output
cstrittmatter@0 60 sub collectOutput{
cstrittmatter@0 61 my ($new_name) = @_;
cstrittmatter@0 62
cstrittmatter@0 63 # Collects output
cstrittmatter@0 64 #if a new name is given for the contigs and scaffolds, change them before moving them
cstrittmatter@0 65 if ( $new_name ne 'NODE') {
cstrittmatter@0 66 renameContigs($new_name);
cstrittmatter@0 67 }
cstrittmatter@0 68 else {
cstrittmatter@0 69 if ( -e "$output_dir/contigs.fasta") {
cstrittmatter@0 70 move "$output_dir/contigs.fasta", $out_contigs_file;
cstrittmatter@0 71 }
cstrittmatter@0 72 if ( -e "$output_dir/scaffolds.fasta") {
cstrittmatter@0 73 move "$output_dir/scaffolds.fasta", $out_scaffolds_file;
cstrittmatter@0 74 }
cstrittmatter@0 75 }
cstrittmatter@0 76
cstrittmatter@0 77 open LOG, '<', "$output_dir/spades.log"
cstrittmatter@0 78 or die "Cannot open log file $output_dir/spades.log: $?";
cstrittmatter@0 79 print $log $_ while (<LOG>);
cstrittmatter@0 80 return 0;
cstrittmatter@0 81 }
cstrittmatter@0 82
cstrittmatter@0 83 #Change name in contig and scaffolds file
cstrittmatter@0 84 sub renameContigs{
cstrittmatter@0 85 my ($name) = @_;
cstrittmatter@0 86
cstrittmatter@0 87 open my $in, '<',"$output_dir/contigs.fasta" or die $!;
cstrittmatter@0 88 open my $out,'>', $out_contigs_file;
cstrittmatter@0 89
cstrittmatter@0 90 while ( my $line = <$in>) {
cstrittmatter@0 91 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
cstrittmatter@0 92 #also move the remainder of the length
cstrittmatter@0 93 if ( $line =~ />NODE_(\d+)_(.+)/) {
cstrittmatter@0 94 $line = ">$name" . "_$1 $2\n";
cstrittmatter@0 95 }
cstrittmatter@0 96 print $out $line;
cstrittmatter@0 97 }
cstrittmatter@0 98 close $in;
cstrittmatter@0 99 close $out;
cstrittmatter@0 100
cstrittmatter@0 101
cstrittmatter@0 102 open $in, '<',"$output_dir/scaffolds.fasta" or die $!;
cstrittmatter@0 103 open $out,'>', $out_scaffolds_file;
cstrittmatter@0 104
cstrittmatter@0 105 while ( my $line = <$in>) {
cstrittmatter@0 106 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
cstrittmatter@0 107 #also move the remainder of the length
cstrittmatter@0 108 if ( $line =~ />NODE_(\d+)_(.+)/) {
cstrittmatter@0 109 $line = ">$name" . "_$1 $2\n";
cstrittmatter@0 110 }
cstrittmatter@0 111 print $out $line;
cstrittmatter@0 112 }
cstrittmatter@0 113 close $in;
cstrittmatter@0 114 close $out;
cstrittmatter@0 115
cstrittmatter@0 116 }
cstrittmatter@0 117
cstrittmatter@0 118
cstrittmatter@0 119 # Extract
cstrittmatter@0 120 sub extractCoverageLength{
cstrittmatter@0 121 my ($in, $out) = @_;
cstrittmatter@0 122 open FASTA, '<', $in or die $!;
cstrittmatter@0 123 open TAB, '>', $out or die $!;
cstrittmatter@0 124 print TAB "#name\tlength\tcoverage\n";
cstrittmatter@0 125 while (<FASTA>){
cstrittmatter@0 126 next unless /^>/;
cstrittmatter@0 127 chomp;
cstrittmatter@0 128 die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)/);
cstrittmatter@0 129 my ($name,$n, $l, $cov) = ($1,$2, $3, $4);
cstrittmatter@0 130 print TAB "$name" . "_$n\t$l\t$cov\n";
cstrittmatter@0 131 }
cstrittmatter@0 132 close TAB;
cstrittmatter@0 133 }