cstrittmatter@0
|
1 #!/usr/bin/env perl
|
cstrittmatter@0
|
2 ## A wrapper script to call spades.py and collect its output
|
cstrittmatter@0
|
3 use strict;
|
cstrittmatter@0
|
4 use warnings;
|
cstrittmatter@0
|
5 use File::Temp qw/ tempfile tempdir /;
|
cstrittmatter@0
|
6 use File::Copy;
|
cstrittmatter@0
|
7 use Getopt::Long;
|
cstrittmatter@0
|
8
|
cstrittmatter@0
|
9 # Parse arguments
|
cstrittmatter@0
|
10 my ($out_contigs_file,
|
cstrittmatter@0
|
11 $out_contigs_stats,
|
cstrittmatter@0
|
12 $out_scaffolds_file,
|
cstrittmatter@0
|
13 $out_scaffolds_stats,
|
cstrittmatter@0
|
14 $out_log_file,
|
cstrittmatter@0
|
15 $new_name,
|
cstrittmatter@0
|
16 @sysargs) = @ARGV;
|
cstrittmatter@0
|
17
|
cstrittmatter@0
|
18 ## GetOptions not compatible with parsing the rest of the arguments in an array.
|
cstrittmatter@0
|
19 ## Keeping the not-so-nice parse-in-one-go method, without named arguments.
|
cstrittmatter@0
|
20 # GetOptions(
|
cstrittmatter@0
|
21 # 'contigs-file=s' => \$out_contigs_file,
|
cstrittmatter@0
|
22 # 'contigs-stats=s' => \$out_contigs_stats,
|
cstrittmatter@0
|
23 # 'scaffolds-file=s' => \$out_scaffolds_file,
|
cstrittmatter@0
|
24 # 'scaffolds-stats=s' => \$out_scaffolds_stats,
|
cstrittmatter@0
|
25 # 'out_log_file=s' => \$out_log_file,
|
cstrittmatter@0
|
26 # );
|
cstrittmatter@0
|
27
|
cstrittmatter@0
|
28 # my @sysargs = @ARGV;
|
cstrittmatter@0
|
29
|
cstrittmatter@0
|
30 # Create temporary folder to store files, delete after use
|
cstrittmatter@0
|
31 #my $output_dir = tempdir( CLEANUP => 0 );
|
cstrittmatter@0
|
32 my $output_dir = 'output_dir';
|
cstrittmatter@0
|
33 # Link "dat" files as fastq, otherwise spades complains about file format
|
cstrittmatter@0
|
34
|
cstrittmatter@0
|
35 # Create log handle
|
cstrittmatter@0
|
36 open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n";
|
cstrittmatter@0
|
37
|
cstrittmatter@0
|
38 # Run program
|
cstrittmatter@0
|
39 # To do: record time
|
cstrittmatter@0
|
40 runSpades(@sysargs);
|
cstrittmatter@0
|
41 collectOutput($new_name);
|
cstrittmatter@0
|
42 extractCoverageLength($out_contigs_file, $out_contigs_stats);
|
cstrittmatter@0
|
43 extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats);
|
cstrittmatter@0
|
44 print $log "Done\n";
|
cstrittmatter@0
|
45 close $log;
|
cstrittmatter@0
|
46 exit 0;
|
cstrittmatter@0
|
47
|
cstrittmatter@0
|
48 # Run spades
|
cstrittmatter@0
|
49 sub runSpades {
|
cstrittmatter@0
|
50 my $cmd = join(" ", @_) . " -o $output_dir";
|
cstrittmatter@0
|
51 my $return_code = system($cmd);
|
cstrittmatter@0
|
52 if ($return_code) {
|
cstrittmatter@0
|
53 print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
|
cstrittmatter@0
|
54 die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
|
cstrittmatter@0
|
55 }
|
cstrittmatter@0
|
56 return 0;
|
cstrittmatter@0
|
57 }
|
cstrittmatter@0
|
58
|
cstrittmatter@0
|
59 # Collect output
|
cstrittmatter@0
|
60 sub collectOutput{
|
cstrittmatter@0
|
61 my ($new_name) = @_;
|
cstrittmatter@0
|
62
|
cstrittmatter@0
|
63 # Collects output
|
cstrittmatter@0
|
64 #if a new name is given for the contigs and scaffolds, change them before moving them
|
cstrittmatter@0
|
65 if ( $new_name ne 'NODE') {
|
cstrittmatter@0
|
66 renameContigs($new_name);
|
cstrittmatter@0
|
67 }
|
cstrittmatter@0
|
68 else {
|
cstrittmatter@0
|
69 if ( -e "$output_dir/contigs.fasta") {
|
cstrittmatter@0
|
70 move "$output_dir/contigs.fasta", $out_contigs_file;
|
cstrittmatter@0
|
71 }
|
cstrittmatter@0
|
72 if ( -e "$output_dir/scaffolds.fasta") {
|
cstrittmatter@0
|
73 move "$output_dir/scaffolds.fasta", $out_scaffolds_file;
|
cstrittmatter@0
|
74 }
|
cstrittmatter@0
|
75 }
|
cstrittmatter@0
|
76
|
cstrittmatter@0
|
77 open LOG, '<', "$output_dir/spades.log"
|
cstrittmatter@0
|
78 or die "Cannot open log file $output_dir/spades.log: $?";
|
cstrittmatter@0
|
79 print $log $_ while (<LOG>);
|
cstrittmatter@0
|
80 return 0;
|
cstrittmatter@0
|
81 }
|
cstrittmatter@0
|
82
|
cstrittmatter@0
|
83 #Change name in contig and scaffolds file
|
cstrittmatter@0
|
84 sub renameContigs{
|
cstrittmatter@0
|
85 my ($name) = @_;
|
cstrittmatter@0
|
86
|
cstrittmatter@0
|
87 open my $in, '<',"$output_dir/contigs.fasta" or die $!;
|
cstrittmatter@0
|
88 open my $out,'>', $out_contigs_file;
|
cstrittmatter@0
|
89
|
cstrittmatter@0
|
90 while ( my $line = <$in>) {
|
cstrittmatter@0
|
91 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
|
cstrittmatter@0
|
92 #also move the remainder of the length
|
cstrittmatter@0
|
93 if ( $line =~ />NODE_(\d+)_(.+)/) {
|
cstrittmatter@0
|
94 $line = ">$name" . "_$1 $2\n";
|
cstrittmatter@0
|
95 }
|
cstrittmatter@0
|
96 print $out $line;
|
cstrittmatter@0
|
97 }
|
cstrittmatter@0
|
98 close $in;
|
cstrittmatter@0
|
99 close $out;
|
cstrittmatter@0
|
100
|
cstrittmatter@0
|
101
|
cstrittmatter@0
|
102 open $in, '<',"$output_dir/scaffolds.fasta" or die $!;
|
cstrittmatter@0
|
103 open $out,'>', $out_scaffolds_file;
|
cstrittmatter@0
|
104
|
cstrittmatter@0
|
105 while ( my $line = <$in>) {
|
cstrittmatter@0
|
106 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
|
cstrittmatter@0
|
107 #also move the remainder of the length
|
cstrittmatter@0
|
108 if ( $line =~ />NODE_(\d+)_(.+)/) {
|
cstrittmatter@0
|
109 $line = ">$name" . "_$1 $2\n";
|
cstrittmatter@0
|
110 }
|
cstrittmatter@0
|
111 print $out $line;
|
cstrittmatter@0
|
112 }
|
cstrittmatter@0
|
113 close $in;
|
cstrittmatter@0
|
114 close $out;
|
cstrittmatter@0
|
115
|
cstrittmatter@0
|
116 }
|
cstrittmatter@0
|
117
|
cstrittmatter@0
|
118
|
cstrittmatter@0
|
119 # Extract
|
cstrittmatter@0
|
120 sub extractCoverageLength{
|
cstrittmatter@0
|
121 my ($in, $out) = @_;
|
cstrittmatter@0
|
122 open FASTA, '<', $in or die $!;
|
cstrittmatter@0
|
123 open TAB, '>', $out or die $!;
|
cstrittmatter@0
|
124 print TAB "#name\tlength\tcoverage\n";
|
cstrittmatter@0
|
125 while (<FASTA>){
|
cstrittmatter@0
|
126 next unless /^>/;
|
cstrittmatter@0
|
127 chomp;
|
cstrittmatter@0
|
128 die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)/);
|
cstrittmatter@0
|
129 my ($name,$n, $l, $cov) = ($1,$2, $3, $4);
|
cstrittmatter@0
|
130 print TAB "$name" . "_$n\t$l\t$cov\n";
|
cstrittmatter@0
|
131 }
|
cstrittmatter@0
|
132 close TAB;
|
cstrittmatter@0
|
133 }
|