annotate 1.0.0/bin/waterfall_per_snp_cluster.pl @ 0:0a8dda29956e draft default tip

planemo upload
author galaxytrakr
date Thu, 28 May 2026 20:41:10 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
1 #!/usr/bin/env perl
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
2
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
3 # Kranti Konganti
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
4 # 01/02/2024
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
5
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
6 use strict;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
7 use warnings;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
8 use Getopt::Long;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
9 use Data::Dumper;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
10 use Pod::Usage;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
11 use File::Basename;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
12 use File::Spec::Functions;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
13
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
14 my $tbl = {};
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
15 my $snp_2_serovar = {};
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
16 my $acc_2_serovar = {};
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
17 my $acc_2_target = {};
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
18 my $snp_count = {};
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
19 my $snp_2_acc = {};
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
20 my $acc_2_snp = {};
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
21 my $multi_cluster_acc = {};
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
22 my (
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
23 $serovar_limit, $serovar_or_type_col, $min_asm_size,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
24 $complete_serotype_name, $PDG_file, $table_file,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
25 $not_null_pdg_serovar, $snp_cluster, $help,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
26 $out_prefix, $acc_col, $seronamecol,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
27 $target_acc_col
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
28 );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
29 my @custom_serovars;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
30
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
31 GetOptions(
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
32 'help' => \$help,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
33 'pdg=s' => \$PDG_file,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
34 'tbl=s' => \$table_file,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
35 'snp=s' => \$snp_cluster,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
36 'min_contig_size=i' => \$min_asm_size,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
37 'complete_serotype_name' => \$complete_serotype_name,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
38 'serocol:i' => \$serovar_or_type_col,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
39 'seronamecol:i' => \$seronamecol,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
40 'target_acc_col:i' => \$target_acc_col,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
41 'acc_col:i' => \$acc_col,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
42 'not_null_pdg_serovar' => \$not_null_pdg_serovar,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
43 'num_serotypes_per_serotype:i' => \$serovar_limit,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
44 'include_serovar=s' => \@custom_serovars,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
45 'op=s' => \$out_prefix
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
46 ) or pod2usage( -verbose => 2 );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
47
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
48 if ( defined $help ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
49 pod2usage( -verbose => 2 );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
50 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
51
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
52 if ( !defined $serovar_limit ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
53 $serovar_limit = 1;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
54 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
55
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
56 if ( !defined $serovar_or_type_col ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
57 $serovar_or_type_col = 50;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
58 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
59
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
60 if ( !defined $seronamecol ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
61 $seronamecol = 34;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
62 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
63
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
64 if ( !defined $target_acc_col ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
65 $target_acc_col = 43;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
66 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
67
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
68 if ( !defined $acc_col ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
69 $acc_col = 10;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
70 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
71
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
72 if ( !defined $min_asm_size ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
73 $min_asm_size = 0;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
74 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
75
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
76 if ( defined $out_prefix ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
77 $out_prefix .= '_';
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
78 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
79 else {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
80 $out_prefix = '';
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
81 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
82
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
83 pod2usage( -verbose => 2 ) if ( !$PDG_file || !$table_file || !$snp_cluster );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
84
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
85 open( my $pdg_file, '<', $PDG_file )
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
86 || die "\nCannot open PDG file $PDG_file: $!\n\n";
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
87 open( my $tbl_file, '<', $table_file )
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
88 || die "\nCannot open tbl file $table_file: $!\n\n";
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
89 open( my $snp_cluster_file, '<', $snp_cluster )
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
90 || die "\nCannot open $snp_cluster: $!\n\n";
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
91 open( my $acc_fh, '>', 'acc2serovar.txt' )
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
92 || die "\nCannot open acc2serovar.txt: $!\n\n";
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
93 open( my $Stdout, '>&', STDOUT ) || die "\nCannot pipe to STDOUT: $!\n\n";
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
94 open( my $Stderr, '>&', STDERR ) || die "\nCannot pipe to STDERR: $!\n\n";
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
95 open( my $accs_snp_fh, '>', $out_prefix . 'accs_snp.txt' )
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
96 || die "\nCannnot open " . $out_prefix . "accs_snp.txt for writing: $!\n\n";
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
97 open( my $genome_headers_fh, '>', $out_prefix . 'mash_snp_genome_list.txt' )
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
98 || die "\nCannnot open "
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
99 . $out_prefix
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
100 . "mash_snp_genome_list.txt for writing: $!\n\n";
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
101
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
102 my $pdg_release = basename( $PDG_file, ".metadata.tsv" );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
103
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
104 while ( my $line = <$pdg_file> ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
105 chomp $line;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
106 next if ( $line =~ m/^\#/ );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
107
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
108 # Relevent columns (Perl index):
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
109 # 10-1 = 9: asm_acc
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
110 # 34 -1 = 33: serovar
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
111 # 50 -1 = 49: computed serotype
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
112
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
113 my @cols = split( /\t/, $line );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
114 my $serovar_or_type = $cols[ $serovar_or_type_col - 1 ];
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
115 my $acc = $cols[ $acc_col - 1 ];
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
116 my $serovar = $cols[ $seronamecol - 1 ];
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
117 my $target_acc = $cols[ $target_acc_col - 1 ];
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
118
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
119 $serovar_or_type =~ s/\"//g;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
120
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
121 my $skip = 1;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
122 foreach my $ser (@custom_serovars) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
123 $skip = 0, next if ( $serovar_or_type =~ qr/\Q$ser\E/ );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
124 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
125
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
126 if ( defined $complete_serotype_name ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
127 next
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
128 if ( $skip
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
129 && ( $serovar_or_type =~ m/serotype=.*?\-.*?\,antigen_formula.+/ )
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
130 );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
131 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
132
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
133 next
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
134 if (
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
135 $skip
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
136 && ( $serovar_or_type =~ m/serotype=\-\s+\-\:\-\:\-/
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
137 || $serovar_or_type =~ m/antigen_formula=\-\:\-\:\-/ )
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
138 );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
139
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
140 # next
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
141 # if (
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
142 # (
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
143 # $serovar_or_type =~ m/serotype=\-\s+\-\:\-\:\-/
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
144 # || $serovar_or_type =~ m/antigen_formula=\-\:\-\:\-/
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
145 # )
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
146 # );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
147
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
148 if ( defined $not_null_pdg_serovar ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
149 $acc_2_serovar->{$acc} = $serovar_or_type,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
150 $acc_2_target->{$acc} = $target_acc,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
151 print $acc_fh "$acc\t$serovar_or_type\n"
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
152 if ( $acc !~ m/NULL/
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
153 && $serovar !~ m/NULL/
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
154 && $serovar_or_type !~ m/NULL/ );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
155 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
156 else {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
157 $acc_2_serovar->{$acc} = $serovar_or_type,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
158 $acc_2_target->{$acc} = $target_acc,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
159 print $acc_fh "$acc\t$serovar_or_type\n"
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
160 if ( $acc !~ m/NULL/ && $serovar_or_type !~ m/NULL/ );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
161 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
162
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
163 # $snp_count->{$serovar_or_type} = 0;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
164 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
165
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
166 #
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
167 # SNP to ACC
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
168 #
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
169
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
170 while ( my $line = <$snp_cluster_file> ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
171 chomp $line;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
172 my @cols = split( /\t/, $line );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
173
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
174 # Relevant columns
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
175 # 0: SNP Cluster ID
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
176 # 3: Genome Accession belonging to the cluster (RefSeq or GenBank)
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
177 my $snp_clus_id = $cols[0];
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
178 my $acc = $cols[3];
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
179
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
180 next if ( $acc =~ m/^NULL/ || $snp_clus_id =~ m/^PDS_acc/ );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
181 next if ( !exists $acc_2_serovar->{$acc} );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
182 push @{ $snp_2_acc->{$snp_clus_id} }, $acc;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
183 if ( exists $acc_2_snp->{$acc} ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
184 print $Stderr
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
185 "\nGot a duplicate assembly accession. Cannot proceed!\n\n$line\n\n";
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
186 exit 1;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
187 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
188 $acc_2_snp->{$acc} = $snp_clus_id;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
189 $snp_count->{$snp_clus_id} = 0;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
190 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
191
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
192 while ( my $line = <$tbl_file> ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
193 chomp $line;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
194
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
195 my @cols = split( /\t/, $line );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
196
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
197 # .tbl file columns (Perl index):
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
198 #
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
199 # 0: Accession
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
200 # 1: AssemblyLevel
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
201 # 2: ScaffoldN50
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
202 # 3: ContigN50
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
203
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
204 my $acc = $cols[0];
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
205 my $asm_lvl = $cols[1];
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
206 my $scaffold_n50 = $cols[2];
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
207 my $contig_n50 = $cols[3];
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
208
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
209 # my $idx0 = $acc_2_serovar->{$cols[0]};
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
210 my $idx0 = $acc_2_snp->{$acc} if ( exists $acc_2_snp->{ $cols[0] } );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
211
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
212 if ( not_empty($acc) && defined $idx0 ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
213 my $fna_rel_loc =
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
214 "$pdg_release/ncbi_dataset/data/$acc/"
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
215 . $acc
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
216 . '_scaffolded_genomic.fna.gz';
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
217
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
218 if ( not_empty($scaffold_n50) ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
219 next if ( $scaffold_n50 <= $min_asm_size );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
220 push @{ $snp_2_serovar->{$idx0}->{ sort_asm_level($asm_lvl) }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
221 ->{$scaffold_n50} }, "$acc_2_serovar->{$acc}|$fna_rel_loc";
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
222 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
223 elsif ( not_empty($contig_n50) ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
224 next if ( $contig_n50 <= $min_asm_size );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
225 push @{ $snp_2_serovar->{$idx0}->{ sort_asm_level($asm_lvl) }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
226 ->{$contig_n50} }, "$acc_2_serovar->{$acc}|$fna_rel_loc";
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
227 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
228 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
229 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
230
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
231 foreach my $snp_cluster_id ( keys %$snp_2_acc ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
232 my $count = $snp_count->{$snp_cluster_id};
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
233 foreach my $asm_lvl (
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
234 sort { $a cmp $b }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
235 keys %{ $snp_2_serovar->{$snp_cluster_id} }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
236 )
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
237 {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
238 if ( $asm_lvl =~ m/Complete\s+Genome/i ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
239 $count =
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
240 print_dl_metadata( $asm_lvl,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
241 \$snp_2_serovar->{$snp_cluster_id}->{$asm_lvl},
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
242 $count, $snp_cluster_id );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
243 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
244 if ( $asm_lvl =~ m/Chromosome/i ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
245 $count =
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
246 print_dl_metadata( $asm_lvl,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
247 \$snp_2_serovar->{$snp_cluster_id}->{$asm_lvl},
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
248 $count, $snp_cluster_id );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
249 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
250 if ( $asm_lvl =~ m/Scaffold/i ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
251 $count =
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
252 print_dl_metadata( $asm_lvl,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
253 \$snp_2_serovar->{$snp_cluster_id}->{$asm_lvl},
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
254 $count, $snp_cluster_id );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
255 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
256 if ( $asm_lvl =~ m/Contig/i ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
257 $count =
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
258 print_dl_metadata( $asm_lvl,
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
259 \$snp_2_serovar->{$snp_cluster_id}->{$asm_lvl},
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
260 $count, $snp_cluster_id );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
261 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
262 printf $Stderr "%-17s | %s\n", $snp_cluster_id, $count
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
263 if ( $count > 0 );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
264 last if ( $count >= $serovar_limit );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
265 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
266 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
267
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
268 close $pdg_file;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
269 close $tbl_file;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
270 close $snp_cluster_file;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
271 close $acc_fh;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
272 close $accs_snp_fh;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
273
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
274 #-------------------------------------------
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
275 # Main ends
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
276 #-------------------------------------------
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
277 # Routines begin
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
278 #-------------------------------------------
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
279
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
280 sub print_dl_metadata {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
281 my $asm_lvl = shift;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
282 my $acc_sizes = shift;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
283 my $curr_count = shift;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
284 my $snp_cluster_id = shift;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
285
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
286 $asm_lvl =~ s/.+?\_(.+)/$1/;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
287
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
288 foreach my $acc_size ( sort { $b <=> $a } keys %{$$acc_sizes} ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
289 foreach my $serovar_url ( @{ $$acc_sizes->{$acc_size} } ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
290 my ( $serovar, $url ) = split( /\|/, $serovar_url );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
291 return $curr_count if ( exists $multi_cluster_acc->{$url} );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
292 $multi_cluster_acc->{$url} = 1;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
293 $curr_count++;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
294 my ( $final_acc, $genome_header ) =
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
295 ( split( /\//, $url ) )[ 3 .. 4 ];
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
296 print $accs_snp_fh "$final_acc\n";
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
297 print $genome_headers_fh catfile( 'scaffold_genomes',
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
298 $genome_header )
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
299 . "\n";
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
300 print $Stdout "$serovar|$asm_lvl|$acc_size|$url|$snp_cluster_id\n"
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
301 if ( $curr_count > 0 );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
302 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
303 last if ( $curr_count >= $serovar_limit );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
304 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
305 return $curr_count;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
306 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
307
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
308 sub sort_asm_level {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
309 my $level = shift;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
310
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
311 $level =~ s/(Complete\s+Genome)/a\_$1/
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
312 if ( $level =~ m/Complete\s+Genome/i );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
313 $level =~ s/(Chromosome)/b\_$1/ if ( $level =~ m/Chromosome/i );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
314 $level =~ s/(Scaffold)/c\_$1/ if ( $level =~ m/Scaffold/i );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
315 $level =~ s/(Contig)/d\_$1/ if ( $level =~ m/Contig/i );
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
316
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
317 return $level;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
318 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
319
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
320 sub not_empty {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
321 my $col = shift;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
322
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
323 if ( $col !~ m/^$/ ) {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
324 return 1;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
325 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
326 else {
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
327 return 0;
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
328 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
329 }
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
330
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
331 __END__
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
332
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
333 =head1 SYNOPSIS
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
334
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
335 This script will take in a PDG metadata file, a C<.tbl> file and generate
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
336 the final list by B<I<waterfall>> priority.
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
337
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
338 See complete description:
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
339
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
340 perldoc waterfall_per_snp_cluster.pl
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
341
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
342 or
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
343
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
344 waterfall_per_snp_cluster.pl --help
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
345
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
346 Examples:
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
347
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
348 waterfall_per_snp_cluster.pl
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
349
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
350 =head1 DESCRIPTION
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
351
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
352 We will retain up to N number of genome accessions per SNP cluster.
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
353 It prioritizes SNP Cluster participation over serotype coverage.
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
354 Which N genomes are selected depends on (in order):
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
355
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
356 1. Genome assembly level, whose priority is
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
357
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
358 a: Complete Genome
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
359 b: Chromosome
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
360 c: Scaffold
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
361 d: Contig
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
362
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
363 2. If the genomes are of same assembly level, then
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
364 scaffold N50 followed by contig N50 is chosen.
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
365
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
366 3. If the scaffold or contig N50 is same, then all
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
367 of them are included
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
368
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
369 =head1 OPTIONS
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
370
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
371 =over 3
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
372
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
373 =item -p PDGXXXXX.XXXX.metadata.tsv
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
374
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
375 Absolute UNIX path pointing to the PDG metadata file.
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
376 Example: PDG000000002.2505.metadata.tsv
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
377
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
378 =item -t asm.tbl
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
379
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
380 Absolute UNIX path pointing to the file from the result
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
381 of the C<dl_pdg_data.py> script, which is the C<asm.tbl>
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
382 file.
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
383
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
384 =item -snp PDGXXXXXXX.XXXX.reference_target.cluster_list.tsv
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
385
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
386 Absolute UNIX path pointing to the SNP Cluster metadata file.
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
387 Examples: PDG000000002.2505.reference_target.cluster_list.tsv
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
388
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
389 =item --serocol <int> (Optional)
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
390
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
391 Column number (non 0-based index) of the PDG metadata file
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
392 by which the serotypes are collected
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
393 (column name: "computed_types"). Default: 50
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
394
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
395 =item --seronamecol <int> (Optional)
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
396
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
397 Column number (non 0-based index) of the PDG metadata file
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
398 whose column name is "serovar". Default: 34
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
399
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
400 =item --acc_col <int> (Optional)
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
401
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
402 Column number (non 0-based index) of the PDG metadata file
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
403 whose column name is "acc". Default: 10
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
404
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
405 =item --target_acc_col <int> (Optional)
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
406
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
407 Column number (non 0-based index) of the PDG metadata file
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
408 whose column name is "target_acc". Default: 43
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
409
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
410 =item --complete_serotype_name (Optional)
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
411
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
412 Skip indexing serotypes when the serotype name in the column
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
413 number 49 (non 0-based) of PDG metadata file consists a "-". For example, if
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
414 an accession has a I<B<serotype=>> string as such in column
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
415 number 49 (non 0-based): C<"serotype=- 13:z4,z23:-","antigen_formula=13:z4,z23:-">
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
416 then, the indexing of that accession is skipped.
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
417 Default: False
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
418
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
419 =item --not_null_pdg_serovar (Optional)
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
420
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
421 Only index the B<I<computed_serotype>> column i.e. column number 49 (non 0-based)
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
422 if the B<I<serovar>> column is not C<NULL>.
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
423
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
424 =item -i <serotype name> (Optional)
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
425
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
426 Make sure the following serotype is included. Mention C<-i> multiple
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
427 times to include multiple serotypes.
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
428
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
429 =item -num <int> (Optional)
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
430
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
431 Number of genome accessions per SNP Cluster. Default: 1
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
432
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
433 =back
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
434
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
435 =head1 AUTHOR
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
436
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
437 Kranti Konganti
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
438
0a8dda29956e planemo upload
galaxytrakr
parents:
diff changeset
439 =cut