|
0
|
1 #!/usr/bin/env perl
|
|
|
2
|
|
|
3 use strict;
|
|
|
4 use FindBin;
|
|
|
5 use Bio::SeqIO;
|
|
|
6 use Bio::Seq;
|
|
|
7 use Path::Tiny;
|
|
|
8 use File::Basename;
|
|
|
9 use File::Spec;
|
|
|
10 use File::Path qw(make_path remove_tree);
|
|
|
11 use List::Util qw(first);
|
|
|
12 use Cwd qw(abs_path);
|
|
|
13 use Data::Dumper;
|
|
|
14 use LWP::Simple;
|
|
|
15 use JSON;
|
|
|
16
|
|
|
17 #..............................................................................
|
|
|
18 # Globals
|
|
|
19
|
|
|
20 my $EXE = basename($0);
|
|
|
21 my $ABX_SEP = ';';
|
|
|
22
|
|
|
23 my %DATABASE = (
|
|
|
24 'resfinder' => \&get_resfinder,
|
|
|
25 'plasmidfinder' => \&get_plasmidfinder,
|
|
|
26 'megares' => \&get_megares,
|
|
|
27 'argannot' => \&get_argannot,
|
|
|
28 'card' => \&get_card,
|
|
|
29
|
|
|
30 # 'ncbibetalactamase' => \&get_ncbibetalactamase,
|
|
|
31 'ncbi' => \&get_ncbi,
|
|
|
32 'vfdb' => \&get_vfdb,
|
|
|
33 'ecoli_vf' => \&get_ecoli_vf, # https://github.com/phac-nml/ecoli_vf
|
|
|
34 'ecoh' => \&get_ecoh,
|
|
|
35 'bacmet2' => \&get_bacmet2,
|
|
|
36 'victors' => \&get_victors,
|
|
|
37
|
|
|
38 # 'serotypefinder' => \&get_serotypefinder,
|
|
|
39 );
|
|
|
40 my $DATABASES = join( ' ', sort keys %DATABASE );
|
|
|
41
|
|
|
42 #..............................................................................
|
|
|
43 # Command line options
|
|
|
44
|
|
|
45 my ( @Options, $debug, $outdir, $db, $force );
|
|
|
46 setOptions();
|
|
|
47
|
|
|
48 $db or err("Please choose a --db from: $DATABASES");
|
|
|
49 exists $DATABASE{$db} or err("Unknown --db '$db', choose from: $DATABASES ");
|
|
|
50 -d $outdir or err("--outdir '$outdir' does not exist");
|
|
|
51
|
|
|
52 my $dir = abs_path( File::Spec->catdir( $outdir, $db ) );
|
|
|
53 make_path($dir);
|
|
|
54 msg("Setting up '$db' in '$dir'");
|
|
|
55
|
|
|
56 #my $tmpdir = tempdir("$db-XXXXXXXX", DIR=>$dir, CLEANUP=>0);
|
|
|
57 #my $tmpdir = "/home/tseemann/git/abricate/db/resfinder/resfinder-6Kuphtvv";
|
|
|
58 my $tmpdir = "$dir/src";
|
|
|
59 make_path($tmpdir);
|
|
|
60
|
|
|
61 # run the specific function from --db
|
|
|
62 chdir $tmpdir;
|
|
|
63 my $seq = $DATABASE{$db}->();
|
|
|
64 map { is_full_gene($_) } @$seq; # doesn't do anything?
|
|
|
65 $seq = dedupe_seq($seq);
|
|
|
66
|
|
|
67 #print Dumper($seq);
|
|
|
68 msg("Sorting sequences by ID");
|
|
|
69 $seq = [ sort { $a->{ID} cmp $b->{ID} } @$seq ];
|
|
|
70 save_fasta( "$dir/sequences", $seq );
|
|
|
71
|
|
|
72 msg("Formatting BLASTN database: $dir/sequences");
|
|
|
73 my $logfile = "$tmpdir/makeblastdb.log";
|
|
|
74 my $ncbi_title = $db;
|
|
|
75 if ( "$db" eq "ncbi" ) {
|
|
|
76 $ncbi_title = "ncbiamrplus";
|
|
|
77 }
|
|
|
78 my $ec = system(
|
|
|
79 "makeblastdb -in '$dir/sequences' -title '$ncbi_title' -dbtype nucl -hash_index -logfile $logfile"
|
|
|
80 );
|
|
|
81 if ( $ec != 0 ) {
|
|
|
82 system("tail '$logfile'");
|
|
|
83 err("Error with makign BLAST database. See $logfile");
|
|
|
84 }
|
|
|
85
|
|
|
86 #msg("Run 'abricate --setupdb' to format the database");
|
|
|
87
|
|
|
88 msg("Done.");
|
|
|
89
|
|
|
90 #..............................................................................
|
|
|
91
|
|
|
92 sub download {
|
|
|
93 my ( $url, $dest ) = @_;
|
|
|
94 if ( -r $dest and not $force ) {
|
|
|
95 msg("Won't re-download existing $dest (use --force)");
|
|
|
96
|
|
|
97 #exit(1);
|
|
|
98 }
|
|
|
99 else {
|
|
|
100 msg("Downloading: $url");
|
|
|
101 my $ec = mirror( $url, $dest );
|
|
|
102 msg("HTTP Result: $ec");
|
|
|
103 ( $ec == 200 or $ec = 304 )
|
|
|
104 or err("HTTP $ec | failed to download $url"); # is HTTP OK ?
|
|
|
105 }
|
|
|
106 msg("Destination: $dest");
|
|
|
107 msg( "Filesize:", ( -s $dest ), "bytes" );
|
|
|
108 }
|
|
|
109
|
|
|
110 #..............................................................................
|
|
|
111 sub trim_spaces {
|
|
|
112 my ($s) = @_;
|
|
|
113 $s =~ s/^\s+//;
|
|
|
114 $s =~ s/\s+$//;
|
|
|
115 return $s;
|
|
|
116 }
|
|
|
117
|
|
|
118 #..............................................................................
|
|
|
119 sub get_resfinder {
|
|
|
120 my $name = "resfinder_db";
|
|
|
121
|
|
|
122 # FIXME - can we just get HEAD.zip like in plasmidfinder?
|
|
|
123 my $url = "https://bitbucket.org/genomicepidemiology/$name.git";
|
|
|
124
|
|
|
125 if (-r $name and not $force) {
|
|
|
126 msg("Won't overwrite existing $name (use --force)");
|
|
|
127 exit(1);
|
|
|
128 }
|
|
|
129 else {
|
|
|
130 msg("Nuking existing folder: $name");
|
|
|
131 remove_tree("./$name");
|
|
|
132 msg("Cloning $url to $name");
|
|
|
133 system("git clone --quiet $url $name");
|
|
|
134 }
|
|
|
135
|
|
|
136 #<*.fsa>
|
|
|
137 #>aac(6')-Ib_2_M23634
|
|
|
138 #>blaNDM-19_1_MF370080
|
|
|
139 #>mcr-1.1_1_KP347127
|
|
|
140 #>fosB1_1_CP001903
|
|
|
141 #>fusB_1_AY373761
|
|
|
142 #>VanHAX_1_FJ866609
|
|
|
143 #>ere(A)_6_DQ157752
|
|
|
144 #>nimA_1_X71444
|
|
|
145 #>cfr_1_AM408573
|
|
|
146 #>catB3_2_U13880
|
|
|
147 #>qnrA1_1_AY070235
|
|
|
148 #>ARR-2_1_HQ141279
|
|
|
149 #>sul1_2_U12338
|
|
|
150 #>tet_1_M74049
|
|
|
151 #>dfrA19_1_EU855687
|
|
|
152
|
|
|
153 #<notes.txt>
|
|
|
154 #aac(6')-Iv:Aminoglycoside resistance:
|
|
|
155 #aac(6')-Iw:Aminoglycoside resistance:Alternate name; aac(6')-Ix
|
|
|
156 #sul3:Sulphonamide resistance:
|
|
|
157 ##Tetracycline:
|
|
|
158 #ort(B):Tetracycline resistance:
|
|
|
159 #blaCMY-59:Beta-lactam resistance:
|
|
|
160
|
|
|
161 #<phenotypes.txt>
|
|
|
162 #Gene_accession no. Class Phenotype PMID Mechanism of resistance Notes Required_gene
|
|
|
163 #ant(2'')-Ia_1_X04555 Aminoglycoside Gentamicin, Tobramycin 3024112 Enzymatic modification Alternative name aadB
|
|
|
164 #ant(2'')-Ia_2_JF826500 Aminoglycoside Gentamicin, Tobramycin 22271862 Enzymat
|
|
|
165
|
|
|
166 # $name = "~/apps/bettercallsal/assets/abricate_dbs/$name";
|
|
|
167 my $metafn = "$name/phenotypes.txt";
|
|
|
168 my @meta = path($metafn)->lines( { chomp => 1 } );
|
|
|
169 my %anno;
|
|
|
170 foreach (@meta) {
|
|
|
171 next if m/^#/;
|
|
|
172 my @x = split m/\t/;
|
|
|
173
|
|
|
174 #msg("$metafn: @x");
|
|
|
175 my ($gene) = ( $x[0] =~ m/^(.*?)_\w+$/ );
|
|
|
176 $anno{$gene}{ABX} = [
|
|
|
177 map { trim_spaces($_) }
|
|
|
178 grep { !m/(unknown|notes|^none)/i }
|
|
|
179 split m/,\s*/,
|
|
|
180 $x[2]
|
|
|
181 ];
|
|
|
182
|
|
|
183 #msg("$metafn: $gene |", $anno{$gene}{ABX}->@*);
|
|
|
184 }
|
|
|
185 msg( "get_resfinder: $metafn", scalar( keys %anno ), "genes" );
|
|
|
186
|
|
|
187 #print Dumper(\%anno);
|
|
|
188
|
|
|
189 my @seq;
|
|
|
190 for my $fasta (<$name/*.fsa>) {
|
|
|
191
|
|
|
192 # Issue #62 - repair broken fasta files like this:
|
|
|
193 # GCTTTAAATTGGAAAAAAGATAGTCAAACTCTTTAA>cmr_1_U43535
|
|
|
194 # inline replacement
|
|
|
195 system( 'sed', '-i.bak', 's/\([A-Z]\)>/\1\n>/gi', $fasta );
|
|
|
196 my $args = load_fasta($fasta);
|
|
|
197
|
|
|
198 # use name of fasta file as antibiotic name
|
|
|
199 #my $abx = basename($fasta, '.fsa');
|
|
|
200 #msg("$fasta: Assigning '$abx' to all genes");
|
|
|
201 #push @{$_->{ABX}}, $abx for (@$args);
|
|
|
202 push @seq, @$args;
|
|
|
203 }
|
|
|
204
|
|
|
205 # https://github.com/tseemann/abricate/issues/92
|
|
|
206 # mcr-9_1_NZ_NAAN01000063.1
|
|
|
207 #>mcr-9_1_NZ_NAAN01000063.1
|
|
|
208 # mcr-9.1:Colistin resistance:
|
|
|
209
|
|
|
210 for my $seq (@seq) {
|
|
|
211 my ( $id, $copy, $acc ) = $seq->{ID} =~ m/^(.*?)_(\d+)_(\S+)$/;
|
|
|
212
|
|
|
213 #msg("resfinder: $1 $2 $3", $anno{$1});
|
|
|
214 $seq->{ID} = "${id}_${copy}";
|
|
|
215 $seq->{ACC} = $acc;
|
|
|
216 $seq->{DESC} = $anno{$id}{DESC} || $id;
|
|
|
217 push @{ $seq->{ABX} }, @{ $anno{$id}{ABX} } if $anno{$id}{ABX};
|
|
|
218 }
|
|
|
219
|
|
|
220 return \@seq;
|
|
|
221 }
|
|
|
222
|
|
|
223 #..............................................................................
|
|
|
224 sub get_serotypefinder {
|
|
|
225 my $name = "serotypefinder_db";
|
|
|
226 my $url = "https://bitbucket.org/genomicepidemiology/$name.git";
|
|
|
227
|
|
|
228 if ( -r $name and not $force ) {
|
|
|
229 msg("Won't overwrite existing $name (use --force)");
|
|
|
230
|
|
|
231 # exit(1);
|
|
|
232 }
|
|
|
233 else {
|
|
|
234 msg("Nuking existing folder: $name");
|
|
|
235 remove_tree("./$name");
|
|
|
236 msg("Cloning $url to $name");
|
|
|
237 system("git clone --quiet $url $name");
|
|
|
238 }
|
|
|
239
|
|
|
240 my @seq;
|
|
|
241 for my $fasta (<$name/*.fsa>) {
|
|
|
242 push @seq, @{ load_fasta($fasta) };
|
|
|
243 }
|
|
|
244
|
|
|
245 # >fliC_44444_AY250028_H52
|
|
|
246 # FIXME - this is already in EcOH database!
|
|
|
247 for my $seq (@seq) {
|
|
|
248 my ( $id, $copy, $acc ) = $seq->{ID} =~ m/^(.*)_(\d+)_(\w+)$/;
|
|
|
249
|
|
|
250 #msg("serotypefinder: $1 $2 $3", $anno{$1});
|
|
|
251 $seq->{ID} = "${id}_${copy}";
|
|
|
252 $seq->{ACC} = $acc;
|
|
|
253
|
|
|
254 #$seq->{DESC} = $anno{$id} || '';
|
|
|
255 }
|
|
|
256
|
|
|
257 return \@seq;
|
|
|
258 }
|
|
|
259
|
|
|
260 #..............................................................................
|
|
|
261 sub get_tag {
|
|
|
262 my ( $f, $tag ) = @_;
|
|
|
263 if ( $f->has_tag($tag) ) {
|
|
|
264 my ($val) = $f->get_tag_values($tag);
|
|
|
265 return $val;
|
|
|
266 }
|
|
|
267 return '';
|
|
|
268 }
|
|
|
269
|
|
|
270 #..............................................................................
|
|
|
271 sub get_ncbi {
|
|
|
272 my $AFP =
|
|
|
273 "https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest";
|
|
|
274
|
|
|
275 #my $src = "https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/data/latest/AMR_CDS";
|
|
|
276 my $src = "$AFP/AMR_CDS.fa";
|
|
|
277 my $name = "amr_cds.ffn";
|
|
|
278
|
|
|
279 #my $src2 = "https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/data/latest/ReferenceGeneCatalog.txt";
|
|
|
280 my $src2 = "$AFP/ReferenceGeneCatalog.txt";
|
|
|
281 my $name2 = "amr_cds.tsv";
|
|
|
282
|
|
|
283 if ( -r $name and -r $name2 and not $force ) {
|
|
|
284 msg("Won't overwrite existing $name/$name2 (use --force)");
|
|
|
285
|
|
|
286 # exit(1);
|
|
|
287 }
|
|
|
288 else {
|
|
|
289 download( $src, $name );
|
|
|
290 download( $src2, $name2 );
|
|
|
291 }
|
|
|
292
|
|
|
293 #1 allele
|
|
|
294 #2 gene_family ble
|
|
|
295 #3 whitelisted_taxa
|
|
|
296 #4 product_name BLMA family bleomycin binding protein
|
|
|
297 #5 scope core
|
|
|
298 #6 type AMR
|
|
|
299 #7 subtype AMR
|
|
|
300 #8 class BLEOMYCIN
|
|
|
301 #9 subclass BLEOMYCIN
|
|
|
302 #10 refseq_protein_accession WP_063842967.1
|
|
|
303 #11 refseq_nucleotide_accession NG_047554.1
|
|
|
304 #12 curated_refseq_start No
|
|
|
305 #13 genbank_protein_accession CAA02068.1
|
|
|
306 #14 genbank_nucleotide_accession A31900.1
|
|
|
307 #15 genbank_strand_orientation +
|
|
|
308 #16 genbank_cds_start 6
|
|
|
309 #17 genbank_cds_stop 374
|
|
|
310 #18 pubmed_reference
|
|
|
311 #19 blacklisted_taxa
|
|
|
312 #20 db_version 2019-08-27.1
|
|
|
313
|
|
|
314 my $tsv = load_tabular( $name2, 10 ); # refseq_nucleotide_accession
|
|
|
315 my $tsv2 = load_tabular( $name2, 12 ); # refseq_nucleotide_accession
|
|
|
316
|
|
|
317 msg( "[$name2] Loaded", scalar keys %$tsv, "records" );
|
|
|
318 msg( "[$name2] Loaded", scalar keys %$tsv2, "records" );
|
|
|
319 # print keys %$tsv;
|
|
|
320 # exit;
|
|
|
321
|
|
|
322 # https://github.com/ncbi/amr/wiki/AMRFinderPlus-database#amrprot
|
|
|
323 # 0 1 2 3 4 5 6 7
|
|
|
324 # >1000909371|WP_061158039.1|NG_050200|1|1|blaTEM-156|blaTEM|class_A_beta-lactamase_TEM-156 NG_050200:101-961
|
|
|
325 my @seq;
|
|
|
326 my $in = Bio::SeqIO->new( -file => $name, -format => "fasta" );
|
|
|
327 while ( my $rec = $in->next_seq ) {
|
|
|
328
|
|
|
329 # parse ID
|
|
|
330 my ( $gi, $pi, $acc, $fp, $fn, $gene, $fam, $prod ) = split m/\|/,
|
|
|
331 $rec->id;
|
|
|
332
|
|
|
333 # skip fusion genes
|
|
|
334 next unless $fp == 1 and $fn == 1;
|
|
|
335
|
|
|
336 # only keep true ARGs
|
|
|
337 $acc .= ".1" unless $acc =~ m/\.\d+$/;
|
|
|
338 #my $t = $tsv->{$acc} or next;
|
|
|
339 my $t;
|
|
|
340 if ($tsv->{$acc}) {
|
|
|
341 $t = $tsv->{$acc}
|
|
|
342 } elsif ($tsv2->{$pi}) {
|
|
|
343 # print $pi, ": pi\n";
|
|
|
344 # print $acc, ": acc\n";
|
|
|
345 # print "$pi | scope: ", $t->{scope}, "\n";
|
|
|
346 # exit;
|
|
|
347 $t = $tsv->{$pi}
|
|
|
348 } else {
|
|
|
349 next;
|
|
|
350 }
|
|
|
351 #next unless ( $t->{scope} eq 'plus' || $t->{scope} eq 'core' );
|
|
|
352
|
|
|
353 # next unless $t->{type} eq 'VIRULENCE';
|
|
|
354 # next unless $t->{subtype} eq 'VIRULENCE';
|
|
|
355 # construct sequence record
|
|
|
356 $prod =~ s/_/ /g;
|
|
|
357 err("$pi: gene is empty") unless $gene;
|
|
|
358 err("$pi: product is empty") unless $prod;
|
|
|
359 my $s = {
|
|
|
360 ID => $gene,
|
|
|
361 ACC => $acc,
|
|
|
362 DESC => $prod,
|
|
|
363 SEQ => $rec->seq,
|
|
|
364 ABX => [ split m'/', $t->{subclass} ]
|
|
|
365 };
|
|
|
366 push @seq, $s;
|
|
|
367 msg( "[$name]", 0 + @seq, "|", $s->{ID}, "|", $s->{ACC}, "|",
|
|
|
368 $s->{DESC} );
|
|
|
369
|
|
|
370 #msg(Dumper($s));
|
|
|
371 msg( $s->{ID}, " is fusion $fp/$fn" ) if "$fp$fn" ne '11';
|
|
|
372 }
|
|
|
373 return \@seq;
|
|
|
374 }
|
|
|
375
|
|
|
376 #..............................................................................
|
|
|
377 sub get_plasmidfinder {
|
|
|
378 my $name = "plasmidfinder";
|
|
|
379 my $zip = "$name.zip";
|
|
|
380
|
|
|
381 # download("https://cge.cbs.dtu.dk/cge/download_data.php?folder=$name&filename=$zip&submit=$zip", $zip);
|
|
|
382 download(
|
|
|
383 "https://bitbucket.org/genomicepidemiology/plasmidfinder_db/get/HEAD.zip",
|
|
|
384 $zip
|
|
|
385 );
|
|
|
386 system("unzip -j -u $zip");
|
|
|
387
|
|
|
388 my @seq;
|
|
|
389 for my $fasta (<*.fsa>) {
|
|
|
390 push @seq, @{ load_fasta($fasta) };
|
|
|
391 }
|
|
|
392
|
|
|
393 for my $seq (@seq) {
|
|
|
394 $seq->{DESC} = $seq->{ID}; # no desc, so use ORIGINAL ID as desc
|
|
|
395 my ( $id, $acc ) =
|
|
|
396 ( $seq->{ID} =~ m/^(.*)_(([A-Z]+|NC_)\d+(\.\d+)?)$/ );
|
|
|
397 $id =~ s/_+$//g;
|
|
|
398 $seq->{ID} = $id || $seq->{ID};
|
|
|
399 $seq->{ACC} = $acc || '';
|
|
|
400 wrn( "Parsed empty ID:",
|
|
|
401 $seq->{DESC},
|
|
|
402 "=> id='$id' acc='$acc' seq=" . substr( $seq->{SEQ}, 0, 10 ) )
|
|
|
403 if not $id;
|
|
|
404 }
|
|
|
405
|
|
|
406 return \@seq;
|
|
|
407 }
|
|
|
408
|
|
|
409 #..............................................................................
|
|
|
410 sub get_megares {
|
|
|
411 my $zip = "megares.zip";
|
|
|
412 download( 'https://www.meglab.org/downloads/megares_v3.00.zip', $zip );
|
|
|
413 system("unzip -j -u $zip");
|
|
|
414 my $seqs = load_fasta( glob("megares_drugs_*.fasta") );
|
|
|
415 my @okseq;
|
|
|
416
|
|
|
417 # >MEG_372|Multi-compound|Drug_and_biocide_resistance|Drug_and_biocide_MATE_efflux_pumps|ABEM
|
|
|
418 # >MEG_411|Multi-compound|Drug_and_biocide_resistance|Drug_and_biocide_RND_efflux_regulator|ACRR|RequiresSNPConfirmation
|
|
|
419 # >MEG_7860|Drugs|betalactams|Class_B_betalactamases|ZOG
|
|
|
420 # >MEG_7439|Drugs|Glycopeptides|VanI-type_resistance_protein|VANI
|
|
|
421 # >MEG_7245|Drugs|Tetracyclines|Tetracycline_resistance_MFS_efflux_pumps|TETY
|
|
|
422 # >MEG_9|Drugs|Aminoglycosides|Aminoglycoside-resistant_16S_ribosomal_subunit_protein|A16S|RequiresSNPConfirmation
|
|
|
423
|
|
|
424 for my $s (@$seqs) {
|
|
|
425 my ( $id, $type, $class, $mech, $group, $note ) = split m/\|/, $s->{ID};
|
|
|
426 if ($note) {
|
|
|
427
|
|
|
428 # "RequiresSNPConfirmation" is the common one; we can't do that
|
|
|
429 msg("Skipping $id due to: $note");
|
|
|
430 next;
|
|
|
431 }
|
|
|
432 $s->{ID} = $group;
|
|
|
433 $s->{ACC} = $id;
|
|
|
434 $s->{DESC} = join( ':', $type, $class, $mech, $group );
|
|
|
435 push @okseq, $s;
|
|
|
436 }
|
|
|
437 return [@okseq];
|
|
|
438
|
|
|
439 #return $seqs;
|
|
|
440 }
|
|
|
441
|
|
|
442 #..............................................................................
|
|
|
443 sub get_argannot {
|
|
|
444 my $fasta = 'arg-annot.fa';
|
|
|
445 download(
|
|
|
446 # 'http://www.mediterranee-infection.com/arkotheque/client/ihumed/_depot_arko/articles/1425/argannot-aa-v3-march2017_doc.fasta',
|
|
|
447 # 'http://www.mediterranee-infection.com/arkotheque/client/ihumed/_depot_arko/articles/691/argannot-nt_doc.fasta',
|
|
|
448 'https://www.mediterranee-infection.com/wp-content/uploads/2019/09/ARG-ANNOT_NT_V6_July2019.txt',
|
|
|
449 $fasta
|
|
|
450 );
|
|
|
451
|
|
|
452 # fix syntax errors in the FASTA file...
|
|
|
453 path($fasta)->edit( sub { s/\\//g; $_ } );
|
|
|
454
|
|
|
455 my $seqs = load_fasta($fasta);
|
|
|
456
|
|
|
457 # 0 1 2 3
|
|
|
458 # >(AGly)Aac2-Ie:NC_011896:3039059-3039607:549
|
|
|
459 for my $s (@$seqs) {
|
|
|
460 my @x = split m/:/, $s->{ID};
|
|
|
461 $s->{ID} = $x[0];
|
|
|
462 $s->{ACC} = $x[1] . ':' . $x[2];
|
|
|
463 $s->{DESC} = '';
|
|
|
464 }
|
|
|
465
|
|
|
466 return $seqs;
|
|
|
467 }
|
|
|
468
|
|
|
469 #..............................................................................
|
|
|
470 sub get_bacmet2 {
|
|
|
471 my $fasta = 'bacmet2.fa';
|
|
|
472 download(
|
|
|
473 'http://bacmet.biomedicine.gu.se/download/BacMet2_EXP_database.fasta',
|
|
|
474 $fasta );
|
|
|
475
|
|
|
476 # This is a PROTEIN file
|
|
|
477 my $seqs = load_fasta($fasta);
|
|
|
478
|
|
|
479 # 0 1 2 3 4 ^
|
|
|
480 # >BAC0098|ctpC|sp|P0A502|CTPC_MYCTU Probable manganese/zinc-exporting
|
|
|
481 for my $s (@$seqs) {
|
|
|
482 my @x = split m/\|/, $s->{ID};
|
|
|
483 $s->{ID} = $x[1] . '-' . $x[0];
|
|
|
484 $s->{ACC} = $x[2] . ':' . $x[3];
|
|
|
485 }
|
|
|
486
|
|
|
487 return $seqs;
|
|
|
488 }
|
|
|
489
|
|
|
490 #..............................................................................
|
|
|
491 sub get_card {
|
|
|
492
|
|
|
493 # https://github.com/tseemann/abricate/issues/25
|
|
|
494 my $tarball = 'card.tar.bz2';
|
|
|
495 download(
|
|
|
496 #'https://card.mcmaster.ca/download/0/broadstreet-v2.0.2.tar.gz',
|
|
|
497 'https://card.mcmaster.ca/latest/data',
|
|
|
498
|
|
|
499 #'https://card.mcmaster.ca/latest/data/card-data.tar.bz2',
|
|
|
500 $tarball # yes, it's really BZ2 not GZ ...
|
|
|
501 );
|
|
|
502
|
|
|
503 # my $fasta = "./nucleotide_fasta_protein_homolog_model.fasta";
|
|
|
504 my $jsonfile = "./card.json";
|
|
|
505 system( "tar", "xf", $tarball, $jsonfile ) == 0
|
|
|
506 or err("Problem with tar xf $tarball $jsonfile");
|
|
|
507 -r $jsonfile or err("Could not extract $jsonfile from $tarball");
|
|
|
508
|
|
|
509 # JSON
|
|
|
510 my $json = path($jsonfile)->slurp_utf8;
|
|
|
511 my $card = from_json( $json, { latin1 => 1 } );
|
|
|
512 my @seq;
|
|
|
513 for my $g ( values %$card ) {
|
|
|
514 next unless ref($g) eq 'HASH';
|
|
|
515
|
|
|
516 # msg(Dumper($g));
|
|
|
517 next
|
|
|
518 unless $g->{model_type} eq
|
|
|
519 "protein homolog model"; # only 'acquired' genes
|
|
|
520 my $id = $g->{model_name};
|
|
|
521 err("$id has {model_param}{snp}") if exists $g->{model_param}{snp};
|
|
|
522
|
|
|
523 # msg("CARD: $id");
|
|
|
524 # print STDERR Dumper($g);
|
|
|
525 my $dna = $g->{model_sequences}{sequence}
|
|
|
526 or err("$id: no {model_sequences}{sequence} found");
|
|
|
527 my ($key) = sort keys %$dna; # first key
|
|
|
528 $dna = $dna->{$key} or err("$id: invalid key '$key'");
|
|
|
529 $dna = $dna->{dna_sequence} or err("$id: no dna_sequence");
|
|
|
530
|
|
|
531 # msg(Dumper($dna)) if $id eq 'OXA-25';
|
|
|
532
|
|
|
533 # ARO_category => {
|
|
|
534 # 'category_aro_name' => 'cephalosporin',
|
|
|
535 # 'category_aro_class_name' => 'Drug Class',
|
|
|
536 my $is_amr_gene = 0;
|
|
|
537 my @abx;
|
|
|
538 for my $key ( keys $g->{ARO_category}->%* ) {
|
|
|
539 my $c = $g->{ARO_category}{$key};
|
|
|
540 if ( $c->{category_aro_class_name} eq 'Drug Class' ) {
|
|
|
541 my $abx = $c->{category_aro_name};
|
|
|
542 $abx =~ s/ antibiotic//;
|
|
|
543 $abx =~ s/\s/_/g;
|
|
|
544 push @abx, $abx;
|
|
|
545 }
|
|
|
546 if ( $c->{category_aro_class_name} eq 'AMR Gene Family' ) {
|
|
|
547 $is_amr_gene++;
|
|
|
548 }
|
|
|
549 }
|
|
|
550
|
|
|
551 #err("CARD | $id | ", Dumper($g->{ARO_category}) ) unless $is_amr_gene;
|
|
|
552 #msg("ABX=$_") for @abx;
|
|
|
553
|
|
|
554 # put coordinates into normal form
|
|
|
555 my ( $start, $stop ) =
|
|
|
556 $dna->{strand} eq '-'
|
|
|
557 ? ( $dna->{fmax}, $dna->{fmin} )
|
|
|
558 : ( $dna->{fmin}, $dna->{fmax} );
|
|
|
559
|
|
|
560 $id =~ s/\s+/_/g;
|
|
|
561 push @seq,
|
|
|
562 {
|
|
|
563 ID => $id,
|
|
|
564 ACC => $dna->{accession} . ":$start-$stop",
|
|
|
565 DESC => ( $g->{ARO_description} || $g->{ARO_accession} ),
|
|
|
566 SEQ => $dna->{sequence},
|
|
|
567 ABX => [@abx],
|
|
|
568 };
|
|
|
569
|
|
|
570 # msg(Dumper($seq[-1]));
|
|
|
571 }
|
|
|
572
|
|
|
573 return \@seq;
|
|
|
574 }
|
|
|
575
|
|
|
576 #..............................................................................
|
|
|
577 sub get_victors {
|
|
|
578
|
|
|
579 # the CDS data is in .ffn and has source GI and coords
|
|
|
580 # the PROT data is in .faa and has the protein ref and /product
|
|
|
581
|
|
|
582 #>gi|115534241:2616-3152 Campylobacter jejuni plasmid pCJ01, complete sequence
|
|
|
583 download( 'http://www.phidias.us/victors/downloads/gen_downloads.php',
|
|
|
584 'victors.ffn' );
|
|
|
585
|
|
|
586 #>gi|115534244|ref|YP_783826.1| hypothetical protein pCJ01p4 [Campylobacter jejuni]
|
|
|
587 download(
|
|
|
588 'http://www.phidias.us/victors/downloads/gen_downloads_protein.php',
|
|
|
589 'victors.faa' );
|
|
|
590
|
|
|
591 my %gi;
|
|
|
592 open my $FAA, '<', 'victors.faa';
|
|
|
593 while (<$FAA>) {
|
|
|
594
|
|
|
595 #>gi|115534244|ref|YP_783826.1| hypothetical protein pCJ01p4 [Campylobacter jejuni]
|
|
|
596 next unless m"^>gi.(\d+).ref.([^|]+). ([^[]+)";
|
|
|
597 $gi{$1}{ACC} = $2;
|
|
|
598 $gi{$1}{DESC} = $3;
|
|
|
599 }
|
|
|
600
|
|
|
601 my $seqs = load_fasta("victors.ffn");
|
|
|
602
|
|
|
603 for my $s (@$seqs) {
|
|
|
604
|
|
|
605 #>gi|115534241:2616-3152 Campylobacter jejuni plasmid pCJ01, complete sequence
|
|
|
606 $s->{ID} =~ m/gi.(\d+):(\d+)-(\d+)/;
|
|
|
607 $s->{ACC} = $gi{$1}{ACC} || "gi|$1:$2-$3";
|
|
|
608 $s->{DESC} =~ $gi{$1}{DESC} || 'hypothetical protein';
|
|
|
609 }
|
|
|
610
|
|
|
611 # print Dumper($seqs); exit;
|
|
|
612
|
|
|
613 return $seqs;
|
|
|
614 }
|
|
|
615
|
|
|
616 #..............................................................................
|
|
|
617 sub get_vfdb {
|
|
|
618 download( 'http://www.mgc.ac.cn/VFs/Down/VFDB_setA_nt.fas.gz',
|
|
|
619 'vfdb.fa.gz' );
|
|
|
620 system("gzip -f -d -c vfdb.fa.gz > vfdb.fa");
|
|
|
621 my $seqs = load_fasta("vfdb.fa");
|
|
|
622
|
|
|
623 # >VFG000676(gb|AAD32411) (lef) anthrax toxin lethal factor precursor [Anthrax toxin (VF0142)] [Bacillus anthracis str. Sterne]
|
|
|
624 for my $s (@$seqs) {
|
|
|
625
|
|
|
626 # https://github.com/tseemann/abricate/issues/64#issuecomment-421895159 by @VGalata
|
|
|
627 $s->{ID} =~ m/^(\w+)\(\w+\|(\w+)(\.\d+)?\)$/; #
|
|
|
628 #$s->{ID} =~ m/^(\w+)\(\w+\|(\w+)\)$/;
|
|
|
629 $s->{ACC} = $2 if $2;
|
|
|
630 $s->{DESC} =~ m/^\((.*?)\)/;
|
|
|
631 $s->{ID} = $1 if $1;
|
|
|
632
|
|
|
633 # print STDERR Dumper($s); exit;
|
|
|
634 }
|
|
|
635
|
|
|
636 return $seqs;
|
|
|
637 }
|
|
|
638
|
|
|
639 #..............................................................................
|
|
|
640 sub get_ncbibetalactamase {
|
|
|
641 my $fasta = "ncbi.fa";
|
|
|
642 download(
|
|
|
643 'ftp://ftp.ncbi.nlm.nih.gov/pathogen/betalactamases/Allele-dna.fa',
|
|
|
644 $fasta );
|
|
|
645 my $tab = "ncbi.tab";
|
|
|
646 download( 'ftp://ftp.ncbi.nlm.nih.gov/pathogen/betalactamases/Allele.tab',
|
|
|
647 $tab );
|
|
|
648
|
|
|
649 # >ACD12694.1 EU650653.1:1-1173
|
|
|
650 my $seqs = load_fasta($fasta);
|
|
|
651
|
|
|
652 # ACC-1 ACD12694.1 EU650653.1 blaACC-1 1 1173 + cephalosporin-hydrolyzing class C beta-lactamase ACC-1
|
|
|
653 my %anno;
|
|
|
654 my @anno = grep { !m/^#/ } path($tab)->lines( { chomp => 1 } );
|
|
|
655 msg( "Read", 0 + @anno, "annotations" );
|
|
|
656 foreach (@anno) {
|
|
|
657 my ( $name, $id, $acc, $gene, $begin, $end, undef, $product ) =
|
|
|
658 split m/\t/;
|
|
|
659 $anno{$id} = {
|
|
|
660 ID => $gene,
|
|
|
661 DESC => $product,
|
|
|
662 ACC => "$acc:$begin-$end",
|
|
|
663 };
|
|
|
664 }
|
|
|
665
|
|
|
666 # print Dumper(\%anno);
|
|
|
667
|
|
|
668 for my $s (@$seqs) {
|
|
|
669 my $id = $s->{ID};
|
|
|
670 next unless exists $anno{$id};
|
|
|
671 $s->{ID} = $anno{$id}{ID};
|
|
|
672 $s->{ACC} = $anno{$id}{ACC};
|
|
|
673 $s->{DESC} = $anno{$id}{DESC};
|
|
|
674 }
|
|
|
675
|
|
|
676 # print Dumper($seqs);
|
|
|
677
|
|
|
678 return $seqs;
|
|
|
679 }
|
|
|
680
|
|
|
681 #..............................................................................
|
|
|
682 sub get_ecoh {
|
|
|
683 my $fasta = "EcOH.fa";
|
|
|
684 download(
|
|
|
685 'https://raw.githubusercontent.com/katholt/srst2/master/data/EcOH.fasta',
|
|
|
686 $fasta
|
|
|
687 );
|
|
|
688
|
|
|
689 # https://github.com/katholt/srst2#generating-srst2-compatible-clustered-database-from-raw-sequences
|
|
|
690 # [clusterID]__[gene]__[allele]__[seqID] [other stuff]
|
|
|
691 # >1__fliC__fliC-H1__1 AB028471.1;flagellin;H1
|
|
|
692 # >8__wzx__wzx-O41__246 AB811617.1;O antigen flippase;O41
|
|
|
693 # >9__wzy__wzy-OgN31__597 LC125932.1;O antigen polyermase;OgN31
|
|
|
694 my $seqs = load_fasta($fasta);
|
|
|
695
|
|
|
696 for my $s (@$seqs) {
|
|
|
697 my @id = split m/__/, $s->{ID};
|
|
|
698 my @desc = split m';', $s->{DESC};
|
|
|
699 $s->{ID} = $id[2];
|
|
|
700 $s->{ACC} = shift(@desc);
|
|
|
701 $s->{DESC} = join( ' ', @desc );
|
|
|
702 }
|
|
|
703
|
|
|
704 # print Dumper($seqs);
|
|
|
705 return $seqs;
|
|
|
706 }
|
|
|
707
|
|
|
708 #..............................................................................
|
|
|
709 sub get_ecoli_vf {
|
|
|
710 my $fasta = "ecoli_vf.ffn";
|
|
|
711 download(
|
|
|
712 'https://github.com/phac-nml/ecoli_vf/raw/master/data/repaired_ecoli_vfs_shortnames.ffn',
|
|
|
713 $fasta
|
|
|
714 );
|
|
|
715 my $seqs = load_fasta($fasta);
|
|
|
716
|
|
|
717 # >VFG000748(gi:2865308) (espF) EspF [EspF (VF0182)] [Escherichia coli O127:H6 str. E2348/69]
|
|
|
718 # >VFG000749(gi:6009379) (bfpA) Bundlin [BFP (VF0174)] [Escherichia coli B171]
|
|
|
719 # >SPG000142 (cvac) Escherichia coli cvi cvaC operon. [X57525 434-745]
|
|
|
720 # >SPG000143 (iss2) Escherichia coli Iss (iss) gene, complete cds. [AF042279 292-600]
|
|
|
721
|
|
|
722 for my $s (@$seqs) {
|
|
|
723
|
|
|
724 #print STDERR Dumper("IN", $s);
|
|
|
725 $s->{ID} =~ m/ ^ (\w+) (?: \( (.*?) \) )? $ /x
|
|
|
726 or die "Can't parse $fasta at " . Dumper($s);
|
|
|
727 $s->{ID} = $1 if $1;
|
|
|
728 $s->{ACC} = $2 || $1;
|
|
|
729 $s->{DESC} =~ s/\s\[.*?\]$//g; # remove strain name
|
|
|
730 $s->{DESC} =~ m/^(?:\((.*?)\)\s+)?(.*?)$/;
|
|
|
731 $s->{ID} = $1 if $1;
|
|
|
732 $s->{DESC} = $2;
|
|
|
733
|
|
|
734 #print STDERR Dumper("OUT", $s);
|
|
|
735 #print STDERR "="x60, "\n";
|
|
|
736 }
|
|
|
737
|
|
|
738 # print Dumper($seqs);
|
|
|
739 return $seqs;
|
|
|
740 }
|
|
|
741
|
|
|
742 #..............................................................................
|
|
|
743 sub is_full_gene {
|
|
|
744 my ($s) = @_;
|
|
|
745 my $has_ambig = 0;
|
|
|
746
|
|
|
747 my $id = $s->{ID};
|
|
|
748 my $L = length( $s->{SEQ} );
|
|
|
749 if ( $L % 3 != 0 ) {
|
|
|
750 wrn("$id - length $L bp is not multiple of 3");
|
|
|
751 return;
|
|
|
752 }
|
|
|
753 if ( $s->{SEQ} !~ m/^[AGCT]+$/ ) {
|
|
|
754 wrn("$id - has non-AGTC bases");
|
|
|
755 return;
|
|
|
756 }
|
|
|
757
|
|
|
758 my $seq = Bio::Seq->new( -id => $s->{ID}, -seq => $s->{SEQ} );
|
|
|
759
|
|
|
760 my $aa = $seq->translate->seq;
|
|
|
761
|
|
|
762 if ( $aa =~ m/\*./ ) {
|
|
|
763 wrn("$id - has internal stop codons, trying revcom");
|
|
|
764 $aa = $seq->revcom->translate->seq;
|
|
|
765 if ( $aa =~ m/\*./ ) {
|
|
|
766 wrn("$id - revcom has internal stop codons too");
|
|
|
767 return;
|
|
|
768 }
|
|
|
769 else {
|
|
|
770 msg("$id - revcom resolves problem, hooray!");
|
|
|
771 $s->{SEQ} = $seq->revcom->seq;
|
|
|
772 }
|
|
|
773 }
|
|
|
774
|
|
|
775 return $L;
|
|
|
776 }
|
|
|
777
|
|
|
778 #..............................................................................
|
|
|
779 sub dedupe_seq {
|
|
|
780 my ($seq) = @_;
|
|
|
781 my %seen;
|
|
|
782 my $good = [];
|
|
|
783 for my $s (@$seq) {
|
|
|
784 if ( $seen{ $s->{SEQ} } ) {
|
|
|
785 wrn( "duplicate", length( $s->{SEQ} ),
|
|
|
786 "bp sequence:", $s->{ID}, '~', $seen{ $s->{SEQ} } );
|
|
|
787 }
|
|
|
788 else {
|
|
|
789 push @$good, $s;
|
|
|
790 }
|
|
|
791 $seen{ $s->{SEQ} } .= ' ' . $s->{ID};
|
|
|
792 }
|
|
|
793 msg( "dedupe_seq: read", scalar(@$seq), "/ kept", scalar(@$good) );
|
|
|
794 return $good;
|
|
|
795 }
|
|
|
796
|
|
|
797 #..............................................................................
|
|
|
798 sub load_tabular {
|
|
|
799 my ( $fname, $keycol, $sep ) = @_;
|
|
|
800 $keycol //= 0;
|
|
|
801 $sep //= "\t";
|
|
|
802 my $hash = {};
|
|
|
803 my @hdr;
|
|
|
804 my $row = 0;
|
|
|
805 open my $TSV, '<', $fname or err("Can't read TSV file: $fname");
|
|
|
806 while (<$TSV>) {
|
|
|
807 chomp;
|
|
|
808 my @col = split m/$sep/;
|
|
|
809 $row++;
|
|
|
810 if (@hdr) {
|
|
|
811 @hdr == @col or err("Header and row $row differ in column count");
|
|
|
812
|
|
|
813 #my $key = $col[$keycol] or wrn("Empty key column $keycol: $_");
|
|
|
814 #exists{$hash->{$col[$key}} and wrn("WARNING: dupe key $key at row: $_");
|
|
|
815 my $key = $col[$keycol];
|
|
|
816 $hash->{$key} ||=
|
|
|
817 { map { ( $hdr[$_] => $col[$_] ) } ( 0 .. $#hdr ) };
|
|
|
818 }
|
|
|
819 else {
|
|
|
820 @hdr = @col;
|
|
|
821 }
|
|
|
822 }
|
|
|
823 close $TSV;
|
|
|
824 return $hash;
|
|
|
825 }
|
|
|
826
|
|
|
827 #..............................................................................
|
|
|
828 sub load_fasta {
|
|
|
829 my ($fasta) = @_;
|
|
|
830 my %seen;
|
|
|
831 my $list;
|
|
|
832 my $dbtype = 'unknown';
|
|
|
833 msg("load_fasta: $fasta");
|
|
|
834 my $in = Bio::SeqIO->new( -file => $fasta, -format => 'fasta' );
|
|
|
835 while ( my $seq = $in->next_seq ) {
|
|
|
836 my $id = $seq->id or err("Empty ID in $fasta");
|
|
|
837 if ( $seen{$id} ) {
|
|
|
838 wrn("Duplicate ID '$id' in $fasta");
|
|
|
839 $id = $id . '_dupe';
|
|
|
840 }
|
|
|
841 $seen{$id}++;
|
|
|
842 my $s = uc( $seq->seq );
|
|
|
843 $dbtype = $seq->alphabet eq 'dna' ? 'nucl' : 'prot';
|
|
|
844 $dbtype eq 'nucl' ? $s =~ s/[^AGTC]/N/g : $s =~ s/[^A-Z]/X/g;
|
|
|
845 push @$list,
|
|
|
846 {
|
|
|
847 ID => $id,
|
|
|
848 ACC => '',
|
|
|
849 DESC => $seq->desc,
|
|
|
850 SEQ => $s,
|
|
|
851 TYPE => $dbtype,
|
|
|
852 };
|
|
|
853 }
|
|
|
854 msg( "load_fasta: read", scalar(@$list), "$dbtype sequences" );
|
|
|
855 return $list;
|
|
|
856 }
|
|
|
857
|
|
|
858 #..............................................................................
|
|
|
859 sub save_fasta {
|
|
|
860 my ( $fasta, $seq ) = @_;
|
|
|
861 msg("save_fasta: $fasta");
|
|
|
862 my %seen;
|
|
|
863 my $out = Bio::SeqIO->new( -file => ">$fasta", -format => 'fasta' );
|
|
|
864 for my $s (@$seq) {
|
|
|
865 $seen{ $s->{ID} }++;
|
|
|
866 my $freq = $seen{ $s->{ID} };
|
|
|
867
|
|
|
868 #wrn("seen $s->{ID} now $freq times") if $freq > 1;
|
|
|
869 # print Dumper($s);
|
|
|
870 my $ABX =
|
|
|
871 defined( $s->{ABX} ) ? join( $ABX_SEP, sort @{ $s->{ABX} } ) : '';
|
|
|
872 $ABX =~ s/\s+/_/g; # remove spaces!
|
|
|
873 my $obj = Bio::Seq->new(
|
|
|
874 -id => join( '~~~', $db, $s->{ID}, $s->{ACC}, $ABX ),
|
|
|
875 -desc => ( $s->{DESC} || $s->{ID} ),
|
|
|
876 -seq => $s->{SEQ},
|
|
|
877 );
|
|
|
878
|
|
|
879 # $obj->desc( hash_encode($s) );
|
|
|
880 $out->write_seq($obj);
|
|
|
881
|
|
|
882 # $seen{ $s->{ID} }++;
|
|
|
883 }
|
|
|
884 msg( "save_fasta: wrote", scalar(@$seq), "sequences" );
|
|
|
885 }
|
|
|
886
|
|
|
887 #----------------------------------------------------------------------
|
|
|
888 sub msg {
|
|
|
889 print STDERR "@_\n";
|
|
|
890 }
|
|
|
891
|
|
|
892 #----------------------------------------------------------------------
|
|
|
893 sub wrn {
|
|
|
894 msg( "WARNING:", @_ ) if $debug;
|
|
|
895 }
|
|
|
896
|
|
|
897 #----------------------------------------------------------------------
|
|
|
898 sub err {
|
|
|
899 msg( "ERROR:", @_ );
|
|
|
900 exit(1);
|
|
|
901 }
|
|
|
902
|
|
|
903 #----------------------------------------------------------------------
|
|
|
904 # Option setting routines
|
|
|
905
|
|
|
906 sub setOptions {
|
|
|
907 use Getopt::Long;
|
|
|
908
|
|
|
909 @Options = (
|
|
|
910 { OPT => "help", VAR => \&usage, DESC => "This help" },
|
|
|
911 {
|
|
|
912 OPT => "debug!",
|
|
|
913 VAR => \$debug,
|
|
|
914 DEFAULT => 0,
|
|
|
915 DESC => "Verbose debug output"
|
|
|
916 },
|
|
|
917 {
|
|
|
918 OPT => "dbdir=s",
|
|
|
919 VAR => \$outdir,
|
|
|
920 DEFAULT => abs_path("$FindBin::RealBin/../db"),
|
|
|
921 DESC => "Parent folder"
|
|
|
922 },
|
|
|
923 {
|
|
|
924 OPT => "db=s",
|
|
|
925 VAR => \$db,
|
|
|
926 DEFAULT => "",
|
|
|
927 DESC => "Choices: $DATABASES"
|
|
|
928 },
|
|
|
929 {
|
|
|
930 OPT => "force!",
|
|
|
931 VAR => \$force,
|
|
|
932 DEFAULT => 0,
|
|
|
933 DESC => "Force download even if exists"
|
|
|
934 },
|
|
|
935 );
|
|
|
936
|
|
|
937 &GetOptions( map { $_->{OPT}, $_->{VAR} } @Options ) || usage(1);
|
|
|
938
|
|
|
939 # Now setup default values.
|
|
|
940 foreach (@Options) {
|
|
|
941 if ( defined( $_->{DEFAULT} ) && !defined( ${ $_->{VAR} } ) ) {
|
|
|
942 ${ $_->{VAR} } = $_->{DEFAULT};
|
|
|
943 }
|
|
|
944 }
|
|
|
945 }
|
|
|
946
|
|
|
947 sub usage {
|
|
|
948 my ($exitcode) = @_;
|
|
|
949 $exitcode = 0 if $exitcode eq 'help'; # what gets passed by getopt func ref
|
|
|
950 $exitcode ||= 0;
|
|
|
951 select STDERR if $exitcode; # write to STDERR if exitcode is error
|
|
|
952
|
|
|
953 print "SYNOPIS\n Download databases for abricate to use\n";
|
|
|
954 print "USAGE\n $EXE [options] --db DATABASE\n";
|
|
|
955 print "OPTIONS\n";
|
|
|
956 foreach (@Options) {
|
|
|
957 printf " --%-13s %s%s.\n", $_->{OPT}, $_->{DESC},
|
|
|
958 defined( $_->{DEFAULT} ) ? " (default '$_->{DEFAULT}')" : "";
|
|
|
959 }
|
|
|
960 exit($exitcode);
|
|
|
961 }
|
|
|
962
|
|
|
963 #----------------------------------------------------------------------
|
|
|
964
|