jpayne@1
|
1 #!/usr/bin/env python3
|
jpayne@1
|
2
|
jpayne@1
|
3 import argparse
|
jpayne@1
|
4 import os,subprocess
|
jpayne@1
|
5 import pickle
|
jpayne@1
|
6
|
jpayne@1
|
7 ### SeqSero Kmer
|
jpayne@1
|
8 def parse_args():
|
jpayne@1
|
9 "Parse the input arguments, use '-h' for help."
|
jpayne@1
|
10 parser = argparse.ArgumentParser(usage='Just type "SeqSero2_update_kmer_database.py", it will update kmer database automatically')
|
jpayne@1
|
11 return parser.parse_args()
|
jpayne@1
|
12
|
jpayne@1
|
13 def reverse_complement(sequence):
|
jpayne@1
|
14 complement = {
|
jpayne@1
|
15 'A': 'T',
|
jpayne@1
|
16 'C': 'G',
|
jpayne@1
|
17 'G': 'C',
|
jpayne@1
|
18 'T': 'A',
|
jpayne@1
|
19 'N': 'N',
|
jpayne@1
|
20 'M': 'K',
|
jpayne@1
|
21 'R': 'Y',
|
jpayne@1
|
22 'W': 'W',
|
jpayne@1
|
23 'S': 'S',
|
jpayne@1
|
24 'Y': 'R',
|
jpayne@1
|
25 'K': 'M',
|
jpayne@1
|
26 'V': 'B',
|
jpayne@1
|
27 'H': 'D',
|
jpayne@1
|
28 'D': 'H',
|
jpayne@1
|
29 'B': 'V'
|
jpayne@1
|
30 }
|
jpayne@1
|
31 return "".join(complement[base] for base in reversed(sequence))
|
jpayne@1
|
32
|
jpayne@1
|
33 def multifasta_dict(multifasta):
|
jpayne@1
|
34 multifasta_list = [
|
jpayne@1
|
35 line.strip() for line in open(multifasta, 'r') if len(line.strip()) > 0
|
jpayne@1
|
36 ]
|
jpayne@1
|
37 headers = [i for i in multifasta_list if i[0] == '>']
|
jpayne@1
|
38 multifasta_dict = {}
|
jpayne@1
|
39 for h in headers:
|
jpayne@1
|
40 start = multifasta_list.index(h)
|
jpayne@1
|
41 for element in multifasta_list[start + 1:]:
|
jpayne@1
|
42 if element[0] == '>':
|
jpayne@1
|
43 break
|
jpayne@1
|
44 else:
|
jpayne@1
|
45 if h[1:] in multifasta_dict:
|
jpayne@1
|
46 multifasta_dict[h[1:]] += element
|
jpayne@1
|
47 else:
|
jpayne@1
|
48 multifasta_dict[h[1:]] = element
|
jpayne@1
|
49 return multifasta_dict
|
jpayne@1
|
50
|
jpayne@1
|
51 def createKmerDict_reads(list_of_strings, kmer):
|
jpayne@1
|
52 kmer_table = {}
|
jpayne@1
|
53 for string in list_of_strings:
|
jpayne@1
|
54 sequence = string.strip('\n')
|
jpayne@1
|
55 for i in range(len(sequence) - kmer + 1):
|
jpayne@1
|
56 new_mer = sequence[i:i + kmer].upper()
|
jpayne@1
|
57 new_mer_rc = reverse_complement(new_mer)
|
jpayne@1
|
58 if new_mer in kmer_table:
|
jpayne@1
|
59 kmer_table[new_mer.upper()] += 1
|
jpayne@1
|
60 else:
|
jpayne@1
|
61 kmer_table[new_mer.upper()] = 1
|
jpayne@1
|
62 if new_mer_rc in kmer_table:
|
jpayne@1
|
63 kmer_table[new_mer_rc.upper()] += 1
|
jpayne@1
|
64 else:
|
jpayne@1
|
65 kmer_table[new_mer_rc.upper()] = 1
|
jpayne@1
|
66 return kmer_table
|
jpayne@1
|
67
|
jpayne@1
|
68 def multifasta_to_kmers_dict(multifasta):
|
jpayne@1
|
69 multi_seq_dict = multifasta_dict(multifasta)
|
jpayne@1
|
70 lib_dict = {}
|
jpayne@1
|
71 for h in multi_seq_dict:
|
jpayne@1
|
72 lib_dict[h] = set(
|
jpayne@1
|
73 [k for k in createKmerDict_reads([multi_seq_dict[h]], 27)])
|
jpayne@1
|
74 return lib_dict
|
jpayne@1
|
75
|
jpayne@1
|
76 def get_salmid_invA_database(ex_dir):
|
jpayne@1
|
77 # read invA kmer and return it
|
jpayne@1
|
78 a = open(ex_dir + '/invA_mers_dict', 'rb')
|
jpayne@1
|
79 invA_dict = pickle.load(a)
|
jpayne@1
|
80 try:
|
jpayne@1
|
81 del invA_dict['version']
|
jpayne@1
|
82 except:
|
jpayne@1
|
83 pass
|
jpayne@1
|
84 return invA_dict
|
jpayne@1
|
85
|
jpayne@1
|
86 def get_salmid_rpoB_database(ex_dir):
|
jpayne@1
|
87 # read invA kmer and return it
|
jpayne@1
|
88 a = open(ex_dir + '/rpoB_mers_dict', 'rb')
|
jpayne@1
|
89 rpoB_dict = pickle.load(a)
|
jpayne@1
|
90 try:
|
jpayne@1
|
91 del rpoB_dict['version']
|
jpayne@1
|
92 except:
|
jpayne@1
|
93 pass
|
jpayne@1
|
94 return rpoB_dict
|
jpayne@1
|
95
|
jpayne@1
|
96 def main():
|
jpayne@1
|
97 args = parse_args()
|
jpayne@1
|
98 ex_dir = os.path.dirname(os.path.realpath(__file__))
|
jpayne@1
|
99 lib_dict = multifasta_to_kmers_dict(ex_dir + '/H_and_O_and_specific_genes.fasta')
|
jpayne@1
|
100 invA_dict=get_salmid_invA_database(ex_dir)
|
jpayne@1
|
101 #rpoB_dict=get_salmid_rpoB_database(ex_dir)
|
jpayne@1
|
102 lib_dict_new = lib_dict.copy()
|
jpayne@1
|
103 #print(len(lib_dict_new))
|
jpayne@1
|
104 lib_dict_new.update(invA_dict)
|
jpayne@1
|
105 #print(len(lib_dict_new))
|
jpayne@1
|
106 #lib_dict_new.update(rpoB_dict)
|
jpayne@1
|
107 #print(len(lib_dict_new))
|
jpayne@1
|
108 f = open(ex_dir + '/antigens.pickle', "wb")
|
jpayne@1
|
109 pickle.dump(lib_dict_new, f)
|
jpayne@1
|
110 f.close()
|
jpayne@1
|
111
|
jpayne@1
|
112 if __name__ == '__main__':
|
jpayne@1
|
113 main()
|