jpayne@17: #!/usr/bin/env python3
jpayne@17: 
jpayne@17: import argparse
jpayne@17: import os,subprocess
jpayne@17: import pickle
jpayne@17: 
jpayne@17: ### SeqSero Kmer
jpayne@17: def parse_args():
jpayne@17:     "Parse the input arguments, use '-h' for help."
jpayne@17:     parser = argparse.ArgumentParser(usage='Just type "SeqSero2_update_kmer_database.py", it will update kmer database automatically')
jpayne@17:     return parser.parse_args()
jpayne@17: 
jpayne@17: def reverse_complement(sequence):
jpayne@17:     complement = {
jpayne@17:         'A': 'T',
jpayne@17:         'C': 'G',
jpayne@17:         'G': 'C',
jpayne@17:         'T': 'A',
jpayne@17:         'N': 'N',
jpayne@17:         'M': 'K',
jpayne@17:         'R': 'Y',
jpayne@17:         'W': 'W',
jpayne@17:         'S': 'S',
jpayne@17:         'Y': 'R',
jpayne@17:         'K': 'M',
jpayne@17:         'V': 'B',
jpayne@17:         'H': 'D',
jpayne@17:         'D': 'H',
jpayne@17:         'B': 'V'
jpayne@17:     }
jpayne@17:     return "".join(complement[base] for base in reversed(sequence))
jpayne@17: 
jpayne@17: def multifasta_dict(multifasta):
jpayne@17:     multifasta_list = [
jpayne@17:         line.strip() for line in open(multifasta, 'r') if len(line.strip()) > 0
jpayne@17:     ]
jpayne@17:     headers = [i for i in multifasta_list if i[0] == '>']
jpayne@17:     multifasta_dict = {}
jpayne@17:     for h in headers:
jpayne@17:         start = multifasta_list.index(h)
jpayne@17:         for element in multifasta_list[start + 1:]:
jpayne@17:             if element[0] == '>':
jpayne@17:                 break
jpayne@17:             else:
jpayne@17:                 if h[1:] in multifasta_dict:
jpayne@17:                     multifasta_dict[h[1:]] += element
jpayne@17:                 else:
jpayne@17:                     multifasta_dict[h[1:]] = element
jpayne@17:     return multifasta_dict
jpayne@17: 
jpayne@17: def createKmerDict_reads(list_of_strings, kmer):
jpayne@17:     kmer_table = {}
jpayne@17:     for string in list_of_strings:
jpayne@17:         sequence = string.strip('\n')
jpayne@17:         for i in range(len(sequence) - kmer + 1):
jpayne@17:             new_mer = sequence[i:i + kmer].upper()
jpayne@17:             new_mer_rc = reverse_complement(new_mer)
jpayne@17:             if new_mer in kmer_table:
jpayne@17:                 kmer_table[new_mer.upper()] += 1
jpayne@17:             else:
jpayne@17:                 kmer_table[new_mer.upper()] = 1
jpayne@17:             if new_mer_rc in kmer_table:
jpayne@17:                 kmer_table[new_mer_rc.upper()] += 1
jpayne@17:             else:
jpayne@17:                 kmer_table[new_mer_rc.upper()] = 1
jpayne@17:     return kmer_table
jpayne@17: 
jpayne@17: def multifasta_to_kmers_dict(multifasta):
jpayne@17:     multi_seq_dict = multifasta_dict(multifasta)
jpayne@17:     lib_dict = {}
jpayne@17:     for h in multi_seq_dict:
jpayne@17:         lib_dict[h] = set(
jpayne@17:             [k for k in createKmerDict_reads([multi_seq_dict[h]], 27)])
jpayne@17:     return lib_dict
jpayne@17: 
jpayne@17: def get_salmid_invA_database(ex_dir):
jpayne@17:   # read invA kmer and return it
jpayne@17:   a = open(ex_dir + '/invA_mers_dict', 'rb')
jpayne@17:   invA_dict = pickle.load(a)
jpayne@17:   try:
jpayne@17:     del invA_dict['version']
jpayne@17:   except:
jpayne@17:     pass
jpayne@17:   return invA_dict
jpayne@17: 
jpayne@17: def get_salmid_rpoB_database(ex_dir):
jpayne@17:   # read invA kmer and return it
jpayne@17:   a = open(ex_dir + '/rpoB_mers_dict', 'rb')
jpayne@17:   rpoB_dict = pickle.load(a)
jpayne@17:   try:
jpayne@17:     del rpoB_dict['version']
jpayne@17:   except:
jpayne@17:     pass
jpayne@17:   return rpoB_dict
jpayne@17: 
jpayne@17: def main():
jpayne@17:   args = parse_args()
jpayne@17:   ex_dir = os.path.dirname(os.path.realpath(__file__))
jpayne@17:   lib_dict = multifasta_to_kmers_dict(ex_dir + '/H_and_O_and_specific_genes.fasta')
jpayne@17:   invA_dict=get_salmid_invA_database(ex_dir)
jpayne@17:   #rpoB_dict=get_salmid_rpoB_database(ex_dir)
jpayne@17:   lib_dict_new = lib_dict.copy()
jpayne@17:   #print(len(lib_dict_new))
jpayne@17:   lib_dict_new.update(invA_dict)
jpayne@17:   #print(len(lib_dict_new))
jpayne@17:   #lib_dict_new.update(rpoB_dict)
jpayne@17:   #print(len(lib_dict_new))
jpayne@17:   f = open(ex_dir + '/antigens.pickle', "wb")
jpayne@17:   pickle.dump(lib_dict_new, f)
jpayne@17:   f.close()
jpayne@17: 
jpayne@17: if __name__ == '__main__':
jpayne@17:   main()