jpayne@17: #!/usr/bin/env python3 jpayne@17: jpayne@17: import argparse jpayne@17: import os,subprocess jpayne@17: import pickle jpayne@17: jpayne@17: ### SeqSero Kmer jpayne@17: def parse_args(): jpayne@17: "Parse the input arguments, use '-h' for help." jpayne@17: parser = argparse.ArgumentParser(usage='Just type "SeqSero2_update_kmer_database.py", it will update kmer database automatically') jpayne@17: return parser.parse_args() jpayne@17: jpayne@17: def reverse_complement(sequence): jpayne@17: complement = { jpayne@17: 'A': 'T', jpayne@17: 'C': 'G', jpayne@17: 'G': 'C', jpayne@17: 'T': 'A', jpayne@17: 'N': 'N', jpayne@17: 'M': 'K', jpayne@17: 'R': 'Y', jpayne@17: 'W': 'W', jpayne@17: 'S': 'S', jpayne@17: 'Y': 'R', jpayne@17: 'K': 'M', jpayne@17: 'V': 'B', jpayne@17: 'H': 'D', jpayne@17: 'D': 'H', jpayne@17: 'B': 'V' jpayne@17: } jpayne@17: return "".join(complement[base] for base in reversed(sequence)) jpayne@17: jpayne@17: def multifasta_dict(multifasta): jpayne@17: multifasta_list = [ jpayne@17: line.strip() for line in open(multifasta, 'r') if len(line.strip()) > 0 jpayne@17: ] jpayne@17: headers = [i for i in multifasta_list if i[0] == '>'] jpayne@17: multifasta_dict = {} jpayne@17: for h in headers: jpayne@17: start = multifasta_list.index(h) jpayne@17: for element in multifasta_list[start + 1:]: jpayne@17: if element[0] == '>': jpayne@17: break jpayne@17: else: jpayne@17: if h[1:] in multifasta_dict: jpayne@17: multifasta_dict[h[1:]] += element jpayne@17: else: jpayne@17: multifasta_dict[h[1:]] = element jpayne@17: return multifasta_dict jpayne@17: jpayne@17: def createKmerDict_reads(list_of_strings, kmer): jpayne@17: kmer_table = {} jpayne@17: for string in list_of_strings: jpayne@17: sequence = string.strip('\n') jpayne@17: for i in range(len(sequence) - kmer + 1): jpayne@17: new_mer = sequence[i:i + kmer].upper() jpayne@17: new_mer_rc = reverse_complement(new_mer) jpayne@17: if new_mer in kmer_table: jpayne@17: kmer_table[new_mer.upper()] += 1 jpayne@17: else: jpayne@17: kmer_table[new_mer.upper()] = 1 jpayne@17: if new_mer_rc in kmer_table: jpayne@17: kmer_table[new_mer_rc.upper()] += 1 jpayne@17: else: jpayne@17: kmer_table[new_mer_rc.upper()] = 1 jpayne@17: return kmer_table jpayne@17: jpayne@17: def multifasta_to_kmers_dict(multifasta): jpayne@17: multi_seq_dict = multifasta_dict(multifasta) jpayne@17: lib_dict = {} jpayne@17: for h in multi_seq_dict: jpayne@17: lib_dict[h] = set( jpayne@17: [k for k in createKmerDict_reads([multi_seq_dict[h]], 27)]) jpayne@17: return lib_dict jpayne@17: jpayne@17: def get_salmid_invA_database(ex_dir): jpayne@17: # read invA kmer and return it jpayne@17: a = open(ex_dir + '/invA_mers_dict', 'rb') jpayne@17: invA_dict = pickle.load(a) jpayne@17: try: jpayne@17: del invA_dict['version'] jpayne@17: except: jpayne@17: pass jpayne@17: return invA_dict jpayne@17: jpayne@17: def get_salmid_rpoB_database(ex_dir): jpayne@17: # read invA kmer and return it jpayne@17: a = open(ex_dir + '/rpoB_mers_dict', 'rb') jpayne@17: rpoB_dict = pickle.load(a) jpayne@17: try: jpayne@17: del rpoB_dict['version'] jpayne@17: except: jpayne@17: pass jpayne@17: return rpoB_dict jpayne@17: jpayne@17: def main(): jpayne@17: args = parse_args() jpayne@17: ex_dir = os.path.dirname(os.path.realpath(__file__)) jpayne@17: lib_dict = multifasta_to_kmers_dict(ex_dir + '/H_and_O_and_specific_genes.fasta') jpayne@17: invA_dict=get_salmid_invA_database(ex_dir) jpayne@17: #rpoB_dict=get_salmid_rpoB_database(ex_dir) jpayne@17: lib_dict_new = lib_dict.copy() jpayne@17: #print(len(lib_dict_new)) jpayne@17: lib_dict_new.update(invA_dict) jpayne@17: #print(len(lib_dict_new)) jpayne@17: #lib_dict_new.update(rpoB_dict) jpayne@17: #print(len(lib_dict_new)) jpayne@17: f = open(ex_dir + '/antigens.pickle', "wb") jpayne@17: pickle.dump(lib_dict_new, f) jpayne@17: f.close() jpayne@17: jpayne@17: if __name__ == '__main__': jpayne@17: main()