annotate SeqSero2S/bin/SeqSero2S.py @ 19:cfc91e1d2c9b draft

planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
author jpayne
date Fri, 15 May 2026 17:50:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
19
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1 #!/usr/bin/env python3
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
2
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
3 import sys
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
4 import time
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
5 import random
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
6 import os
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
7 import subprocess
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
8 import gzip
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
9 import io
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
10 import pickle
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
11 import argparse
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
12 import itertools
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
13 import json
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
14 from distutils.version import LooseVersion
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
15 from distutils.spawn import find_executable
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
16 sys.path.insert(1,sys.path[0]+'/..')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
17
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
18 __version__ = "1.1.4"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
19
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
20 ### SeqSero Kmer
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
21 def parse_args():
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
22 "Parse the input arguments, use '-h' for help."
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
23 parser = argparse.ArgumentParser(usage='SeqSero2S.py -t <data_type> -m <mode> -i <input_data> [-d <output_directory>] [-p <number of threads>] [-b <BWA_algorithm>]\n\nDevelopper: Shaokang Zhang (zskzsk@uga.edu), Hendrik C Den-Bakker (Hendrik.DenBakker@uga.edu) and Xiangyu Deng (xdeng@uga.edu)\n\nContact email:seqsero@gmail.com\n\n')#add "-m <data_type>" in future
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
24 parser.add_argument("-i",nargs="+",help="<string>: path/to/input_data",type=os.path.abspath) ### add 'type=os.path.abspath' to generate absolute path of input data.
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
25 parser.add_argument("-t",choices=['1','2','3','4','5'],help="<int>: '1' for interleaved paired-end reads, '2' for separated paired-end reads, '3' for single reads, '4' for genome assembly, '5' for nanopore reads (fasta/fastq)")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
26 parser.add_argument("-b",choices=['sam','mem'],default="mem",help="<string>: algorithms for bwa mapping for allele mode; 'mem' for mem, 'sam' for samse/sampe; default=mem; optional; for now we only optimized for default 'mem' mode")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
27 parser.add_argument("-p",default="1",help="<int>: number of threads for allele mode, if p >4, only 4 threads will be used for assembly since the amount of extracted reads is small, default=1")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
28 parser.add_argument("-m",choices=['k','a'],default="a",help="<string>: which workflow to apply, 'a'(raw reads allele micro-assembly), 'k'(raw reads and genome assembly k-mer), default=a")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
29 parser.add_argument("-n",help="<string>: optional, to specify a sample name in the report output")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
30 parser.add_argument("-d",help="<string>: optional, to specify an output directory name, if not set, the output directory would be 'SeqSero_result_'+time stamp+one random number")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
31 parser.add_argument("-c",action="store_true",help="<flag>: if '-c' was flagged, SeqSero2S will only output serotype prediction without the directory containing log files")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
32 parser.add_argument("-s",action="store_true",help="<flag>: if '-s' was flagged, SeqSero2S will not output header in SeqSero_result.tsv")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
33 parser.add_argument("--phred_offset",choices=['33','64','auto'],default='auto',help="<33|64|auto>: offset for FASTQ file quality scores, default=auto")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
34 parser.add_argument("--check",action="store_true",help="<flag>: use '--check' flag to check the required dependencies")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
35 parser.add_argument('-v', '--version', action='version', version=f"%(prog)s {__version__}")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
36 return parser.parse_args()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
37
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
38 ### check paths of dependencies
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
39 check_dependencies = parse_args().check
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
40 dependencies = ['bwa','samtools','blastn','fastq-dump','spades.py','bedtools','SalmID.py','mlst','stringMLST.py']
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
41 if check_dependencies:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
42 for item in dependencies:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
43 ext_path = find_executable(item)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
44 if ext_path is not None:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
45 print ("Using "+item+" - "+ext_path)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
46 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
47 print ("ERROR: can not find "+item+" in PATH")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
48 sys.exit()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
49 ### end of --check
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
50
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
51 def reverse_complement(sequence):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
52 complement = {
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
53 'A': 'T',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
54 'C': 'G',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
55 'G': 'C',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
56 'T': 'A',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
57 'N': 'N',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
58 'M': 'K',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
59 'R': 'Y',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
60 'W': 'W',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
61 'S': 'S',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
62 'Y': 'R',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
63 'K': 'M',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
64 'V': 'B',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
65 'H': 'D',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
66 'D': 'H',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
67 'B': 'V'
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
68 }
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
69 return "".join(complement[base] for base in reversed(sequence))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
70
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
71 def mlst(assembly):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
72 subprocess.check_call("mlst -q --json mlst.json --scheme senterica_achtman_2 "+assembly+" >> data_log.txt 2>&1",shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
73 f = open("mlst.json",'r')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
74 mlst_result = json.load(f)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
75 f.close()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
76 st = mlst_result[0]['sequence_type']
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
77 alleles = mlst_result[0]['alleles']
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
78 return(st,alleles)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
79
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
80 def stringmlst(f1,f2,t,d):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
81 if t in ['1','2']:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
82 subprocess.check_call("stringMLST.py --predict -P "+d+"/kmer/salmonella -1 "+f1+" -2 "+f2+" -a /dev/stdout -o stringMLST.txt >> data_log.txt 2>&1",shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
83 elif t=='3':
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
84 subprocess.check_call("stringMLST.py --predict -P "+d+"/kmer/salmonella -1 "+f1+" -s -a /dev/stdout -o stringMLST.txt >> data_log.txt 2>&1",shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
85 f = "stringMLST.txt"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
86 mlst_result = open(f).readlines()[1].strip().split('\t')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
87 st = mlst_result[-1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
88 if st == '0':
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
89 st = '-'
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
90 k = ['aroC','dnaN','hemD','hisD','purE','sucA','thrA']
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
91 v = [mlst_result[1],mlst_result[2],mlst_result[3],mlst_result[4],mlst_result[5],mlst_result[6],mlst_result[7]]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
92 alleles = dict(zip(k,v))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
93 return(st,alleles)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
94
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
95 def createKmerDict_reads(list_of_strings, kmer):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
96 kmer_table = {}
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
97 for string in list_of_strings:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
98 sequence = string.strip('\n')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
99 for i in range(len(sequence) - kmer + 1):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
100 new_mer = sequence[i:i + kmer].upper()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
101 new_mer_rc = reverse_complement(new_mer)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
102 if new_mer in kmer_table:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
103 kmer_table[new_mer.upper()] += 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
104 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
105 kmer_table[new_mer.upper()] = 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
106 if new_mer_rc in kmer_table:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
107 kmer_table[new_mer_rc.upper()] += 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
108 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
109 kmer_table[new_mer_rc.upper()] = 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
110 return kmer_table
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
111
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
112
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
113 def multifasta_dict(multifasta):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
114 multifasta_list = [
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
115 line.strip() for line in open(multifasta, 'r') if len(line.strip()) > 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
116 ]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
117 headers = [i for i in multifasta_list if i[0] == '>']
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
118 multifasta_dict = {}
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
119 for h in headers:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
120 start = multifasta_list.index(h)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
121 for element in multifasta_list[start + 1:]:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
122 if element[0] == '>':
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
123 break
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
124 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
125 if h[1:] in multifasta_dict:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
126 multifasta_dict[h[1:]] += element
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
127 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
128 multifasta_dict[h[1:]] = element
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
129 return multifasta_dict
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
130
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
131
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
132 def multifasta_single_string(multifasta):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
133 multifasta_list = [
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
134 line.strip() for line in open(multifasta, 'r')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
135 if (len(line.strip()) > 0) and (line.strip()[0] != '>')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
136 ]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
137 return ''.join(multifasta_list)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
138
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
139
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
140 def chunk_a_long_sequence(long_sequence, chunk_size=60):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
141 chunk_list = []
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
142 steps = len(long_sequence) // 60 #how many chunks
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
143 for i in range(steps):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
144 chunk_list.append(long_sequence[i * chunk_size:(i + 1) * chunk_size])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
145 chunk_list.append(long_sequence[steps * chunk_size:len(long_sequence)])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
146 return chunk_list
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
147
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
148
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
149 def target_multifasta_kmerizer(multifasta, k, kmerDict):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
150 forward_length = 300 #if find the target, put forward 300 bases
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
151 reverse_length = 2200 #if find the target, put backward 2200 bases
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
152 chunk_size = 60 #it will firstly chunk the single long sequence to multiple smaller sequences, it controls the size of those smaller sequences
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
153 target_mers = []
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
154 long_single_string = multifasta_single_string(multifasta)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
155 multifasta_list = chunk_a_long_sequence(long_single_string, chunk_size)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
156 unit_length = len(multifasta_list[0])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
157 forward_lines = int(forward_length / unit_length) + 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
158 reverse_lines = int(forward_length / unit_length) + 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
159 start_num = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
160 end_num = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
161 for i in range(len(multifasta_list)):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
162 if i not in range(start_num, end_num): #avoid computational repetition
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
163 line = multifasta_list[i]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
164 start = int((len(line) - k) // 2)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
165 s1 = line[start:k + start]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
166 if s1 in kmerDict: #detect it is a potential read or not (use the middle part)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
167 if i - forward_lines >= 0:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
168 start_num = i - forward_lines
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
169 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
170 start_num = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
171 if i + reverse_lines <= len(multifasta_list) - 1:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
172 end_num = i + reverse_lines
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
173 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
174 end_num = len(multifasta_list) - 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
175 target_list = [
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
176 x.strip() for x in multifasta_list[start_num:end_num]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
177 ]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
178 target_line = "".join(target_list)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
179 target_mers += [
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
180 k1 for k1 in createKmerDict_reads([str(target_line)], k)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
181 ] ##changed k to k1, just want to avoid the mixes of this "k" (kmer) to the "k" above (kmer length)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
182 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
183 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
184 return set(target_mers)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
185
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
186
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
187 def target_read_kmerizer(file, k, kmerDict):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
188 i = 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
189 n_reads = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
190 total_coverage = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
191 target_mers = []
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
192 if file.endswith(".gz"):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
193 file_content = io.BufferedReader(gzip.open(file))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
194 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
195 file_content = open(file, "r").readlines()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
196 for line in file_content:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
197 start = int((len(line) - k) // 2)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
198 if i % 4 == 2:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
199 if file.endswith(".gz"):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
200 s1 = line[start:k + start].decode()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
201 line = line.decode()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
202 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
203 s1 = line[start:k + start]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
204 if s1 in kmerDict: #detect it is a potential read or not (use the middle part)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
205 n_reads += 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
206 total_coverage += len(line)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
207 target_mers += [
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
208 k1 for k1 in createKmerDict_reads([str(line)], k)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
209 ] #changed k to k1, just want to avoid the mixes of this "k" (kmer) to the "k" above (kmer length)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
210 i += 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
211 if total_coverage >= 4000000:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
212 break
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
213 return set(target_mers)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
214
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
215
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
216 def minion_fasta_kmerizer(file, k, kmerDict):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
217 i = 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
218 n_reads = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
219 total_coverage = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
220 target_mers = {}
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
221 for line in open(file):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
222 if i % 2 == 0:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
223 for kmer, rc_kmer in kmers(line.strip().upper(), k):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
224 if (kmer in kmerDict) or (rc_kmer in kmerDict):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
225 if kmer in target_mers:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
226 target_mers[kmer] += 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
227 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
228 target_mers[kmer] = 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
229 if rc_kmer in target_mers:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
230 target_mers[rc_kmer] += 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
231 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
232 target_mers[rc_kmer] = 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
233 i += 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
234 return set([h for h in target_mers])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
235
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
236
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
237 def minion_fastq_kmerizer(file, k, kmerDict):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
238 i = 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
239 n_reads = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
240 total_coverage = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
241 target_mers = {}
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
242 for line in open(file):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
243 if i % 4 == 2:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
244 for kmer, rc_kmer in kmers(line.strip().upper(), k):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
245 if (kmer in kmerDict) or (rc_kmer in kmerDict):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
246 if kmer in target_mers:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
247 target_mers[kmer] += 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
248 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
249 target_mers[kmer] = 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
250 if rc_kmer in target_mers:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
251 target_mers[rc_kmer] += 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
252 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
253 target_mers[rc_kmer] = 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
254 i += 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
255 return set([h for h in target_mers])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
256
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
257
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
258 def multifasta_single_string2(multifasta):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
259 single_string = ''
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
260 with open(multifasta, 'r') as f:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
261 for line in f:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
262 if line.strip()[0] == '>':
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
263 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
264 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
265 single_string += line.strip()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
266 return single_string
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
267
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
268
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
269 def kmers(seq, k):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
270 rev_comp = reverse_complement(seq)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
271 for start in range(1, len(seq) - k + 1):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
272 yield seq[start:start + k], rev_comp[-(start + k):-start]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
273
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
274
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
275 def multifasta_to_kmers_dict(multifasta,k_size):#used to create database kmer set
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
276 multi_seq_dict = multifasta_dict(multifasta)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
277 lib_dict = {}
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
278 for h in multi_seq_dict:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
279 lib_dict[h] = set(
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
280 [k for k in createKmerDict_reads([multi_seq_dict[h]], k_size)])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
281 return lib_dict
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
282
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
283
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
284 def Combine(b, c):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
285 fliC_combinations = []
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
286 fliC_combinations.append(",".join(c))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
287 temp_combinations = []
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
288 for i in range(len(b)):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
289 for x in itertools.combinations(b, i + 1):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
290 temp_combinations.append(",".join(x))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
291 for x in temp_combinations:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
292 temp = []
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
293 for y in c:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
294 temp.append(y)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
295 temp.append(x)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
296 temp = ",".join(temp)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
297 temp = temp.split(",")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
298 temp.sort()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
299 temp = ",".join(temp)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
300 fliC_combinations.append(temp)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
301 return fliC_combinations
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
302
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
303
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
304 def seqsero_from_formula_to_serotypes(Otype, fliC, fljB, special_gene_list,subspecies,ss):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
305 #like test_output_06012017.txt
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
306 #can add more varialbles like sdf-type, sub-species-type in future (we can conclude it into a special-gene-list)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
307 #from Initial_Conditions import phase1,phase2,phaseO,sero,subs,remove_list,rename_dict
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
308 if ss == 'ss2':
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
309 from Initial_Conditions_SS2 import phase1,phase2,phaseO,sero,subs,remove_list,rename_dict
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
310 elif ss == 'ss2s':
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
311 from Initial_Conditions_SS2S import phase1,phase2,phaseO,sero,subs,remove_list,rename_dict
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
312 rename_dict_not_anymore=[rename_dict[x] for x in rename_dict]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
313 rename_dict_all=rename_dict_not_anymore+list(rename_dict) #used for decide whether to
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
314 seronames = []
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
315 seronames_none_subspecies=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
316 for i in range(len(phase1)):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
317 fliC_combine = []
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
318 fljB_combine = []
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
319 if phaseO[i] == Otype: # no VII in KW, but it's there
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
320 ### for fliC, detect every possible combinations to avoid the effect of "["
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
321 if phase1[i].count("[") == 0:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
322 fliC_combine.append(phase1[i])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
323 elif phase1[i].count("[") >= 1:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
324 c = []
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
325 b = []
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
326 if phase1[i][0] == "[" and phase1[i][-1] == "]" and phase1[i].count(
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
327 "[") == 1:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
328 content = phase1[i].replace("[", "").replace("]", "")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
329 fliC_combine.append(content)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
330 fliC_combine.append("-")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
331 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
332 for x in phase1[i].split(","):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
333 if "[" in x:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
334 b.append(x.replace("[", "").replace("]", ""))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
335 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
336 c.append(x)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
337 fliC_combine = Combine(
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
338 b, c
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
339 ) #Combine will offer every possible combinations of the formula, like f,[g],t: f,t f,g,t
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
340 ### end of fliC "[" detect
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
341 ### for fljB, detect every possible combinations to avoid the effect of "["
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
342 if phase2[i].count("[") == 0:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
343 fljB_combine.append(phase2[i])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
344 elif phase2[i].count("[") >= 1:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
345 d = []
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
346 e = []
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
347 if phase2[i][0] == "[" and phase2[i][-1] == "]" and phase2[i].count(
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
348 "[") == 1:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
349 content = phase2[i].replace("[", "").replace("]", "")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
350 fljB_combine.append(content)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
351 fljB_combine.append("-")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
352 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
353 for x in phase2[i].split(","):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
354 if "[" in x:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
355 d.append(x.replace("[", "").replace("]", ""))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
356 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
357 e.append(x)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
358 fljB_combine = Combine(d, e)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
359 ### end of fljB "[" detect
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
360 new_fliC = fliC.split(
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
361 ","
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
362 ) #because some antigen like r,[i] not follow alphabetical order, so use this one to judge and can avoid missings
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
363 new_fliC.sort()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
364 new_fliC = ",".join(new_fliC)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
365 new_fljB = fljB.split(",")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
366 new_fljB.sort()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
367 new_fljB = ",".join(new_fljB)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
368 if (new_fliC in fliC_combine
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
369 or fliC in fliC_combine) and (new_fljB in fljB_combine
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
370 or fljB in fljB_combine):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
371 ######start, remove_list,rename_dict, added on 11/11/2018
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
372 if sero[i] not in remove_list:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
373 temp_sero=sero[i]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
374 if temp_sero in rename_dict:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
375 temp_sero=rename_dict[temp_sero] #rename if in the rename list
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
376 if temp_sero not in seronames:#the new sero may already included, if yes, then not consider
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
377 if subs[i] == subspecies:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
378 seronames.append(temp_sero)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
379 seronames_none_subspecies.append(temp_sero)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
380 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
381 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
382 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
383 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
384 ######end, added on 11/11/2018
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
385 #analyze seronames
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
386 subspecies_pointer=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
387 if len(seronames) == 0 and len(seronames_none_subspecies)!=0:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
388 ## ed_SL_06062020: for the subspecies mismatch between KW and SalmID
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
389 seronames=seronames_none_subspecies
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
390 #seronames=["N/A"]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
391 subspecies_pointer="1"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
392 #subspecies_pointer="0"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
393 ##
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
394 if len(seronames) == 0:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
395 seronames = [
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
396 "N/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme)"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
397 ]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
398 star = ""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
399 star_line = ""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
400 if len(seronames) > 1: #there are two possible predictions for serotypes
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
401 star = "*"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
402 #changed 04072019
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
403 #star_line = "The predicted serotypes share the same general formula:\t" + Otype + ":" + fliC + ":" + fljB + "\n"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
404 if subspecies_pointer=="1" and len(seronames_none_subspecies)!=0:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
405 star="*"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
406 star_line = "This antigenic profile has been associated with serotype '"+(" or ").join(seronames)+"' in the Kauffman-White scheme. The existence of the same antigenic formula in multiple species or subspecies is well documented in the Kauffman-White Scheme. " + star_line ## ed_SL_03202021: changed for new output format
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
407 #star_line="The predicted O and H antigens correspond to serotype '"+(" or ").join(seronames)+"' in the Kauffmann-White scheme. The predicted subspecies by SalmID (github.com/hcdenbakker/SalmID) may not be consistent with subspecies designation in the Kauffmann-White scheme. " + star_line
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
408 #star_line="The formula with this subspieces prediction can't get a serotype in KW manual, and the serotyping prediction was made without considering it."+star_line
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
409 seronames=["N/A"] ## ed_SL_06062020
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
410 if Otype=="":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
411 Otype="-"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
412 predict_form = Otype + ":" + fliC + ":" + fljB
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
413 predict_sero = (" or ").join(seronames)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
414 ###special test for Enteritidis
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
415 if predict_form == "9:g,m:-":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
416 sdf = "-"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
417 for x in special_gene_list:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
418 if x.startswith("sdf"):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
419 sdf = "+"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
420 #star_line="Detected sdf gene, a marker to differentiate Gallinarum and Enteritidis"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
421 #star_line="sdf gene detected. "
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
422 star_line = "Detected Sdf I that is characteristic of commonly circulating strains of serotype Enteritidis. "
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
423 #predict_form = predict_form + " Sdf prediction:" + sdf
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
424 predict_form = predict_form #changed 04072019
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
425 if sdf == "-":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
426 star = "*"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
427 #star_line="Didn't detected sdf gene, a marker to differentiate Gallinarum and Enteritidis"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
428 #star_line="sdf gene not detected. "
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
429 star_line = "Sdf I that is characteristic of commonly circulating strains of serotype Enteritidis was not detected. "
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
430 #changed in 04072019, for new output
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
431 #star_line = "Additional characterization is necessary to assign a serotype to this strain. Commonly circulating strains of serotype Enteritidis are sdf+, although sdf- strains of serotype Enteritidis are known to exist. Serotype Gallinarum is typically sdf- but should be quite rare. Sdf- strains of serotype Enteritidis and serotype Gallinarum can be differentiated by phenotypic profile or genetic criteria.\n"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
432 #predict_sero = "Gallinarum/Enteritidis" #04132019, for new output requirement
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
433 predict_sero = "Gallinarum or Enteritidis"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
434 ###end of special test for Enteritidis
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
435 elif predict_form == "4:i:-":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
436 predict_sero = "I 4,[5],12:i:-" # change serotype name
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
437 elif predict_form == "4:r:-":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
438 predict_sero = "N/A (4:r:-)"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
439 elif predict_form == "4:b:-":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
440 predict_sero = "N/A (4:b:-)"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
441 #elif predict_form == "8:e,h:1,2": #removed after official merge of newport and bardo
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
442 #predict_sero = "Newport"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
443 #star = "*"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
444 #star_line = "Serotype Bardo shares the same antigenic profile with Newport, but Bardo is exceedingly rare."
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
445 claim = "The serotype(s) is/are the only serotype(s) with the indicated antigenic profile currently recognized in the Kauffmann White Scheme. New serotypes can emerge and the possibility exists that this antigenic profile may emerge in a different subspecies. Identification of strains to the subspecies level should accompany serotype determination; the same antigenic profile in different subspecies is considered different serotypes.\n"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
446 if "N/A" in predict_sero:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
447 claim = ""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
448 #special test for Typhimurium
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
449 if "Typhimurium" in predict_sero or predict_form == "4:i:-":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
450 normal = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
451 mutation = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
452 for x in special_gene_list:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
453 if "oafA-O-4_full" in x:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
454 normal = float(special_gene_list[x])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
455 elif "oafA-O-4_5-" in x:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
456 mutation = float(special_gene_list[x])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
457 if normal > mutation:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
458 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
459 elif normal < mutation:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
460 #predict_sero = predict_sero.strip() + "(O5-)"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
461 predict_sero = predict_sero.strip() #diable special sero for new output requirement, 04132019
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
462 star = "*"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
463 #star_line = "Detected the deletion of O5-."
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
464 star_line = "Detected a deletion in gene oafA that causes O5- variant of Typhimurium. "
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
465 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
466 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
467 #special test for Paratyphi B
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
468 if "Paratyphi B" in predict_sero or predict_form == "4:b:-":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
469 normal = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
470 mutation = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
471 for x in special_gene_list:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
472 if "gntR-family-regulatory-protein_dt-positive" in x:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
473 normal = float(special_gene_list[x])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
474 elif "gntR-family-regulatory-protein_dt-negative" in x:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
475 mutation = float(special_gene_list[x])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
476 #print(normal,mutation)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
477 if normal > mutation:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
478 #predict_sero = predict_sero.strip() + "(dt+)" #diable special sero for new output requirement, 04132019
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
479 predict_sero = predict_sero.strip()+' var. L(+) tartrate+' if "Paratyphi B" in predict_sero else predict_sero.strip()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
480 star = "*"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
481 #star_line = "Didn't detect the SNP for dt- which means this isolate is a Paratyphi B variant L(+) tartrate(+)."
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
482 star_line = "The SNP in gene STM3356 that is associated with the d-Tartrate nonfermenting phenotype characteristic of the typhoidal pathotype was not detected. "
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
483 elif normal < mutation:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
484 #predict_sero = predict_sero.strip() + "(dt-)" #diable special sero for new output requirement, 04132019
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
485 predict_sero = predict_sero.strip()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
486 star = "*"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
487 #star_line = "Detected the SNP for d-Tartrate nonfermenting phenotype of Paratyphi B. "
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
488 star_line = "Detected the SNP in gene STM3356 that is associated with the d-Tartrate nonfermenting phenotype characteristic of the typhoidal pathotype. "
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
489 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
490 star = "*"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
491 #star_line = " Failed to detect the SNP for dt-, can't decide it's a Paratyphi B variant L(+) tartrate(+) or not."
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
492 star_line = " " ## ed_SL_05152019: do not report this situation.
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
493 #special test for O13,22 and O13,23
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
494 ### add comment for any O2 call. 06052024
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
495 if Otype=='2' and predict_sero not in ['Nitra','Kiel','Koessen']:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
496 star_line = 'O2 is typically a O9 rfb serotype with a mutation that results in a different sugar being placed in O antigen. SS2S detects only one group O2 serotype, Paratyphi A. This genome may be a variant of a group O9 serotype.'
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
497 ###
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
498 if Otype=="13":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
499 #ex_dir = os.path.dirname(os.path.realpath(__file__))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
500 ex_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(__file__)),'seqsero2s_db')) # ed_SL_09152019
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
501 f = open(ex_dir + '/special.pickle', 'rb')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
502 special = pickle.load(f)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
503 O22_O23=special['O22_O23']
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
504 if predict_sero.split(" or ")[0] in O22_O23[-1] and predict_sero.split(" or ")[0] not in rename_dict_all:#if in rename_dict_all, then it means already merged, no need to analyze
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
505 #if predict_sero.split(" or ")[0] in O22_O23[-1]: # Report O22 vs O23 result for O13 serotypes. 12232024
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
506 O22_score=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
507 O23_score=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
508 for x in special_gene_list:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
509 if "O:22" in x:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
510 O22_score = O22_score+float(special_gene_list[x])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
511 elif "O:23" in x:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
512 O23_score = O23_score+float(special_gene_list[x])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
513 #print(O22_score,O23_score)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
514 for z in O22_O23[0]:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
515 if predict_sero.split(" or ")[0] in z:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
516 if O22_score > O23_score:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
517 star = "*"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
518 star_line = "Detected a genetic marker (galE allele) for ancillary O22."
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
519 #star_line = "Detected O22 specific genes to further differenciate '"+predict_sero+"'." #diabled for new output requirement, 04132019
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
520 predict_sero = z[0]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
521 elif O22_score < O23_score:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
522 star = "*"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
523 star_line = "Detected a genetic marker (galE allele) for ancillary O23."
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
524 #star_line = "Detected O23 specific genes to further differenciate '"+predict_sero+"'." #diabled for new output requirement, 04132019
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
525 predict_sero = z[1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
526 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
527 star = "*"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
528 star_line = "Fail to detect genetic markers (galE alleles) for ancillary O22 and O23."
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
529 #star_line = "Fail to detect O22/O23 specific genes." #diabled for new output requirement, 04132019
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
530 if " or " in predict_sero:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
531 star_line = star_line + "The predicted serotypes share the same general formula: " + Otype + ":" + fliC + ":" + fljB + " and can be differentiated by additional analysis. "
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
532 #special test for O6,8
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
533 #merge_O68_list=["Blockley","Bovismorbificans","Hadar","Litchfield","Manhattan","Muenchen"] #remove 11/11/2018, because already in merge list
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
534 #for x in merge_O68_list:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
535 # if x in predict_sero:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
536 # predict_sero=x
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
537 # star=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
538 # star_line=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
539 #special test for Montevideo; most of them are monophasic
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
540 #if "Montevideo" in predict_sero and "1,2,7" in predict_form: #remove 11/11/2018, because already in merge list
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
541 #star="*"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
542 #star_line="Montevideo is almost always monophasic, having an antigen called for the fljB position may be a result of Salmonella-Salmonella contamination."
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
543 return predict_form, predict_sero, star, star_line, claim
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
544 ### End of SeqSero Kmer part
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
545
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
546 ### Begin of SeqSero2 allele prediction and output
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
547 def xml_parse_score_comparision_seqsero(xmlfile):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
548 #used to do seqsero xml analysis
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
549 from Bio.Blast import NCBIXML
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
550 handle=open(xmlfile)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
551 handle=NCBIXML.parse(handle)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
552 handle=list(handle)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
553 List=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
554 List_score=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
555 List_ids=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
556 List_query_region=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
557 for i in range(len(handle)):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
558 if len(handle[i].alignments)>0:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
559 for j in range(len(handle[i].alignments)):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
560 score=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
561 ids=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
562 cover_region=set() #fixed problem that repeated calculation leading percentage > 1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
563 List.append(handle[i].query.strip()+"___"+handle[i].alignments[j].hit_def)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
564 for z in range(len(handle[i].alignments[j].hsps)):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
565 hsp=handle[i].alignments[j].hsps[z]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
566 temp=set(range(hsp.query_start,hsp.query_end))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
567 if len(cover_region)==0:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
568 cover_region=cover_region|temp
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
569 fraction=1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
570 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
571 fraction=1-len(cover_region&temp)/float(len(temp))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
572 cover_region=cover_region|temp
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
573 if "last" in handle[i].query or "first" in handle[i].query:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
574 score+=hsp.bits*fraction
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
575 ids+=float(hsp.identities)/handle[i].query_length*fraction
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
576 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
577 score+=hsp.bits*fraction
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
578 ids+=float(hsp.identities)/handle[i].query_length*fraction
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
579 List_score.append(score)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
580 List_ids.append(ids)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
581 List_query_region.append(cover_region)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
582 temp=zip(List,List_score,List_ids,List_query_region)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
583 Final_list=sorted(temp, key=lambda d:d[1], reverse = True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
584 return Final_list
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
585
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
586
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
587 def Uniq(L,sort_on_fre="none"): #return the uniq list and the count number
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
588 Old=L
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
589 L.sort()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
590 L = [L[i] for i in range(len(L)) if L[i] not in L[:i]]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
591 count=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
592 for j in range(len(L)):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
593 y=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
594 for x in Old:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
595 if L[j]==x:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
596 y+=1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
597 count.append(y)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
598 if sort_on_fre!="none":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
599 d=zip(*sorted(zip(count, L)))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
600 L=d[1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
601 count=d[0]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
602 return (L,count)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
603
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
604 def judge_fliC_or_fljB_from_head_tail_for_one_contig(nodes_vs_score_list):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
605 #used to predict it's fliC or fljB for one contig, based on tail and head score, but output the score difference,if it is very small, then not reliable, use blast score for whole contig to test
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
606 #this is mainly used for
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
607 a=nodes_vs_score_list
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
608 fliC_score=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
609 fljB_score=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
610 for z in a:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
611 if "fliC" in z[0]:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
612 fliC_score+=z[1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
613 elif "fljB" in z[0]:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
614 fljB_score+=z[1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
615 if fliC_score>=fljB_score:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
616 role="fliC"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
617 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
618 role="fljB"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
619 return (role,abs(fliC_score-fljB_score))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
620
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
621 def judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(node_name,Final_list,Final_list_passed):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
622 #used to predict contig is fliC or fljB, if the differnce score value on above head_and_tail is less than 10 (quite small)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
623 #also used when no head or tail got blasted score for the contig
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
624 role=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
625 for z in Final_list_passed:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
626 if node_name in z[0]:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
627 role=z[0].split("_")[0]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
628 break
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
629 return role
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
630
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
631 def fliC_or_fljB_judge_from_head_tail_sequence(nodes_list,tail_head_list,Final_list,Final_list_passed):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
632 #nodes_list is the c created by c,d=Uniq(nodes) in below function
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
633 first_target=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
634 role_list=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
635 for x in nodes_list:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
636 a=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
637 role=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
638 for y in tail_head_list:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
639 if x in y[0]:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
640 a.append(y)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
641 if len(a)==4:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
642 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
643 if diff<20:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
644 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
645 elif len(a)==3:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
646 ###however, if the one with highest score is the fewer one, compare their accumulation score
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
647 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
648 if diff<20:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
649 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
650 ###end of above score comparison
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
651 elif len(a)==2:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
652 #must on same node, if not, then decide with unit blast score, blast-score/length_of_special_sequence(30 or 37)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
653 temp=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
654 for z in a:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
655 temp.append(z[0].split("_")[0])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
656 m,n=Uniq(temp)#should only have one choice, but weird situation might occur too
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
657 if len(m)==1:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
658 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
659 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
660 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
661 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
662 if diff<20:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
663 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
664 ###need to desgin a algorithm to guess most possible situation for nodes_list, See the situations of test evaluation
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
665 elif len(a)==1:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
666 #that one
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
667 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
668 if diff<20:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
669 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
670 #need to evaluate, in future, may set up a cut-off, if not met, then just find Final_list_passed best match,like when "a==0"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
671 else:#a==0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
672 #use Final_list_passed best match
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
673 for z in Final_list_passed:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
674 if x in z[0]:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
675 role=z[0].split("_")[0]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
676 break
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
677 #print x,role,len(a)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
678 role_list.append((role,x))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
679 if len(role_list)==2:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
680 if role_list[0][0]==role_list[1][0]:#this is the most cocmmon error, two antigen were assigned to same phase
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
681 #just use score to do a final test
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
682 role_list=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
683 for x in nodes_list:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
684 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
685 role_list.append((role,x))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
686 return role_list
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
687
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
688 def decide_contig_roles_for_H_antigen(Final_list,Final_list_passed):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
689 #used to decide which contig is FliC and which one is fljB
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
690 contigs=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
691 nodes=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
692 for x in Final_list_passed:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
693 if x[0].startswith("fl") and "last" not in x[0] and "first" not in x[0]:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
694 nodes.append(x[0].split("___")[1].strip())
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
695 c,d=Uniq(nodes)#c is node_list
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
696 #print c
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
697 tail_head_list=[x for x in Final_list if ("last" in x[0] or "first" in x[0])]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
698 roles=fliC_or_fljB_judge_from_head_tail_sequence(c,tail_head_list,Final_list,Final_list_passed)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
699 return roles
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
700
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
701 def decide_O_type_and_get_special_genes(Final_list,Final_list_passed):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
702 #decide O based on Final_list
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
703 O_choice="?"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
704 O_list=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
705 special_genes={}
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
706 nodes=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
707 for x in Final_list_passed:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
708 if x[0].startswith("O-"):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
709 nodes.append(x[0].split("___")[1].strip())
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
710 elif not x[0].startswith("fl"):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
711 special_genes[x[0]]=x[2]#08172018, x[2] changed from x[-1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
712 ##print("special_genes:",special_genes)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
713 c,d=Uniq(nodes)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
714 #print "potential O antigen contig",c
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
715 final_O=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
716 O_nodes_list=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
717 for x in c:#c is the list for contigs
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
718 temp=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
719 for y in Final_list_passed:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
720 if x in y[0] and y[0].startswith("O-"):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
721 final_O.append(y)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
722 break
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
723 ### O contig has the problem of two genes on same contig, so do additional test
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
724 potenial_new_gene=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
725 for x in final_O:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
726 pointer=0 #for genes merged or not
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
727 #not consider O-1,3,19_not_in_3,10, too short compared with others
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
728 if "O-1,3,19_not_in_3,10" not in x[0] and int(x[0].split("__")[1].split("___")[0])*x[2]+850 <= int(x[0].split("length_")[1].split("_")[0]):#gene length << contig length; for now give 300*2 (for secureity can use 400*2) as flank region
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
729 pointer=x[0].split("___")[1].strip()#store the contig name
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
730 print(pointer)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
731 if pointer!=0:#it has potential merge event
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
732 for y in Final_list:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
733 if pointer in y[0] and y not in final_O and (y[1]>=int(y[0].split("__")[1].split("___")[0])*1.5 or (y[1]>=int(y[0].split("__")[1].split("___")[0])*y[2] and y[1]>=400)):#that's a realtively strict filter now; if passed, it has merge event and add one more to final_O
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
734 potenial_new_gene=y
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
735 #print(potenial_new_gene)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
736 break
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
737 if potenial_new_gene!="":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
738 print("two differnt genes in same contig, fix it for O antigen")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
739 print(potenial_new_gene[:3])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
740 pointer=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
741 for y in final_O:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
742 if y[0].split("___")[-1]==potenial_new_gene[0].split("___")[-1]:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
743 pointer=1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
744 if pointer!=0: #changed to consider two genes in same contig
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
745 final_O.append(potenial_new_gene)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
746 ### end of the two genes on same contig test
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
747 final_O=sorted(final_O,key=lambda x: x[2], reverse=True)#sorted
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
748 if len(final_O)==0 or (len(final_O)==1 and "O-1,3,19_not_in_3,10" in final_O[0][0]):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
749 #print "$$$No Otype, due to no hit"#may need to be changed
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
750 O_choice="-"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
751 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
752 highest_O_coverage=max([float(x[0].split("_cov_")[-1].split("_")[0]) for x in final_O if "O-1,3,19_not_in_3,10" not in x[0]])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
753 O_list=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
754 O_list_less_contamination=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
755 for x in final_O:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
756 if not "O-1,3,19_not_in_3,10__130" in x[0]:#O-1,3,19_not_in_3,10 is too small, which may affect further analysis; to avoid contamination affect, use 0.15 of highest coverage as cut-off
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
757 O_list.append(x[0].split("__")[0])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
758 O_nodes_list.append(x[0].split("___")[1])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
759 if float(x[0].split("_cov_")[-1].split("_")[0])>highest_O_coverage*0.15:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
760 O_list_less_contamination.append(x[0].split("__")[0])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
761 ### special test for O9,46 and O3,10 family
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
762 if ("O-9,46_wbaV" in O_list or "O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254" in O_list) and O_list_less_contamination[0].startswith("O-9,"):#not sure should use and float(O9_wbaV)/float(num_1) > 0.1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
763 if "O-9,46_wzy" in O_list or "O-9,46_wzy_partial" in O_list:#and float(O946_wzy)/float(num_1) > 0.1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
764 O_choice="O-9,46"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
765 #print "$$$Most possilble Otype: O-9,46"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
766 elif "O-9,46,27_partial_wzy" in O_list:#and float(O94627)/float(num_1) > 0.1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
767 O_choice="O-9,46,27"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
768 #print "$$$Most possilble Otype: O-9,46,27"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
769 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
770 O_choice="O-9"#next, detect O9 vs O2?
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
771 O2=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
772 O9=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
773 for z in special_genes:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
774 if "tyr-O-9" in z and special_genes[z] > O9: ##20240322, add "special_genes[z] > O9" to avoid misidentification of O9 to O2 that caused by multiple tyr-O-9 contigs
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
775 O9=special_genes[z]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
776 elif "tyr-O-2" in z and special_genes[z] > O2: ##20240322, add "special_genes[z] > O2"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
777 O2=special_genes[z]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
778 if O2>O9:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
779 O_choice="O-2"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
780 elif O2<O9:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
781 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
782 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
783 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
784 #print "$$$No suitable one, because can't distinct it's O-9 or O-2, but O-9 has a more possibility."
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
785 elif ("O-3,10_wzx" in O_list) and ("O-9,46_wzy" in O_list) and (O_list[0].startswith("O-3,10") or O_list_less_contamination[0].startswith("O-9,46_wzy")):#and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
786 if "O-3,10_not_in_1,3,19" in O_list:#and float(O310_no_1319)/float(num_1) > 0.1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
787 O_choice="O-3,10"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
788 #print "$$$Most possilble Otype: O-3,10 (contain O-3,10_not_in_1,3,19)"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
789 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
790 O_choice="O-1,3,19"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
791 #print "$$$Most possilble Otype: O-1,3,19 (not contain O-3,10_not_in_1,3,19)"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
792 ### end of special test for O9,46 and O3,10 family
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
793 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
794 try:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
795 max_score=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
796 for x in final_O:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
797 if x[2]>=max_score and float(x[0].split("_cov_")[-1].split("_")[0])>highest_O_coverage*0.15:#use x[2],08172018, the "coverage identity = cover_length * identity"; also meet coverage threshold
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
798 max_score=x[2]#change from x[-1] to x[2],08172018
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
799 O_choice=x[0].split("_")[0]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
800 if O_choice=="O-1,3,19":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
801 O_choice=final_O[1][0].split("_")[0]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
802 #print "$$$Most possilble Otype: ",O_choice
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
803 except:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
804 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
805 #print "$$$No suitable Otype, or failure of mapping (please check the quality of raw reads)"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
806 if O_choice=="O-9,46,27" and len(O_list)==2 and "O-4_wzx" in O_list: #special for very low chance sitatuion between O4 and O9,27,46, this is for serotypes like Bredeney and Schwarzengrund (normallly O-4 will have higher score, but sometimes sequencing quality may affect the prediction)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
807 O_choice="O-4"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
808 #print "O:",O_choice,O_nodes_list
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
809 Otypes=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
810 for x in O_list:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
811 if x!="O-1,3,19_not_in_3,10":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
812 if "O-9,46_" not in x:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
813 Otypes.append(x.split("_")[0])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
814 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
815 Otypes.append(x.split("-from")[0])#O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
816 #Otypes=[x.split("_")[0] for x in O_list if x!="O-1,3,19_not_in_3,10"]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
817 Otypes_uniq,Otypes_fre=Uniq(Otypes)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
818 contamination_O=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
819 if O_choice=="O-9,46,27" or O_choice=="O-3,10" or O_choice=="O-1,3,19":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
820 if len(Otypes_uniq)>2:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
821 contamination_O="potential contamination from O antigen signals"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
822 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
823 if len(Otypes_uniq)>1:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
824 if O_choice=="O-4" and len(Otypes_uniq)==2 and "O-9,46,27" in Otypes_uniq: #for special 4,12,27 case such as Bredeney and Schwarzengrund
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
825 contamination_O=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
826 elif O_choice=="O-9,46" and len(Otypes_uniq)==2 and "O-9,46_wbaV" in Otypes_uniq and "O-9,46_wzy" in Otypes_uniq: #for special 4,12,27 case such as Bredeney and Schwarzengrund
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
827 contamination_O=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
828 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
829 contamination_O="potential contamination from O antigen signals"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
830 return O_choice,O_nodes_list,special_genes,final_O,contamination_O,Otypes_uniq
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
831 ### End of SeqSero2 allele prediction and output
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
832
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
833 def get_input_files(make_dir,input_file,data_type,dirpath):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
834 #tell input files from datatype
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
835 #"<int>: '1'(pair-end reads, interleaved),'2'(pair-end reads, seperated),'3'(single-end reads), '4'(assembly),'5'(nanopore fasta),'6'(nanopore fastq)"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
836 for_fq=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
837 rev_fq=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
838 os.chdir(make_dir)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
839 if data_type=="1":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
840 input_file=input_file[0].split("/")[-1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
841 if input_file.endswith(".sra"):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
842 subprocess.check_call("fastq-dump --split-files "+input_file,shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
843 for_fq=input_file.replace(".sra","_1.fastq")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
844 rev_fq=input_file.replace(".sra","_2.fastq")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
845 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
846 core_id=input_file.split(".fastq")[0].split(".fq")[0]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
847 for_fq=core_id+"_1.fastq"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
848 rev_fq=core_id+"_2.fastq"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
849 if input_file.endswith(".gz"):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
850 subprocess.check_call("gzip -dc "+input_file+" | "+dirpath+"/deinterleave_fastq.sh "+for_fq+" "+rev_fq,shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
851 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
852 subprocess.check_call("cat "+input_file+" | "+dirpath+"/deinterleave_fastq.sh "+for_fq+" "+rev_fq,shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
853 elif data_type=="2":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
854 for_fq=input_file[0].split("/")[-1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
855 rev_fq=input_file[1].split("/")[-1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
856 elif data_type=="3":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
857 input_file=input_file[0].split("/")[-1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
858 if input_file.endswith(".sra"):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
859 subprocess.check_call("fastq-dump --split-files "+input_file,shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
860 for_fq=input_file.replace(".sra","_1.fastq")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
861 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
862 for_fq=input_file
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
863 elif data_type in ["4","5","6"]:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
864 for_fq=input_file[0].split("/")[-1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
865 os.chdir("..")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
866 return for_fq,rev_fq
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
867
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
868 def predict_O_and_H_types(Final_list,Final_list_passed,new_fasta):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
869 #get O and H types from Final_list from blast parsing; allele mode
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
870 from Bio import SeqIO
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
871 fliC_choice="-"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
872 fljB_choice="-"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
873 fliC_contig="NA"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
874 fljB_contig="NA"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
875 fliC_region=set([0])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
876 fljB_region=set([0,])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
877 fliC_length=0 #can be changed to coverage in future; in 03292019, changed to ailgned length
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
878 fljB_length=0 #can be changed to coverage in future; in 03292019, changed to ailgned length
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
879 O_choice="-"#no need to decide O contig for now, should be only one
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
880 O_choice,O_nodes,special_gene_list,O_nodes_roles,contamination_O,Otypes_uniq=decide_O_type_and_get_special_genes(Final_list,Final_list_passed)#decide the O antigen type and also return special-gene-list for further identification
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
881 O_choice=O_choice.split("-")[-1].strip()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
882 if (O_choice=="1,3,19" and len(O_nodes_roles)==1 and "1,3,19" in O_nodes_roles[0][0]) or O_choice=="":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
883 O_choice="-"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
884 H_contig_roles=decide_contig_roles_for_H_antigen(Final_list,Final_list_passed)#decide the H antigen contig is fliC or fljB
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
885 #add alignment locations, used for further selection, 03312019
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
886 for i in range(len(H_contig_roles)):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
887 x=H_contig_roles[i]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
888 for y in Final_list_passed:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
889 if x[1] in y[0] and y[0].startswith(x[0]):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
890 H_contig_roles[i]+=H_contig_roles[i]+(y[-1],)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
891 break
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
892 log_file=open("SeqSero_log.txt","a")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
893 extract_file=open("Extracted_antigen_alleles.fasta","a")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
894 handle_fasta=list(SeqIO.parse(new_fasta,"fasta"))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
895
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
896 #print("O_contigs:")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
897 log_file.write("O_contigs:\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
898 extract_file.write("#Sequences with antigen signals (if the micro-assembled contig only covers the flanking region, it will not be used for contamination analysis)\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
899 extract_file.write("#O_contigs:\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
900 for x in O_nodes_roles:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
901 if "O-1,3,19_not_in_3,10" not in x[0]:#O-1,3,19_not_in_3,10 is just a small size marker
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
902 #print(x[0].split("___")[-1],x[0].split("__")[0],"blast score:",x[1],"identity%:",str(round(x[2]*100,2))+"%",str(min(x[-1]))+" to "+str(max(x[-1])))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
903 log_file.write(x[0].split("___")[-1]+" "+x[0].split("__")[0]+"; "+"blast score: "+str(x[1])+" identity%: "+str(round(x[2]*100,2))+"%; alignment from "+str(min(x[-1]))+" to "+str(max(x[-1]))+" of antigen\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
904 title=">"+x[0].split("___")[-1]+" "+x[0].split("__")[0]+"; "+"blast score: "+str(x[1])+" identity%: "+str(round(x[2]*100,2))+"%; alignment from "+str(min(x[-1]))+" to "+str(max(x[-1]))+" of antigen\n"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
905 seqs=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
906 for z in handle_fasta:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
907 if x[0].split("___")[-1]==z.description:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
908 seqs=str(z.seq)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
909 extract_file.write(title+seqs+"\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
910 if len(H_contig_roles)!=0:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
911 highest_H_coverage=max([float(x[1].split("_cov_")[-1].split("_")[0]) for x in H_contig_roles]) #less than highest*0.1 would be regarded as contamination and noises, they will still be considered in contamination detection and logs, but not used as final serotype output
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
912 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
913 highest_H_coverage=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
914 for x in H_contig_roles:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
915 #if multiple choices, temporately select the one with longest length for now, will revise in further change
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
916 if "fliC" == x[0] and len(x[-1])>=fliC_length and x[1] not in O_nodes and float(x[1].split("_cov_")[-1].split("_")[0])>highest_H_coverage*0.13:#remember to avoid the effect of O-type contig, so should not in O_node list
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
917 fliC_contig=x[1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
918 fliC_length=len(x[-1])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
919 elif "fljB" == x[0] and len(x[-1])>=fljB_length and x[1] not in O_nodes and float(x[1].split("_cov_")[-1].split("_")[0])>highest_H_coverage*0.13:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
920 fljB_contig=x[1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
921 fljB_length=len(x[-1])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
922 for x in Final_list_passed:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
923 if fliC_choice=="-" and "fliC_" in x[0] and fliC_contig in x[0]:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
924 fliC_choice=x[0].split("_")[1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
925 elif fljB_choice=="-" and "fljB_" in x[0] and fljB_contig in x[0]:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
926 fljB_choice=x[0].split("_")[1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
927 elif fliC_choice!="-" and fljB_choice!="-":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
928 break
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
929 #now remove contigs not in middle core part
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
930 first_allele="NA"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
931 first_allele_percentage=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
932 for x in Final_list:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
933 if x[0].startswith("fliC") or x[0].startswith("fljB"):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
934 first_allele=x[0].split("__")[0] #used to filter those un-middle contigs
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
935 first_allele_percentage=x[2]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
936 break
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
937 additional_contigs=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
938 for x in Final_list:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
939 if first_allele in x[0]:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
940 if (fliC_contig == x[0].split("___")[-1]):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
941 fliC_region=x[3]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
942 elif fljB_contig!="NA" and (fljB_contig == x[0].split("___")[-1]):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
943 fljB_region=x[3]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
944 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
945 if x[1]*1.1>int(x[0].split("___")[1].split("_")[3]):#loose threshold by multiplying 1.1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
946 additional_contigs.append(x)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
947 #else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
948 #print x[:3]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
949 #we can just use the fljB region (or fliC depends on size), no matter set() or contain a large locations (without middle part); however, if none of them is fully assembled, use 500 and 1200 as conservative cut-off
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
950 if first_allele_percentage>0.9:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
951 if len(fliC_region)>len(fljB_region) and (max(fljB_region)-min(fljB_region))>1000:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
952 target_region=fljB_region|(fliC_region-set(range(min(fljB_region),max(fljB_region)))) #fljB_region|(fliC_region-set(range(min(fljB_region),max(fljB_region))))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
953 elif len(fliC_region)<len(fljB_region) and (max(fliC_region)-min(fliC_region))>1000:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
954 target_region=fliC_region|(fljB_region-set(range(min(fliC_region),max(fliC_region)))) #fljB_region|(fliC_region-set(range(min(fljB_region),max(fljB_region))))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
955 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
956 target_region=set()#doesn't do anything
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
957 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
958 target_region=set()#doesn't do anything
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
959 #print(target_region)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
960 #print(additional_contigs)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
961 target_region2=set(list(range(0,525))+list(range(1200,1700)))#I found to use 500 to 1200 as special region would be best
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
962 target_region=target_region2|target_region
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
963 for x in additional_contigs:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
964 removal=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
965 contig_length=int(x[0].split("___")[1].split("length_")[-1].split("_")[0])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
966 if fljB_contig not in x[0] and fliC_contig not in x[0] and len(target_region&x[3])/float(len(x[3]))>0.65 and contig_length*0.5<len(x[3])<contig_length*1.5: #consider length and alignment length for now, but very loose,0.5 and 1.5 as cut-off
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
967 removal=1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
968 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
969 if first_allele_percentage > 0.9 and float(x[0].split("__")[1].split("___")[0])*x[2]/len(x[-1])>0.96:#if high similiarity with middle part of first allele (first allele >0.9, already cover middle part)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
970 removal=1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
971 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
972 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
973 if removal==1:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
974 for y in H_contig_roles:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
975 if y[1] in x[0]:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
976 H_contig_roles.remove(y)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
977 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
978 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
979 #print(x[:3],contig_length,len(target_region&x[3])/float(len(x[3])),contig_length*0.5,len(x[3]),contig_length*1.5)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
980 #end of removing none-middle contigs
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
981 #print("H_contigs:")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
982 log_file.write("H_contigs:\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
983 extract_file.write("#H_contigs:\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
984 H_contig_stat=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
985 H1_cont_stat={}
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
986 H2_cont_stat={}
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
987 for i in range(len(H_contig_roles)):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
988 x=H_contig_roles[i]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
989 a=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
990 for y in Final_list_passed:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
991 if x[1] in y[0] and y[0].startswith(x[0]):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
992 if "first" in y[0] or "last" in y[0]: #this is the final filter to decide it's fliC or fljB, if can't pass, then can't decide
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
993 for y in Final_list_passed: #it's impossible to has the "first" and "last" allele as prediction, so re-do it
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
994 if x[1] in y[0]:#it's very possible to be third phase allele, so no need to make it must be fliC or fljB
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
995 #print(x[1],"can't_decide_fliC_or_fljB",y[0].split("_")[1],"blast_score:",y[1],"identity%:",str(round(y[2]*100,2))+"%",str(min(y[-1]))+" to "+str(max(y[-1])))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
996 log_file.write(x[1]+" "+x[0]+" "+y[0].split("_")[1]+"; "+"blast score: "+str(y[1])+" identity%: "+str(round(y[2]*100,2))+"%; alignment from "+str(min(y[-1]))+" to "+str(max(y[-1]))+" of antigen\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
997 H_contig_roles[i]="can't decide fliC or fljB, may be third phase"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
998 title=">"+x[1]+" "+x[0]+" "+y[0].split("_")[1]+"; "+"blast score: "+str(y[1])+" identity%: "+str(round(y[2]*100,2))+"%; alignment from "+str(min(y[-1]))+" to "+str(max(y[-1]))+" of antiten\n"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
999 seqs=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1000 for z in handle_fasta:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1001 if x[1]==z.description:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1002 seqs=str(z.seq)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1003 extract_file.write(title+seqs+"\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1004 break
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1005 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1006 #print(x[1],x[0],y[0].split("_")[1],"blast_score:",y[1],"identity%:",str(round(y[2]*100,2))+"%",str(min(y[-1]))+" to "+str(max(y[-1])))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1007 log_file.write(x[1]+" "+x[0]+" "+y[0].split("_")[1]+"; "+"blast score: "+str(y[1])+" identity%: "+str(round(y[2]*100,2))+"%; alignment from "+str(min(y[-1]))+" to "+str(max(y[-1]))+" of antigen\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1008 title=">"+x[1]+" "+x[0]+" "+y[0].split("_")[1]+"; "+"blast score: "+str(y[1])+" identity%: "+str(round(y[2]*100,2))+"%; alignment from "+str(min(y[-1]))+" to "+str(max(y[-1]))+" of antigen\n"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1009 seqs=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1010 for z in handle_fasta:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1011 if x[1]==z.description:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1012 seqs=str(z.seq)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1013 extract_file.write(title+seqs+"\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1014 if x[0]=="fliC":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1015 if y[0].split("_")[1] not in H1_cont_stat:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1016 H1_cont_stat[y[0].split("_")[1]]=y[2]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1017 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1018 H1_cont_stat[y[0].split("_")[1]]+=y[2]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1019 if x[0]=="fljB":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1020 if y[0].split("_")[1] not in H2_cont_stat:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1021 H2_cont_stat[y[0].split("_")[1]]=y[2]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1022 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1023 H2_cont_stat[y[0].split("_")[1]]+=y[2]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1024 break
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1025 #detect contaminations
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1026 #print(H1_cont_stat)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1027 #print(H2_cont_stat)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1028 H1_cont_stat_list=[x for x in H1_cont_stat if H1_cont_stat[x]>0.2]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1029 H2_cont_stat_list=[x for x in H2_cont_stat if H2_cont_stat[x]>0.2]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1030 contamination_H=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1031 if len(H1_cont_stat_list)>1 or len(H2_cont_stat_list)>1:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1032 contamination_H="potential contamination from H antigen signals"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1033 elif len(H2_cont_stat_list)==1 and fljB_contig=="NA":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1034 contamination_H="potential contamination from H antigen signals, uncommon weak fljB signals detected"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1035 #get additional antigens
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1036 """
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1037 if ("O-9,46_wbaV" in O_list or "O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254" in O_list) and O_list_less_contamination[0].startswith("O-9,"):#not sure should use and float(O9_wbaV)/float(num_1) > 0.1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1038 if "O-9,46_wzy" in O_list:#and float(O946_wzy)/float(num_1) > 0.1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1039 O_choice="O-9,46"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1040 #print "$$$Most possilble Otype: O-9,46"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1041 elif "O-9,46,27_partial_wzy" in O_list:#and float(O94627)/float(num_1) > 0.1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1042 O_choice="O-9,46,27"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1043 #print "$$$Most possilble Otype: O-9,46,27"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1044 elif ("O-3,10_wzx" in O_list) and ("O-9,46_wzy" in O_list) and (O_list[0].startswith("O-3,10") or O_list_less_contamination[0].startswith("O-9,46_wzy")):#and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1045 if "O-3,10_not_in_1,3,19" in O_list:#and float(O310_no_1319)/float(num_1) > 0.1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1046 O_choice="O-3,10"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1047 #print "$$$Most possilble Otype: O-3,10 (contain O-3,10_not_in_1,3,19)"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1048 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1049 O_choice="O-1,3,19"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1050 #print "$$$Most possilble Otype: O-1,3,19 (not contain O-3,10_not_in_1,3,19)"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1051 ### end of special test for O9,46 and O3,10 family
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1052
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1053 if O_choice=="O-9,46,27" or O_choice=="O-3,10" or O_choice=="O-1,3,19":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1054 if len(Otypes_uniq)>2:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1055 contamination_O="potential contamination from O antigen signals"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1056 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1057 if len(Otypes_uniq)>1:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1058 if O_choice=="O-4" and len(Otypes_uniq)==2 and "O-9,46,27" in Otypes_uniq: #for special 4,12,27 case such as Bredeney and Schwarzengrund
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1059 contamination_O=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1060 elif O_choice=="O-9,46" and len(Otypes_uniq)==2 and "O-9,46_wbaV" in Otypes_uniq and "O-9,46_wzy" in Otypes_uniq: #for special 4,12,27 case such as Bredeney and Schwarzengrund
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1061 contamination_O=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1062 """
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1063 additonal_antigents=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1064 #print(contamination_O)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1065 #print(contamination_H)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1066 log_file.write(contamination_O+"\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1067 log_file.write(contamination_H+"\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1068 log_file.close()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1069 return O_choice,fliC_choice,fljB_choice,special_gene_list,contamination_O,contamination_H,Otypes_uniq,H1_cont_stat_list,H2_cont_stat_list
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1070
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1071 def get_input_K(input_file,lib_dict,data_type,k_size):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1072 #kmer mode; get input_Ks from dict and data_type
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1073 kmers = []
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1074 for h in lib_dict:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1075 kmers += lib_dict[h]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1076 if data_type == '4':
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1077 input_Ks = target_multifasta_kmerizer(input_file, k_size, set(kmers))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1078 elif data_type == '1' or data_type == '2' or data_type == '3':#set it for now, will change later
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1079 input_Ks = target_read_kmerizer(input_file, k_size, set(kmers))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1080 elif data_type == '5':#minion_2d_fasta
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1081 #input_Ks = minion_fasta_kmerizer(input_file, k_size, set(kmers))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1082 input_Ks = target_multifasta_kmerizer(input_file, k_size, set(kmers)) #ed_SL_08172020: change for nanopore workflow
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1083 if data_type == '6':#minion_2d_fastq
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1084 input_Ks = minion_fastq_kmerizer(input_file, k_size, set(kmers))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1085 return input_Ks
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1086
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1087 def get_kmer_dict(lib_dict,input_Ks):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1088 #kmer mode; get predicted types
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1089 O_dict = {}
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1090 H_dict = {}
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1091 Special_dict = {}
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1092 for h in lib_dict:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1093 score = (len(lib_dict[h] & input_Ks) / len(lib_dict[h])) * 100
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1094 if score > 1: # Arbitrary cut-off for similarity score very low but seems necessary to detect O-3,10 in some cases
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1095 if h.startswith('O-') and score > 25:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1096 O_dict[h] = score
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1097 if h.startswith('fl') and score > 40:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1098 H_dict[h] = score
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1099 if (h[:2] != 'fl') and (h[:2] != 'O-'):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1100 Special_dict[h] = score
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1101 return O_dict,H_dict,Special_dict
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1102
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1103 def call_O_and_H_type(O_dict,H_dict,Special_dict,make_dir):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1104 log_file=open("SeqSero_log.txt","a")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1105 log_file.write("O_scores:\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1106 #call O:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1107 highest_O = '-'
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1108 if len(O_dict) == 0:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1109 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1110 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1111 for x in O_dict:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1112 log_file.write(x+"\t"+str(O_dict[x])+"\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1113 if ('O-9,46_wbaV__1002' in O_dict and O_dict['O-9,46_wbaV__1002']>70) or ("O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254__1002" in O_dict and O_dict['O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254__1002']>70): # not sure should use and float(O9_wbaV)/float(num_1) > 0.1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1114 #if 'O-9,46_wzy__1191' in O_dict or "O-9,46_wzy_partial__216" in O_dict: # and float(O946_wzy)/float(num_1) > 0.1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1115 #modified to fix miscall of O-9,46
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1116 if ('O-9,46_wzy__1191' in O_dict and O_dict['O-9,46_wzy__1191']>40) or ("O-9,46_wzy_partial__216" in O_dict and O_dict["O-9,46_wzy_partial__216"]>40): # and float(O946_wzy)/float(num_1) > 0.1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1117 highest_O = "O-9,46"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1118 elif "O-9,46,27_partial_wzy__1019" in O_dict: # and float(O94627)/float(num_1) > 0.1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1119 highest_O = "O-9,46,27"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1120 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1121 highest_O = "O-9" # next, detect O9 vs O2?
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1122 O2 = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1123 O9 = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1124 for z in Special_dict:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1125 if "tyr-O-9" in z:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1126 O9 = float(Special_dict[z])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1127 if "tyr-O-2" in z:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1128 O2 = float(Special_dict[z])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1129 if O2 > O9:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1130 highest_O = "O-2"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1131 elif ("O-3,10_wzx__1539" in O_dict) and (
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1132 "O-9,46_wzy__1191" in O_dict
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1133 ): # and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1134 if "O-3,10_not_in_1,3,19__1519" in O_dict: # and float(O310_no_1319)/float(num_1) > 0.1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1135 highest_O = "O-3,10"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1136 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1137 highest_O = "O-1,3,19"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1138 ### end of special test for O9,46 and O3,10 family
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1139 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1140 try:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1141 max_score = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1142 for x in O_dict:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1143 if float(O_dict[x]) >= max_score:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1144 max_score = float(O_dict[x])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1145 #highest_O = x.split("_")[0]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1146 # ed_SL_12182019: modified to fix the O-9,46 error example1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1147 if (x == 'O-9,46_wbaV__1002' or x == 'O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254__1002') and ('O-9,46_wzy__1191' not in O_dict and 'O-9,46_wzy_partial__216' not in O_dict):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1148 highest_O = "O-9"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1149 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1150 highest_O = x.split("_")[0]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1151 if highest_O == "O-1,3,19":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1152 highest_O = '-'
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1153 max_score = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1154 for x in O_dict:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1155 if x == 'O-1,3,19_not_in_3,10__130':
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1156 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1157 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1158 if float(O_dict[x]) >= max_score:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1159 max_score = float(O_dict[x])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1160 #highest_O = x.split("_")[0]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1161 # ed_SL_12182019: modified to fix the O-9,46 error example1
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1162 if (x == 'O-9,46_wbaV__1002' or x == 'O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254__1002') and ('O-9,46_wzy__1191' not in O_dict and 'O-9,46_wzy_partial__216' not in O_dict):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1163 highest_O = "O-9"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1164 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1165 highest_O = x.split("_")[0]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1166 except:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1167 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1168 #call_fliC:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1169 if len(H_dict)!=0:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1170 highest_H_score_both_BC=H_dict[max(H_dict.keys(), key=(lambda k: H_dict[k]))] #used to detect whether fljB existed or not
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1171 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1172 highest_H_score_both_BC=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1173 highest_fliC = '-'
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1174 highest_fliC_raw = '-'
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1175 highest_Score = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1176 log_file.write("\nH_scores:\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1177 for s in H_dict:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1178 log_file.write(s+"\t"+str(H_dict[s])+"\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1179 if s.startswith('fliC'):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1180 if float(H_dict[s]) > highest_Score:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1181 highest_fliC = s.split('_')[1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1182 highest_fliC_raw = s
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1183 highest_Score = float(H_dict[s])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1184 #call_fljB
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1185 highest_fljB = '-'
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1186 highest_fljB_raw = '-'
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1187 highest_Score = 0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1188 for s in H_dict:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1189 if s.startswith('fljB'):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1190 if float(H_dict[s]) > highest_Score and float(H_dict[s]) > highest_H_score_both_BC * 0.65: #fljB is special, so use highest_H_score_both_BC to give a general estimate of coverage, currently 0.65 seems pretty good; the reason use a high (0.65) is some fliC and fljB shared with each other
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1191 #highest_fljB = s.split('_')[1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1192 #highest_fljB_raw = s
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1193 #highest_Score = float(H_dict[s])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1194 if s.split('_')[1]!=highest_fliC:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1195 highest_fljB = s.split('_')[1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1196 highest_fljB_raw = s
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1197 highest_Score = float(H_dict[s])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1198 log_file.write("\nSpecial_scores:\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1199 for s in Special_dict:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1200 log_file.write(s+"\t"+str(Special_dict[s])+"\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1201 log_file.close()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1202 return highest_O,highest_fliC,highest_fljB
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1203
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1204 def get_temp_file_names(for_fq,rev_fq):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1205 #seqsero2 -a; get temp file names
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1206 sam=for_fq+".sam"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1207 bam=for_fq+".bam"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1208 sorted_bam=for_fq+"_sorted.bam"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1209 mapped_fq1=for_fq+"_mapped.fq"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1210 mapped_fq2=rev_fq+"_mapped.fq"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1211 combined_fq=for_fq+"_combined.fq"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1212 for_sai=for_fq+".sai"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1213 rev_sai=rev_fq+".sai"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1214 return sam,bam,sorted_bam,mapped_fq1,mapped_fq2,combined_fq,for_sai,rev_sai
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1215
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1216 def map_and_sort(threads,database,fnameA,fnameB,sam,bam,for_sai,rev_sai,sorted_bam,mapping_mode):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1217 #seqsero2 -a; do mapping and sort
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1218 print("building database...")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1219 subprocess.check_call("bwa index "+database+ " 2>> data_log.txt",shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1220 print("mapping...")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1221 if mapping_mode=="mem":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1222 subprocess.check_call("bwa mem -k 17 -t "+threads+" "+database+" "+fnameA+" "+fnameB+" > "+sam+ " 2>> data_log.txt",shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1223 elif mapping_mode=="sam":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1224 if fnameB!="":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1225 subprocess.check_call("bwa aln -t "+threads+" "+database+" "+fnameA+" > "+for_sai+ " 2>> data_log.txt",shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1226 subprocess.check_call("bwa aln -t "+threads+" "+database+" "+fnameB+" > "+rev_sai+ " 2>> data_log.txt",shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1227 subprocess.check_call("bwa sampe "+database+" "+for_sai+" "+ rev_sai+" "+fnameA+" "+fnameB+" > "+sam+ " 2>> data_log.txt",shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1228 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1229 subprocess.check_call("bwa aln -t "+threads+" "+database+" "+fnameA+" > "+for_sai+ " 2>> data_log.txt",shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1230 subprocess.check_call("bwa samse "+database+" "+for_sai+" "+for_fq+" > "+sam)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1231 subprocess.check_call("samtools view -@ "+threads+" -F 4 -Sh "+sam+" > "+bam,shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1232 ### check the version of samtools then use differnt commands
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1233 samtools_version=subprocess.Popen(["samtools"],stdout=subprocess.PIPE,stderr=subprocess.PIPE)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1234 out, err = samtools_version.communicate()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1235 version = str(err).split("ersion:")[1].strip().split(" ")[0].strip()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1236 print("check samtools version:",version)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1237 ### end of samtools version check and its analysis
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1238 if LooseVersion(version)<=LooseVersion("1.2"):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1239 subprocess.check_call("samtools sort -@ "+threads+" -n "+bam+" "+fnameA+"_sorted",shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1240 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1241 subprocess.check_call("samtools sort -@ "+threads+" -n "+bam+" >"+sorted_bam,shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1242
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1243 def extract_mapped_reads_and_do_assembly_and_blast(current_time,sorted_bam,combined_fq,mapped_fq1,mapped_fq2,threads,fnameA,fnameB,database,mapping_mode,phred_offset):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1244 #seqsero2 -a; extract, assembly and blast
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1245 #subprocess.check_call("bamToFastq -i "+sorted_bam+" -fq "+combined_fq,shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1246 subprocess.check_call("samtools bam2fq "+sorted_bam+" > "+combined_fq+" 2>> data_log.txt",shell=True) ## change to samtools bam2fq. 202509
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1247 #print("fnameA:",fnameA)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1248 #print("fnameB:",fnameB)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1249 if fnameB!="":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1250 #subprocess.check_call("bamToFastq -i "+sorted_bam+" -fq "+mapped_fq1+" -fq2 "+mapped_fq2 + " 2>> data_log.txt",shell=True)#2> /dev/null if want no output
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1251 subprocess.check_call("samtools bam2fq -1 "+mapped_fq1+" -2 "+mapped_fq2+" -0 /dev/null -s /dev/null -n "+sorted_bam+" 2>> data_log.txt",shell=True) ## change to samtools bam2fq. 202509
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1252 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1253 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1254 outdir=current_time+"_temp"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1255 print("assembling...")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1256 if int(threads)>4:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1257 t="4"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1258 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1259 t=threads
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1260 if os.path.getsize(combined_fq)>100 and (fnameB=="" or os.path.getsize(mapped_fq1)>100):#if not, then it's "-:-:-"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1261 if phred_offset == 'auto':
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1262 phred_offset = ''
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1263 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1264 phred_offset = '--phred-offset ' + phred_offset
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1265
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1266 if fnameB!="":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1267 #print("spades.py --careful "+phred_offset+" --pe1-s "+combined_fq+" --pe1-1 "+mapped_fq1+" --pe1-2 "+mapped_fq2+" -t "+t+" -o "+outdir+ " >> data_log.txt 2>&1")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1268 subprocess.check_call("spades.py --careful "+phred_offset+" --pe1-s "+combined_fq+" --pe1-1 "+mapped_fq1+" --pe1-2 "+mapped_fq2+" -t "+t+" -o "+outdir+ " >> data_log.txt 2>&1",shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1269 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1270 subprocess.check_call("spades.py --careful "+phred_offset+" --pe1-s "+combined_fq+" -t "+t+" -o "+outdir+ " >> data_log.txt 2>&1",shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1271 new_fasta=fnameA+"_"+database+"_"+mapping_mode+".fasta"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1272 #new_fasta=fnameA+"_"+database.split('/')[-1]+"_"+mapping_mode+".fasta" # change path to databse for packaging
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1273 subprocess.check_call("mv "+outdir+"/contigs.fasta "+new_fasta+ " 2> /dev/null",shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1274 #os.system("mv "+outdir+"/scaffolds.fasta "+new_fasta+ " 2> /dev/null") contigs.fasta
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1275 subprocess.check_call("rm -rf "+outdir+ " 2> /dev/null",shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1276 print("blasting...","\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1277 xmlfile="blasted_output.xml"#fnameA+"-extracted_vs_"+database+"_"+mapping_mode+".xml"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1278 subprocess.check_call('makeblastdb -in '+new_fasta+' -out '+new_fasta+'_db '+'-dbtype nucl >> data_log.txt 2>&1',shell=True) #temp.txt is to forbid the blast result interrupt the output of our program###1/27/2015
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1279 subprocess.check_call("blastn -query "+database+" -db "+new_fasta+"_db -out "+xmlfile+" -outfmt 5 >> data_log.txt 2>&1",shell=True)###1/27/2015; 08272018, remove "-word_size 10"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1280 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1281 xmlfile="NA"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1282 return xmlfile,new_fasta
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1283
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1284 def judge_subspecies(fnameA):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1285 #seqsero2 -a; judge subspecies on just forward raw reads fastq
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1286 salmID_output=subprocess.Popen("SalmID.py -i "+fnameA,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1287 out, err = salmID_output.communicate()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1288 out=out.decode("utf-8")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1289 file=open("data_log.txt","a")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1290 file.write(out)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1291 file.close()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1292 salm_species_scores=out.split("\n")[1].split("\t")[6:]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1293 salm_species_results=out.split("\n")[0].split("\t")[6:]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1294 max_score=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1295 max_score_index=1 #default is 1, means "I"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1296 for i in range(len(salm_species_scores)):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1297 if max_score<float(salm_species_scores[i]):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1298 max_score=float(salm_species_scores[i])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1299 max_score_index=i
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1300 prediction=salm_species_results[max_score_index].split(".")[1].strip().split(" ")[0]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1301 #if float(out.split("\n")[1].split("\t")[4]) > float(out.split("\n")[1].split("\t")[5]): #bongori and enterica compare
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1302 if float(out.split("\n")[1].split("\t")[4]) > 10 and float(out.split("\n")[1].split("\t")[4]) > float(out.split("\n")[1].split("\t")[5]): ## ed_SL_0318: change SalmID_ssp_threshold
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1303 prediction="bongori" #if not, the prediction would always be enterica, since they are located in the later part
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1304 #if max_score<10: ## ed_SL_0318: change SalmID_ssp_threshold
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1305 if max_score<60:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1306 prediction="-"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1307 ## ed_SL_0818: add for enterica
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1308 if float(out.split("\n")[1].split("\t")[5]) > 10 and float(out.split("\n")[1].split("\t")[5]) > float(out.split("\n")[1].split("\t")[4]):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1309 prediction="enterica"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1310 ##
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1311 return prediction
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1312
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1313 def judge_subspecies_Kmer(Special_dict):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1314 #seqsero2 -k;
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1315 max_score=0
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1316 prediction="-" #default should be I
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1317 for x in Special_dict:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1318 #if "mer" in x: ## ed_SL_0318: change ssp_threshold
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1319 if "mer" in x and float(Special_dict[x]) > 60:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1320 if max_score<float(Special_dict[x]):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1321 max_score=float(Special_dict[x])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1322 prediction=x.split("_")[-1].strip()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1323 if x.split("_")[-1].strip()=="bongori" and float(Special_dict[x])>95:#if bongori already, then no need to test enterica
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1324 prediction="bongori"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1325 break
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1326 return prediction
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1327
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1328 ## ed_SL_11232019: add notes for missing antigen
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1329 def check_antigens(ssp,O_antigen,H1_antigen,H2_antigen,NA_note):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1330 antigen_note = ''
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1331 if ssp != '-':
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1332 if O_antigen != '-' and H1_antigen == '-' and H2_antigen == '-': # O:-:-
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1333 antigen_note = 'H antigens were not detected. This is an atypical result that should be further investigated. Most Salmonella strains have at least fliC, encoding the Phase 1 H antigen, even if it is not expressed. '
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1334 NA_note = ''
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1335 elif O_antigen != '-' and H1_antigen == '-' and H2_antigen != '-': # O:-:H2
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1336 antigen_note = 'fliC was not detected. This is an atypical result that should be further investigated. Most Salmonella strains have fliC, encoding the Phase 1 H antigen, even if it is not expressed. '
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1337 NA_note = ''
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1338 elif O_antigen == '-' and H1_antigen != '-': # -:H1:X
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1339 antigen_note = 'O antigen was not detected. This result may be due to a rough strain that has deleted the rfb region. For raw reads input, the k-mer workflow is sometimes more sensitive than the microassembly workflow in detecting O antigen. Caution should be used with this approach because the k-mer result may be due to low levels of contamination. '
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1340 NA_note = ''
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1341 elif O_antigen == '-' and H1_antigen == '-' and H2_antigen == '-': # -:-:-
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1342 antigen_note = 'No serotype antigens were detected. This is an atypical result that should be further investigated. '
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1343 NA_note = ''
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1344 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1345 antigen_note = 'The input genome cannot be identified as Salmonella. Check the input for taxonomic ID, contamination, or sequencing quality. '
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1346 NA_note = ''
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1347 if ssp == 'enterica':
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1348 antigen_note += 'Subspecies identification of the input genome cannot be definitively determined. '
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1349 NA_note = ''
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1350 # if [O_antigen, H1_antigen, H2_antigen].count('-') >= 2:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1351 # antigen_note = 'No subspecies marker was detected and less than 2 serotype antigens were detected; further, this genome was not identified as Salmonella. This is an atypical result that should be further investigated. '
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1352 # else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1353 # antigen_note = 'No subspecies marker was detected. This genome may not be Salmonella. This is an atypical result that should be further investigated. '
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1354 return (antigen_note,NA_note)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1355
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1356 ## ed_SL_06062020: rename subspecies ID
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1357 subspecies_ID_dir = {'I': 'Salmonella enterica subspecies enterica (subspecies I)',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1358 'II': 'Salmonella enterica subspecies salamae (subspecies II)',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1359 'IIIa': 'Salmonella enterica subspecies arizonae (subspecies IIIa)',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1360 'IIIb': 'Salmonella enterica subspecies diarizonae (subspecies IIIb)',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1361 'IV': 'Salmonella enterica subspecies houtenae (subspecies IV)',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1362 'VI': 'Salmonella enterica subspecies indica (subspecies VI)',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1363 'VII': 'Salmonella enterica subspecies VII (subspecies VII)',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1364 'bongori': 'Salmonella bongori',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1365 'enterica': 'Salmonella enterica',
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1366 '-': '-'}
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1367 ##
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1368
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1369 ## ed_SL_08172020: format check for fasta or fastq in nanopore workflow, convert raw reads fastq to fasta
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1370 def format_check(input_file):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1371 line=open(input_file,'r').readline()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1372 if line.startswith('>'):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1373 output_file = input_file
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1374 elif line.startswith('@'):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1375 input_file_fa = input_file + '.fasta'
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1376 subprocess.check_call("seqtk seq -A "+input_file+" > "+input_file_fa,shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1377 output_file = input_file_fa
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1378 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1379 print ('please check the format of input files')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1380 return (output_file)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1381 ##
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1382
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1383 def main():
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1384 #combine SeqSeroK and SeqSero2, also with SalmID
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1385 args = parse_args()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1386 input_file = args.i
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1387 data_type = args.t
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1388 analysis_mode = args.m
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1389 mapping_mode=args.b
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1390 threads=args.p
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1391 make_dir=args.d
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1392 clean_mode=args.c
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1393 sample_name=args.n
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1394 ingore_header=args.s
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1395 phred_offset=args.phred_offset
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1396 k_size=27 #will change for bug fixing
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1397 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1398 ex_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(__file__)),'seqsero2s_db')) # ed_SL_09152019: add ex_dir for packaging
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1399 seqsero2_db=ex_dir+"/H_and_O_and_specific_genes.fasta" # ed_SL_11092019: change path to database for packaging
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1400 database="H_and_O_and_specific_genes.fasta"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1401 mlst_count=pickle.load(open(ex_dir+"/mlst.pickle", "rb"))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1402 note="Note: "
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1403 NA_note="This predicted serotype is not in the Kauffman-White scheme. " # ed_SL_09272019: add for new output format
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1404 if len(sys.argv)==1:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1405 subprocess.check_call(dirpath+"/SeqSero2S.py -h",shell=True)#change name of python file
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1406 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1407 request_id = time.strftime("%m_%d_%Y_%H_%M_%S", time.localtime())
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1408 request_id += str(random.randint(1, 10000000))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1409 if make_dir is None:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1410 make_dir="SeqSero_result_"+request_id
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1411 make_dir=os.path.abspath(make_dir)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1412 if os.path.isdir(make_dir):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1413 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1414 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1415 subprocess.check_call("mkdir -p "+make_dir,shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1416 subprocess.check_call("ln -f -s "+seqsero2_db+" "+" ".join(input_file)+" "+make_dir,shell=True) # ed_SL_11092019: change path to database for packaging
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1417 #subprocess.check_call("ln -f -s "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True) ### use -f option to force the replacement of links, remove -r and use absolute path instead to avoid link issue (use 'type=os.path.abspath' in -i argument).
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1418 ############################begin the real analysis
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1419 if analysis_mode=="a":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1420 if data_type in ["1","2","3"]:#use allele mode
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1421 for_fq,rev_fq=get_input_files(make_dir,input_file,data_type,dirpath)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1422 os.chdir(make_dir)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1423 ###add a function to tell input files
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1424 fnameA=for_fq.split("/")[-1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1425 fnameB=rev_fq.split("/")[-1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1426 current_time=time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1427 sam,bam,sorted_bam,mapped_fq1,mapped_fq2,combined_fq,for_sai,rev_sai=get_temp_file_names(fnameA,fnameB) #get temp files id
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1428 map_and_sort(threads,database,fnameA,fnameB,sam,bam,for_sai,rev_sai,sorted_bam,mapping_mode) #do mapping and sort
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1429
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1430 ### avoid error out when micro assembly fails. ed_SL_03172020
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1431 try:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1432 xmlfile,new_fasta=extract_mapped_reads_and_do_assembly_and_blast(current_time,sorted_bam,combined_fq,mapped_fq1,mapped_fq2,threads,fnameA,fnameB,database,mapping_mode,phred_offset) #extract the mapped reads and do micro assembly and blast
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1433 except (UnboundLocalError, subprocess.CalledProcessError):
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1434 xmlfile="NA"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1435 H1_cont_stat_list=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1436 H2_cont_stat_list=[]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1437 ###
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1438 if xmlfile=="NA":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1439 O_choice,fliC_choice,fljB_choice,special_gene_list,contamination_O,contamination_H=("-","-","-",[],"","")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1440 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1441 Final_list=xml_parse_score_comparision_seqsero(xmlfile) #analyze xml and get parsed results
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1442 file=open("data_log.txt","a")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1443 for x in Final_list:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1444 file.write("\t".join(str(y) for y in x)+"\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1445 file.close()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1446 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1].split("_")[0])>=0.9 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]) or x[1]>1000)]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1447 O_choice,fliC_choice,fljB_choice,special_gene_list,contamination_O,contamination_H,Otypes_uniq,H1_cont_stat_list,H2_cont_stat_list=predict_O_and_H_types(Final_list,Final_list_passed,new_fasta) #predict O, fliC and fljB
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1448 subspecies=judge_subspecies(fnameA) #predict subspecies
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1449 ### ed_SL_06062020: correction VIII -> II
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1450 if subspecies == 'VIII':
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1451 subspecies = 'II'
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1452 ### ed_SL_08132020: correction VII -> IV, according to CDC's suggestion
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1453 if subspecies == 'VII':
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1454 subspecies = 'IV'
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1455 note+='SalmID reports this as ssp VII, which has not been formally recognized. '
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1456 ###
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1457 ### ed_SL_08182020: change serotype ouput for genome without definitive subspecies ID
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1458 ssp_pointer = subspecies
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1459 if subspecies == 'enterica':
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1460 subspecies = '-'
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1461 ###
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1462 ###MLST
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1463 #print("MLST using https://github.com/jordanlab/stringMLST")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1464 #print("7-gene MLST...")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1465 mlst_result = stringmlst(for_fq,rev_fq,data_type,ex_dir)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1466 st = mlst_result[0]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1467 alleles = mlst_result[1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1468 sorted_alleles = sorted(alleles.items())
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1469 try:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1470 st_count = str(mlst_count[st])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1471 except:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1472 st_count = '0'
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1473 subprocess.call("rm H_and_O_and_specific_genes.fasta* *.sra *.bam *.sam *.fastq *.gz *.fq temp.txt "+fnameA+"*_db* 2> /dev/null",shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1474 ###
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1475 ###output
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1476 predict_form_ss2,predict_sero_ss2,star_ss2,star_line_ss2,claim_ss2=seqsero_from_formula_to_serotypes(O_choice,fliC_choice,fljB_choice,special_gene_list,subspecies,'ss2')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1477 predict_form,predict_sero,star,star_line,claim=seqsero_from_formula_to_serotypes(O_choice,fliC_choice,fljB_choice,special_gene_list,subspecies,'ss2s')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1478 claim="" #04132019, disable claim for new report requirement
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1479 contamination_report=""
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1480 H_list=["fliC_"+x for x in H1_cont_stat_list if len(x)>0]+["fljB_"+x for x in H2_cont_stat_list if len(x)>0]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1481 if contamination_O!="" and contamination_H=="":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1482 contamination_report="#Potential inter-serotype contamination detected from O antigen signals. All O-antigens detected:"+"\t".join(Otypes_uniq)+"."
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1483 elif contamination_O=="" and contamination_H!="":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1484 contamination_report="#Potential inter-serotype contamination detected or potential thrid H phase from H antigen signals. All H-antigens detected:"+"\t".join(H_list)+"."
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1485 elif contamination_O!="" and contamination_H!="":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1486 contamination_report="#Potential inter-serotype contamination detected from both O and H antigen signals.All O-antigens detected:"+"\t".join(Otypes_uniq)+". All H-antigens detected:"+"\t".join(H_list)+"."
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1487 if contamination_report!="":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1488 #contamination_report="potential inter-serotype contamination detected (please refer below antigen signal report for details)." #above contamination_reports are for back-up and bug fixing #web-based mode need to be re-used, 04132019
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1489 contamination_report="Co-existence of multiple serotypes detected, indicating potential inter-serotype contamination. See 'Extracted_antigen_alleles.fasta' for detected serotype determinant alleles. "
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1490
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1491 ### ed_SL_11232019: add notes for missing antigen
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1492 if O_choice=="":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1493 O_choice="-"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1494 antigen_note,NA_note=check_antigens(ssp_pointer,O_choice,fliC_choice,fljB_choice,NA_note)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1495 if sample_name:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1496 print ("Sample name:\t"+sample_name)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1497 ###
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1498 if clean_mode:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1499 subprocess.check_call("rm -rf "+make_dir,shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1500 make_dir="none-output-directory due to '-c' flag"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1501 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1502 new_file=open("SeqSero_result.txt","w")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1503 ### ed_SL_01152020: add new output
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1504 conta_note="yes" if "inter-serotype contamination" in contamination_report else "no"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1505 tsv_file=open("SeqSero_result.tsv","w")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1506 if ingore_header:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1507 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1508 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1509 tsv_file.write("Sample name\tOutput directory\tInput files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted identification\tPredicted antigenic profile\tPredicted serotype\tPredicted serotype (SeqSero2 v1.3.2)\tPotential inter-serotype contamination\tNote\tST\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1510 if sample_name:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1511 new_file.write("Sample name:\t"+sample_name+"\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1512 tsv_file.write(sample_name+'\t')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1513 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1514 tsv_file.write(input_file[0].split('/')[-1]+'\t')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1515 ###
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1516 if "N/A" not in predict_sero:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1517 new_file.write("Output directory:\t"+make_dir+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1518 "Input files:\t"+"\t".join(input_file)+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1519 "O antigen prediction:\t"+O_choice+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1520 "H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1521 "H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1522 "Predicted identification:\t"+subspecies_ID_dir[ssp_pointer]+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1523 "Predicted antigenic profile:\t"+predict_form+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1524 "Predicted serotype:\t"+predict_sero+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1525 "Predicted serotype (SeqSero2 v1.3.2):\t"+predict_sero_ss2+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1526 note+contamination_report+star_line+claim+antigen_note+"\n")#+##
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1527 tsv_file.write(make_dir+"\t"+" ".join(input_file)+"\t"+O_choice+"\t"+fliC_choice+"\t"+fljB_choice+"\t"+subspecies_ID_dir[ssp_pointer]+"\t"+predict_form+"\t"+predict_sero+"\t"+predict_sero_ss2+"\t"+conta_note+"\t"+contamination_report+star_line+claim+antigen_note+"\t"+st+"\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1528 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1529 new_file.write("Output directory:\t"+make_dir+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1530 "Input files:\t"+"\t".join(input_file)+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1531 "O antigen prediction:\t"+O_choice+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1532 "H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1533 "H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1534 "Predicted identification:\t"+subspecies_ID_dir[ssp_pointer]+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1535 "Predicted antigenic profile:\t"+predict_form+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1536 "Predicted serotype:\t"+subspecies+' '+predict_form+"\n"+ # add serotype output for "N/A" prediction, add subspecies
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1537 "Predicted serotype (SeqSero2 v1.3.2):\t"+subspecies+' '+predict_form_ss2+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1538 note+NA_note+contamination_report+star_line+claim+antigen_note+"\n")#+##
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1539 tsv_file.write(make_dir+"\t"+" ".join(input_file)+"\t"+O_choice+"\t"+fliC_choice+"\t"+fljB_choice+"\t"+subspecies_ID_dir[ssp_pointer]+"\t"+predict_form+"\t"+subspecies+' '+predict_form+"\t"+subspecies+' '+predict_form_ss2+"\t"+conta_note+"\t"+NA_note+contamination_report+star_line+claim+antigen_note+"\t"+st+"\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1540 ##MLST
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1541 new_file.write("Sequence type:\t"+st+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1542 "Number of ST"+st+" strains in EnteroBase:\t"+st_count+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1543 "\n".join([k+":\t"+v for k,v in sorted_alleles]))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1544 ###
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1545 new_file.close()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1546 tsv_file.close()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1547 if "N/A" not in predict_sero:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1548 print("Output directory:\t"+make_dir+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1549 "Input files:\t"+"\t".join(input_file)+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1550 "O antigen prediction:\t"+O_choice+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1551 "H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1552 "H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1553 "Predicted identification:\t"+subspecies_ID_dir[ssp_pointer]+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1554 "Predicted antigenic profile:\t"+predict_form+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1555 "Predicted serotype:\t"+predict_sero+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1556 "Predicted serotype (SeqSero2 v1.3.2):\t"+predict_sero_ss2+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1557 note+contamination_report+star_line+claim+antigen_note+"\n")#+##
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1558 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1559 print("Output directory:\t"+make_dir+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1560 "Input files:\t"+"\t".join(input_file)+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1561 "O antigen prediction:\t"+O_choice+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1562 "H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1563 "H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1564 "Predicted identification:\t"+subspecies_ID_dir[ssp_pointer]+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1565 "Predicted antigenic profile:\t"+predict_form+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1566 "Predicted serotype:\t"+subspecies+' '+predict_form+"\n"+ # add serotype output for "N/A" prediction, subspecies
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1567 "Predicted serotype (SeqSero2 v1.3.2):\t"+subspecies+' '+predict_form_ss2+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1568 note+NA_note+contamination_report+star_line+claim+antigen_note+"\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1569 ###MLST
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1570 print("Sequence type: "+st)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1571 print("Number of ST"+st+" strains in EnteroBase: "+st_count)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1572 #print("Allele profile...")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1573 for k,v in sorted_alleles:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1574 print(k+': '+v)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1575 print('\n')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1576 ###
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1577 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1578 print("Allele modes only support raw reads datatype, i.e. '-t 1 or 2 or 3'; please use '-m k'")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1579 elif analysis_mode=="k":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1580 #ex_dir = os.path.dirname(os.path.realpath(__file__))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1581 ex_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(__file__)),'seqsero2s_db')) # ed_SL_09152019: change ex_dir for packaging
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1582 for_fq,rev_fq=get_input_files(make_dir,input_file,data_type,dirpath)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1583 input_file = for_fq #-k will just use forward because not all reads were used
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1584 os.chdir(make_dir)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1585 ### ed_SL_08182020: use assembly workflow for nanopore fastq, convert fastq to fasta
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1586 if data_type == "5":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1587 input_file = format_check(for_fq)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1588 ###
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1589 f = open(ex_dir + '/antigens.pickle', 'rb')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1590 lib_dict = pickle.load(f)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1591 f.close
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1592 input_Ks=get_input_K(input_file,lib_dict,data_type,k_size)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1593 O_dict,H_dict,Special_dict=get_kmer_dict(lib_dict,input_Ks)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1594 highest_O,highest_fliC,highest_fljB=call_O_and_H_type(O_dict,H_dict,Special_dict,make_dir)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1595 subspecies=judge_subspecies_Kmer(Special_dict)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1596 if subspecies=="IIb" or subspecies=="IIa":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1597 subspecies="II"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1598 ### ed_SL_06062020: correction VIII -> II
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1599 if subspecies == 'VIII':
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1600 subspecies = 'II'
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1601 ### ed_SL_08132020: correction VII -> IV, according to CDC's suggestion
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1602 if subspecies == 'VII':
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1603 subspecies = 'IV'
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1604 note+='SalmID reports this as ssp VII, which has not been formally recognized. '
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1605 ###
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1606 ### ed_SL_08182020: change serotype ouput for genome without definitive subspecies ID
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1607 ssp_pointer = subspecies
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1608 if subspecies == 'enterica':
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1609 subspecies = '-'
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1610 ###
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1611 predict_form_ss2,predict_sero_ss2,star_ss2,star_line_ss2,claim_ss2 = seqsero_from_formula_to_serotypes(
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1612 highest_O.split('-')[1], highest_fliC, highest_fljB, Special_dict,subspecies, 'ss2')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1613 predict_form,predict_sero,star,star_line,claim = seqsero_from_formula_to_serotypes(
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1614 highest_O.split('-')[1], highest_fliC, highest_fljB, Special_dict,subspecies, 'ss2s')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1615 claim="" #no claim any more based on new output requirement
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1616
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1617 ### ed_SL_11232019: add notes for missing antigen
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1618 if highest_O.split('-')[-1]=="":
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1619 O_choice="-"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1620 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1621 O_choice=highest_O.split('-')[-1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1622 antigen_note,NA_note=check_antigens(ssp_pointer,O_choice,highest_fliC,highest_fljB,NA_note)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1623 if sample_name:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1624 print ("Sample name:\t"+sample_name)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1625 ###
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1626 ###MLST
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1627 if data_type in ["4","5"]:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1628 #print("MLST using https://github.com/tseemann/mlst")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1629 #print("7-gene MLST...")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1630 mlst_result = mlst(args.i[0])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1631 if data_type in ["1","2","3"]:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1632 #print("MLST using https://github.com/jordanlab/stringMLST")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1633 #print("7-gene MLST...")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1634 mlst_result = stringmlst(for_fq,rev_fq,data_type,ex_dir)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1635 st = mlst_result[0]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1636 alleles = mlst_result[1]
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1637 sorted_alleles = sorted(alleles.items())
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1638 try:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1639 st_count = str(mlst_count[st])
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1640 except:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1641 st_count = '0'
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1642 subprocess.call("rm *.fasta* *.fastq *.gz *.fq temp.txt *.sra 2> /dev/null",shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1643 ###
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1644 ###output
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1645 if clean_mode:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1646 subprocess.check_call("rm -rf "+make_dir,shell=True)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1647 make_dir="none-output-directory due to '-c' flag"
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1648 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1649 new_file=open("SeqSero_result.txt","w")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1650 tsv_file=open("SeqSero_result.tsv","w")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1651 if ingore_header:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1652 pass
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1653 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1654 tsv_file.write("Sample name\tOutput directory\tInput files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted identification\tPredicted antigenic profile\tPredicted serotype\tPredicted serotype (SeqSero2 v1.3.2)\tNote\tST\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1655 if sample_name:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1656 new_file.write("Sample name:\t"+sample_name+"\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1657 tsv_file.write(sample_name+'\t')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1658 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1659 tsv_file.write(input_file.split('/')[-1]+'\t')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1660 ###
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1661 if "N/A" not in predict_sero:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1662 new_file.write("Output directory:\t"+make_dir+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1663 "Input files:\t"+input_file+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1664 "O antigen prediction:\t"+O_choice+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1665 "H1 antigen prediction(fliC):\t"+highest_fliC+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1666 "H2 antigen prediction(fljB):\t"+highest_fljB+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1667 "Predicted identification:\t"+subspecies_ID_dir[ssp_pointer]+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1668 "Predicted antigenic profile:\t"+predict_form+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1669 "Predicted serotype:\t"+predict_sero+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1670 "Predicted serotype (SeqSero2 v1.3.2):\t"+predict_sero_ss2+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1671 note+star_line+claim+antigen_note+"\n")#+##
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1672 tsv_file.write(make_dir+"\t"+input_file+"\t"+O_choice+"\t"+highest_fliC+"\t"+highest_fljB+"\t"+subspecies_ID_dir[ssp_pointer]+"\t"+predict_form+"\t"+predict_sero+"\t"+predict_sero_ss2+"\t"+star_line+claim+antigen_note+"\t"+st+"\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1673 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1674 new_file.write("Output directory:\t"+make_dir+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1675 "Input files:\t"+input_file+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1676 "O antigen prediction:\t"+O_choice+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1677 "H1 antigen prediction(fliC):\t"+highest_fliC+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1678 "H2 antigen prediction(fljB):\t"+highest_fljB+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1679 "Predicted identification:\t"+subspecies_ID_dir[ssp_pointer]+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1680 "Predicted antigenic profile:\t"+predict_form+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1681 "Predicted serotype:\t"+subspecies+' '+predict_form+"\n"+ # add serotype output for "N/A" prediction, subspecies
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1682 "Predicted serotype (SeqSero2 v1.3.2):\t"+subspecies+' '+predict_form_ss2+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1683 note+NA_note+star_line+claim+antigen_note+"\n")#+##
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1684 tsv_file.write(make_dir+"\t"+input_file+"\t"+O_choice+"\t"+highest_fliC+"\t"+highest_fljB+"\t"+subspecies_ID_dir[ssp_pointer]+"\t"+predict_form+"\t"+subspecies+' '+predict_form+"\t"+subspecies+' '+predict_form_ss2+"\t"+NA_note+star_line+claim+antigen_note+"\t"+st+"\n")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1685 ###MLST
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1686 new_file.write("Sequence type:\t"+st+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1687 "Number of ST"+st+" strains in EnteroBase:\t"+st_count+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1688 "\n".join([k+":\t"+v for k,v in sorted_alleles]))
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1689 ###
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1690 new_file.close()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1691 tsv_file.close()
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1692 if "N/A" not in predict_sero:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1693 print("Output directory:\t"+make_dir+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1694 "Input files:\t"+input_file+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1695 "O antigen prediction:\t"+O_choice+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1696 "H1 antigen prediction(fliC):\t"+highest_fliC+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1697 "H2 antigen prediction(fljB):\t"+highest_fljB+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1698 "Predicted identification:\t"+subspecies_ID_dir[ssp_pointer]+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1699 "Predicted antigenic profile:\t"+predict_form+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1700 "Predicted serotype:\t"+predict_sero+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1701 "Predicted serotype (SeqSero2 v1.3.2):\t"+predict_sero_ss2+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1702 note+star_line+claim+antigen_note+"\n")#+##
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1703 else:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1704 print("Output directory:\t"+make_dir+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1705 "Input files:\t"+input_file+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1706 "O antigen prediction:\t"+O_choice+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1707 "H1 antigen prediction(fliC):\t"+highest_fliC+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1708 "H2 antigen prediction(fljB):\t"+highest_fljB+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1709 "Predicted identification:\t"+subspecies_ID_dir[ssp_pointer]+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1710 "Predicted antigenic profile:\t"+predict_form+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1711 "Predicted serotype:\t"+subspecies+' '+predict_form+"\n"+ # add serotype output for "N/A" prediction, subspecies
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1712 "Predicted serotype (SeqSero2 v1.3.2):\t"+subspecies+' '+predict_form_ss2+"\n"+
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1713 note+NA_note+star_line+claim+antigen_note+"\n")#+##
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1714 ###MLST
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1715 print("Sequence type: "+st)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1716 print("Number of ST"+st+" strains in EnteroBase: "+st_count)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1717 #print("Allele profile...")
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1718 for k,v in sorted_alleles:
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1719 print(k+': '+v)
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1720 print('\n')
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1721 ###
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1722 if __name__ == '__main__':
cfc91e1d2c9b planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff changeset
1723 main()