Mercurial > repos > jpayne > seqsero2s
annotate SeqSero2S/bin/SeqSero2S.py @ 19:cfc91e1d2c9b draft
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
| author | jpayne |
|---|---|
| date | Fri, 15 May 2026 17:50:45 +0000 |
| parents | |
| children |
| rev | line source |
|---|---|
|
19
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
2 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
3 import sys |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
4 import time |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
5 import random |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
6 import os |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
7 import subprocess |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
8 import gzip |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
9 import io |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
10 import pickle |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
11 import argparse |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
12 import itertools |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
13 import json |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
14 from distutils.version import LooseVersion |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
15 from distutils.spawn import find_executable |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
16 sys.path.insert(1,sys.path[0]+'/..') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
17 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
18 __version__ = "1.1.4" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
19 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
20 ### SeqSero Kmer |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
21 def parse_args(): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
22 "Parse the input arguments, use '-h' for help." |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
23 parser = argparse.ArgumentParser(usage='SeqSero2S.py -t <data_type> -m <mode> -i <input_data> [-d <output_directory>] [-p <number of threads>] [-b <BWA_algorithm>]\n\nDevelopper: Shaokang Zhang (zskzsk@uga.edu), Hendrik C Den-Bakker (Hendrik.DenBakker@uga.edu) and Xiangyu Deng (xdeng@uga.edu)\n\nContact email:seqsero@gmail.com\n\n')#add "-m <data_type>" in future |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
24 parser.add_argument("-i",nargs="+",help="<string>: path/to/input_data",type=os.path.abspath) ### add 'type=os.path.abspath' to generate absolute path of input data. |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
25 parser.add_argument("-t",choices=['1','2','3','4','5'],help="<int>: '1' for interleaved paired-end reads, '2' for separated paired-end reads, '3' for single reads, '4' for genome assembly, '5' for nanopore reads (fasta/fastq)") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
26 parser.add_argument("-b",choices=['sam','mem'],default="mem",help="<string>: algorithms for bwa mapping for allele mode; 'mem' for mem, 'sam' for samse/sampe; default=mem; optional; for now we only optimized for default 'mem' mode") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
27 parser.add_argument("-p",default="1",help="<int>: number of threads for allele mode, if p >4, only 4 threads will be used for assembly since the amount of extracted reads is small, default=1") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
28 parser.add_argument("-m",choices=['k','a'],default="a",help="<string>: which workflow to apply, 'a'(raw reads allele micro-assembly), 'k'(raw reads and genome assembly k-mer), default=a") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
29 parser.add_argument("-n",help="<string>: optional, to specify a sample name in the report output") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
30 parser.add_argument("-d",help="<string>: optional, to specify an output directory name, if not set, the output directory would be 'SeqSero_result_'+time stamp+one random number") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
31 parser.add_argument("-c",action="store_true",help="<flag>: if '-c' was flagged, SeqSero2S will only output serotype prediction without the directory containing log files") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
32 parser.add_argument("-s",action="store_true",help="<flag>: if '-s' was flagged, SeqSero2S will not output header in SeqSero_result.tsv") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
33 parser.add_argument("--phred_offset",choices=['33','64','auto'],default='auto',help="<33|64|auto>: offset for FASTQ file quality scores, default=auto") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
34 parser.add_argument("--check",action="store_true",help="<flag>: use '--check' flag to check the required dependencies") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
35 parser.add_argument('-v', '--version', action='version', version=f"%(prog)s {__version__}") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
36 return parser.parse_args() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
37 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
38 ### check paths of dependencies |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
39 check_dependencies = parse_args().check |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
40 dependencies = ['bwa','samtools','blastn','fastq-dump','spades.py','bedtools','SalmID.py','mlst','stringMLST.py'] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
41 if check_dependencies: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
42 for item in dependencies: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
43 ext_path = find_executable(item) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
44 if ext_path is not None: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
45 print ("Using "+item+" - "+ext_path) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
46 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
47 print ("ERROR: can not find "+item+" in PATH") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
48 sys.exit() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
49 ### end of --check |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
50 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
51 def reverse_complement(sequence): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
52 complement = { |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
53 'A': 'T', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
54 'C': 'G', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
55 'G': 'C', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
56 'T': 'A', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
57 'N': 'N', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
58 'M': 'K', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
59 'R': 'Y', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
60 'W': 'W', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
61 'S': 'S', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
62 'Y': 'R', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
63 'K': 'M', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
64 'V': 'B', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
65 'H': 'D', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
66 'D': 'H', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
67 'B': 'V' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
68 } |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
69 return "".join(complement[base] for base in reversed(sequence)) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
70 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
71 def mlst(assembly): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
72 subprocess.check_call("mlst -q --json mlst.json --scheme senterica_achtman_2 "+assembly+" >> data_log.txt 2>&1",shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
73 f = open("mlst.json",'r') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
74 mlst_result = json.load(f) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
75 f.close() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
76 st = mlst_result[0]['sequence_type'] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
77 alleles = mlst_result[0]['alleles'] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
78 return(st,alleles) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
79 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
80 def stringmlst(f1,f2,t,d): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
81 if t in ['1','2']: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
82 subprocess.check_call("stringMLST.py --predict -P "+d+"/kmer/salmonella -1 "+f1+" -2 "+f2+" -a /dev/stdout -o stringMLST.txt >> data_log.txt 2>&1",shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
83 elif t=='3': |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
84 subprocess.check_call("stringMLST.py --predict -P "+d+"/kmer/salmonella -1 "+f1+" -s -a /dev/stdout -o stringMLST.txt >> data_log.txt 2>&1",shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
85 f = "stringMLST.txt" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
86 mlst_result = open(f).readlines()[1].strip().split('\t') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
87 st = mlst_result[-1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
88 if st == '0': |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
89 st = '-' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
90 k = ['aroC','dnaN','hemD','hisD','purE','sucA','thrA'] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
91 v = [mlst_result[1],mlst_result[2],mlst_result[3],mlst_result[4],mlst_result[5],mlst_result[6],mlst_result[7]] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
92 alleles = dict(zip(k,v)) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
93 return(st,alleles) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
94 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
95 def createKmerDict_reads(list_of_strings, kmer): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
96 kmer_table = {} |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
97 for string in list_of_strings: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
98 sequence = string.strip('\n') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
99 for i in range(len(sequence) - kmer + 1): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
100 new_mer = sequence[i:i + kmer].upper() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
101 new_mer_rc = reverse_complement(new_mer) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
102 if new_mer in kmer_table: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
103 kmer_table[new_mer.upper()] += 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
104 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
105 kmer_table[new_mer.upper()] = 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
106 if new_mer_rc in kmer_table: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
107 kmer_table[new_mer_rc.upper()] += 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
108 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
109 kmer_table[new_mer_rc.upper()] = 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
110 return kmer_table |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
111 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
112 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
113 def multifasta_dict(multifasta): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
114 multifasta_list = [ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
115 line.strip() for line in open(multifasta, 'r') if len(line.strip()) > 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
116 ] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
117 headers = [i for i in multifasta_list if i[0] == '>'] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
118 multifasta_dict = {} |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
119 for h in headers: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
120 start = multifasta_list.index(h) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
121 for element in multifasta_list[start + 1:]: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
122 if element[0] == '>': |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
123 break |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
124 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
125 if h[1:] in multifasta_dict: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
126 multifasta_dict[h[1:]] += element |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
127 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
128 multifasta_dict[h[1:]] = element |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
129 return multifasta_dict |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
130 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
131 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
132 def multifasta_single_string(multifasta): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
133 multifasta_list = [ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
134 line.strip() for line in open(multifasta, 'r') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
135 if (len(line.strip()) > 0) and (line.strip()[0] != '>') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
136 ] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
137 return ''.join(multifasta_list) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
138 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
139 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
140 def chunk_a_long_sequence(long_sequence, chunk_size=60): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
141 chunk_list = [] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
142 steps = len(long_sequence) // 60 #how many chunks |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
143 for i in range(steps): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
144 chunk_list.append(long_sequence[i * chunk_size:(i + 1) * chunk_size]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
145 chunk_list.append(long_sequence[steps * chunk_size:len(long_sequence)]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
146 return chunk_list |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
147 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
148 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
149 def target_multifasta_kmerizer(multifasta, k, kmerDict): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
150 forward_length = 300 #if find the target, put forward 300 bases |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
151 reverse_length = 2200 #if find the target, put backward 2200 bases |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
152 chunk_size = 60 #it will firstly chunk the single long sequence to multiple smaller sequences, it controls the size of those smaller sequences |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
153 target_mers = [] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
154 long_single_string = multifasta_single_string(multifasta) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
155 multifasta_list = chunk_a_long_sequence(long_single_string, chunk_size) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
156 unit_length = len(multifasta_list[0]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
157 forward_lines = int(forward_length / unit_length) + 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
158 reverse_lines = int(forward_length / unit_length) + 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
159 start_num = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
160 end_num = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
161 for i in range(len(multifasta_list)): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
162 if i not in range(start_num, end_num): #avoid computational repetition |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
163 line = multifasta_list[i] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
164 start = int((len(line) - k) // 2) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
165 s1 = line[start:k + start] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
166 if s1 in kmerDict: #detect it is a potential read or not (use the middle part) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
167 if i - forward_lines >= 0: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
168 start_num = i - forward_lines |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
169 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
170 start_num = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
171 if i + reverse_lines <= len(multifasta_list) - 1: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
172 end_num = i + reverse_lines |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
173 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
174 end_num = len(multifasta_list) - 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
175 target_list = [ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
176 x.strip() for x in multifasta_list[start_num:end_num] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
177 ] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
178 target_line = "".join(target_list) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
179 target_mers += [ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
180 k1 for k1 in createKmerDict_reads([str(target_line)], k) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
181 ] ##changed k to k1, just want to avoid the mixes of this "k" (kmer) to the "k" above (kmer length) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
182 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
183 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
184 return set(target_mers) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
185 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
186 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
187 def target_read_kmerizer(file, k, kmerDict): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
188 i = 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
189 n_reads = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
190 total_coverage = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
191 target_mers = [] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
192 if file.endswith(".gz"): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
193 file_content = io.BufferedReader(gzip.open(file)) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
194 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
195 file_content = open(file, "r").readlines() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
196 for line in file_content: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
197 start = int((len(line) - k) // 2) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
198 if i % 4 == 2: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
199 if file.endswith(".gz"): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
200 s1 = line[start:k + start].decode() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
201 line = line.decode() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
202 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
203 s1 = line[start:k + start] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
204 if s1 in kmerDict: #detect it is a potential read or not (use the middle part) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
205 n_reads += 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
206 total_coverage += len(line) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
207 target_mers += [ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
208 k1 for k1 in createKmerDict_reads([str(line)], k) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
209 ] #changed k to k1, just want to avoid the mixes of this "k" (kmer) to the "k" above (kmer length) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
210 i += 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
211 if total_coverage >= 4000000: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
212 break |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
213 return set(target_mers) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
214 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
215 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
216 def minion_fasta_kmerizer(file, k, kmerDict): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
217 i = 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
218 n_reads = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
219 total_coverage = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
220 target_mers = {} |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
221 for line in open(file): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
222 if i % 2 == 0: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
223 for kmer, rc_kmer in kmers(line.strip().upper(), k): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
224 if (kmer in kmerDict) or (rc_kmer in kmerDict): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
225 if kmer in target_mers: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
226 target_mers[kmer] += 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
227 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
228 target_mers[kmer] = 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
229 if rc_kmer in target_mers: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
230 target_mers[rc_kmer] += 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
231 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
232 target_mers[rc_kmer] = 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
233 i += 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
234 return set([h for h in target_mers]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
235 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
236 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
237 def minion_fastq_kmerizer(file, k, kmerDict): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
238 i = 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
239 n_reads = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
240 total_coverage = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
241 target_mers = {} |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
242 for line in open(file): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
243 if i % 4 == 2: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
244 for kmer, rc_kmer in kmers(line.strip().upper(), k): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
245 if (kmer in kmerDict) or (rc_kmer in kmerDict): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
246 if kmer in target_mers: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
247 target_mers[kmer] += 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
248 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
249 target_mers[kmer] = 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
250 if rc_kmer in target_mers: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
251 target_mers[rc_kmer] += 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
252 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
253 target_mers[rc_kmer] = 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
254 i += 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
255 return set([h for h in target_mers]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
256 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
257 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
258 def multifasta_single_string2(multifasta): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
259 single_string = '' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
260 with open(multifasta, 'r') as f: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
261 for line in f: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
262 if line.strip()[0] == '>': |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
263 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
264 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
265 single_string += line.strip() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
266 return single_string |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
267 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
268 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
269 def kmers(seq, k): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
270 rev_comp = reverse_complement(seq) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
271 for start in range(1, len(seq) - k + 1): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
272 yield seq[start:start + k], rev_comp[-(start + k):-start] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
273 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
274 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
275 def multifasta_to_kmers_dict(multifasta,k_size):#used to create database kmer set |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
276 multi_seq_dict = multifasta_dict(multifasta) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
277 lib_dict = {} |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
278 for h in multi_seq_dict: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
279 lib_dict[h] = set( |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
280 [k for k in createKmerDict_reads([multi_seq_dict[h]], k_size)]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
281 return lib_dict |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
282 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
283 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
284 def Combine(b, c): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
285 fliC_combinations = [] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
286 fliC_combinations.append(",".join(c)) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
287 temp_combinations = [] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
288 for i in range(len(b)): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
289 for x in itertools.combinations(b, i + 1): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
290 temp_combinations.append(",".join(x)) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
291 for x in temp_combinations: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
292 temp = [] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
293 for y in c: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
294 temp.append(y) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
295 temp.append(x) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
296 temp = ",".join(temp) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
297 temp = temp.split(",") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
298 temp.sort() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
299 temp = ",".join(temp) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
300 fliC_combinations.append(temp) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
301 return fliC_combinations |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
302 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
303 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
304 def seqsero_from_formula_to_serotypes(Otype, fliC, fljB, special_gene_list,subspecies,ss): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
305 #like test_output_06012017.txt |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
306 #can add more varialbles like sdf-type, sub-species-type in future (we can conclude it into a special-gene-list) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
307 #from Initial_Conditions import phase1,phase2,phaseO,sero,subs,remove_list,rename_dict |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
308 if ss == 'ss2': |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
309 from Initial_Conditions_SS2 import phase1,phase2,phaseO,sero,subs,remove_list,rename_dict |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
310 elif ss == 'ss2s': |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
311 from Initial_Conditions_SS2S import phase1,phase2,phaseO,sero,subs,remove_list,rename_dict |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
312 rename_dict_not_anymore=[rename_dict[x] for x in rename_dict] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
313 rename_dict_all=rename_dict_not_anymore+list(rename_dict) #used for decide whether to |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
314 seronames = [] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
315 seronames_none_subspecies=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
316 for i in range(len(phase1)): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
317 fliC_combine = [] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
318 fljB_combine = [] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
319 if phaseO[i] == Otype: # no VII in KW, but it's there |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
320 ### for fliC, detect every possible combinations to avoid the effect of "[" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
321 if phase1[i].count("[") == 0: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
322 fliC_combine.append(phase1[i]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
323 elif phase1[i].count("[") >= 1: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
324 c = [] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
325 b = [] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
326 if phase1[i][0] == "[" and phase1[i][-1] == "]" and phase1[i].count( |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
327 "[") == 1: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
328 content = phase1[i].replace("[", "").replace("]", "") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
329 fliC_combine.append(content) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
330 fliC_combine.append("-") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
331 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
332 for x in phase1[i].split(","): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
333 if "[" in x: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
334 b.append(x.replace("[", "").replace("]", "")) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
335 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
336 c.append(x) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
337 fliC_combine = Combine( |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
338 b, c |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
339 ) #Combine will offer every possible combinations of the formula, like f,[g],t: f,t f,g,t |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
340 ### end of fliC "[" detect |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
341 ### for fljB, detect every possible combinations to avoid the effect of "[" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
342 if phase2[i].count("[") == 0: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
343 fljB_combine.append(phase2[i]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
344 elif phase2[i].count("[") >= 1: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
345 d = [] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
346 e = [] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
347 if phase2[i][0] == "[" and phase2[i][-1] == "]" and phase2[i].count( |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
348 "[") == 1: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
349 content = phase2[i].replace("[", "").replace("]", "") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
350 fljB_combine.append(content) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
351 fljB_combine.append("-") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
352 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
353 for x in phase2[i].split(","): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
354 if "[" in x: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
355 d.append(x.replace("[", "").replace("]", "")) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
356 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
357 e.append(x) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
358 fljB_combine = Combine(d, e) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
359 ### end of fljB "[" detect |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
360 new_fliC = fliC.split( |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
361 "," |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
362 ) #because some antigen like r,[i] not follow alphabetical order, so use this one to judge and can avoid missings |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
363 new_fliC.sort() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
364 new_fliC = ",".join(new_fliC) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
365 new_fljB = fljB.split(",") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
366 new_fljB.sort() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
367 new_fljB = ",".join(new_fljB) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
368 if (new_fliC in fliC_combine |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
369 or fliC in fliC_combine) and (new_fljB in fljB_combine |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
370 or fljB in fljB_combine): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
371 ######start, remove_list,rename_dict, added on 11/11/2018 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
372 if sero[i] not in remove_list: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
373 temp_sero=sero[i] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
374 if temp_sero in rename_dict: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
375 temp_sero=rename_dict[temp_sero] #rename if in the rename list |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
376 if temp_sero not in seronames:#the new sero may already included, if yes, then not consider |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
377 if subs[i] == subspecies: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
378 seronames.append(temp_sero) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
379 seronames_none_subspecies.append(temp_sero) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
380 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
381 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
382 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
383 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
384 ######end, added on 11/11/2018 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
385 #analyze seronames |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
386 subspecies_pointer="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
387 if len(seronames) == 0 and len(seronames_none_subspecies)!=0: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
388 ## ed_SL_06062020: for the subspecies mismatch between KW and SalmID |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
389 seronames=seronames_none_subspecies |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
390 #seronames=["N/A"] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
391 subspecies_pointer="1" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
392 #subspecies_pointer="0" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
393 ## |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
394 if len(seronames) == 0: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
395 seronames = [ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
396 "N/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme)" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
397 ] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
398 star = "" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
399 star_line = "" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
400 if len(seronames) > 1: #there are two possible predictions for serotypes |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
401 star = "*" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
402 #changed 04072019 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
403 #star_line = "The predicted serotypes share the same general formula:\t" + Otype + ":" + fliC + ":" + fljB + "\n" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
404 if subspecies_pointer=="1" and len(seronames_none_subspecies)!=0: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
405 star="*" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
406 star_line = "This antigenic profile has been associated with serotype '"+(" or ").join(seronames)+"' in the Kauffman-White scheme. The existence of the same antigenic formula in multiple species or subspecies is well documented in the Kauffman-White Scheme. " + star_line ## ed_SL_03202021: changed for new output format |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
407 #star_line="The predicted O and H antigens correspond to serotype '"+(" or ").join(seronames)+"' in the Kauffmann-White scheme. The predicted subspecies by SalmID (github.com/hcdenbakker/SalmID) may not be consistent with subspecies designation in the Kauffmann-White scheme. " + star_line |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
408 #star_line="The formula with this subspieces prediction can't get a serotype in KW manual, and the serotyping prediction was made without considering it."+star_line |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
409 seronames=["N/A"] ## ed_SL_06062020 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
410 if Otype=="": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
411 Otype="-" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
412 predict_form = Otype + ":" + fliC + ":" + fljB |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
413 predict_sero = (" or ").join(seronames) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
414 ###special test for Enteritidis |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
415 if predict_form == "9:g,m:-": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
416 sdf = "-" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
417 for x in special_gene_list: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
418 if x.startswith("sdf"): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
419 sdf = "+" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
420 #star_line="Detected sdf gene, a marker to differentiate Gallinarum and Enteritidis" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
421 #star_line="sdf gene detected. " |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
422 star_line = "Detected Sdf I that is characteristic of commonly circulating strains of serotype Enteritidis. " |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
423 #predict_form = predict_form + " Sdf prediction:" + sdf |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
424 predict_form = predict_form #changed 04072019 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
425 if sdf == "-": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
426 star = "*" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
427 #star_line="Didn't detected sdf gene, a marker to differentiate Gallinarum and Enteritidis" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
428 #star_line="sdf gene not detected. " |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
429 star_line = "Sdf I that is characteristic of commonly circulating strains of serotype Enteritidis was not detected. " |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
430 #changed in 04072019, for new output |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
431 #star_line = "Additional characterization is necessary to assign a serotype to this strain. Commonly circulating strains of serotype Enteritidis are sdf+, although sdf- strains of serotype Enteritidis are known to exist. Serotype Gallinarum is typically sdf- but should be quite rare. Sdf- strains of serotype Enteritidis and serotype Gallinarum can be differentiated by phenotypic profile or genetic criteria.\n" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
432 #predict_sero = "Gallinarum/Enteritidis" #04132019, for new output requirement |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
433 predict_sero = "Gallinarum or Enteritidis" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
434 ###end of special test for Enteritidis |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
435 elif predict_form == "4:i:-": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
436 predict_sero = "I 4,[5],12:i:-" # change serotype name |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
437 elif predict_form == "4:r:-": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
438 predict_sero = "N/A (4:r:-)" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
439 elif predict_form == "4:b:-": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
440 predict_sero = "N/A (4:b:-)" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
441 #elif predict_form == "8:e,h:1,2": #removed after official merge of newport and bardo |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
442 #predict_sero = "Newport" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
443 #star = "*" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
444 #star_line = "Serotype Bardo shares the same antigenic profile with Newport, but Bardo is exceedingly rare." |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
445 claim = "The serotype(s) is/are the only serotype(s) with the indicated antigenic profile currently recognized in the Kauffmann White Scheme. New serotypes can emerge and the possibility exists that this antigenic profile may emerge in a different subspecies. Identification of strains to the subspecies level should accompany serotype determination; the same antigenic profile in different subspecies is considered different serotypes.\n" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
446 if "N/A" in predict_sero: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
447 claim = "" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
448 #special test for Typhimurium |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
449 if "Typhimurium" in predict_sero or predict_form == "4:i:-": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
450 normal = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
451 mutation = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
452 for x in special_gene_list: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
453 if "oafA-O-4_full" in x: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
454 normal = float(special_gene_list[x]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
455 elif "oafA-O-4_5-" in x: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
456 mutation = float(special_gene_list[x]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
457 if normal > mutation: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
458 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
459 elif normal < mutation: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
460 #predict_sero = predict_sero.strip() + "(O5-)" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
461 predict_sero = predict_sero.strip() #diable special sero for new output requirement, 04132019 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
462 star = "*" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
463 #star_line = "Detected the deletion of O5-." |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
464 star_line = "Detected a deletion in gene oafA that causes O5- variant of Typhimurium. " |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
465 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
466 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
467 #special test for Paratyphi B |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
468 if "Paratyphi B" in predict_sero or predict_form == "4:b:-": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
469 normal = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
470 mutation = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
471 for x in special_gene_list: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
472 if "gntR-family-regulatory-protein_dt-positive" in x: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
473 normal = float(special_gene_list[x]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
474 elif "gntR-family-regulatory-protein_dt-negative" in x: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
475 mutation = float(special_gene_list[x]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
476 #print(normal,mutation) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
477 if normal > mutation: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
478 #predict_sero = predict_sero.strip() + "(dt+)" #diable special sero for new output requirement, 04132019 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
479 predict_sero = predict_sero.strip()+' var. L(+) tartrate+' if "Paratyphi B" in predict_sero else predict_sero.strip() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
480 star = "*" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
481 #star_line = "Didn't detect the SNP for dt- which means this isolate is a Paratyphi B variant L(+) tartrate(+)." |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
482 star_line = "The SNP in gene STM3356 that is associated with the d-Tartrate nonfermenting phenotype characteristic of the typhoidal pathotype was not detected. " |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
483 elif normal < mutation: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
484 #predict_sero = predict_sero.strip() + "(dt-)" #diable special sero for new output requirement, 04132019 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
485 predict_sero = predict_sero.strip() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
486 star = "*" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
487 #star_line = "Detected the SNP for d-Tartrate nonfermenting phenotype of Paratyphi B. " |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
488 star_line = "Detected the SNP in gene STM3356 that is associated with the d-Tartrate nonfermenting phenotype characteristic of the typhoidal pathotype. " |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
489 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
490 star = "*" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
491 #star_line = " Failed to detect the SNP for dt-, can't decide it's a Paratyphi B variant L(+) tartrate(+) or not." |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
492 star_line = " " ## ed_SL_05152019: do not report this situation. |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
493 #special test for O13,22 and O13,23 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
494 ### add comment for any O2Â call. 06052024 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
495 if Otype=='2' and predict_sero not in ['Nitra','Kiel','Koessen']: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
496 star_line = 'O2 is typically a O9 rfb serotype with a mutation that results in a different sugar being placed in O antigen. SS2S detects only one group O2 serotype, Paratyphi A. This genome may be a variant of a group O9 serotype.' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
497 ### |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
498 if Otype=="13": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
499 #ex_dir = os.path.dirname(os.path.realpath(__file__)) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
500 ex_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(__file__)),'seqsero2s_db')) # ed_SL_09152019 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
501 f = open(ex_dir + '/special.pickle', 'rb') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
502 special = pickle.load(f) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
503 O22_O23=special['O22_O23'] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
504 if predict_sero.split(" or ")[0] in O22_O23[-1] and predict_sero.split(" or ")[0] not in rename_dict_all:#if in rename_dict_all, then it means already merged, no need to analyze |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
505 #if predict_sero.split(" or ")[0] in O22_O23[-1]: # Report O22 vs O23 result for O13 serotypes. 12232024 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
506 O22_score=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
507 O23_score=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
508 for x in special_gene_list: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
509 if "O:22" in x: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
510 O22_score = O22_score+float(special_gene_list[x]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
511 elif "O:23" in x: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
512 O23_score = O23_score+float(special_gene_list[x]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
513 #print(O22_score,O23_score) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
514 for z in O22_O23[0]: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
515 if predict_sero.split(" or ")[0] in z: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
516 if O22_score > O23_score: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
517 star = "*" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
518 star_line = "Detected a genetic marker (galE allele) for ancillary O22." |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
519 #star_line = "Detected O22 specific genes to further differenciate '"+predict_sero+"'." #diabled for new output requirement, 04132019 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
520 predict_sero = z[0] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
521 elif O22_score < O23_score: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
522 star = "*" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
523 star_line = "Detected a genetic marker (galE allele) for ancillary O23." |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
524 #star_line = "Detected O23 specific genes to further differenciate '"+predict_sero+"'." #diabled for new output requirement, 04132019 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
525 predict_sero = z[1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
526 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
527 star = "*" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
528 star_line = "Fail to detect genetic markers (galE alleles) for ancillary O22 and O23." |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
529 #star_line = "Fail to detect O22/O23 specific genes." #diabled for new output requirement, 04132019 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
530 if " or " in predict_sero: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
531 star_line = star_line + "The predicted serotypes share the same general formula: " + Otype + ":" + fliC + ":" + fljB + " and can be differentiated by additional analysis. " |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
532 #special test for O6,8 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
533 #merge_O68_list=["Blockley","Bovismorbificans","Hadar","Litchfield","Manhattan","Muenchen"] #remove 11/11/2018, because already in merge list |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
534 #for x in merge_O68_list: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
535 # if x in predict_sero: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
536 # predict_sero=x |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
537 # star="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
538 # star_line="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
539 #special test for Montevideo; most of them are monophasic |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
540 #if "Montevideo" in predict_sero and "1,2,7" in predict_form: #remove 11/11/2018, because already in merge list |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
541 #star="*" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
542 #star_line="Montevideo is almost always monophasic, having an antigen called for the fljB position may be a result of Salmonella-Salmonella contamination." |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
543 return predict_form, predict_sero, star, star_line, claim |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
544 ### End of SeqSero Kmer part |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
545 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
546 ### Begin of SeqSero2 allele prediction and output |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
547 def xml_parse_score_comparision_seqsero(xmlfile): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
548 #used to do seqsero xml analysis |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
549 from Bio.Blast import NCBIXML |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
550 handle=open(xmlfile) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
551 handle=NCBIXML.parse(handle) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
552 handle=list(handle) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
553 List=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
554 List_score=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
555 List_ids=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
556 List_query_region=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
557 for i in range(len(handle)): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
558 if len(handle[i].alignments)>0: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
559 for j in range(len(handle[i].alignments)): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
560 score=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
561 ids=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
562 cover_region=set() #fixed problem that repeated calculation leading percentage > 1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
563 List.append(handle[i].query.strip()+"___"+handle[i].alignments[j].hit_def) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
564 for z in range(len(handle[i].alignments[j].hsps)): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
565 hsp=handle[i].alignments[j].hsps[z] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
566 temp=set(range(hsp.query_start,hsp.query_end)) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
567 if len(cover_region)==0: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
568 cover_region=cover_region|temp |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
569 fraction=1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
570 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
571 fraction=1-len(cover_region&temp)/float(len(temp)) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
572 cover_region=cover_region|temp |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
573 if "last" in handle[i].query or "first" in handle[i].query: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
574 score+=hsp.bits*fraction |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
575 ids+=float(hsp.identities)/handle[i].query_length*fraction |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
576 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
577 score+=hsp.bits*fraction |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
578 ids+=float(hsp.identities)/handle[i].query_length*fraction |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
579 List_score.append(score) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
580 List_ids.append(ids) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
581 List_query_region.append(cover_region) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
582 temp=zip(List,List_score,List_ids,List_query_region) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
583 Final_list=sorted(temp, key=lambda d:d[1], reverse = True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
584 return Final_list |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
585 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
586 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
587 def Uniq(L,sort_on_fre="none"): #return the uniq list and the count number |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
588 Old=L |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
589 L.sort() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
590 L = [L[i] for i in range(len(L)) if L[i] not in L[:i]] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
591 count=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
592 for j in range(len(L)): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
593 y=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
594 for x in Old: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
595 if L[j]==x: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
596 y+=1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
597 count.append(y) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
598 if sort_on_fre!="none": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
599 d=zip(*sorted(zip(count, L))) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
600 L=d[1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
601 count=d[0] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
602 return (L,count) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
603 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
604 def judge_fliC_or_fljB_from_head_tail_for_one_contig(nodes_vs_score_list): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
605 #used to predict it's fliC or fljB for one contig, based on tail and head score, but output the score difference,if it is very small, then not reliable, use blast score for whole contig to test |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
606 #this is mainly used for |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
607 a=nodes_vs_score_list |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
608 fliC_score=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
609 fljB_score=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
610 for z in a: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
611 if "fliC" in z[0]: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
612 fliC_score+=z[1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
613 elif "fljB" in z[0]: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
614 fljB_score+=z[1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
615 if fliC_score>=fljB_score: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
616 role="fliC" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
617 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
618 role="fljB" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
619 return (role,abs(fliC_score-fljB_score)) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
620 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
621 def judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(node_name,Final_list,Final_list_passed): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
622 #used to predict contig is fliC or fljB, if the differnce score value on above head_and_tail is less than 10 (quite small) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
623 #also used when no head or tail got blasted score for the contig |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
624 role="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
625 for z in Final_list_passed: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
626 if node_name in z[0]: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
627 role=z[0].split("_")[0] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
628 break |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
629 return role |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
630 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
631 def fliC_or_fljB_judge_from_head_tail_sequence(nodes_list,tail_head_list,Final_list,Final_list_passed): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
632 #nodes_list is the c created by c,d=Uniq(nodes) in below function |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
633 first_target="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
634 role_list=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
635 for x in nodes_list: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
636 a=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
637 role="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
638 for y in tail_head_list: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
639 if x in y[0]: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
640 a.append(y) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
641 if len(a)==4: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
642 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
643 if diff<20: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
644 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
645 elif len(a)==3: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
646 ###however, if the one with highest score is the fewer one, compare their accumulation score |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
647 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
648 if diff<20: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
649 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
650 ###end of above score comparison |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
651 elif len(a)==2: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
652 #must on same node, if not, then decide with unit blast score, blast-score/length_of_special_sequence(30 or 37) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
653 temp=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
654 for z in a: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
655 temp.append(z[0].split("_")[0]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
656 m,n=Uniq(temp)#should only have one choice, but weird situation might occur too |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
657 if len(m)==1: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
658 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
659 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
660 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
661 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
662 if diff<20: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
663 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
664 ###need to desgin a algorithm to guess most possible situation for nodes_list, See the situations of test evaluation |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
665 elif len(a)==1: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
666 #that one |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
667 role,diff=judge_fliC_or_fljB_from_head_tail_for_one_contig(a) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
668 if diff<20: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
669 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
670 #need to evaluate, in future, may set up a cut-off, if not met, then just find Final_list_passed best match,like when "a==0" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
671 else:#a==0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
672 #use Final_list_passed best match |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
673 for z in Final_list_passed: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
674 if x in z[0]: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
675 role=z[0].split("_")[0] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
676 break |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
677 #print x,role,len(a) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
678 role_list.append((role,x)) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
679 if len(role_list)==2: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
680 if role_list[0][0]==role_list[1][0]:#this is the most cocmmon error, two antigen were assigned to same phase |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
681 #just use score to do a final test |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
682 role_list=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
683 for x in nodes_list: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
684 role=judge_fliC_or_fljB_from_whole_contig_blast_score_ranking(x,Final_list,Final_list_passed) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
685 role_list.append((role,x)) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
686 return role_list |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
687 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
688 def decide_contig_roles_for_H_antigen(Final_list,Final_list_passed): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
689 #used to decide which contig is FliC and which one is fljB |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
690 contigs=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
691 nodes=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
692 for x in Final_list_passed: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
693 if x[0].startswith("fl") and "last" not in x[0] and "first" not in x[0]: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
694 nodes.append(x[0].split("___")[1].strip()) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
695 c,d=Uniq(nodes)#c is node_list |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
696 #print c |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
697 tail_head_list=[x for x in Final_list if ("last" in x[0] or "first" in x[0])] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
698 roles=fliC_or_fljB_judge_from_head_tail_sequence(c,tail_head_list,Final_list,Final_list_passed) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
699 return roles |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
700 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
701 def decide_O_type_and_get_special_genes(Final_list,Final_list_passed): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
702 #decide O based on Final_list |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
703 O_choice="?" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
704 O_list=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
705 special_genes={} |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
706 nodes=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
707 for x in Final_list_passed: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
708 if x[0].startswith("O-"): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
709 nodes.append(x[0].split("___")[1].strip()) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
710 elif not x[0].startswith("fl"): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
711 special_genes[x[0]]=x[2]#08172018, x[2] changed from x[-1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
712 ##print("special_genes:",special_genes) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
713 c,d=Uniq(nodes) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
714 #print "potential O antigen contig",c |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
715 final_O=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
716 O_nodes_list=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
717 for x in c:#c is the list for contigs |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
718 temp=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
719 for y in Final_list_passed: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
720 if x in y[0] and y[0].startswith("O-"): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
721 final_O.append(y) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
722 break |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
723 ### O contig has the problem of two genes on same contig, so do additional test |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
724 potenial_new_gene="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
725 for x in final_O: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
726 pointer=0 #for genes merged or not |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
727 #not consider O-1,3,19_not_in_3,10, too short compared with others |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
728 if "O-1,3,19_not_in_3,10" not in x[0] and int(x[0].split("__")[1].split("___")[0])*x[2]+850 <= int(x[0].split("length_")[1].split("_")[0]):#gene length << contig length; for now give 300*2 (for secureity can use 400*2) as flank region |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
729 pointer=x[0].split("___")[1].strip()#store the contig name |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
730 print(pointer) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
731 if pointer!=0:#it has potential merge event |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
732 for y in Final_list: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
733 if pointer in y[0] and y not in final_O and (y[1]>=int(y[0].split("__")[1].split("___")[0])*1.5 or (y[1]>=int(y[0].split("__")[1].split("___")[0])*y[2] and y[1]>=400)):#that's a realtively strict filter now; if passed, it has merge event and add one more to final_O |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
734 potenial_new_gene=y |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
735 #print(potenial_new_gene) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
736 break |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
737 if potenial_new_gene!="": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
738 print("two differnt genes in same contig, fix it for O antigen") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
739 print(potenial_new_gene[:3]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
740 pointer=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
741 for y in final_O: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
742 if y[0].split("___")[-1]==potenial_new_gene[0].split("___")[-1]: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
743 pointer=1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
744 if pointer!=0: #changed to consider two genes in same contig |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
745 final_O.append(potenial_new_gene) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
746 ### end of the two genes on same contig test |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
747 final_O=sorted(final_O,key=lambda x: x[2], reverse=True)#sorted |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
748 if len(final_O)==0 or (len(final_O)==1 and "O-1,3,19_not_in_3,10" in final_O[0][0]): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
749 #print "$$$No Otype, due to no hit"#may need to be changed |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
750 O_choice="-" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
751 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
752 highest_O_coverage=max([float(x[0].split("_cov_")[-1].split("_")[0]) for x in final_O if "O-1,3,19_not_in_3,10" not in x[0]]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
753 O_list=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
754 O_list_less_contamination=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
755 for x in final_O: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
756 if not "O-1,3,19_not_in_3,10__130" in x[0]:#O-1,3,19_not_in_3,10 is too small, which may affect further analysis; to avoid contamination affect, use 0.15 of highest coverage as cut-off |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
757 O_list.append(x[0].split("__")[0]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
758 O_nodes_list.append(x[0].split("___")[1]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
759 if float(x[0].split("_cov_")[-1].split("_")[0])>highest_O_coverage*0.15: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
760 O_list_less_contamination.append(x[0].split("__")[0]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
761 ### special test for O9,46 and O3,10 family |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
762 if ("O-9,46_wbaV" in O_list or "O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254" in O_list) and O_list_less_contamination[0].startswith("O-9,"):#not sure should use and float(O9_wbaV)/float(num_1) > 0.1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
763 if "O-9,46_wzy" in O_list or "O-9,46_wzy_partial" in O_list:#and float(O946_wzy)/float(num_1) > 0.1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
764 O_choice="O-9,46" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
765 #print "$$$Most possilble Otype: O-9,46" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
766 elif "O-9,46,27_partial_wzy" in O_list:#and float(O94627)/float(num_1) > 0.1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
767 O_choice="O-9,46,27" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
768 #print "$$$Most possilble Otype: O-9,46,27" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
769 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
770 O_choice="O-9"#next, detect O9 vs O2? |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
771 O2=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
772 O9=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
773 for z in special_genes: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
774 if "tyr-O-9" in z and special_genes[z] > O9: ##20240322, add "special_genes[z] > O9" to avoid misidentification of O9 to O2 that caused by multiple tyr-O-9 contigs |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
775 O9=special_genes[z] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
776 elif "tyr-O-2" in z and special_genes[z] > O2: ##20240322, add "special_genes[z] > O2" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
777 O2=special_genes[z] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
778 if O2>O9: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
779 O_choice="O-2" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
780 elif O2<O9: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
781 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
782 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
783 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
784 #print "$$$No suitable one, because can't distinct it's O-9 or O-2, but O-9 has a more possibility." |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
785 elif ("O-3,10_wzx" in O_list) and ("O-9,46_wzy" in O_list) and (O_list[0].startswith("O-3,10") or O_list_less_contamination[0].startswith("O-9,46_wzy")):#and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
786 if "O-3,10_not_in_1,3,19" in O_list:#and float(O310_no_1319)/float(num_1) > 0.1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
787 O_choice="O-3,10" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
788 #print "$$$Most possilble Otype: O-3,10 (contain O-3,10_not_in_1,3,19)" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
789 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
790 O_choice="O-1,3,19" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
791 #print "$$$Most possilble Otype: O-1,3,19 (not contain O-3,10_not_in_1,3,19)" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
792 ### end of special test for O9,46 and O3,10 family |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
793 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
794 try: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
795 max_score=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
796 for x in final_O: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
797 if x[2]>=max_score and float(x[0].split("_cov_")[-1].split("_")[0])>highest_O_coverage*0.15:#use x[2],08172018, the "coverage identity = cover_length * identity"; also meet coverage threshold |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
798 max_score=x[2]#change from x[-1] to x[2],08172018 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
799 O_choice=x[0].split("_")[0] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
800 if O_choice=="O-1,3,19": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
801 O_choice=final_O[1][0].split("_")[0] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
802 #print "$$$Most possilble Otype: ",O_choice |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
803 except: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
804 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
805 #print "$$$No suitable Otype, or failure of mapping (please check the quality of raw reads)" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
806 if O_choice=="O-9,46,27" and len(O_list)==2 and "O-4_wzx" in O_list: #special for very low chance sitatuion between O4 and O9,27,46, this is for serotypes like Bredeney and Schwarzengrund (normallly O-4 will have higher score, but sometimes sequencing quality may affect the prediction) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
807 O_choice="O-4" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
808 #print "O:",O_choice,O_nodes_list |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
809 Otypes=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
810 for x in O_list: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
811 if x!="O-1,3,19_not_in_3,10": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
812 if "O-9,46_" not in x: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
813 Otypes.append(x.split("_")[0]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
814 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
815 Otypes.append(x.split("-from")[0])#O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
816 #Otypes=[x.split("_")[0] for x in O_list if x!="O-1,3,19_not_in_3,10"] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
817 Otypes_uniq,Otypes_fre=Uniq(Otypes) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
818 contamination_O="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
819 if O_choice=="O-9,46,27" or O_choice=="O-3,10" or O_choice=="O-1,3,19": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
820 if len(Otypes_uniq)>2: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
821 contamination_O="potential contamination from O antigen signals" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
822 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
823 if len(Otypes_uniq)>1: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
824 if O_choice=="O-4" and len(Otypes_uniq)==2 and "O-9,46,27" in Otypes_uniq: #for special 4,12,27 case such as Bredeney and Schwarzengrund |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
825 contamination_O="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
826 elif O_choice=="O-9,46" and len(Otypes_uniq)==2 and "O-9,46_wbaV" in Otypes_uniq and "O-9,46_wzy" in Otypes_uniq: #for special 4,12,27 case such as Bredeney and Schwarzengrund |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
827 contamination_O="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
828 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
829 contamination_O="potential contamination from O antigen signals" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
830 return O_choice,O_nodes_list,special_genes,final_O,contamination_O,Otypes_uniq |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
831 ### End of SeqSero2 allele prediction and output |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
832 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
833 def get_input_files(make_dir,input_file,data_type,dirpath): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
834 #tell input files from datatype |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
835 #"<int>: '1'(pair-end reads, interleaved),'2'(pair-end reads, seperated),'3'(single-end reads), '4'(assembly),'5'(nanopore fasta),'6'(nanopore fastq)" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
836 for_fq="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
837 rev_fq="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
838 os.chdir(make_dir) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
839 if data_type=="1": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
840 input_file=input_file[0].split("/")[-1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
841 if input_file.endswith(".sra"): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
842 subprocess.check_call("fastq-dump --split-files "+input_file,shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
843 for_fq=input_file.replace(".sra","_1.fastq") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
844 rev_fq=input_file.replace(".sra","_2.fastq") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
845 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
846 core_id=input_file.split(".fastq")[0].split(".fq")[0] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
847 for_fq=core_id+"_1.fastq" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
848 rev_fq=core_id+"_2.fastq" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
849 if input_file.endswith(".gz"): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
850 subprocess.check_call("gzip -dc "+input_file+" | "+dirpath+"/deinterleave_fastq.sh "+for_fq+" "+rev_fq,shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
851 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
852 subprocess.check_call("cat "+input_file+" | "+dirpath+"/deinterleave_fastq.sh "+for_fq+" "+rev_fq,shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
853 elif data_type=="2": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
854 for_fq=input_file[0].split("/")[-1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
855 rev_fq=input_file[1].split("/")[-1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
856 elif data_type=="3": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
857 input_file=input_file[0].split("/")[-1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
858 if input_file.endswith(".sra"): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
859 subprocess.check_call("fastq-dump --split-files "+input_file,shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
860 for_fq=input_file.replace(".sra","_1.fastq") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
861 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
862 for_fq=input_file |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
863 elif data_type in ["4","5","6"]: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
864 for_fq=input_file[0].split("/")[-1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
865 os.chdir("..") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
866 return for_fq,rev_fq |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
867 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
868 def predict_O_and_H_types(Final_list,Final_list_passed,new_fasta): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
869 #get O and H types from Final_list from blast parsing; allele mode |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
870 from Bio import SeqIO |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
871 fliC_choice="-" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
872 fljB_choice="-" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
873 fliC_contig="NA" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
874 fljB_contig="NA" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
875 fliC_region=set([0]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
876 fljB_region=set([0,]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
877 fliC_length=0 #can be changed to coverage in future; in 03292019, changed to ailgned length |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
878 fljB_length=0 #can be changed to coverage in future; in 03292019, changed to ailgned length |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
879 O_choice="-"#no need to decide O contig for now, should be only one |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
880 O_choice,O_nodes,special_gene_list,O_nodes_roles,contamination_O,Otypes_uniq=decide_O_type_and_get_special_genes(Final_list,Final_list_passed)#decide the O antigen type and also return special-gene-list for further identification |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
881 O_choice=O_choice.split("-")[-1].strip() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
882 if (O_choice=="1,3,19" and len(O_nodes_roles)==1 and "1,3,19" in O_nodes_roles[0][0]) or O_choice=="": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
883 O_choice="-" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
884 H_contig_roles=decide_contig_roles_for_H_antigen(Final_list,Final_list_passed)#decide the H antigen contig is fliC or fljB |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
885 #add alignment locations, used for further selection, 03312019 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
886 for i in range(len(H_contig_roles)): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
887 x=H_contig_roles[i] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
888 for y in Final_list_passed: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
889 if x[1] in y[0] and y[0].startswith(x[0]): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
890 H_contig_roles[i]+=H_contig_roles[i]+(y[-1],) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
891 break |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
892 log_file=open("SeqSero_log.txt","a") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
893 extract_file=open("Extracted_antigen_alleles.fasta","a") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
894 handle_fasta=list(SeqIO.parse(new_fasta,"fasta")) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
895 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
896 #print("O_contigs:") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
897 log_file.write("O_contigs:\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
898 extract_file.write("#Sequences with antigen signals (if the micro-assembled contig only covers the flanking region, it will not be used for contamination analysis)\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
899 extract_file.write("#O_contigs:\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
900 for x in O_nodes_roles: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
901 if "O-1,3,19_not_in_3,10" not in x[0]:#O-1,3,19_not_in_3,10 is just a small size marker |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
902 #print(x[0].split("___")[-1],x[0].split("__")[0],"blast score:",x[1],"identity%:",str(round(x[2]*100,2))+"%",str(min(x[-1]))+" to "+str(max(x[-1]))) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
903 log_file.write(x[0].split("___")[-1]+" "+x[0].split("__")[0]+"; "+"blast score: "+str(x[1])+" identity%: "+str(round(x[2]*100,2))+"%; alignment from "+str(min(x[-1]))+" to "+str(max(x[-1]))+" of antigen\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
904 title=">"+x[0].split("___")[-1]+" "+x[0].split("__")[0]+"; "+"blast score: "+str(x[1])+" identity%: "+str(round(x[2]*100,2))+"%; alignment from "+str(min(x[-1]))+" to "+str(max(x[-1]))+" of antigen\n" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
905 seqs="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
906 for z in handle_fasta: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
907 if x[0].split("___")[-1]==z.description: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
908 seqs=str(z.seq) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
909 extract_file.write(title+seqs+"\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
910 if len(H_contig_roles)!=0: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
911 highest_H_coverage=max([float(x[1].split("_cov_")[-1].split("_")[0]) for x in H_contig_roles]) #less than highest*0.1 would be regarded as contamination and noises, they will still be considered in contamination detection and logs, but not used as final serotype output |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
912 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
913 highest_H_coverage=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
914 for x in H_contig_roles: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
915 #if multiple choices, temporately select the one with longest length for now, will revise in further change |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
916 if "fliC" == x[0] and len(x[-1])>=fliC_length and x[1] not in O_nodes and float(x[1].split("_cov_")[-1].split("_")[0])>highest_H_coverage*0.13:#remember to avoid the effect of O-type contig, so should not in O_node list |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
917 fliC_contig=x[1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
918 fliC_length=len(x[-1]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
919 elif "fljB" == x[0] and len(x[-1])>=fljB_length and x[1] not in O_nodes and float(x[1].split("_cov_")[-1].split("_")[0])>highest_H_coverage*0.13: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
920 fljB_contig=x[1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
921 fljB_length=len(x[-1]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
922 for x in Final_list_passed: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
923 if fliC_choice=="-" and "fliC_" in x[0] and fliC_contig in x[0]: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
924 fliC_choice=x[0].split("_")[1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
925 elif fljB_choice=="-" and "fljB_" in x[0] and fljB_contig in x[0]: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
926 fljB_choice=x[0].split("_")[1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
927 elif fliC_choice!="-" and fljB_choice!="-": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
928 break |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
929 #now remove contigs not in middle core part |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
930 first_allele="NA" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
931 first_allele_percentage=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
932 for x in Final_list: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
933 if x[0].startswith("fliC") or x[0].startswith("fljB"): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
934 first_allele=x[0].split("__")[0] #used to filter those un-middle contigs |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
935 first_allele_percentage=x[2] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
936 break |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
937 additional_contigs=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
938 for x in Final_list: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
939 if first_allele in x[0]: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
940 if (fliC_contig == x[0].split("___")[-1]): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
941 fliC_region=x[3] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
942 elif fljB_contig!="NA" and (fljB_contig == x[0].split("___")[-1]): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
943 fljB_region=x[3] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
944 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
945 if x[1]*1.1>int(x[0].split("___")[1].split("_")[3]):#loose threshold by multiplying 1.1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
946 additional_contigs.append(x) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
947 #else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
948 #print x[:3] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
949 #we can just use the fljB region (or fliC depends on size), no matter set() or contain a large locations (without middle part); however, if none of them is fully assembled, use 500 and 1200 as conservative cut-off |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
950 if first_allele_percentage>0.9: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
951 if len(fliC_region)>len(fljB_region) and (max(fljB_region)-min(fljB_region))>1000: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
952 target_region=fljB_region|(fliC_region-set(range(min(fljB_region),max(fljB_region)))) #fljB_region|(fliC_region-set(range(min(fljB_region),max(fljB_region)))) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
953 elif len(fliC_region)<len(fljB_region) and (max(fliC_region)-min(fliC_region))>1000: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
954 target_region=fliC_region|(fljB_region-set(range(min(fliC_region),max(fliC_region)))) #fljB_region|(fliC_region-set(range(min(fljB_region),max(fljB_region)))) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
955 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
956 target_region=set()#doesn't do anything |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
957 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
958 target_region=set()#doesn't do anything |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
959 #print(target_region) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
960 #print(additional_contigs) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
961 target_region2=set(list(range(0,525))+list(range(1200,1700)))#I found to use 500 to 1200 as special region would be best |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
962 target_region=target_region2|target_region |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
963 for x in additional_contigs: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
964 removal=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
965 contig_length=int(x[0].split("___")[1].split("length_")[-1].split("_")[0]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
966 if fljB_contig not in x[0] and fliC_contig not in x[0] and len(target_region&x[3])/float(len(x[3]))>0.65 and contig_length*0.5<len(x[3])<contig_length*1.5: #consider length and alignment length for now, but very loose,0.5 and 1.5 as cut-off |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
967 removal=1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
968 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
969 if first_allele_percentage > 0.9 and float(x[0].split("__")[1].split("___")[0])*x[2]/len(x[-1])>0.96:#if high similiarity with middle part of first allele (first allele >0.9, already cover middle part) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
970 removal=1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
971 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
972 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
973 if removal==1: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
974 for y in H_contig_roles: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
975 if y[1] in x[0]: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
976 H_contig_roles.remove(y) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
977 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
978 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
979 #print(x[:3],contig_length,len(target_region&x[3])/float(len(x[3])),contig_length*0.5,len(x[3]),contig_length*1.5) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
980 #end of removing none-middle contigs |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
981 #print("H_contigs:") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
982 log_file.write("H_contigs:\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
983 extract_file.write("#H_contigs:\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
984 H_contig_stat=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
985 H1_cont_stat={} |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
986 H2_cont_stat={} |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
987 for i in range(len(H_contig_roles)): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
988 x=H_contig_roles[i] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
989 a=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
990 for y in Final_list_passed: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
991 if x[1] in y[0] and y[0].startswith(x[0]): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
992 if "first" in y[0] or "last" in y[0]: #this is the final filter to decide it's fliC or fljB, if can't pass, then can't decide |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
993 for y in Final_list_passed: #it's impossible to has the "first" and "last" allele as prediction, so re-do it |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
994 if x[1] in y[0]:#it's very possible to be third phase allele, so no need to make it must be fliC or fljB |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
995 #print(x[1],"can't_decide_fliC_or_fljB",y[0].split("_")[1],"blast_score:",y[1],"identity%:",str(round(y[2]*100,2))+"%",str(min(y[-1]))+" to "+str(max(y[-1]))) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
996 log_file.write(x[1]+" "+x[0]+" "+y[0].split("_")[1]+"; "+"blast score: "+str(y[1])+" identity%: "+str(round(y[2]*100,2))+"%; alignment from "+str(min(y[-1]))+" to "+str(max(y[-1]))+" of antigen\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
997 H_contig_roles[i]="can't decide fliC or fljB, may be third phase" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
998 title=">"+x[1]+" "+x[0]+" "+y[0].split("_")[1]+"; "+"blast score: "+str(y[1])+" identity%: "+str(round(y[2]*100,2))+"%; alignment from "+str(min(y[-1]))+" to "+str(max(y[-1]))+" of antiten\n" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
999 seqs="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1000 for z in handle_fasta: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1001 if x[1]==z.description: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1002 seqs=str(z.seq) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1003 extract_file.write(title+seqs+"\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1004 break |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1005 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1006 #print(x[1],x[0],y[0].split("_")[1],"blast_score:",y[1],"identity%:",str(round(y[2]*100,2))+"%",str(min(y[-1]))+" to "+str(max(y[-1]))) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1007 log_file.write(x[1]+" "+x[0]+" "+y[0].split("_")[1]+"; "+"blast score: "+str(y[1])+" identity%: "+str(round(y[2]*100,2))+"%; alignment from "+str(min(y[-1]))+" to "+str(max(y[-1]))+" of antigen\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1008 title=">"+x[1]+" "+x[0]+" "+y[0].split("_")[1]+"; "+"blast score: "+str(y[1])+" identity%: "+str(round(y[2]*100,2))+"%; alignment from "+str(min(y[-1]))+" to "+str(max(y[-1]))+" of antigen\n" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1009 seqs="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1010 for z in handle_fasta: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1011 if x[1]==z.description: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1012 seqs=str(z.seq) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1013 extract_file.write(title+seqs+"\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1014 if x[0]=="fliC": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1015 if y[0].split("_")[1] not in H1_cont_stat: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1016 H1_cont_stat[y[0].split("_")[1]]=y[2] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1017 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1018 H1_cont_stat[y[0].split("_")[1]]+=y[2] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1019 if x[0]=="fljB": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1020 if y[0].split("_")[1] not in H2_cont_stat: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1021 H2_cont_stat[y[0].split("_")[1]]=y[2] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1022 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1023 H2_cont_stat[y[0].split("_")[1]]+=y[2] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1024 break |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1025 #detect contaminations |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1026 #print(H1_cont_stat) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1027 #print(H2_cont_stat) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1028 H1_cont_stat_list=[x for x in H1_cont_stat if H1_cont_stat[x]>0.2] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1029 H2_cont_stat_list=[x for x in H2_cont_stat if H2_cont_stat[x]>0.2] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1030 contamination_H="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1031 if len(H1_cont_stat_list)>1 or len(H2_cont_stat_list)>1: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1032 contamination_H="potential contamination from H antigen signals" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1033 elif len(H2_cont_stat_list)==1 and fljB_contig=="NA": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1034 contamination_H="potential contamination from H antigen signals, uncommon weak fljB signals detected" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1035 #get additional antigens |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1036 """ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1037 if ("O-9,46_wbaV" in O_list or "O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254" in O_list) and O_list_less_contamination[0].startswith("O-9,"):#not sure should use and float(O9_wbaV)/float(num_1) > 0.1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1038 if "O-9,46_wzy" in O_list:#and float(O946_wzy)/float(num_1) > 0.1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1039 O_choice="O-9,46" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1040 #print "$$$Most possilble Otype: O-9,46" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1041 elif "O-9,46,27_partial_wzy" in O_list:#and float(O94627)/float(num_1) > 0.1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1042 O_choice="O-9,46,27" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1043 #print "$$$Most possilble Otype: O-9,46,27" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1044 elif ("O-3,10_wzx" in O_list) and ("O-9,46_wzy" in O_list) and (O_list[0].startswith("O-3,10") or O_list_less_contamination[0].startswith("O-9,46_wzy")):#and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1045 if "O-3,10_not_in_1,3,19" in O_list:#and float(O310_no_1319)/float(num_1) > 0.1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1046 O_choice="O-3,10" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1047 #print "$$$Most possilble Otype: O-3,10 (contain O-3,10_not_in_1,3,19)" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1048 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1049 O_choice="O-1,3,19" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1050 #print "$$$Most possilble Otype: O-1,3,19 (not contain O-3,10_not_in_1,3,19)" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1051 ### end of special test for O9,46 and O3,10 family |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1052 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1053 if O_choice=="O-9,46,27" or O_choice=="O-3,10" or O_choice=="O-1,3,19": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1054 if len(Otypes_uniq)>2: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1055 contamination_O="potential contamination from O antigen signals" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1056 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1057 if len(Otypes_uniq)>1: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1058 if O_choice=="O-4" and len(Otypes_uniq)==2 and "O-9,46,27" in Otypes_uniq: #for special 4,12,27 case such as Bredeney and Schwarzengrund |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1059 contamination_O="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1060 elif O_choice=="O-9,46" and len(Otypes_uniq)==2 and "O-9,46_wbaV" in Otypes_uniq and "O-9,46_wzy" in Otypes_uniq: #for special 4,12,27 case such as Bredeney and Schwarzengrund |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1061 contamination_O="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1062 """ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1063 additonal_antigents=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1064 #print(contamination_O) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1065 #print(contamination_H) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1066 log_file.write(contamination_O+"\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1067 log_file.write(contamination_H+"\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1068 log_file.close() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1069 return O_choice,fliC_choice,fljB_choice,special_gene_list,contamination_O,contamination_H,Otypes_uniq,H1_cont_stat_list,H2_cont_stat_list |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1070 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1071 def get_input_K(input_file,lib_dict,data_type,k_size): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1072 #kmer mode; get input_Ks from dict and data_type |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1073 kmers = [] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1074 for h in lib_dict: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1075 kmers += lib_dict[h] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1076 if data_type == '4': |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1077 input_Ks = target_multifasta_kmerizer(input_file, k_size, set(kmers)) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1078 elif data_type == '1' or data_type == '2' or data_type == '3':#set it for now, will change later |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1079 input_Ks = target_read_kmerizer(input_file, k_size, set(kmers)) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1080 elif data_type == '5':#minion_2d_fasta |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1081 #input_Ks = minion_fasta_kmerizer(input_file, k_size, set(kmers)) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1082 input_Ks = target_multifasta_kmerizer(input_file, k_size, set(kmers)) #ed_SL_08172020: change for nanopore workflow |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1083 if data_type == '6':#minion_2d_fastq |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1084 input_Ks = minion_fastq_kmerizer(input_file, k_size, set(kmers)) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1085 return input_Ks |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1086 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1087 def get_kmer_dict(lib_dict,input_Ks): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1088 #kmer mode; get predicted types |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1089 O_dict = {} |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1090 H_dict = {} |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1091 Special_dict = {} |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1092 for h in lib_dict: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1093 score = (len(lib_dict[h] & input_Ks) / len(lib_dict[h])) * 100 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1094 if score > 1: # Arbitrary cut-off for similarity score very low but seems necessary to detect O-3,10 in some cases |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1095 if h.startswith('O-') and score > 25: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1096 O_dict[h] = score |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1097 if h.startswith('fl') and score > 40: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1098 H_dict[h] = score |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1099 if (h[:2] != 'fl') and (h[:2] != 'O-'): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1100 Special_dict[h] = score |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1101 return O_dict,H_dict,Special_dict |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1102 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1103 def call_O_and_H_type(O_dict,H_dict,Special_dict,make_dir): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1104 log_file=open("SeqSero_log.txt","a") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1105 log_file.write("O_scores:\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1106 #call O: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1107 highest_O = '-' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1108 if len(O_dict) == 0: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1109 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1110 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1111 for x in O_dict: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1112 log_file.write(x+"\t"+str(O_dict[x])+"\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1113 if ('O-9,46_wbaV__1002' in O_dict and O_dict['O-9,46_wbaV__1002']>70) or ("O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254__1002" in O_dict and O_dict['O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254__1002']>70): # not sure should use and float(O9_wbaV)/float(num_1) > 0.1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1114 #if 'O-9,46_wzy__1191' in O_dict or "O-9,46_wzy_partial__216" in O_dict: # and float(O946_wzy)/float(num_1) > 0.1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1115 #modified to fix miscall of O-9,46 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1116 if ('O-9,46_wzy__1191' in O_dict and O_dict['O-9,46_wzy__1191']>40) or ("O-9,46_wzy_partial__216" in O_dict and O_dict["O-9,46_wzy_partial__216"]>40): # and float(O946_wzy)/float(num_1) > 0.1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1117 highest_O = "O-9,46" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1118 elif "O-9,46,27_partial_wzy__1019" in O_dict: # and float(O94627)/float(num_1) > 0.1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1119 highest_O = "O-9,46,27" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1120 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1121 highest_O = "O-9" # next, detect O9 vs O2? |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1122 O2 = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1123 O9 = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1124 for z in Special_dict: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1125 if "tyr-O-9" in z: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1126 O9 = float(Special_dict[z]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1127 if "tyr-O-2" in z: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1128 O2 = float(Special_dict[z]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1129 if O2 > O9: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1130 highest_O = "O-2" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1131 elif ("O-3,10_wzx__1539" in O_dict) and ( |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1132 "O-9,46_wzy__1191" in O_dict |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1133 ): # and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1134 if "O-3,10_not_in_1,3,19__1519" in O_dict: # and float(O310_no_1319)/float(num_1) > 0.1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1135 highest_O = "O-3,10" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1136 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1137 highest_O = "O-1,3,19" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1138 ### end of special test for O9,46 and O3,10 family |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1139 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1140 try: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1141 max_score = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1142 for x in O_dict: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1143 if float(O_dict[x]) >= max_score: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1144 max_score = float(O_dict[x]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1145 #highest_O = x.split("_")[0] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1146 # ed_SL_12182019: modified to fix the O-9,46 error example1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1147 if (x == 'O-9,46_wbaV__1002' or x == 'O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254__1002') and ('O-9,46_wzy__1191' not in O_dict and 'O-9,46_wzy_partial__216' not in O_dict): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1148 highest_O = "O-9" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1149 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1150 highest_O = x.split("_")[0] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1151 if highest_O == "O-1,3,19": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1152 highest_O = '-' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1153 max_score = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1154 for x in O_dict: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1155 if x == 'O-1,3,19_not_in_3,10__130': |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1156 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1157 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1158 if float(O_dict[x]) >= max_score: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1159 max_score = float(O_dict[x]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1160 #highest_O = x.split("_")[0] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1161 # ed_SL_12182019: modified to fix the O-9,46 error example1 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1162 if (x == 'O-9,46_wbaV__1002' or x == 'O-9,46_wbaV-from-II-9,12:z29:1,5-SRR1346254__1002') and ('O-9,46_wzy__1191' not in O_dict and 'O-9,46_wzy_partial__216' not in O_dict): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1163 highest_O = "O-9" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1164 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1165 highest_O = x.split("_")[0] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1166 except: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1167 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1168 #call_fliC: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1169 if len(H_dict)!=0: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1170 highest_H_score_both_BC=H_dict[max(H_dict.keys(), key=(lambda k: H_dict[k]))] #used to detect whether fljB existed or not |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1171 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1172 highest_H_score_both_BC=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1173 highest_fliC = '-' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1174 highest_fliC_raw = '-' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1175 highest_Score = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1176 log_file.write("\nH_scores:\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1177 for s in H_dict: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1178 log_file.write(s+"\t"+str(H_dict[s])+"\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1179 if s.startswith('fliC'): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1180 if float(H_dict[s]) > highest_Score: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1181 highest_fliC = s.split('_')[1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1182 highest_fliC_raw = s |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1183 highest_Score = float(H_dict[s]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1184 #call_fljB |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1185 highest_fljB = '-' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1186 highest_fljB_raw = '-' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1187 highest_Score = 0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1188 for s in H_dict: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1189 if s.startswith('fljB'): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1190 if float(H_dict[s]) > highest_Score and float(H_dict[s]) > highest_H_score_both_BC * 0.65: #fljB is special, so use highest_H_score_both_BC to give a general estimate of coverage, currently 0.65 seems pretty good; the reason use a high (0.65) is some fliC and fljB shared with each other |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1191 #highest_fljB = s.split('_')[1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1192 #highest_fljB_raw = s |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1193 #highest_Score = float(H_dict[s]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1194 if s.split('_')[1]!=highest_fliC: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1195 highest_fljB = s.split('_')[1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1196 highest_fljB_raw = s |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1197 highest_Score = float(H_dict[s]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1198 log_file.write("\nSpecial_scores:\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1199 for s in Special_dict: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1200 log_file.write(s+"\t"+str(Special_dict[s])+"\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1201 log_file.close() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1202 return highest_O,highest_fliC,highest_fljB |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1203 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1204 def get_temp_file_names(for_fq,rev_fq): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1205 #seqsero2 -a; get temp file names |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1206 sam=for_fq+".sam" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1207 bam=for_fq+".bam" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1208 sorted_bam=for_fq+"_sorted.bam" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1209 mapped_fq1=for_fq+"_mapped.fq" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1210 mapped_fq2=rev_fq+"_mapped.fq" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1211 combined_fq=for_fq+"_combined.fq" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1212 for_sai=for_fq+".sai" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1213 rev_sai=rev_fq+".sai" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1214 return sam,bam,sorted_bam,mapped_fq1,mapped_fq2,combined_fq,for_sai,rev_sai |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1215 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1216 def map_and_sort(threads,database,fnameA,fnameB,sam,bam,for_sai,rev_sai,sorted_bam,mapping_mode): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1217 #seqsero2 -a; do mapping and sort |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1218 print("building database...") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1219 subprocess.check_call("bwa index "+database+ " 2>> data_log.txt",shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1220 print("mapping...") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1221 if mapping_mode=="mem": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1222 subprocess.check_call("bwa mem -k 17 -t "+threads+" "+database+" "+fnameA+" "+fnameB+" > "+sam+ " 2>> data_log.txt",shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1223 elif mapping_mode=="sam": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1224 if fnameB!="": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1225 subprocess.check_call("bwa aln -t "+threads+" "+database+" "+fnameA+" > "+for_sai+ " 2>> data_log.txt",shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1226 subprocess.check_call("bwa aln -t "+threads+" "+database+" "+fnameB+" > "+rev_sai+ " 2>> data_log.txt",shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1227 subprocess.check_call("bwa sampe "+database+" "+for_sai+" "+ rev_sai+" "+fnameA+" "+fnameB+" > "+sam+ " 2>> data_log.txt",shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1228 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1229 subprocess.check_call("bwa aln -t "+threads+" "+database+" "+fnameA+" > "+for_sai+ " 2>> data_log.txt",shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1230 subprocess.check_call("bwa samse "+database+" "+for_sai+" "+for_fq+" > "+sam) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1231 subprocess.check_call("samtools view -@ "+threads+" -F 4 -Sh "+sam+" > "+bam,shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1232 ### check the version of samtools then use differnt commands |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1233 samtools_version=subprocess.Popen(["samtools"],stdout=subprocess.PIPE,stderr=subprocess.PIPE) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1234 out, err = samtools_version.communicate() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1235 version = str(err).split("ersion:")[1].strip().split(" ")[0].strip() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1236 print("check samtools version:",version) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1237 ### end of samtools version check and its analysis |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1238 if LooseVersion(version)<=LooseVersion("1.2"): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1239 subprocess.check_call("samtools sort -@ "+threads+" -n "+bam+" "+fnameA+"_sorted",shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1240 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1241 subprocess.check_call("samtools sort -@ "+threads+" -n "+bam+" >"+sorted_bam,shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1242 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1243 def extract_mapped_reads_and_do_assembly_and_blast(current_time,sorted_bam,combined_fq,mapped_fq1,mapped_fq2,threads,fnameA,fnameB,database,mapping_mode,phred_offset): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1244 #seqsero2 -a; extract, assembly and blast |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1245 #subprocess.check_call("bamToFastq -i "+sorted_bam+" -fq "+combined_fq,shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1246 subprocess.check_call("samtools bam2fq "+sorted_bam+" > "+combined_fq+" 2>> data_log.txt",shell=True) ## change to samtools bam2fq. 202509 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1247 #print("fnameA:",fnameA) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1248 #print("fnameB:",fnameB) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1249 if fnameB!="": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1250 #subprocess.check_call("bamToFastq -i "+sorted_bam+" -fq "+mapped_fq1+" -fq2 "+mapped_fq2 + " 2>> data_log.txt",shell=True)#2> /dev/null if want no output |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1251 subprocess.check_call("samtools bam2fq -1 "+mapped_fq1+" -2 "+mapped_fq2+" -0 /dev/null -s /dev/null -n "+sorted_bam+" 2>> data_log.txt",shell=True) ## change to samtools bam2fq. 202509 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1252 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1253 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1254 outdir=current_time+"_temp" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1255 print("assembling...") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1256 if int(threads)>4: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1257 t="4" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1258 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1259 t=threads |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1260 if os.path.getsize(combined_fq)>100 and (fnameB=="" or os.path.getsize(mapped_fq1)>100):#if not, then it's "-:-:-" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1261 if phred_offset == 'auto': |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1262 phred_offset = '' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1263 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1264 phred_offset = '--phred-offset ' + phred_offset |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1265 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1266 if fnameB!="": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1267 #print("spades.py --careful "+phred_offset+" --pe1-s "+combined_fq+" --pe1-1 "+mapped_fq1+" --pe1-2 "+mapped_fq2+" -t "+t+" -o "+outdir+ " >> data_log.txt 2>&1") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1268 subprocess.check_call("spades.py --careful "+phred_offset+" --pe1-s "+combined_fq+" --pe1-1 "+mapped_fq1+" --pe1-2 "+mapped_fq2+" -t "+t+" -o "+outdir+ " >> data_log.txt 2>&1",shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1269 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1270 subprocess.check_call("spades.py --careful "+phred_offset+" --pe1-s "+combined_fq+" -t "+t+" -o "+outdir+ " >> data_log.txt 2>&1",shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1271 new_fasta=fnameA+"_"+database+"_"+mapping_mode+".fasta" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1272 #new_fasta=fnameA+"_"+database.split('/')[-1]+"_"+mapping_mode+".fasta" # change path to databse for packaging |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1273 subprocess.check_call("mv "+outdir+"/contigs.fasta "+new_fasta+ " 2> /dev/null",shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1274 #os.system("mv "+outdir+"/scaffolds.fasta "+new_fasta+ " 2> /dev/null") contigs.fasta |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1275 subprocess.check_call("rm -rf "+outdir+ " 2> /dev/null",shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1276 print("blasting...","\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1277 xmlfile="blasted_output.xml"#fnameA+"-extracted_vs_"+database+"_"+mapping_mode+".xml" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1278 subprocess.check_call('makeblastdb -in '+new_fasta+' -out '+new_fasta+'_db '+'-dbtype nucl >> data_log.txt 2>&1',shell=True) #temp.txt is to forbid the blast result interrupt the output of our program###1/27/2015 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1279 subprocess.check_call("blastn -query "+database+" -db "+new_fasta+"_db -out "+xmlfile+" -outfmt 5 >> data_log.txt 2>&1",shell=True)###1/27/2015; 08272018, remove "-word_size 10" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1280 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1281 xmlfile="NA" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1282 return xmlfile,new_fasta |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1283 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1284 def judge_subspecies(fnameA): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1285 #seqsero2 -a; judge subspecies on just forward raw reads fastq |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1286 salmID_output=subprocess.Popen("SalmID.py -i "+fnameA,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1287 out, err = salmID_output.communicate() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1288 out=out.decode("utf-8") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1289 file=open("data_log.txt","a") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1290 file.write(out) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1291 file.close() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1292 salm_species_scores=out.split("\n")[1].split("\t")[6:] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1293 salm_species_results=out.split("\n")[0].split("\t")[6:] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1294 max_score=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1295 max_score_index=1 #default is 1, means "I" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1296 for i in range(len(salm_species_scores)): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1297 if max_score<float(salm_species_scores[i]): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1298 max_score=float(salm_species_scores[i]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1299 max_score_index=i |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1300 prediction=salm_species_results[max_score_index].split(".")[1].strip().split(" ")[0] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1301 #if float(out.split("\n")[1].split("\t")[4]) > float(out.split("\n")[1].split("\t")[5]): #bongori and enterica compare |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1302 if float(out.split("\n")[1].split("\t")[4]) > 10 and float(out.split("\n")[1].split("\t")[4]) > float(out.split("\n")[1].split("\t")[5]): ## ed_SL_0318: change SalmID_ssp_threshold |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1303 prediction="bongori" #if not, the prediction would always be enterica, since they are located in the later part |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1304 #if max_score<10: ## ed_SL_0318: change SalmID_ssp_threshold |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1305 if max_score<60: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1306 prediction="-" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1307 ## ed_SL_0818: add for enterica |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1308 if float(out.split("\n")[1].split("\t")[5]) > 10 and float(out.split("\n")[1].split("\t")[5]) > float(out.split("\n")[1].split("\t")[4]): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1309 prediction="enterica" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1310 ## |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1311 return prediction |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1312 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1313 def judge_subspecies_Kmer(Special_dict): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1314 #seqsero2 -k; |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1315 max_score=0 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1316 prediction="-" #default should be I |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1317 for x in Special_dict: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1318 #if "mer" in x: ## ed_SL_0318: change ssp_threshold |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1319 if "mer" in x and float(Special_dict[x]) > 60: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1320 if max_score<float(Special_dict[x]): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1321 max_score=float(Special_dict[x]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1322 prediction=x.split("_")[-1].strip() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1323 if x.split("_")[-1].strip()=="bongori" and float(Special_dict[x])>95:#if bongori already, then no need to test enterica |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1324 prediction="bongori" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1325 break |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1326 return prediction |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1327 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1328 ## ed_SL_11232019: add notes for missing antigen |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1329 def check_antigens(ssp,O_antigen,H1_antigen,H2_antigen,NA_note): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1330 antigen_note = '' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1331 if ssp != '-': |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1332 if O_antigen != '-' and H1_antigen == '-' and H2_antigen == '-': # O:-:- |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1333 antigen_note = 'H antigens were not detected. This is an atypical result that should be further investigated. Most Salmonella strains have at least fliC, encoding the Phase 1 H antigen, even if it is not expressed. ' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1334 NA_note = '' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1335 elif O_antigen != '-' and H1_antigen == '-' and H2_antigen != '-': # O:-:H2 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1336 antigen_note = 'fliC was not detected. This is an atypical result that should be further investigated. Most Salmonella strains have fliC, encoding the Phase 1 H antigen, even if it is not expressed. ' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1337 NA_note = '' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1338 elif O_antigen == '-' and H1_antigen != '-': # -:H1:X |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1339 antigen_note = 'O antigen was not detected. This result may be due to a rough strain that has deleted the rfb region. For raw reads input, the k-mer workflow is sometimes more sensitive than the microassembly workflow in detecting O antigen. Caution should be used with this approach because the k-mer result may be due to low levels of contamination. ' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1340 NA_note = '' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1341 elif O_antigen == '-' and H1_antigen == '-' and H2_antigen == '-': # -:-:- |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1342 antigen_note = 'No serotype antigens were detected. This is an atypical result that should be further investigated. ' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1343 NA_note = '' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1344 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1345 antigen_note = 'The input genome cannot be identified as Salmonella. Check the input for taxonomic ID, contamination, or sequencing quality. ' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1346 NA_note = '' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1347 if ssp == 'enterica': |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1348 antigen_note += 'Subspecies identification of the input genome cannot be definitively determined. ' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1349 NA_note = '' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1350 # if [O_antigen, H1_antigen, H2_antigen].count('-') >= 2: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1351 # antigen_note = 'No subspecies marker was detected and less than 2 serotype antigens were detected; further, this genome was not identified as Salmonella. This is an atypical result that should be further investigated. ' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1352 # else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1353 # antigen_note = 'No subspecies marker was detected. This genome may not be Salmonella. This is an atypical result that should be further investigated. ' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1354 return (antigen_note,NA_note) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1355 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1356 ## ed_SL_06062020: rename subspecies ID |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1357 subspecies_ID_dir = {'I': 'Salmonella enterica subspecies enterica (subspecies I)', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1358 'II': 'Salmonella enterica subspecies salamae (subspecies II)', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1359 'IIIa': 'Salmonella enterica subspecies arizonae (subspecies IIIa)', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1360 'IIIb': 'Salmonella enterica subspecies diarizonae (subspecies IIIb)', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1361 'IV': 'Salmonella enterica subspecies houtenae (subspecies IV)', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1362 'VI': 'Salmonella enterica subspecies indica (subspecies VI)', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1363 'VII': 'Salmonella enterica subspecies VII (subspecies VII)', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1364 'bongori': 'Salmonella bongori', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1365 'enterica': 'Salmonella enterica', |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1366 '-': '-'} |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1367 ## |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1368 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1369 ## ed_SL_08172020: format check for fasta or fastq in nanopore workflow, convert raw reads fastq to fasta |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1370 def format_check(input_file): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1371 line=open(input_file,'r').readline() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1372 if line.startswith('>'): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1373 output_file = input_file |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1374 elif line.startswith('@'): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1375 input_file_fa = input_file + '.fasta' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1376 subprocess.check_call("seqtk seq -A "+input_file+" > "+input_file_fa,shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1377 output_file = input_file_fa |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1378 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1379 print ('please check the format of input files') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1380 return (output_file) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1381 ## |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1382 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1383 def main(): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1384 #combine SeqSeroK and SeqSero2, also with SalmID |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1385 args = parse_args() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1386 input_file = args.i |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1387 data_type = args.t |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1388 analysis_mode = args.m |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1389 mapping_mode=args.b |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1390 threads=args.p |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1391 make_dir=args.d |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1392 clean_mode=args.c |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1393 sample_name=args.n |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1394 ingore_header=args.s |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1395 phred_offset=args.phred_offset |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1396 k_size=27 #will change for bug fixing |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1397 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1398 ex_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(__file__)),'seqsero2s_db')) # ed_SL_09152019: add ex_dir for packaging |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1399 seqsero2_db=ex_dir+"/H_and_O_and_specific_genes.fasta" # ed_SL_11092019: change path to database for packaging |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1400 database="H_and_O_and_specific_genes.fasta" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1401 mlst_count=pickle.load(open(ex_dir+"/mlst.pickle", "rb")) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1402 note="Note: " |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1403 NA_note="This predicted serotype is not in the Kauffman-White scheme. " # ed_SL_09272019: add for new output format |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1404 if len(sys.argv)==1: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1405 subprocess.check_call(dirpath+"/SeqSero2S.py -h",shell=True)#change name of python file |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1406 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1407 request_id = time.strftime("%m_%d_%Y_%H_%M_%S", time.localtime()) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1408 request_id += str(random.randint(1, 10000000)) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1409 if make_dir is None: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1410 make_dir="SeqSero_result_"+request_id |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1411 make_dir=os.path.abspath(make_dir) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1412 if os.path.isdir(make_dir): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1413 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1414 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1415 subprocess.check_call("mkdir -p "+make_dir,shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1416 subprocess.check_call("ln -f -s "+seqsero2_db+" "+" ".join(input_file)+" "+make_dir,shell=True) # ed_SL_11092019: change path to database for packaging |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1417 #subprocess.check_call("ln -f -s "+dirpath+"/"+database+" "+" ".join(input_file)+" "+make_dir,shell=True) ### use -f option to force the replacement of links, remove -r and use absolute path instead to avoid link issue (use 'type=os.path.abspath' in -i argument). |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1418 ############################begin the real analysis |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1419 if analysis_mode=="a": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1420 if data_type in ["1","2","3"]:#use allele mode |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1421 for_fq,rev_fq=get_input_files(make_dir,input_file,data_type,dirpath) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1422 os.chdir(make_dir) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1423 ###add a function to tell input files |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1424 fnameA=for_fq.split("/")[-1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1425 fnameB=rev_fq.split("/")[-1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1426 current_time=time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime()) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1427 sam,bam,sorted_bam,mapped_fq1,mapped_fq2,combined_fq,for_sai,rev_sai=get_temp_file_names(fnameA,fnameB) #get temp files id |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1428 map_and_sort(threads,database,fnameA,fnameB,sam,bam,for_sai,rev_sai,sorted_bam,mapping_mode) #do mapping and sort |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1429 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1430 ### avoid error out when micro assembly fails. ed_SL_03172020 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1431 try: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1432 xmlfile,new_fasta=extract_mapped_reads_and_do_assembly_and_blast(current_time,sorted_bam,combined_fq,mapped_fq1,mapped_fq2,threads,fnameA,fnameB,database,mapping_mode,phred_offset) #extract the mapped reads and do micro assembly and blast |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1433 except (UnboundLocalError, subprocess.CalledProcessError): |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1434 xmlfile="NA" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1435 H1_cont_stat_list=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1436 H2_cont_stat_list=[] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1437 ### |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1438 if xmlfile=="NA": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1439 O_choice,fliC_choice,fljB_choice,special_gene_list,contamination_O,contamination_H=("-","-","-",[],"","") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1440 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1441 Final_list=xml_parse_score_comparision_seqsero(xmlfile) #analyze xml and get parsed results |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1442 file=open("data_log.txt","a") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1443 for x in Final_list: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1444 file.write("\t".join(str(y) for y in x)+"\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1445 file.close() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1446 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1].split("_")[0])>=0.9 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]) or x[1]>1000)] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1447 O_choice,fliC_choice,fljB_choice,special_gene_list,contamination_O,contamination_H,Otypes_uniq,H1_cont_stat_list,H2_cont_stat_list=predict_O_and_H_types(Final_list,Final_list_passed,new_fasta) #predict O, fliC and fljB |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1448 subspecies=judge_subspecies(fnameA) #predict subspecies |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1449 ### ed_SL_06062020: correction VIII -> II |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1450 if subspecies == 'VIII': |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1451 subspecies = 'II' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1452 ### ed_SL_08132020: correction VII -> IV, according to CDC's suggestion |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1453 if subspecies == 'VII': |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1454 subspecies = 'IV' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1455 note+='SalmID reports this as ssp VII, which has not been formally recognized. ' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1456 ### |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1457 ### ed_SL_08182020: change serotype ouput for genome without definitive subspecies ID |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1458 ssp_pointer = subspecies |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1459 if subspecies == 'enterica': |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1460 subspecies = '-' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1461 ### |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1462 ###MLST |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1463 #print("MLST using https://github.com/jordanlab/stringMLST") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1464 #print("7-gene MLST...") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1465 mlst_result = stringmlst(for_fq,rev_fq,data_type,ex_dir) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1466 st = mlst_result[0] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1467 alleles = mlst_result[1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1468 sorted_alleles = sorted(alleles.items()) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1469 try: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1470 st_count = str(mlst_count[st]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1471 except: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1472 st_count = '0' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1473 subprocess.call("rm H_and_O_and_specific_genes.fasta* *.sra *.bam *.sam *.fastq *.gz *.fq temp.txt "+fnameA+"*_db* 2> /dev/null",shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1474 ### |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1475 ###output |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1476 predict_form_ss2,predict_sero_ss2,star_ss2,star_line_ss2,claim_ss2=seqsero_from_formula_to_serotypes(O_choice,fliC_choice,fljB_choice,special_gene_list,subspecies,'ss2') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1477 predict_form,predict_sero,star,star_line,claim=seqsero_from_formula_to_serotypes(O_choice,fliC_choice,fljB_choice,special_gene_list,subspecies,'ss2s') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1478 claim="" #04132019, disable claim for new report requirement |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1479 contamination_report="" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1480 H_list=["fliC_"+x for x in H1_cont_stat_list if len(x)>0]+["fljB_"+x for x in H2_cont_stat_list if len(x)>0] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1481 if contamination_O!="" and contamination_H=="": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1482 contamination_report="#Potential inter-serotype contamination detected from O antigen signals. All O-antigens detected:"+"\t".join(Otypes_uniq)+"." |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1483 elif contamination_O=="" and contamination_H!="": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1484 contamination_report="#Potential inter-serotype contamination detected or potential thrid H phase from H antigen signals. All H-antigens detected:"+"\t".join(H_list)+"." |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1485 elif contamination_O!="" and contamination_H!="": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1486 contamination_report="#Potential inter-serotype contamination detected from both O and H antigen signals.All O-antigens detected:"+"\t".join(Otypes_uniq)+". All H-antigens detected:"+"\t".join(H_list)+"." |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1487 if contamination_report!="": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1488 #contamination_report="potential inter-serotype contamination detected (please refer below antigen signal report for details)." #above contamination_reports are for back-up and bug fixing #web-based mode need to be re-used, 04132019 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1489 contamination_report="Co-existence of multiple serotypes detected, indicating potential inter-serotype contamination. See 'Extracted_antigen_alleles.fasta' for detected serotype determinant alleles. " |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1490 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1491 ### ed_SL_11232019: add notes for missing antigen |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1492 if O_choice=="": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1493 O_choice="-" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1494 antigen_note,NA_note=check_antigens(ssp_pointer,O_choice,fliC_choice,fljB_choice,NA_note) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1495 if sample_name: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1496 print ("Sample name:\t"+sample_name) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1497 ### |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1498 if clean_mode: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1499 subprocess.check_call("rm -rf "+make_dir,shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1500 make_dir="none-output-directory due to '-c' flag" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1501 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1502 new_file=open("SeqSero_result.txt","w") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1503 ### ed_SL_01152020: add new output |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1504 conta_note="yes" if "inter-serotype contamination" in contamination_report else "no" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1505 tsv_file=open("SeqSero_result.tsv","w") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1506 if ingore_header: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1507 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1508 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1509 tsv_file.write("Sample name\tOutput directory\tInput files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted identification\tPredicted antigenic profile\tPredicted serotype\tPredicted serotype (SeqSero2 v1.3.2)\tPotential inter-serotype contamination\tNote\tST\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1510 if sample_name: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1511 new_file.write("Sample name:\t"+sample_name+"\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1512 tsv_file.write(sample_name+'\t') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1513 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1514 tsv_file.write(input_file[0].split('/')[-1]+'\t') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1515 ### |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1516 if "N/A" not in predict_sero: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1517 new_file.write("Output directory:\t"+make_dir+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1518 "Input files:\t"+"\t".join(input_file)+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1519 "O antigen prediction:\t"+O_choice+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1520 "H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1521 "H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1522 "Predicted identification:\t"+subspecies_ID_dir[ssp_pointer]+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1523 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1524 "Predicted serotype:\t"+predict_sero+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1525 "Predicted serotype (SeqSero2 v1.3.2):\t"+predict_sero_ss2+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1526 note+contamination_report+star_line+claim+antigen_note+"\n")#+## |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1527 tsv_file.write(make_dir+"\t"+" ".join(input_file)+"\t"+O_choice+"\t"+fliC_choice+"\t"+fljB_choice+"\t"+subspecies_ID_dir[ssp_pointer]+"\t"+predict_form+"\t"+predict_sero+"\t"+predict_sero_ss2+"\t"+conta_note+"\t"+contamination_report+star_line+claim+antigen_note+"\t"+st+"\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1528 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1529 new_file.write("Output directory:\t"+make_dir+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1530 "Input files:\t"+"\t".join(input_file)+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1531 "O antigen prediction:\t"+O_choice+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1532 "H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1533 "H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1534 "Predicted identification:\t"+subspecies_ID_dir[ssp_pointer]+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1535 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1536 "Predicted serotype:\t"+subspecies+' '+predict_form+"\n"+ # add serotype output for "N/A" prediction, add subspecies |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1537 "Predicted serotype (SeqSero2 v1.3.2):\t"+subspecies+' '+predict_form_ss2+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1538 note+NA_note+contamination_report+star_line+claim+antigen_note+"\n")#+## |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1539 tsv_file.write(make_dir+"\t"+" ".join(input_file)+"\t"+O_choice+"\t"+fliC_choice+"\t"+fljB_choice+"\t"+subspecies_ID_dir[ssp_pointer]+"\t"+predict_form+"\t"+subspecies+' '+predict_form+"\t"+subspecies+' '+predict_form_ss2+"\t"+conta_note+"\t"+NA_note+contamination_report+star_line+claim+antigen_note+"\t"+st+"\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1540 ##MLST |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1541 new_file.write("Sequence type:\t"+st+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1542 "Number of ST"+st+" strains in EnteroBase:\t"+st_count+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1543 "\n".join([k+":\t"+v for k,v in sorted_alleles])) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1544 ### |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1545 new_file.close() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1546 tsv_file.close() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1547 if "N/A" not in predict_sero: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1548 print("Output directory:\t"+make_dir+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1549 "Input files:\t"+"\t".join(input_file)+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1550 "O antigen prediction:\t"+O_choice+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1551 "H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1552 "H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1553 "Predicted identification:\t"+subspecies_ID_dir[ssp_pointer]+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1554 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1555 "Predicted serotype:\t"+predict_sero+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1556 "Predicted serotype (SeqSero2 v1.3.2):\t"+predict_sero_ss2+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1557 note+contamination_report+star_line+claim+antigen_note+"\n")#+## |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1558 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1559 print("Output directory:\t"+make_dir+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1560 "Input files:\t"+"\t".join(input_file)+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1561 "O antigen prediction:\t"+O_choice+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1562 "H1 antigen prediction(fliC):\t"+fliC_choice+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1563 "H2 antigen prediction(fljB):\t"+fljB_choice+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1564 "Predicted identification:\t"+subspecies_ID_dir[ssp_pointer]+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1565 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1566 "Predicted serotype:\t"+subspecies+' '+predict_form+"\n"+ # add serotype output for "N/A" prediction, subspecies |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1567 "Predicted serotype (SeqSero2 v1.3.2):\t"+subspecies+' '+predict_form_ss2+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1568 note+NA_note+contamination_report+star_line+claim+antigen_note+"\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1569 ###MLST |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1570 print("Sequence type: "+st) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1571 print("Number of ST"+st+" strains in EnteroBase: "+st_count) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1572 #print("Allele profile...") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1573 for k,v in sorted_alleles: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1574 print(k+': '+v) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1575 print('\n') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1576 ### |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1577 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1578 print("Allele modes only support raw reads datatype, i.e. '-t 1 or 2 or 3'; please use '-m k'") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1579 elif analysis_mode=="k": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1580 #ex_dir = os.path.dirname(os.path.realpath(__file__)) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1581 ex_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(__file__)),'seqsero2s_db')) # ed_SL_09152019: change ex_dir for packaging |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1582 for_fq,rev_fq=get_input_files(make_dir,input_file,data_type,dirpath) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1583 input_file = for_fq #-k will just use forward because not all reads were used |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1584 os.chdir(make_dir) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1585 ### ed_SL_08182020: use assembly workflow for nanopore fastq, convert fastq to fasta |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1586 if data_type == "5": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1587 input_file = format_check(for_fq) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1588 ### |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1589 f = open(ex_dir + '/antigens.pickle', 'rb') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1590 lib_dict = pickle.load(f) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1591 f.close |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1592 input_Ks=get_input_K(input_file,lib_dict,data_type,k_size) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1593 O_dict,H_dict,Special_dict=get_kmer_dict(lib_dict,input_Ks) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1594 highest_O,highest_fliC,highest_fljB=call_O_and_H_type(O_dict,H_dict,Special_dict,make_dir) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1595 subspecies=judge_subspecies_Kmer(Special_dict) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1596 if subspecies=="IIb" or subspecies=="IIa": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1597 subspecies="II" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1598 ### ed_SL_06062020: correction VIII -> II |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1599 if subspecies == 'VIII': |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1600 subspecies = 'II' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1601 ### ed_SL_08132020: correction VII -> IV, according to CDC's suggestion |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1602 if subspecies == 'VII': |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1603 subspecies = 'IV' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1604 note+='SalmID reports this as ssp VII, which has not been formally recognized. ' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1605 ### |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1606 ### ed_SL_08182020: change serotype ouput for genome without definitive subspecies ID |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1607 ssp_pointer = subspecies |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1608 if subspecies == 'enterica': |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1609 subspecies = '-' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1610 ### |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1611 predict_form_ss2,predict_sero_ss2,star_ss2,star_line_ss2,claim_ss2 = seqsero_from_formula_to_serotypes( |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1612 highest_O.split('-')[1], highest_fliC, highest_fljB, Special_dict,subspecies, 'ss2') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1613 predict_form,predict_sero,star,star_line,claim = seqsero_from_formula_to_serotypes( |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1614 highest_O.split('-')[1], highest_fliC, highest_fljB, Special_dict,subspecies, 'ss2s') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1615 claim="" #no claim any more based on new output requirement |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1616 |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1617 ### ed_SL_11232019: add notes for missing antigen |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1618 if highest_O.split('-')[-1]=="": |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1619 O_choice="-" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1620 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1621 O_choice=highest_O.split('-')[-1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1622 antigen_note,NA_note=check_antigens(ssp_pointer,O_choice,highest_fliC,highest_fljB,NA_note) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1623 if sample_name: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1624 print ("Sample name:\t"+sample_name) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1625 ### |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1626 ###MLST |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1627 if data_type in ["4","5"]: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1628 #print("MLST using https://github.com/tseemann/mlst") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1629 #print("7-gene MLST...") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1630 mlst_result = mlst(args.i[0]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1631 if data_type in ["1","2","3"]: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1632 #print("MLST using https://github.com/jordanlab/stringMLST") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1633 #print("7-gene MLST...") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1634 mlst_result = stringmlst(for_fq,rev_fq,data_type,ex_dir) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1635 st = mlst_result[0] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1636 alleles = mlst_result[1] |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1637 sorted_alleles = sorted(alleles.items()) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1638 try: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1639 st_count = str(mlst_count[st]) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1640 except: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1641 st_count = '0' |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1642 subprocess.call("rm *.fasta* *.fastq *.gz *.fq temp.txt *.sra 2> /dev/null",shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1643 ### |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1644 ###output |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1645 if clean_mode: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1646 subprocess.check_call("rm -rf "+make_dir,shell=True) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1647 make_dir="none-output-directory due to '-c' flag" |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1648 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1649 new_file=open("SeqSero_result.txt","w") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1650 tsv_file=open("SeqSero_result.tsv","w") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1651 if ingore_header: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1652 pass |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1653 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1654 tsv_file.write("Sample name\tOutput directory\tInput files\tO antigen prediction\tH1 antigen prediction(fliC)\tH2 antigen prediction(fljB)\tPredicted identification\tPredicted antigenic profile\tPredicted serotype\tPredicted serotype (SeqSero2 v1.3.2)\tNote\tST\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1655 if sample_name: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1656 new_file.write("Sample name:\t"+sample_name+"\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1657 tsv_file.write(sample_name+'\t') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1658 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1659 tsv_file.write(input_file.split('/')[-1]+'\t') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1660 ### |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1661 if "N/A" not in predict_sero: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1662 new_file.write("Output directory:\t"+make_dir+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1663 "Input files:\t"+input_file+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1664 "O antigen prediction:\t"+O_choice+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1665 "H1 antigen prediction(fliC):\t"+highest_fliC+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1666 "H2 antigen prediction(fljB):\t"+highest_fljB+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1667 "Predicted identification:\t"+subspecies_ID_dir[ssp_pointer]+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1668 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1669 "Predicted serotype:\t"+predict_sero+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1670 "Predicted serotype (SeqSero2 v1.3.2):\t"+predict_sero_ss2+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1671 note+star_line+claim+antigen_note+"\n")#+## |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1672 tsv_file.write(make_dir+"\t"+input_file+"\t"+O_choice+"\t"+highest_fliC+"\t"+highest_fljB+"\t"+subspecies_ID_dir[ssp_pointer]+"\t"+predict_form+"\t"+predict_sero+"\t"+predict_sero_ss2+"\t"+star_line+claim+antigen_note+"\t"+st+"\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1673 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1674 new_file.write("Output directory:\t"+make_dir+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1675 "Input files:\t"+input_file+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1676 "O antigen prediction:\t"+O_choice+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1677 "H1 antigen prediction(fliC):\t"+highest_fliC+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1678 "H2 antigen prediction(fljB):\t"+highest_fljB+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1679 "Predicted identification:\t"+subspecies_ID_dir[ssp_pointer]+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1680 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1681 "Predicted serotype:\t"+subspecies+' '+predict_form+"\n"+ # add serotype output for "N/A" prediction, subspecies |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1682 "Predicted serotype (SeqSero2 v1.3.2):\t"+subspecies+' '+predict_form_ss2+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1683 note+NA_note+star_line+claim+antigen_note+"\n")#+## |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1684 tsv_file.write(make_dir+"\t"+input_file+"\t"+O_choice+"\t"+highest_fliC+"\t"+highest_fljB+"\t"+subspecies_ID_dir[ssp_pointer]+"\t"+predict_form+"\t"+subspecies+' '+predict_form+"\t"+subspecies+' '+predict_form_ss2+"\t"+NA_note+star_line+claim+antigen_note+"\t"+st+"\n") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1685 ###MLST |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1686 new_file.write("Sequence type:\t"+st+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1687 "Number of ST"+st+" strains in EnteroBase:\t"+st_count+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1688 "\n".join([k+":\t"+v for k,v in sorted_alleles])) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1689 ### |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1690 new_file.close() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1691 tsv_file.close() |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1692 if "N/A" not in predict_sero: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1693 print("Output directory:\t"+make_dir+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1694 "Input files:\t"+input_file+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1695 "O antigen prediction:\t"+O_choice+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1696 "H1 antigen prediction(fliC):\t"+highest_fliC+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1697 "H2 antigen prediction(fljB):\t"+highest_fljB+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1698 "Predicted identification:\t"+subspecies_ID_dir[ssp_pointer]+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1699 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1700 "Predicted serotype:\t"+predict_sero+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1701 "Predicted serotype (SeqSero2 v1.3.2):\t"+predict_sero_ss2+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1702 note+star_line+claim+antigen_note+"\n")#+## |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1703 else: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1704 print("Output directory:\t"+make_dir+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1705 "Input files:\t"+input_file+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1706 "O antigen prediction:\t"+O_choice+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1707 "H1 antigen prediction(fliC):\t"+highest_fliC+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1708 "H2 antigen prediction(fljB):\t"+highest_fljB+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1709 "Predicted identification:\t"+subspecies_ID_dir[ssp_pointer]+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1710 "Predicted antigenic profile:\t"+predict_form+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1711 "Predicted serotype:\t"+subspecies+' '+predict_form+"\n"+ # add serotype output for "N/A" prediction, subspecies |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1712 "Predicted serotype (SeqSero2 v1.3.2):\t"+subspecies+' '+predict_form_ss2+"\n"+ |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1713 note+NA_note+star_line+claim+antigen_note+"\n")#+## |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1714 ###MLST |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1715 print("Sequence type: "+st) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1716 print("Number of ST"+st+" strains in EnteroBase: "+st_count) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1717 #print("Allele profile...") |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1718 for k,v in sorted_alleles: |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1719 print(k+': '+v) |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1720 print('\n') |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1721 ### |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1722 if __name__ == '__main__': |
|
cfc91e1d2c9b
planemo upload commit 936a627c4fc706080f07ec678f89e8256a7e7895
jpayne
parents:
diff
changeset
|
1723 main() |
