cstrittmatter@0
|
1 import pickle
|
cstrittmatter@0
|
2 import traceback
|
cstrittmatter@0
|
3 import shlex
|
cstrittmatter@0
|
4 import subprocess
|
cstrittmatter@0
|
5 from threading import Timer
|
cstrittmatter@0
|
6 import shutil
|
cstrittmatter@0
|
7 import time
|
cstrittmatter@0
|
8 import functools
|
cstrittmatter@0
|
9 import os.path
|
cstrittmatter@0
|
10 import sys
|
cstrittmatter@0
|
11 import argparse
|
cstrittmatter@0
|
12
|
cstrittmatter@0
|
13
|
cstrittmatter@0
|
14 def start_logger(workdir):
|
cstrittmatter@0
|
15 time_str = time.strftime("%Y%m%d-%H%M%S")
|
cstrittmatter@0
|
16 sys.stdout = Logger(workdir, time_str)
|
cstrittmatter@0
|
17 logfile = sys.stdout.getLogFile()
|
cstrittmatter@0
|
18 return logfile, time_str
|
cstrittmatter@0
|
19
|
cstrittmatter@0
|
20
|
cstrittmatter@0
|
21 class Logger(object):
|
cstrittmatter@0
|
22 def __init__(self, out_directory, time_str):
|
cstrittmatter@0
|
23 self.logfile = os.path.join(out_directory, str('run.' + time_str + '.log'))
|
cstrittmatter@0
|
24 self.terminal = sys.stdout
|
cstrittmatter@0
|
25 self.log = open(self.logfile, "w")
|
cstrittmatter@0
|
26
|
cstrittmatter@0
|
27 def write(self, message):
|
cstrittmatter@0
|
28 self.terminal.write(message)
|
cstrittmatter@0
|
29 self.log.write(message)
|
cstrittmatter@0
|
30 self.log.flush()
|
cstrittmatter@0
|
31
|
cstrittmatter@0
|
32 def flush(self):
|
cstrittmatter@0
|
33 pass
|
cstrittmatter@0
|
34
|
cstrittmatter@0
|
35 def getLogFile(self):
|
cstrittmatter@0
|
36 return self.logfile
|
cstrittmatter@0
|
37
|
cstrittmatter@0
|
38
|
cstrittmatter@0
|
39 def checkPrograms(programs_version_dictionary):
|
cstrittmatter@0
|
40 print('\n' + 'Checking dependencies...')
|
cstrittmatter@0
|
41 programs = programs_version_dictionary
|
cstrittmatter@0
|
42 which_program = ['which', '']
|
cstrittmatter@0
|
43 listMissings = []
|
cstrittmatter@0
|
44 for program in programs:
|
cstrittmatter@0
|
45 which_program[1] = program
|
cstrittmatter@0
|
46 run_successfully, stdout, stderr = runCommandPopenCommunicate(which_program, False, None, False)
|
cstrittmatter@0
|
47 if not run_successfully:
|
cstrittmatter@0
|
48 listMissings.append(program + ' not found in PATH.')
|
cstrittmatter@0
|
49 else:
|
cstrittmatter@0
|
50 print(stdout.splitlines()[0])
|
cstrittmatter@0
|
51 if programs[program][0] is None:
|
cstrittmatter@0
|
52 print(program + ' (impossible to determine programme version) found at: ' + stdout.splitlines()[0])
|
cstrittmatter@0
|
53 else:
|
cstrittmatter@0
|
54 if program.endswith('.jar'):
|
cstrittmatter@0
|
55 check_version = ['java', '-jar', stdout.splitlines()[0], programs[program][0]]
|
cstrittmatter@0
|
56 programs[program].append(stdout.splitlines()[0])
|
cstrittmatter@0
|
57 else:
|
cstrittmatter@0
|
58 check_version = [stdout.splitlines()[0], programs[program][0]]
|
cstrittmatter@0
|
59 run_successfully, stdout, stderr = runCommandPopenCommunicate(check_version, False, None, False)
|
cstrittmatter@0
|
60 if stdout == '':
|
cstrittmatter@0
|
61 stdout = stderr
|
cstrittmatter@0
|
62 if program in ['wget', 'awk']:
|
cstrittmatter@0
|
63 version_line = stdout.splitlines()[0].split(' ', 3)[2]
|
cstrittmatter@0
|
64 elif program in ['prefetch', 'fastq-dump']:
|
cstrittmatter@0
|
65 version_line = stdout.splitlines()[1].split(' ')[-1]
|
cstrittmatter@0
|
66 else:
|
cstrittmatter@0
|
67 version_line = stdout.splitlines()[0].split(' ')[-1]
|
cstrittmatter@0
|
68 replace_characters = ['"', 'v', 'V', '+', ',']
|
cstrittmatter@0
|
69 for i in replace_characters:
|
cstrittmatter@0
|
70 version_line = version_line.replace(i, '')
|
cstrittmatter@0
|
71 print(program + ' (' + version_line + ') found')
|
cstrittmatter@0
|
72 if programs[program][1] == '>=':
|
cstrittmatter@0
|
73 program_found_version = version_line.split('.')
|
cstrittmatter@0
|
74 program_version_required = programs[program][2].split('.')
|
cstrittmatter@0
|
75 if len(program_version_required) == 3:
|
cstrittmatter@0
|
76 if len(program_found_version) == 2:
|
cstrittmatter@0
|
77 program_found_version.append(0)
|
cstrittmatter@0
|
78 else:
|
cstrittmatter@0
|
79 program_found_version[2] = program_found_version[2].split('_')[0]
|
cstrittmatter@0
|
80 for i in range(0, len(program_version_required)):
|
cstrittmatter@0
|
81 if int(program_found_version[i]) > int(program_version_required[i]):
|
cstrittmatter@0
|
82 break
|
cstrittmatter@0
|
83 elif int(program_found_version[i]) == int(program_version_required[i]):
|
cstrittmatter@0
|
84 continue
|
cstrittmatter@0
|
85 else:
|
cstrittmatter@0
|
86 listMissings.append('It is required ' + program + ' with version ' +
|
cstrittmatter@0
|
87 programs[program][1] + ' ' + programs[program][2])
|
cstrittmatter@0
|
88 else:
|
cstrittmatter@0
|
89 if version_line != programs[program][2]:
|
cstrittmatter@0
|
90 listMissings.append('It is required ' + program + ' with version ' + programs[program][1] +
|
cstrittmatter@0
|
91 ' ' + programs[program][2])
|
cstrittmatter@0
|
92 return listMissings
|
cstrittmatter@0
|
93
|
cstrittmatter@0
|
94
|
cstrittmatter@0
|
95 def requiredPrograms():
|
cstrittmatter@0
|
96 programs_version_dictionary = {}
|
cstrittmatter@0
|
97 programs_version_dictionary['rematch.py'] = ['--version', '>=', '4.0']
|
cstrittmatter@0
|
98 missingPrograms = checkPrograms(programs_version_dictionary)
|
cstrittmatter@0
|
99 if len(missingPrograms) > 0:
|
cstrittmatter@0
|
100 sys.exit('\n' + 'Errors:' + '\n' + '\n'.join(missingPrograms))
|
cstrittmatter@0
|
101
|
cstrittmatter@0
|
102
|
cstrittmatter@0
|
103 def general_information(logfile, version, outdir, time_str):
|
cstrittmatter@0
|
104 # Check if output directory exists
|
cstrittmatter@0
|
105
|
cstrittmatter@0
|
106 print('\n' + '==========> patho_typing <==========')
|
cstrittmatter@0
|
107 print('\n' + 'Program start: ' + time.ctime())
|
cstrittmatter@0
|
108
|
cstrittmatter@0
|
109 # Tells where the logfile will be stored
|
cstrittmatter@0
|
110 print('\n' + 'LOGFILE:')
|
cstrittmatter@0
|
111 print(logfile)
|
cstrittmatter@0
|
112
|
cstrittmatter@0
|
113 # Print command
|
cstrittmatter@0
|
114 print('\n' + 'COMMAND:')
|
cstrittmatter@0
|
115 script_path = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 'patho_typing.py')
|
cstrittmatter@0
|
116 print(sys.executable + ' ' + ' '.join(sys.argv))
|
cstrittmatter@0
|
117
|
cstrittmatter@0
|
118 # Print directory where programme was lunch
|
cstrittmatter@0
|
119 print('\n' + 'PRESENT DIRECTORY:')
|
cstrittmatter@0
|
120 present_directory = os.path.abspath(os.getcwd())
|
cstrittmatter@0
|
121 print(present_directory)
|
cstrittmatter@0
|
122
|
cstrittmatter@0
|
123 # Print program version
|
cstrittmatter@0
|
124 print('\n' + 'VERSION:')
|
cstrittmatter@0
|
125 script_version_git(version, present_directory, script_path)
|
cstrittmatter@0
|
126
|
cstrittmatter@0
|
127 # Check programms
|
cstrittmatter@0
|
128 requiredPrograms()
|
cstrittmatter@0
|
129
|
cstrittmatter@0
|
130 return script_path
|
cstrittmatter@0
|
131
|
cstrittmatter@0
|
132
|
cstrittmatter@0
|
133 def setPATHvariable(doNotUseProvidedSoftware, script_path):
|
cstrittmatter@0
|
134 path_variable = os.environ['PATH']
|
cstrittmatter@0
|
135 script_folder = os.path.dirname(script_path)
|
cstrittmatter@0
|
136 # Set path to use provided softwares
|
cstrittmatter@0
|
137 if not doNotUseProvidedSoftware:
|
cstrittmatter@0
|
138 bowtie2 = os.path.join(script_folder, 'src', 'bowtie2-2.2.9')
|
cstrittmatter@0
|
139 samtools = os.path.join(script_folder, 'src', 'samtools-1.3.1', 'bin')
|
cstrittmatter@0
|
140 bcftools = os.path.join(script_folder, 'src', 'bcftools-1.3.1', 'bin')
|
cstrittmatter@0
|
141
|
cstrittmatter@0
|
142 os.environ['PATH'] = str(':'.join([bowtie2, samtools, bcftools, path_variable]))
|
cstrittmatter@0
|
143
|
cstrittmatter@0
|
144 # Print PATH variable
|
cstrittmatter@0
|
145 print('\n' + 'PATH variable:')
|
cstrittmatter@0
|
146 print(os.environ['PATH'])
|
cstrittmatter@0
|
147
|
cstrittmatter@0
|
148
|
cstrittmatter@0
|
149 def script_version_git(version, current_directory, script_path, no_git_info=False):
|
cstrittmatter@0
|
150 """
|
cstrittmatter@0
|
151 Print script version and get GitHub commit information
|
cstrittmatter@0
|
152
|
cstrittmatter@0
|
153 Parameters
|
cstrittmatter@0
|
154 ----------
|
cstrittmatter@0
|
155 version : str
|
cstrittmatter@0
|
156 Version of the script, e.g. "4.0"
|
cstrittmatter@0
|
157 current_directory : str
|
cstrittmatter@0
|
158 Path to the directory where the script was start to run
|
cstrittmatter@0
|
159 script_path : str
|
cstrittmatter@0
|
160 Path to the script running
|
cstrittmatter@0
|
161 no_git_info : bool, default False
|
cstrittmatter@0
|
162 True if it is not necessary to retreive the GitHub commit information
|
cstrittmatter@0
|
163
|
cstrittmatter@0
|
164 Returns
|
cstrittmatter@0
|
165 -------
|
cstrittmatter@0
|
166
|
cstrittmatter@0
|
167 """
|
cstrittmatter@0
|
168 print('Version {}'.format(version))
|
cstrittmatter@0
|
169
|
cstrittmatter@0
|
170 if not no_git_info:
|
cstrittmatter@0
|
171 try:
|
cstrittmatter@0
|
172 os.chdir(os.path.dirname(os.path.dirname(script_path)))
|
cstrittmatter@0
|
173 command = ['git', 'log', '-1', '--date=local', '--pretty=format:"%h (%H) - Commit by %cn, %cd) : %s"']
|
cstrittmatter@0
|
174 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False)
|
cstrittmatter@0
|
175 print(stdout)
|
cstrittmatter@0
|
176 command = ['git', 'remote', 'show', 'origin']
|
cstrittmatter@0
|
177 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False)
|
cstrittmatter@0
|
178 print(stdout)
|
cstrittmatter@0
|
179 except:
|
cstrittmatter@0
|
180 print('HARMLESS WARNING: git command possibly not found. The GitHub repository information will not be'
|
cstrittmatter@0
|
181 ' obtained.')
|
cstrittmatter@0
|
182 finally:
|
cstrittmatter@0
|
183 os.chdir(current_directory)
|
cstrittmatter@0
|
184
|
cstrittmatter@0
|
185
|
cstrittmatter@0
|
186 def runTime(start_time):
|
cstrittmatter@0
|
187 end_time = time.time()
|
cstrittmatter@0
|
188 time_taken = end_time - start_time
|
cstrittmatter@0
|
189 hours, rest = divmod(time_taken, 3600)
|
cstrittmatter@0
|
190 minutes, seconds = divmod(rest, 60)
|
cstrittmatter@0
|
191 print('Runtime :' + str(hours) + 'h:' + str(minutes) + 'm:' + str(round(seconds, 2)) + 's')
|
cstrittmatter@0
|
192 return round(time_taken, 2)
|
cstrittmatter@0
|
193
|
cstrittmatter@0
|
194
|
cstrittmatter@0
|
195 def timer(function, name):
|
cstrittmatter@0
|
196 @functools.wraps(function)
|
cstrittmatter@0
|
197 def wrapper(*args, **kwargs):
|
cstrittmatter@0
|
198 print('\n' + 'RUNNING {0}\n'.format(name))
|
cstrittmatter@0
|
199 start_time = time.time()
|
cstrittmatter@0
|
200
|
cstrittmatter@0
|
201 results = list(function(*args, **kwargs)) # guarantees return is a list to allow .insert()
|
cstrittmatter@0
|
202
|
cstrittmatter@0
|
203 time_taken = runTime(start_time)
|
cstrittmatter@0
|
204 print('END {0}'.format(name))
|
cstrittmatter@0
|
205
|
cstrittmatter@0
|
206 results.insert(0, time_taken)
|
cstrittmatter@0
|
207 return results
|
cstrittmatter@0
|
208 return wrapper
|
cstrittmatter@0
|
209
|
cstrittmatter@0
|
210
|
cstrittmatter@0
|
211 def removeDirectory(directory):
|
cstrittmatter@0
|
212 if os.path.isdir(directory):
|
cstrittmatter@0
|
213 shutil.rmtree(directory)
|
cstrittmatter@0
|
214
|
cstrittmatter@0
|
215
|
cstrittmatter@0
|
216 def saveVariableToPickle(variableToStore, pickleFile):
|
cstrittmatter@0
|
217 with open(pickleFile, 'wb') as writer:
|
cstrittmatter@0
|
218 pickle.dump(variableToStore, writer)
|
cstrittmatter@0
|
219
|
cstrittmatter@0
|
220
|
cstrittmatter@0
|
221 def extractVariableFromPickle(pickleFile):
|
cstrittmatter@0
|
222 with open(pickleFile, 'rb') as reader:
|
cstrittmatter@0
|
223 variable = pickle.load(reader)
|
cstrittmatter@0
|
224 return variable
|
cstrittmatter@0
|
225
|
cstrittmatter@0
|
226
|
cstrittmatter@0
|
227 def trace_unhandled_exceptions(func):
|
cstrittmatter@0
|
228 @functools.wraps(func)
|
cstrittmatter@0
|
229 def wrapped_func(*args, **kwargs):
|
cstrittmatter@0
|
230 try:
|
cstrittmatter@0
|
231 func(*args, **kwargs)
|
cstrittmatter@0
|
232 except:
|
cstrittmatter@0
|
233 print('Exception in ' + func.__name__)
|
cstrittmatter@0
|
234 traceback.print_exc()
|
cstrittmatter@0
|
235 return wrapped_func
|
cstrittmatter@0
|
236
|
cstrittmatter@0
|
237
|
cstrittmatter@0
|
238 def kill_subprocess_Popen(subprocess_Popen, command):
|
cstrittmatter@0
|
239 print('Command run out of time: ' + str(command))
|
cstrittmatter@0
|
240 subprocess_Popen.kill()
|
cstrittmatter@0
|
241
|
cstrittmatter@0
|
242
|
cstrittmatter@0
|
243 def runCommandPopenCommunicate(command, shell_True, timeout_sec_None, print_comand_True):
|
cstrittmatter@0
|
244 run_successfully = False
|
cstrittmatter@0
|
245 if not isinstance(command, str):
|
cstrittmatter@0
|
246 command = ' '.join(command)
|
cstrittmatter@0
|
247 command = shlex.split(command)
|
cstrittmatter@0
|
248
|
cstrittmatter@0
|
249 if print_comand_True:
|
cstrittmatter@0
|
250 print('Running: ' + ' '.join(command))
|
cstrittmatter@0
|
251
|
cstrittmatter@0
|
252 if shell_True:
|
cstrittmatter@0
|
253 command = ' '.join(command)
|
cstrittmatter@0
|
254 proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
|
cstrittmatter@0
|
255 else:
|
cstrittmatter@0
|
256 proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
cstrittmatter@0
|
257
|
cstrittmatter@0
|
258 not_killed_by_timer = True
|
cstrittmatter@0
|
259 if timeout_sec_None is None:
|
cstrittmatter@0
|
260 stdout, stderr = proc.communicate()
|
cstrittmatter@0
|
261 else:
|
cstrittmatter@0
|
262 time_counter = Timer(timeout_sec_None, kill_subprocess_Popen, args=(proc, command,))
|
cstrittmatter@0
|
263 time_counter.start()
|
cstrittmatter@0
|
264 stdout, stderr = proc.communicate()
|
cstrittmatter@0
|
265 time_counter.cancel()
|
cstrittmatter@0
|
266 not_killed_by_timer = time_counter.isAlive()
|
cstrittmatter@0
|
267
|
cstrittmatter@0
|
268 stdout = stdout.decode("utf-8")
|
cstrittmatter@0
|
269 stderr = stderr.decode("utf-8")
|
cstrittmatter@0
|
270
|
cstrittmatter@0
|
271 if proc.returncode == 0:
|
cstrittmatter@0
|
272 run_successfully = True
|
cstrittmatter@0
|
273 else:
|
cstrittmatter@0
|
274 if not print_comand_True and not_killed_by_timer:
|
cstrittmatter@0
|
275 print('Running: ' + str(command))
|
cstrittmatter@0
|
276 if len(stdout) > 0:
|
cstrittmatter@0
|
277 print('STDOUT')
|
cstrittmatter@0
|
278 print(stdout)
|
cstrittmatter@0
|
279 if len(stderr) > 0:
|
cstrittmatter@0
|
280 print('STDERR')
|
cstrittmatter@0
|
281 print(stderr)
|
cstrittmatter@0
|
282 return run_successfully, stdout, stderr
|
cstrittmatter@0
|
283
|
cstrittmatter@0
|
284
|
cstrittmatter@0
|
285 def required_length(tuple_length_options, argument_name):
|
cstrittmatter@0
|
286 class RequiredLength(argparse.Action):
|
cstrittmatter@0
|
287 def __call__(self, parser, args, values, option_string=None):
|
cstrittmatter@0
|
288 if len(values) not in tuple_length_options:
|
cstrittmatter@0
|
289 msg = 'Option {argument_name} requires one of the following number of' \
|
cstrittmatter@0
|
290 ' arguments: {tuple_length_options}'.format(argument_name=self.argument_name,
|
cstrittmatter@0
|
291 tuple_length_options=tuple_length_options)
|
cstrittmatter@0
|
292 raise argparse.ArgumentTypeError(msg)
|
cstrittmatter@0
|
293 setattr(args, self.dest, values)
|
cstrittmatter@0
|
294 return RequiredLength
|
cstrittmatter@0
|
295
|
cstrittmatter@0
|
296
|
cstrittmatter@0
|
297 def get_sequence_information(fasta_file, length_extra_seq):
|
cstrittmatter@0
|
298 sequence_dict = {}
|
cstrittmatter@0
|
299 headers = {}
|
cstrittmatter@0
|
300
|
cstrittmatter@0
|
301 with open(fasta_file, 'rtU') as reader:
|
cstrittmatter@0
|
302 blank_line_found = False
|
cstrittmatter@0
|
303 sequence_counter = 0
|
cstrittmatter@0
|
304 temp_sequence_dict = {}
|
cstrittmatter@0
|
305 for line in reader:
|
cstrittmatter@0
|
306 line = line.splitlines()[0]
|
cstrittmatter@0
|
307 if len(line) > 0:
|
cstrittmatter@0
|
308 if not blank_line_found:
|
cstrittmatter@0
|
309 if line.startswith('>'):
|
cstrittmatter@0
|
310 if len(temp_sequence_dict) > 0:
|
cstrittmatter@0
|
311 if list(temp_sequence_dict.values())[0]['length'] - 2 * length_extra_seq > 0:
|
cstrittmatter@0
|
312 sequence_dict[list(temp_sequence_dict.keys())[0]] = list(temp_sequence_dict.values())[0]
|
cstrittmatter@0
|
313 headers[list(temp_sequence_dict.values())[0]['header'].lower()] = sequence_counter
|
cstrittmatter@0
|
314 else:
|
cstrittmatter@0
|
315 print(list(temp_sequence_dict.values())[0]['header'] + ' sequence ignored due to '
|
cstrittmatter@0
|
316 'length <= 0')
|
cstrittmatter@0
|
317 temp_sequence_dict = {}
|
cstrittmatter@0
|
318
|
cstrittmatter@0
|
319 if line[1:].lower() in headers:
|
cstrittmatter@0
|
320 sys.exit('Found duplicated sequence headers')
|
cstrittmatter@0
|
321
|
cstrittmatter@0
|
322 sequence_counter += 1
|
cstrittmatter@0
|
323 temp_sequence_dict[sequence_counter] = {'header': line[1:].lower(), 'sequence': '', 'length': 0}
|
cstrittmatter@0
|
324 else:
|
cstrittmatter@0
|
325 temp_sequence_dict[sequence_counter]['sequence'] += line.upper()
|
cstrittmatter@0
|
326 temp_sequence_dict[sequence_counter]['length'] += len(line)
|
cstrittmatter@0
|
327 else:
|
cstrittmatter@0
|
328 sys.exit('It was found a blank line between the fasta file above line ' + line)
|
cstrittmatter@0
|
329 else:
|
cstrittmatter@0
|
330 blank_line_found = True
|
cstrittmatter@0
|
331
|
cstrittmatter@0
|
332 if len(temp_sequence_dict) > 0:
|
cstrittmatter@0
|
333 if list(temp_sequence_dict.values())[0]['length'] - 2 * length_extra_seq > 0:
|
cstrittmatter@0
|
334 sequence_dict[list(temp_sequence_dict.keys())[0]] = list(temp_sequence_dict.values())[0]
|
cstrittmatter@0
|
335 headers[list(temp_sequence_dict.values())[0]['header'].lower()] = sequence_counter
|
cstrittmatter@0
|
336 else:
|
cstrittmatter@0
|
337 print(list(temp_sequence_dict.values())[0]['header'] + ' sequence ignored due to length <= 0')
|
cstrittmatter@0
|
338
|
cstrittmatter@0
|
339 return sequence_dict, headers
|
cstrittmatter@0
|
340
|
cstrittmatter@0
|
341
|
cstrittmatter@0
|
342 def simplify_sequence_dict(sequence_dict):
|
cstrittmatter@0
|
343 simple_sequence_dict = {}
|
cstrittmatter@0
|
344 for counter, info in list(sequence_dict.items()):
|
cstrittmatter@0
|
345 simple_sequence_dict[info['header']] = info
|
cstrittmatter@0
|
346 del simple_sequence_dict[info['header']]['header']
|
cstrittmatter@0
|
347 return simple_sequence_dict
|
cstrittmatter@0
|
348
|
cstrittmatter@0
|
349
|
cstrittmatter@0
|
350 def chunkstring(string, length):
|
cstrittmatter@0
|
351 return (string[0 + i:length + i] for i in range(0, len(string), length))
|
cstrittmatter@0
|
352
|
cstrittmatter@0
|
353
|
cstrittmatter@0
|
354 def clean_headers_sequences(sequence_dict):
|
cstrittmatter@0
|
355 problematic_characters = ["|", " ", ",", ".", "(", ")", "'", "/", ":"]
|
cstrittmatter@0
|
356 # print 'Checking if reference sequences contain ' + str(problematic_characters) + '\n'
|
cstrittmatter@0
|
357
|
cstrittmatter@0
|
358 headers_changed = False
|
cstrittmatter@0
|
359 new_headers = {}
|
cstrittmatter@0
|
360 for i in sequence_dict:
|
cstrittmatter@0
|
361 if any(x in sequence_dict[i]['header'] for x in problematic_characters):
|
cstrittmatter@0
|
362 for x in problematic_characters:
|
cstrittmatter@0
|
363 sequence_dict[i]['header'] = sequence_dict[i]['header'].replace(x, '_')
|
cstrittmatter@0
|
364 headers_changed = True
|
cstrittmatter@0
|
365 new_headers[sequence_dict[i]['header'].lower()] = i
|
cstrittmatter@0
|
366
|
cstrittmatter@0
|
367 if headers_changed:
|
cstrittmatter@0
|
368 print('At least one of the those characters was found. Replacing those with _' + '\n')
|
cstrittmatter@0
|
369
|
cstrittmatter@0
|
370 return sequence_dict, new_headers
|