Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/Bio/cpairwise2module.c @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 /* Copyright 2002 by Jeffrey Chang. | |
2 * Copyright 2016, 2019 by Markus Piotrowski. | |
3 * All rights reserved. | |
4 * | |
5 * This file is part of the Biopython distribution and governed by your | |
6 * choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
7 * Please see the LICENSE file that should have been included as part of this | |
8 * package. | |
9 * | |
10 * cpairwise2module.c | |
11 * Created 30 Sep 2001 | |
12 * | |
13 * Optimized C routines that complement pairwise2.py. | |
14 */ | |
15 | |
16 #include "Python.h" | |
17 | |
18 | |
19 #define _PRECISION 1000 | |
20 #define rint(x) (int)((x)*_PRECISION+0.5) | |
21 | |
22 /* Functions in this module. */ | |
23 | |
24 static double calc_affine_penalty(int length, double open, double extend, | |
25 int penalize_extend_when_opening) | |
26 { | |
27 double penalty; | |
28 | |
29 if(length <= 0) | |
30 return 0.0; | |
31 penalty = open + extend * length; | |
32 if(!penalize_extend_when_opening) | |
33 penalty -= extend; | |
34 return penalty; | |
35 } | |
36 | |
37 static double _get_match_score(PyObject *py_sequenceA, PyObject *py_sequenceB, | |
38 PyObject *py_match_fn, int i, int j, | |
39 char *sequenceA, char *sequenceB, | |
40 int use_sequence_cstring, | |
41 double match, double mismatch, | |
42 int use_match_mismatch_scores) | |
43 { | |
44 PyObject *py_A=NULL, *py_B=NULL; | |
45 PyObject *py_arglist=NULL, *py_result=NULL; | |
46 double score = 0; | |
47 | |
48 if(use_sequence_cstring && use_match_mismatch_scores) { | |
49 score = (sequenceA[i] == sequenceB[j]) ? match : mismatch; | |
50 return score; | |
51 } | |
52 /* Calculate the match score. */ | |
53 if(!(py_A = PySequence_GetItem(py_sequenceA, i))) | |
54 goto _get_match_score_cleanup; | |
55 if(!(py_B = PySequence_GetItem(py_sequenceB, j))) | |
56 goto _get_match_score_cleanup; | |
57 if(!(py_arglist = Py_BuildValue("(OO)", py_A, py_B))) | |
58 goto _get_match_score_cleanup; | |
59 | |
60 if(!(py_result = PyEval_CallObject(py_match_fn, py_arglist))) | |
61 goto _get_match_score_cleanup; | |
62 score = PyFloat_AsDouble(py_result); | |
63 | |
64 _get_match_score_cleanup: | |
65 if(py_A) { | |
66 Py_DECREF(py_A); | |
67 } | |
68 if(py_B) { | |
69 Py_DECREF(py_B); | |
70 } | |
71 if(py_arglist) { | |
72 Py_DECREF(py_arglist); | |
73 } | |
74 if(py_result) { | |
75 Py_DECREF(py_result); | |
76 } | |
77 return score; | |
78 } | |
79 | |
80 #if PY_MAJOR_VERSION >= 3 | |
81 static PyObject* _create_bytes_object(PyObject* o) | |
82 { | |
83 PyObject* b; | |
84 if (PyBytes_Check(o)) { | |
85 return o; | |
86 } | |
87 if (!PyUnicode_Check(o)) { | |
88 return NULL; | |
89 } | |
90 b = PyUnicode_AsASCIIString(o); | |
91 if (!b) { | |
92 PyErr_Clear(); | |
93 return NULL; | |
94 } | |
95 return b; | |
96 } | |
97 #endif | |
98 | |
99 /* This function is a more-or-less straightforward port of the | |
100 * equivalent function in pairwise2. Please see there for algorithm | |
101 * documentation. | |
102 */ | |
103 static PyObject *cpairwise2__make_score_matrix_fast(PyObject *self, | |
104 PyObject *args) | |
105 { | |
106 int i; | |
107 int row, col; | |
108 PyObject *py_sequenceA, *py_sequenceB, *py_match_fn; | |
109 #if PY_MAJOR_VERSION >= 3 | |
110 PyObject *py_bytesA, *py_bytesB; | |
111 #endif | |
112 char *sequenceA=NULL, *sequenceB=NULL; | |
113 int use_sequence_cstring; | |
114 double open_A, extend_A, open_B, extend_B; | |
115 int penalize_extend_when_opening, penalize_end_gaps_A, penalize_end_gaps_B; | |
116 int align_globally, score_only; | |
117 | |
118 PyObject *py_match=NULL, *py_mismatch=NULL; | |
119 double first_A_gap, first_B_gap; | |
120 double match, mismatch; | |
121 double score; | |
122 double best_score = 0; | |
123 double local_max_score = 0; | |
124 int use_match_mismatch_scores; | |
125 int lenA, lenB; | |
126 double *score_matrix = NULL; | |
127 unsigned char *trace_matrix = NULL; | |
128 PyObject *py_score_matrix=NULL, *py_trace_matrix=NULL; | |
129 | |
130 double *col_cache_score = NULL; | |
131 PyObject *py_retval = NULL; | |
132 | |
133 if(!PyArg_ParseTuple(args, "OOOddddi(ii)ii", &py_sequenceA, &py_sequenceB, | |
134 &py_match_fn, &open_A, &extend_A, &open_B, &extend_B, | |
135 &penalize_extend_when_opening, | |
136 &penalize_end_gaps_A, &penalize_end_gaps_B, | |
137 &align_globally, &score_only)) | |
138 return NULL; | |
139 if(!PySequence_Check(py_sequenceA) || !PySequence_Check(py_sequenceB)) { | |
140 PyErr_SetString(PyExc_TypeError, | |
141 "py_sequenceA and py_sequenceB should be sequences."); | |
142 return NULL; | |
143 } | |
144 | |
145 /* Optimize for the common case. Check to see if py_sequenceA and | |
146 py_sequenceB are strings. If they are, use the c string | |
147 representation. */ | |
148 #if PY_MAJOR_VERSION < 3 | |
149 use_sequence_cstring = 0; | |
150 if(PyString_Check(py_sequenceA) && PyString_Check(py_sequenceB)) { | |
151 sequenceA = PyString_AS_STRING(py_sequenceA); | |
152 sequenceB = PyString_AS_STRING(py_sequenceB); | |
153 use_sequence_cstring = 1; | |
154 } | |
155 #else | |
156 py_bytesA = _create_bytes_object(py_sequenceA); | |
157 py_bytesB = _create_bytes_object(py_sequenceB); | |
158 if (py_bytesA && py_bytesB) { | |
159 sequenceA = PyBytes_AS_STRING(py_bytesA); | |
160 sequenceB = PyBytes_AS_STRING(py_bytesB); | |
161 use_sequence_cstring = 1; | |
162 } | |
163 else { | |
164 Py_XDECREF(py_bytesA); | |
165 Py_XDECREF(py_bytesB); | |
166 use_sequence_cstring = 0; | |
167 } | |
168 #endif | |
169 | |
170 if(!PyCallable_Check(py_match_fn)) { | |
171 PyErr_SetString(PyExc_TypeError, "py_match_fn must be callable."); | |
172 return NULL; | |
173 } | |
174 /* Optimize for the common case. Check to see if py_match_fn is | |
175 an identity_match. If so, pull out the match and mismatch | |
176 member variables and calculate the scores myself. */ | |
177 match = mismatch = 0; | |
178 use_match_mismatch_scores = 0; | |
179 if(!(py_match = PyObject_GetAttrString(py_match_fn, "match"))) | |
180 goto cleanup_after_py_match_fn; | |
181 match = PyFloat_AsDouble(py_match); | |
182 if(match==-1.0 && PyErr_Occurred()) | |
183 goto cleanup_after_py_match_fn; | |
184 if(!(py_mismatch = PyObject_GetAttrString(py_match_fn, "mismatch"))) | |
185 goto cleanup_after_py_match_fn; | |
186 mismatch = PyFloat_AsDouble(py_mismatch); | |
187 if(mismatch==-1.0 && PyErr_Occurred()) | |
188 goto cleanup_after_py_match_fn; | |
189 use_match_mismatch_scores = 1; | |
190 | |
191 cleanup_after_py_match_fn: | |
192 if(PyErr_Occurred()) | |
193 PyErr_Clear(); | |
194 if(py_match) { | |
195 Py_DECREF(py_match); | |
196 } | |
197 if(py_mismatch) { | |
198 Py_DECREF(py_mismatch); | |
199 } | |
200 /* Cache some commonly used gap penalties */ | |
201 first_A_gap = calc_affine_penalty(1, open_A, extend_A, | |
202 penalize_extend_when_opening); | |
203 first_B_gap = calc_affine_penalty(1, open_B, extend_B, | |
204 penalize_extend_when_opening); | |
205 | |
206 /* Allocate matrices for storing the results and initialize first row and col. */ | |
207 lenA = PySequence_Length(py_sequenceA); | |
208 lenB = PySequence_Length(py_sequenceB); | |
209 score_matrix = malloc((lenA+1)*(lenB+1)*sizeof(*score_matrix)); | |
210 if(!score_matrix) { | |
211 PyErr_SetString(PyExc_MemoryError, "Out of memory"); | |
212 goto _cleanup_make_score_matrix_fast; | |
213 } | |
214 for(i=0; i<(lenB+1); i++) | |
215 score_matrix[i] = 0; | |
216 for(i=0; i<(lenA+1)*(lenB+1); i += (lenB+1)) | |
217 score_matrix[i] = 0; | |
218 /* If we only want the score, we don't need the trace matrix. */ | |
219 if (!score_only){ | |
220 trace_matrix = malloc((lenA+1)*(lenB+1)*sizeof(*trace_matrix)); | |
221 if(!trace_matrix) { | |
222 PyErr_SetString(PyExc_MemoryError, "Out of memory"); | |
223 goto _cleanup_make_score_matrix_fast; | |
224 } | |
225 for(i=0; i<(lenB+1); i++) | |
226 trace_matrix[i] = 0; | |
227 for(i=0; i<(lenA+1)*(lenB+1); i += (lenB+1)) | |
228 trace_matrix[i] = 0; | |
229 } | |
230 else | |
231 trace_matrix = malloc(1); | |
232 | |
233 /* Initialize the first row and col of the score matrix. */ | |
234 for(i=0; i<=lenA; i++) { | |
235 if(penalize_end_gaps_B) | |
236 score = calc_affine_penalty(i, open_B, extend_B, | |
237 penalize_extend_when_opening); | |
238 else | |
239 score = 0; | |
240 score_matrix[i*(lenB+1)] = score; | |
241 } | |
242 for(i=0; i<=lenB; i++) { | |
243 if(penalize_end_gaps_A) | |
244 score = calc_affine_penalty(i, open_A, extend_A, | |
245 penalize_extend_when_opening); | |
246 else | |
247 score = 0; | |
248 score_matrix[i] = score; | |
249 } | |
250 | |
251 /* Now initialize the col cache. */ | |
252 col_cache_score = malloc((lenB+1)*sizeof(*col_cache_score)); | |
253 memset((void *)col_cache_score, 0, (lenB+1)*sizeof(*col_cache_score)); | |
254 for(i=0; i<=lenB; i++) { | |
255 col_cache_score[i] = calc_affine_penalty(i, (2*open_B), extend_B, | |
256 penalize_extend_when_opening); | |
257 } | |
258 | |
259 /* Fill in the score matrix. The row cache is calculated on the fly.*/ | |
260 for(row=1; row<=lenA; row++) { | |
261 double row_cache_score = calc_affine_penalty(row, (2*open_A), extend_A, | |
262 penalize_extend_when_opening); | |
263 for(col=1; col<=lenB; col++) { | |
264 double match_score, nogap_score; | |
265 double row_open, row_extend, col_open, col_extend; | |
266 int best_score_rint, row_score_rint, col_score_rint; | |
267 unsigned char row_trace_score, col_trace_score, trace_score; | |
268 | |
269 /* Calculate the best score. */ | |
270 match_score = _get_match_score(py_sequenceA, py_sequenceB, | |
271 py_match_fn, row-1, col-1, | |
272 sequenceA, sequenceB, | |
273 use_sequence_cstring, | |
274 match, mismatch, | |
275 use_match_mismatch_scores); | |
276 if(match_score==-1.0 && PyErr_Occurred()) | |
277 goto _cleanup_make_score_matrix_fast; | |
278 nogap_score = score_matrix[(row-1)*(lenB+1)+col-1] + match_score; | |
279 | |
280 if (!penalize_end_gaps_A && row==lenA) { | |
281 row_open = score_matrix[(row)*(lenB+1)+col-1]; | |
282 row_extend = row_cache_score; | |
283 } | |
284 else { | |
285 row_open = score_matrix[(row)*(lenB+1)+col-1] + first_A_gap; | |
286 row_extend = row_cache_score + extend_A; | |
287 } | |
288 row_cache_score = (row_open > row_extend) ? row_open : row_extend; | |
289 | |
290 if (!penalize_end_gaps_B && col==lenB){ | |
291 col_open = score_matrix[(row-1)*(lenB+1)+col]; | |
292 col_extend = col_cache_score[col]; | |
293 } | |
294 else { | |
295 col_open = score_matrix[(row-1)*(lenB+1)+col] + first_B_gap; | |
296 col_extend = col_cache_score[col] + extend_B; | |
297 } | |
298 col_cache_score[col] = (col_open > col_extend) ? col_open : col_extend; | |
299 | |
300 best_score = (row_cache_score > col_cache_score[col]) ? row_cache_score : col_cache_score[col]; | |
301 if(nogap_score > best_score) | |
302 best_score = nogap_score; | |
303 | |
304 if (best_score > local_max_score) | |
305 local_max_score = best_score; | |
306 | |
307 if(!align_globally && best_score < 0) | |
308 score_matrix[row*(lenB+1)+col] = 0; | |
309 else | |
310 score_matrix[row*(lenB+1)+col] = best_score; | |
311 | |
312 if (!score_only) { | |
313 row_score_rint = rint(row_cache_score); | |
314 col_score_rint = rint(col_cache_score[col]); | |
315 row_trace_score = 0; | |
316 col_trace_score = 0; | |
317 if (rint(row_open) == row_score_rint) | |
318 row_trace_score = row_trace_score|1; | |
319 if (rint(row_extend) == row_score_rint) | |
320 row_trace_score = row_trace_score|8; | |
321 if (rint(col_open) == col_score_rint) | |
322 col_trace_score = col_trace_score|4; | |
323 if (rint(col_extend) == col_score_rint) | |
324 col_trace_score = col_trace_score|16; | |
325 | |
326 trace_score = 0; | |
327 best_score_rint = rint(best_score); | |
328 if (rint(nogap_score) == best_score_rint) | |
329 trace_score = trace_score|2; | |
330 if (row_score_rint == best_score_rint) | |
331 trace_score += row_trace_score; | |
332 if (col_score_rint == best_score_rint) | |
333 trace_score += col_trace_score; | |
334 trace_matrix[row*(lenB+1)+col] = trace_score; | |
335 } | |
336 } | |
337 } | |
338 | |
339 if (!align_globally) | |
340 best_score = local_max_score; | |
341 | |
342 /* Save the score and traceback matrices into real python objects. */ | |
343 if(!score_only) { | |
344 if(!(py_score_matrix = PyList_New(lenA+1))) | |
345 goto _cleanup_make_score_matrix_fast; | |
346 if(!(py_trace_matrix = PyList_New(lenA+1))) | |
347 goto _cleanup_make_score_matrix_fast; | |
348 | |
349 for(row=0; row<=lenA; row++) { | |
350 PyObject *py_score_row, *py_trace_row; | |
351 if(!(py_score_row = PyList_New(lenB+1))) | |
352 goto _cleanup_make_score_matrix_fast; | |
353 PyList_SET_ITEM(py_score_matrix, row, py_score_row); | |
354 if(!(py_trace_row = PyList_New(lenB+1))) | |
355 goto _cleanup_make_score_matrix_fast; | |
356 PyList_SET_ITEM(py_trace_matrix, row, py_trace_row); | |
357 | |
358 for(col=0; col<=lenB; col++) { | |
359 PyObject *py_score, *py_trace; | |
360 int offset = row*(lenB+1) + col; | |
361 | |
362 /* Set py_score_matrix[row][col] to the score. */ | |
363 if(!(py_score = PyFloat_FromDouble(score_matrix[offset]))) | |
364 goto _cleanup_make_score_matrix_fast; | |
365 PyList_SET_ITEM(py_score_row, col, py_score); | |
366 | |
367 /* Set py_trace_matrix[row][col] to a list of indexes. On | |
368 the edges of the matrix (row or column is 0), the | |
369 matrix should be [None]. */ | |
370 if(!row || !col) { | |
371 if(!(py_trace = Py_BuildValue("B", 1))) | |
372 goto _cleanup_make_score_matrix_fast; | |
373 Py_INCREF(Py_None); | |
374 PyList_SET_ITEM(py_trace_row, col, Py_None); | |
375 } | |
376 else { | |
377 if(!(py_trace = Py_BuildValue("B", trace_matrix[offset]))) | |
378 goto _cleanup_make_score_matrix_fast; | |
379 PyList_SET_ITEM(py_trace_row, col, py_trace); | |
380 | |
381 } | |
382 } | |
383 } | |
384 } | |
385 else { | |
386 py_score_matrix = PyList_New(1); | |
387 py_trace_matrix = PyList_New(1); | |
388 } | |
389 py_retval = Py_BuildValue("(OOd)", py_score_matrix, py_trace_matrix, best_score); | |
390 | |
391 _cleanup_make_score_matrix_fast: | |
392 if(score_matrix) | |
393 free(score_matrix); | |
394 if(trace_matrix) | |
395 free(trace_matrix); | |
396 if(col_cache_score) | |
397 free(col_cache_score); | |
398 if(py_score_matrix){ | |
399 Py_DECREF(py_score_matrix); | |
400 } | |
401 if(py_trace_matrix){ | |
402 Py_DECREF(py_trace_matrix); | |
403 } | |
404 | |
405 #if PY_MAJOR_VERSION >= 3 | |
406 if (py_bytesA != NULL && py_bytesA != py_sequenceA) Py_DECREF(py_bytesA); | |
407 if (py_bytesB != NULL && py_bytesB != py_sequenceB) Py_DECREF(py_bytesB); | |
408 #endif | |
409 | |
410 return py_retval; | |
411 } | |
412 | |
413 static PyObject *cpairwise2_rint(PyObject *self, PyObject *args, | |
414 PyObject *keywds) | |
415 { | |
416 double x; | |
417 int precision = _PRECISION; | |
418 int rint_x; | |
419 | |
420 static char *kwlist[] = {"x", "precision", NULL}; | |
421 | |
422 if(!PyArg_ParseTupleAndKeywords(args, keywds, "d|l", kwlist, | |
423 &x, &precision)) | |
424 return NULL; | |
425 rint_x = (int)(x * precision + 0.5); | |
426 #if PY_MAJOR_VERSION >= 3 | |
427 return PyLong_FromLong((long)rint_x); | |
428 #else | |
429 return PyInt_FromLong((long)rint_x); | |
430 #endif | |
431 } | |
432 | |
433 /* Module definition stuff */ | |
434 | |
435 static PyMethodDef cpairwise2Methods[] = { | |
436 {"_make_score_matrix_fast", | |
437 (PyCFunction)cpairwise2__make_score_matrix_fast, METH_VARARGS, ""}, | |
438 {"rint", (PyCFunction)cpairwise2_rint, METH_VARARGS|METH_KEYWORDS, ""}, | |
439 {NULL, NULL, 0, NULL} | |
440 }; | |
441 | |
442 static char cpairwise2__doc__[] = | |
443 "Optimized C routines that complement pairwise2.py. These are called from within pairwise2.py.\n\ | |
444 \n\ | |
445 "; | |
446 | |
447 #if PY_MAJOR_VERSION >= 3 | |
448 | |
449 static struct PyModuleDef moduledef = { | |
450 PyModuleDef_HEAD_INIT, | |
451 "cpairwise2", | |
452 cpairwise2__doc__, | |
453 -1, | |
454 cpairwise2Methods, | |
455 NULL, | |
456 NULL, | |
457 NULL, | |
458 NULL | |
459 }; | |
460 | |
461 PyObject * | |
462 PyInit_cpairwise2(void) | |
463 | |
464 #else | |
465 | |
466 void | |
467 /* for Windows: _declspec(dllexport) initcpairwise2(void) */ | |
468 initcpairwise2(void) | |
469 #endif | |
470 | |
471 { | |
472 #if PY_MAJOR_VERSION >= 3 | |
473 PyObject* module = PyModule_Create(&moduledef); | |
474 if (module==NULL) return NULL; | |
475 return module; | |
476 #else | |
477 (void) Py_InitModule3("cpairwise2", cpairwise2Methods, cpairwise2__doc__); | |
478 #endif | |
479 } |