comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/Bio/cpairwise2module.c @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 /* Copyright 2002 by Jeffrey Chang.
2 * Copyright 2016, 2019 by Markus Piotrowski.
3 * All rights reserved.
4 *
5 * This file is part of the Biopython distribution and governed by your
6 * choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
7 * Please see the LICENSE file that should have been included as part of this
8 * package.
9 *
10 * cpairwise2module.c
11 * Created 30 Sep 2001
12 *
13 * Optimized C routines that complement pairwise2.py.
14 */
15
16 #include "Python.h"
17
18
19 #define _PRECISION 1000
20 #define rint(x) (int)((x)*_PRECISION+0.5)
21
22 /* Functions in this module. */
23
24 static double calc_affine_penalty(int length, double open, double extend,
25 int penalize_extend_when_opening)
26 {
27 double penalty;
28
29 if(length <= 0)
30 return 0.0;
31 penalty = open + extend * length;
32 if(!penalize_extend_when_opening)
33 penalty -= extend;
34 return penalty;
35 }
36
37 static double _get_match_score(PyObject *py_sequenceA, PyObject *py_sequenceB,
38 PyObject *py_match_fn, int i, int j,
39 char *sequenceA, char *sequenceB,
40 int use_sequence_cstring,
41 double match, double mismatch,
42 int use_match_mismatch_scores)
43 {
44 PyObject *py_A=NULL, *py_B=NULL;
45 PyObject *py_arglist=NULL, *py_result=NULL;
46 double score = 0;
47
48 if(use_sequence_cstring && use_match_mismatch_scores) {
49 score = (sequenceA[i] == sequenceB[j]) ? match : mismatch;
50 return score;
51 }
52 /* Calculate the match score. */
53 if(!(py_A = PySequence_GetItem(py_sequenceA, i)))
54 goto _get_match_score_cleanup;
55 if(!(py_B = PySequence_GetItem(py_sequenceB, j)))
56 goto _get_match_score_cleanup;
57 if(!(py_arglist = Py_BuildValue("(OO)", py_A, py_B)))
58 goto _get_match_score_cleanup;
59
60 if(!(py_result = PyEval_CallObject(py_match_fn, py_arglist)))
61 goto _get_match_score_cleanup;
62 score = PyFloat_AsDouble(py_result);
63
64 _get_match_score_cleanup:
65 if(py_A) {
66 Py_DECREF(py_A);
67 }
68 if(py_B) {
69 Py_DECREF(py_B);
70 }
71 if(py_arglist) {
72 Py_DECREF(py_arglist);
73 }
74 if(py_result) {
75 Py_DECREF(py_result);
76 }
77 return score;
78 }
79
80 #if PY_MAJOR_VERSION >= 3
81 static PyObject* _create_bytes_object(PyObject* o)
82 {
83 PyObject* b;
84 if (PyBytes_Check(o)) {
85 return o;
86 }
87 if (!PyUnicode_Check(o)) {
88 return NULL;
89 }
90 b = PyUnicode_AsASCIIString(o);
91 if (!b) {
92 PyErr_Clear();
93 return NULL;
94 }
95 return b;
96 }
97 #endif
98
99 /* This function is a more-or-less straightforward port of the
100 * equivalent function in pairwise2. Please see there for algorithm
101 * documentation.
102 */
103 static PyObject *cpairwise2__make_score_matrix_fast(PyObject *self,
104 PyObject *args)
105 {
106 int i;
107 int row, col;
108 PyObject *py_sequenceA, *py_sequenceB, *py_match_fn;
109 #if PY_MAJOR_VERSION >= 3
110 PyObject *py_bytesA, *py_bytesB;
111 #endif
112 char *sequenceA=NULL, *sequenceB=NULL;
113 int use_sequence_cstring;
114 double open_A, extend_A, open_B, extend_B;
115 int penalize_extend_when_opening, penalize_end_gaps_A, penalize_end_gaps_B;
116 int align_globally, score_only;
117
118 PyObject *py_match=NULL, *py_mismatch=NULL;
119 double first_A_gap, first_B_gap;
120 double match, mismatch;
121 double score;
122 double best_score = 0;
123 double local_max_score = 0;
124 int use_match_mismatch_scores;
125 int lenA, lenB;
126 double *score_matrix = NULL;
127 unsigned char *trace_matrix = NULL;
128 PyObject *py_score_matrix=NULL, *py_trace_matrix=NULL;
129
130 double *col_cache_score = NULL;
131 PyObject *py_retval = NULL;
132
133 if(!PyArg_ParseTuple(args, "OOOddddi(ii)ii", &py_sequenceA, &py_sequenceB,
134 &py_match_fn, &open_A, &extend_A, &open_B, &extend_B,
135 &penalize_extend_when_opening,
136 &penalize_end_gaps_A, &penalize_end_gaps_B,
137 &align_globally, &score_only))
138 return NULL;
139 if(!PySequence_Check(py_sequenceA) || !PySequence_Check(py_sequenceB)) {
140 PyErr_SetString(PyExc_TypeError,
141 "py_sequenceA and py_sequenceB should be sequences.");
142 return NULL;
143 }
144
145 /* Optimize for the common case. Check to see if py_sequenceA and
146 py_sequenceB are strings. If they are, use the c string
147 representation. */
148 #if PY_MAJOR_VERSION < 3
149 use_sequence_cstring = 0;
150 if(PyString_Check(py_sequenceA) && PyString_Check(py_sequenceB)) {
151 sequenceA = PyString_AS_STRING(py_sequenceA);
152 sequenceB = PyString_AS_STRING(py_sequenceB);
153 use_sequence_cstring = 1;
154 }
155 #else
156 py_bytesA = _create_bytes_object(py_sequenceA);
157 py_bytesB = _create_bytes_object(py_sequenceB);
158 if (py_bytesA && py_bytesB) {
159 sequenceA = PyBytes_AS_STRING(py_bytesA);
160 sequenceB = PyBytes_AS_STRING(py_bytesB);
161 use_sequence_cstring = 1;
162 }
163 else {
164 Py_XDECREF(py_bytesA);
165 Py_XDECREF(py_bytesB);
166 use_sequence_cstring = 0;
167 }
168 #endif
169
170 if(!PyCallable_Check(py_match_fn)) {
171 PyErr_SetString(PyExc_TypeError, "py_match_fn must be callable.");
172 return NULL;
173 }
174 /* Optimize for the common case. Check to see if py_match_fn is
175 an identity_match. If so, pull out the match and mismatch
176 member variables and calculate the scores myself. */
177 match = mismatch = 0;
178 use_match_mismatch_scores = 0;
179 if(!(py_match = PyObject_GetAttrString(py_match_fn, "match")))
180 goto cleanup_after_py_match_fn;
181 match = PyFloat_AsDouble(py_match);
182 if(match==-1.0 && PyErr_Occurred())
183 goto cleanup_after_py_match_fn;
184 if(!(py_mismatch = PyObject_GetAttrString(py_match_fn, "mismatch")))
185 goto cleanup_after_py_match_fn;
186 mismatch = PyFloat_AsDouble(py_mismatch);
187 if(mismatch==-1.0 && PyErr_Occurred())
188 goto cleanup_after_py_match_fn;
189 use_match_mismatch_scores = 1;
190
191 cleanup_after_py_match_fn:
192 if(PyErr_Occurred())
193 PyErr_Clear();
194 if(py_match) {
195 Py_DECREF(py_match);
196 }
197 if(py_mismatch) {
198 Py_DECREF(py_mismatch);
199 }
200 /* Cache some commonly used gap penalties */
201 first_A_gap = calc_affine_penalty(1, open_A, extend_A,
202 penalize_extend_when_opening);
203 first_B_gap = calc_affine_penalty(1, open_B, extend_B,
204 penalize_extend_when_opening);
205
206 /* Allocate matrices for storing the results and initialize first row and col. */
207 lenA = PySequence_Length(py_sequenceA);
208 lenB = PySequence_Length(py_sequenceB);
209 score_matrix = malloc((lenA+1)*(lenB+1)*sizeof(*score_matrix));
210 if(!score_matrix) {
211 PyErr_SetString(PyExc_MemoryError, "Out of memory");
212 goto _cleanup_make_score_matrix_fast;
213 }
214 for(i=0; i<(lenB+1); i++)
215 score_matrix[i] = 0;
216 for(i=0; i<(lenA+1)*(lenB+1); i += (lenB+1))
217 score_matrix[i] = 0;
218 /* If we only want the score, we don't need the trace matrix. */
219 if (!score_only){
220 trace_matrix = malloc((lenA+1)*(lenB+1)*sizeof(*trace_matrix));
221 if(!trace_matrix) {
222 PyErr_SetString(PyExc_MemoryError, "Out of memory");
223 goto _cleanup_make_score_matrix_fast;
224 }
225 for(i=0; i<(lenB+1); i++)
226 trace_matrix[i] = 0;
227 for(i=0; i<(lenA+1)*(lenB+1); i += (lenB+1))
228 trace_matrix[i] = 0;
229 }
230 else
231 trace_matrix = malloc(1);
232
233 /* Initialize the first row and col of the score matrix. */
234 for(i=0; i<=lenA; i++) {
235 if(penalize_end_gaps_B)
236 score = calc_affine_penalty(i, open_B, extend_B,
237 penalize_extend_when_opening);
238 else
239 score = 0;
240 score_matrix[i*(lenB+1)] = score;
241 }
242 for(i=0; i<=lenB; i++) {
243 if(penalize_end_gaps_A)
244 score = calc_affine_penalty(i, open_A, extend_A,
245 penalize_extend_when_opening);
246 else
247 score = 0;
248 score_matrix[i] = score;
249 }
250
251 /* Now initialize the col cache. */
252 col_cache_score = malloc((lenB+1)*sizeof(*col_cache_score));
253 memset((void *)col_cache_score, 0, (lenB+1)*sizeof(*col_cache_score));
254 for(i=0; i<=lenB; i++) {
255 col_cache_score[i] = calc_affine_penalty(i, (2*open_B), extend_B,
256 penalize_extend_when_opening);
257 }
258
259 /* Fill in the score matrix. The row cache is calculated on the fly.*/
260 for(row=1; row<=lenA; row++) {
261 double row_cache_score = calc_affine_penalty(row, (2*open_A), extend_A,
262 penalize_extend_when_opening);
263 for(col=1; col<=lenB; col++) {
264 double match_score, nogap_score;
265 double row_open, row_extend, col_open, col_extend;
266 int best_score_rint, row_score_rint, col_score_rint;
267 unsigned char row_trace_score, col_trace_score, trace_score;
268
269 /* Calculate the best score. */
270 match_score = _get_match_score(py_sequenceA, py_sequenceB,
271 py_match_fn, row-1, col-1,
272 sequenceA, sequenceB,
273 use_sequence_cstring,
274 match, mismatch,
275 use_match_mismatch_scores);
276 if(match_score==-1.0 && PyErr_Occurred())
277 goto _cleanup_make_score_matrix_fast;
278 nogap_score = score_matrix[(row-1)*(lenB+1)+col-1] + match_score;
279
280 if (!penalize_end_gaps_A && row==lenA) {
281 row_open = score_matrix[(row)*(lenB+1)+col-1];
282 row_extend = row_cache_score;
283 }
284 else {
285 row_open = score_matrix[(row)*(lenB+1)+col-1] + first_A_gap;
286 row_extend = row_cache_score + extend_A;
287 }
288 row_cache_score = (row_open > row_extend) ? row_open : row_extend;
289
290 if (!penalize_end_gaps_B && col==lenB){
291 col_open = score_matrix[(row-1)*(lenB+1)+col];
292 col_extend = col_cache_score[col];
293 }
294 else {
295 col_open = score_matrix[(row-1)*(lenB+1)+col] + first_B_gap;
296 col_extend = col_cache_score[col] + extend_B;
297 }
298 col_cache_score[col] = (col_open > col_extend) ? col_open : col_extend;
299
300 best_score = (row_cache_score > col_cache_score[col]) ? row_cache_score : col_cache_score[col];
301 if(nogap_score > best_score)
302 best_score = nogap_score;
303
304 if (best_score > local_max_score)
305 local_max_score = best_score;
306
307 if(!align_globally && best_score < 0)
308 score_matrix[row*(lenB+1)+col] = 0;
309 else
310 score_matrix[row*(lenB+1)+col] = best_score;
311
312 if (!score_only) {
313 row_score_rint = rint(row_cache_score);
314 col_score_rint = rint(col_cache_score[col]);
315 row_trace_score = 0;
316 col_trace_score = 0;
317 if (rint(row_open) == row_score_rint)
318 row_trace_score = row_trace_score|1;
319 if (rint(row_extend) == row_score_rint)
320 row_trace_score = row_trace_score|8;
321 if (rint(col_open) == col_score_rint)
322 col_trace_score = col_trace_score|4;
323 if (rint(col_extend) == col_score_rint)
324 col_trace_score = col_trace_score|16;
325
326 trace_score = 0;
327 best_score_rint = rint(best_score);
328 if (rint(nogap_score) == best_score_rint)
329 trace_score = trace_score|2;
330 if (row_score_rint == best_score_rint)
331 trace_score += row_trace_score;
332 if (col_score_rint == best_score_rint)
333 trace_score += col_trace_score;
334 trace_matrix[row*(lenB+1)+col] = trace_score;
335 }
336 }
337 }
338
339 if (!align_globally)
340 best_score = local_max_score;
341
342 /* Save the score and traceback matrices into real python objects. */
343 if(!score_only) {
344 if(!(py_score_matrix = PyList_New(lenA+1)))
345 goto _cleanup_make_score_matrix_fast;
346 if(!(py_trace_matrix = PyList_New(lenA+1)))
347 goto _cleanup_make_score_matrix_fast;
348
349 for(row=0; row<=lenA; row++) {
350 PyObject *py_score_row, *py_trace_row;
351 if(!(py_score_row = PyList_New(lenB+1)))
352 goto _cleanup_make_score_matrix_fast;
353 PyList_SET_ITEM(py_score_matrix, row, py_score_row);
354 if(!(py_trace_row = PyList_New(lenB+1)))
355 goto _cleanup_make_score_matrix_fast;
356 PyList_SET_ITEM(py_trace_matrix, row, py_trace_row);
357
358 for(col=0; col<=lenB; col++) {
359 PyObject *py_score, *py_trace;
360 int offset = row*(lenB+1) + col;
361
362 /* Set py_score_matrix[row][col] to the score. */
363 if(!(py_score = PyFloat_FromDouble(score_matrix[offset])))
364 goto _cleanup_make_score_matrix_fast;
365 PyList_SET_ITEM(py_score_row, col, py_score);
366
367 /* Set py_trace_matrix[row][col] to a list of indexes. On
368 the edges of the matrix (row or column is 0), the
369 matrix should be [None]. */
370 if(!row || !col) {
371 if(!(py_trace = Py_BuildValue("B", 1)))
372 goto _cleanup_make_score_matrix_fast;
373 Py_INCREF(Py_None);
374 PyList_SET_ITEM(py_trace_row, col, Py_None);
375 }
376 else {
377 if(!(py_trace = Py_BuildValue("B", trace_matrix[offset])))
378 goto _cleanup_make_score_matrix_fast;
379 PyList_SET_ITEM(py_trace_row, col, py_trace);
380
381 }
382 }
383 }
384 }
385 else {
386 py_score_matrix = PyList_New(1);
387 py_trace_matrix = PyList_New(1);
388 }
389 py_retval = Py_BuildValue("(OOd)", py_score_matrix, py_trace_matrix, best_score);
390
391 _cleanup_make_score_matrix_fast:
392 if(score_matrix)
393 free(score_matrix);
394 if(trace_matrix)
395 free(trace_matrix);
396 if(col_cache_score)
397 free(col_cache_score);
398 if(py_score_matrix){
399 Py_DECREF(py_score_matrix);
400 }
401 if(py_trace_matrix){
402 Py_DECREF(py_trace_matrix);
403 }
404
405 #if PY_MAJOR_VERSION >= 3
406 if (py_bytesA != NULL && py_bytesA != py_sequenceA) Py_DECREF(py_bytesA);
407 if (py_bytesB != NULL && py_bytesB != py_sequenceB) Py_DECREF(py_bytesB);
408 #endif
409
410 return py_retval;
411 }
412
413 static PyObject *cpairwise2_rint(PyObject *self, PyObject *args,
414 PyObject *keywds)
415 {
416 double x;
417 int precision = _PRECISION;
418 int rint_x;
419
420 static char *kwlist[] = {"x", "precision", NULL};
421
422 if(!PyArg_ParseTupleAndKeywords(args, keywds, "d|l", kwlist,
423 &x, &precision))
424 return NULL;
425 rint_x = (int)(x * precision + 0.5);
426 #if PY_MAJOR_VERSION >= 3
427 return PyLong_FromLong((long)rint_x);
428 #else
429 return PyInt_FromLong((long)rint_x);
430 #endif
431 }
432
433 /* Module definition stuff */
434
435 static PyMethodDef cpairwise2Methods[] = {
436 {"_make_score_matrix_fast",
437 (PyCFunction)cpairwise2__make_score_matrix_fast, METH_VARARGS, ""},
438 {"rint", (PyCFunction)cpairwise2_rint, METH_VARARGS|METH_KEYWORDS, ""},
439 {NULL, NULL, 0, NULL}
440 };
441
442 static char cpairwise2__doc__[] =
443 "Optimized C routines that complement pairwise2.py. These are called from within pairwise2.py.\n\
444 \n\
445 ";
446
447 #if PY_MAJOR_VERSION >= 3
448
449 static struct PyModuleDef moduledef = {
450 PyModuleDef_HEAD_INIT,
451 "cpairwise2",
452 cpairwise2__doc__,
453 -1,
454 cpairwise2Methods,
455 NULL,
456 NULL,
457 NULL,
458 NULL
459 };
460
461 PyObject *
462 PyInit_cpairwise2(void)
463
464 #else
465
466 void
467 /* for Windows: _declspec(dllexport) initcpairwise2(void) */
468 initcpairwise2(void)
469 #endif
470
471 {
472 #if PY_MAJOR_VERSION >= 3
473 PyObject* module = PyModule_Create(&moduledef);
474 if (module==NULL) return NULL;
475 return module;
476 #else
477 (void) Py_InitModule3("cpairwise2", cpairwise2Methods, cpairwise2__doc__);
478 #endif
479 }