jpayne@68: /* Copyright 2002 by Jeffrey Chang. jpayne@68: * Copyright 2016, 2019 by Markus Piotrowski. jpayne@68: * All rights reserved. jpayne@68: * jpayne@68: * This file is part of the Biopython distribution and governed by your jpayne@68: * choice of the "Biopython License Agreement" or the "BSD 3-Clause License". jpayne@68: * Please see the LICENSE file that should have been included as part of this jpayne@68: * package. jpayne@68: * jpayne@68: * cpairwise2module.c jpayne@68: * Created 30 Sep 2001 jpayne@68: * jpayne@68: * Optimized C routines that complement pairwise2.py. jpayne@68: */ jpayne@68: jpayne@68: #include "Python.h" jpayne@68: jpayne@68: jpayne@68: #define _PRECISION 1000 jpayne@68: #define rint(x) (int)((x)*_PRECISION+0.5) jpayne@68: jpayne@68: /* Functions in this module. */ jpayne@68: jpayne@68: static double calc_affine_penalty(int length, double open, double extend, jpayne@68: int penalize_extend_when_opening) jpayne@68: { jpayne@68: double penalty; jpayne@68: jpayne@68: if(length <= 0) jpayne@68: return 0.0; jpayne@68: penalty = open + extend * length; jpayne@68: if(!penalize_extend_when_opening) jpayne@68: penalty -= extend; jpayne@68: return penalty; jpayne@68: } jpayne@68: jpayne@68: static double _get_match_score(PyObject *py_sequenceA, PyObject *py_sequenceB, jpayne@68: PyObject *py_match_fn, int i, int j, jpayne@68: char *sequenceA, char *sequenceB, jpayne@68: int use_sequence_cstring, jpayne@68: double match, double mismatch, jpayne@68: int use_match_mismatch_scores) jpayne@68: { jpayne@68: PyObject *py_A=NULL, *py_B=NULL; jpayne@68: PyObject *py_arglist=NULL, *py_result=NULL; jpayne@68: double score = 0; jpayne@68: jpayne@68: if(use_sequence_cstring && use_match_mismatch_scores) { jpayne@68: score = (sequenceA[i] == sequenceB[j]) ? match : mismatch; jpayne@68: return score; jpayne@68: } jpayne@68: /* Calculate the match score. */ jpayne@68: if(!(py_A = PySequence_GetItem(py_sequenceA, i))) jpayne@68: goto _get_match_score_cleanup; jpayne@68: if(!(py_B = PySequence_GetItem(py_sequenceB, j))) jpayne@68: goto _get_match_score_cleanup; jpayne@68: if(!(py_arglist = Py_BuildValue("(OO)", py_A, py_B))) jpayne@68: goto _get_match_score_cleanup; jpayne@68: jpayne@68: if(!(py_result = PyEval_CallObject(py_match_fn, py_arglist))) jpayne@68: goto _get_match_score_cleanup; jpayne@68: score = PyFloat_AsDouble(py_result); jpayne@68: jpayne@68: _get_match_score_cleanup: jpayne@68: if(py_A) { jpayne@68: Py_DECREF(py_A); jpayne@68: } jpayne@68: if(py_B) { jpayne@68: Py_DECREF(py_B); jpayne@68: } jpayne@68: if(py_arglist) { jpayne@68: Py_DECREF(py_arglist); jpayne@68: } jpayne@68: if(py_result) { jpayne@68: Py_DECREF(py_result); jpayne@68: } jpayne@68: return score; jpayne@68: } jpayne@68: jpayne@68: #if PY_MAJOR_VERSION >= 3 jpayne@68: static PyObject* _create_bytes_object(PyObject* o) jpayne@68: { jpayne@68: PyObject* b; jpayne@68: if (PyBytes_Check(o)) { jpayne@68: return o; jpayne@68: } jpayne@68: if (!PyUnicode_Check(o)) { jpayne@68: return NULL; jpayne@68: } jpayne@68: b = PyUnicode_AsASCIIString(o); jpayne@68: if (!b) { jpayne@68: PyErr_Clear(); jpayne@68: return NULL; jpayne@68: } jpayne@68: return b; jpayne@68: } jpayne@68: #endif jpayne@68: jpayne@68: /* This function is a more-or-less straightforward port of the jpayne@68: * equivalent function in pairwise2. Please see there for algorithm jpayne@68: * documentation. jpayne@68: */ jpayne@68: static PyObject *cpairwise2__make_score_matrix_fast(PyObject *self, jpayne@68: PyObject *args) jpayne@68: { jpayne@68: int i; jpayne@68: int row, col; jpayne@68: PyObject *py_sequenceA, *py_sequenceB, *py_match_fn; jpayne@68: #if PY_MAJOR_VERSION >= 3 jpayne@68: PyObject *py_bytesA, *py_bytesB; jpayne@68: #endif jpayne@68: char *sequenceA=NULL, *sequenceB=NULL; jpayne@68: int use_sequence_cstring; jpayne@68: double open_A, extend_A, open_B, extend_B; jpayne@68: int penalize_extend_when_opening, penalize_end_gaps_A, penalize_end_gaps_B; jpayne@68: int align_globally, score_only; jpayne@68: jpayne@68: PyObject *py_match=NULL, *py_mismatch=NULL; jpayne@68: double first_A_gap, first_B_gap; jpayne@68: double match, mismatch; jpayne@68: double score; jpayne@68: double best_score = 0; jpayne@68: double local_max_score = 0; jpayne@68: int use_match_mismatch_scores; jpayne@68: int lenA, lenB; jpayne@68: double *score_matrix = NULL; jpayne@68: unsigned char *trace_matrix = NULL; jpayne@68: PyObject *py_score_matrix=NULL, *py_trace_matrix=NULL; jpayne@68: jpayne@68: double *col_cache_score = NULL; jpayne@68: PyObject *py_retval = NULL; jpayne@68: jpayne@68: if(!PyArg_ParseTuple(args, "OOOddddi(ii)ii", &py_sequenceA, &py_sequenceB, jpayne@68: &py_match_fn, &open_A, &extend_A, &open_B, &extend_B, jpayne@68: &penalize_extend_when_opening, jpayne@68: &penalize_end_gaps_A, &penalize_end_gaps_B, jpayne@68: &align_globally, &score_only)) jpayne@68: return NULL; jpayne@68: if(!PySequence_Check(py_sequenceA) || !PySequence_Check(py_sequenceB)) { jpayne@68: PyErr_SetString(PyExc_TypeError, jpayne@68: "py_sequenceA and py_sequenceB should be sequences."); jpayne@68: return NULL; jpayne@68: } jpayne@68: jpayne@68: /* Optimize for the common case. Check to see if py_sequenceA and jpayne@68: py_sequenceB are strings. If they are, use the c string jpayne@68: representation. */ jpayne@68: #if PY_MAJOR_VERSION < 3 jpayne@68: use_sequence_cstring = 0; jpayne@68: if(PyString_Check(py_sequenceA) && PyString_Check(py_sequenceB)) { jpayne@68: sequenceA = PyString_AS_STRING(py_sequenceA); jpayne@68: sequenceB = PyString_AS_STRING(py_sequenceB); jpayne@68: use_sequence_cstring = 1; jpayne@68: } jpayne@68: #else jpayne@68: py_bytesA = _create_bytes_object(py_sequenceA); jpayne@68: py_bytesB = _create_bytes_object(py_sequenceB); jpayne@68: if (py_bytesA && py_bytesB) { jpayne@68: sequenceA = PyBytes_AS_STRING(py_bytesA); jpayne@68: sequenceB = PyBytes_AS_STRING(py_bytesB); jpayne@68: use_sequence_cstring = 1; jpayne@68: } jpayne@68: else { jpayne@68: Py_XDECREF(py_bytesA); jpayne@68: Py_XDECREF(py_bytesB); jpayne@68: use_sequence_cstring = 0; jpayne@68: } jpayne@68: #endif jpayne@68: jpayne@68: if(!PyCallable_Check(py_match_fn)) { jpayne@68: PyErr_SetString(PyExc_TypeError, "py_match_fn must be callable."); jpayne@68: return NULL; jpayne@68: } jpayne@68: /* Optimize for the common case. Check to see if py_match_fn is jpayne@68: an identity_match. If so, pull out the match and mismatch jpayne@68: member variables and calculate the scores myself. */ jpayne@68: match = mismatch = 0; jpayne@68: use_match_mismatch_scores = 0; jpayne@68: if(!(py_match = PyObject_GetAttrString(py_match_fn, "match"))) jpayne@68: goto cleanup_after_py_match_fn; jpayne@68: match = PyFloat_AsDouble(py_match); jpayne@68: if(match==-1.0 && PyErr_Occurred()) jpayne@68: goto cleanup_after_py_match_fn; jpayne@68: if(!(py_mismatch = PyObject_GetAttrString(py_match_fn, "mismatch"))) jpayne@68: goto cleanup_after_py_match_fn; jpayne@68: mismatch = PyFloat_AsDouble(py_mismatch); jpayne@68: if(mismatch==-1.0 && PyErr_Occurred()) jpayne@68: goto cleanup_after_py_match_fn; jpayne@68: use_match_mismatch_scores = 1; jpayne@68: jpayne@68: cleanup_after_py_match_fn: jpayne@68: if(PyErr_Occurred()) jpayne@68: PyErr_Clear(); jpayne@68: if(py_match) { jpayne@68: Py_DECREF(py_match); jpayne@68: } jpayne@68: if(py_mismatch) { jpayne@68: Py_DECREF(py_mismatch); jpayne@68: } jpayne@68: /* Cache some commonly used gap penalties */ jpayne@68: first_A_gap = calc_affine_penalty(1, open_A, extend_A, jpayne@68: penalize_extend_when_opening); jpayne@68: first_B_gap = calc_affine_penalty(1, open_B, extend_B, jpayne@68: penalize_extend_when_opening); jpayne@68: jpayne@68: /* Allocate matrices for storing the results and initialize first row and col. */ jpayne@68: lenA = PySequence_Length(py_sequenceA); jpayne@68: lenB = PySequence_Length(py_sequenceB); jpayne@68: score_matrix = malloc((lenA+1)*(lenB+1)*sizeof(*score_matrix)); jpayne@68: if(!score_matrix) { jpayne@68: PyErr_SetString(PyExc_MemoryError, "Out of memory"); jpayne@68: goto _cleanup_make_score_matrix_fast; jpayne@68: } jpayne@68: for(i=0; i<(lenB+1); i++) jpayne@68: score_matrix[i] = 0; jpayne@68: for(i=0; i<(lenA+1)*(lenB+1); i += (lenB+1)) jpayne@68: score_matrix[i] = 0; jpayne@68: /* If we only want the score, we don't need the trace matrix. */ jpayne@68: if (!score_only){ jpayne@68: trace_matrix = malloc((lenA+1)*(lenB+1)*sizeof(*trace_matrix)); jpayne@68: if(!trace_matrix) { jpayne@68: PyErr_SetString(PyExc_MemoryError, "Out of memory"); jpayne@68: goto _cleanup_make_score_matrix_fast; jpayne@68: } jpayne@68: for(i=0; i<(lenB+1); i++) jpayne@68: trace_matrix[i] = 0; jpayne@68: for(i=0; i<(lenA+1)*(lenB+1); i += (lenB+1)) jpayne@68: trace_matrix[i] = 0; jpayne@68: } jpayne@68: else jpayne@68: trace_matrix = malloc(1); jpayne@68: jpayne@68: /* Initialize the first row and col of the score matrix. */ jpayne@68: for(i=0; i<=lenA; i++) { jpayne@68: if(penalize_end_gaps_B) jpayne@68: score = calc_affine_penalty(i, open_B, extend_B, jpayne@68: penalize_extend_when_opening); jpayne@68: else jpayne@68: score = 0; jpayne@68: score_matrix[i*(lenB+1)] = score; jpayne@68: } jpayne@68: for(i=0; i<=lenB; i++) { jpayne@68: if(penalize_end_gaps_A) jpayne@68: score = calc_affine_penalty(i, open_A, extend_A, jpayne@68: penalize_extend_when_opening); jpayne@68: else jpayne@68: score = 0; jpayne@68: score_matrix[i] = score; jpayne@68: } jpayne@68: jpayne@68: /* Now initialize the col cache. */ jpayne@68: col_cache_score = malloc((lenB+1)*sizeof(*col_cache_score)); jpayne@68: memset((void *)col_cache_score, 0, (lenB+1)*sizeof(*col_cache_score)); jpayne@68: for(i=0; i<=lenB; i++) { jpayne@68: col_cache_score[i] = calc_affine_penalty(i, (2*open_B), extend_B, jpayne@68: penalize_extend_when_opening); jpayne@68: } jpayne@68: jpayne@68: /* Fill in the score matrix. The row cache is calculated on the fly.*/ jpayne@68: for(row=1; row<=lenA; row++) { jpayne@68: double row_cache_score = calc_affine_penalty(row, (2*open_A), extend_A, jpayne@68: penalize_extend_when_opening); jpayne@68: for(col=1; col<=lenB; col++) { jpayne@68: double match_score, nogap_score; jpayne@68: double row_open, row_extend, col_open, col_extend; jpayne@68: int best_score_rint, row_score_rint, col_score_rint; jpayne@68: unsigned char row_trace_score, col_trace_score, trace_score; jpayne@68: jpayne@68: /* Calculate the best score. */ jpayne@68: match_score = _get_match_score(py_sequenceA, py_sequenceB, jpayne@68: py_match_fn, row-1, col-1, jpayne@68: sequenceA, sequenceB, jpayne@68: use_sequence_cstring, jpayne@68: match, mismatch, jpayne@68: use_match_mismatch_scores); jpayne@68: if(match_score==-1.0 && PyErr_Occurred()) jpayne@68: goto _cleanup_make_score_matrix_fast; jpayne@68: nogap_score = score_matrix[(row-1)*(lenB+1)+col-1] + match_score; jpayne@68: jpayne@68: if (!penalize_end_gaps_A && row==lenA) { jpayne@68: row_open = score_matrix[(row)*(lenB+1)+col-1]; jpayne@68: row_extend = row_cache_score; jpayne@68: } jpayne@68: else { jpayne@68: row_open = score_matrix[(row)*(lenB+1)+col-1] + first_A_gap; jpayne@68: row_extend = row_cache_score + extend_A; jpayne@68: } jpayne@68: row_cache_score = (row_open > row_extend) ? row_open : row_extend; jpayne@68: jpayne@68: if (!penalize_end_gaps_B && col==lenB){ jpayne@68: col_open = score_matrix[(row-1)*(lenB+1)+col]; jpayne@68: col_extend = col_cache_score[col]; jpayne@68: } jpayne@68: else { jpayne@68: col_open = score_matrix[(row-1)*(lenB+1)+col] + first_B_gap; jpayne@68: col_extend = col_cache_score[col] + extend_B; jpayne@68: } jpayne@68: col_cache_score[col] = (col_open > col_extend) ? col_open : col_extend; jpayne@68: jpayne@68: best_score = (row_cache_score > col_cache_score[col]) ? row_cache_score : col_cache_score[col]; jpayne@68: if(nogap_score > best_score) jpayne@68: best_score = nogap_score; jpayne@68: jpayne@68: if (best_score > local_max_score) jpayne@68: local_max_score = best_score; jpayne@68: jpayne@68: if(!align_globally && best_score < 0) jpayne@68: score_matrix[row*(lenB+1)+col] = 0; jpayne@68: else jpayne@68: score_matrix[row*(lenB+1)+col] = best_score; jpayne@68: jpayne@68: if (!score_only) { jpayne@68: row_score_rint = rint(row_cache_score); jpayne@68: col_score_rint = rint(col_cache_score[col]); jpayne@68: row_trace_score = 0; jpayne@68: col_trace_score = 0; jpayne@68: if (rint(row_open) == row_score_rint) jpayne@68: row_trace_score = row_trace_score|1; jpayne@68: if (rint(row_extend) == row_score_rint) jpayne@68: row_trace_score = row_trace_score|8; jpayne@68: if (rint(col_open) == col_score_rint) jpayne@68: col_trace_score = col_trace_score|4; jpayne@68: if (rint(col_extend) == col_score_rint) jpayne@68: col_trace_score = col_trace_score|16; jpayne@68: jpayne@68: trace_score = 0; jpayne@68: best_score_rint = rint(best_score); jpayne@68: if (rint(nogap_score) == best_score_rint) jpayne@68: trace_score = trace_score|2; jpayne@68: if (row_score_rint == best_score_rint) jpayne@68: trace_score += row_trace_score; jpayne@68: if (col_score_rint == best_score_rint) jpayne@68: trace_score += col_trace_score; jpayne@68: trace_matrix[row*(lenB+1)+col] = trace_score; jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if (!align_globally) jpayne@68: best_score = local_max_score; jpayne@68: jpayne@68: /* Save the score and traceback matrices into real python objects. */ jpayne@68: if(!score_only) { jpayne@68: if(!(py_score_matrix = PyList_New(lenA+1))) jpayne@68: goto _cleanup_make_score_matrix_fast; jpayne@68: if(!(py_trace_matrix = PyList_New(lenA+1))) jpayne@68: goto _cleanup_make_score_matrix_fast; jpayne@68: jpayne@68: for(row=0; row<=lenA; row++) { jpayne@68: PyObject *py_score_row, *py_trace_row; jpayne@68: if(!(py_score_row = PyList_New(lenB+1))) jpayne@68: goto _cleanup_make_score_matrix_fast; jpayne@68: PyList_SET_ITEM(py_score_matrix, row, py_score_row); jpayne@68: if(!(py_trace_row = PyList_New(lenB+1))) jpayne@68: goto _cleanup_make_score_matrix_fast; jpayne@68: PyList_SET_ITEM(py_trace_matrix, row, py_trace_row); jpayne@68: jpayne@68: for(col=0; col<=lenB; col++) { jpayne@68: PyObject *py_score, *py_trace; jpayne@68: int offset = row*(lenB+1) + col; jpayne@68: jpayne@68: /* Set py_score_matrix[row][col] to the score. */ jpayne@68: if(!(py_score = PyFloat_FromDouble(score_matrix[offset]))) jpayne@68: goto _cleanup_make_score_matrix_fast; jpayne@68: PyList_SET_ITEM(py_score_row, col, py_score); jpayne@68: jpayne@68: /* Set py_trace_matrix[row][col] to a list of indexes. On jpayne@68: the edges of the matrix (row or column is 0), the jpayne@68: matrix should be [None]. */ jpayne@68: if(!row || !col) { jpayne@68: if(!(py_trace = Py_BuildValue("B", 1))) jpayne@68: goto _cleanup_make_score_matrix_fast; jpayne@68: Py_INCREF(Py_None); jpayne@68: PyList_SET_ITEM(py_trace_row, col, Py_None); jpayne@68: } jpayne@68: else { jpayne@68: if(!(py_trace = Py_BuildValue("B", trace_matrix[offset]))) jpayne@68: goto _cleanup_make_score_matrix_fast; jpayne@68: PyList_SET_ITEM(py_trace_row, col, py_trace); jpayne@68: jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: else { jpayne@68: py_score_matrix = PyList_New(1); jpayne@68: py_trace_matrix = PyList_New(1); jpayne@68: } jpayne@68: py_retval = Py_BuildValue("(OOd)", py_score_matrix, py_trace_matrix, best_score); jpayne@68: jpayne@68: _cleanup_make_score_matrix_fast: jpayne@68: if(score_matrix) jpayne@68: free(score_matrix); jpayne@68: if(trace_matrix) jpayne@68: free(trace_matrix); jpayne@68: if(col_cache_score) jpayne@68: free(col_cache_score); jpayne@68: if(py_score_matrix){ jpayne@68: Py_DECREF(py_score_matrix); jpayne@68: } jpayne@68: if(py_trace_matrix){ jpayne@68: Py_DECREF(py_trace_matrix); jpayne@68: } jpayne@68: jpayne@68: #if PY_MAJOR_VERSION >= 3 jpayne@68: if (py_bytesA != NULL && py_bytesA != py_sequenceA) Py_DECREF(py_bytesA); jpayne@68: if (py_bytesB != NULL && py_bytesB != py_sequenceB) Py_DECREF(py_bytesB); jpayne@68: #endif jpayne@68: jpayne@68: return py_retval; jpayne@68: } jpayne@68: jpayne@68: static PyObject *cpairwise2_rint(PyObject *self, PyObject *args, jpayne@68: PyObject *keywds) jpayne@68: { jpayne@68: double x; jpayne@68: int precision = _PRECISION; jpayne@68: int rint_x; jpayne@68: jpayne@68: static char *kwlist[] = {"x", "precision", NULL}; jpayne@68: jpayne@68: if(!PyArg_ParseTupleAndKeywords(args, keywds, "d|l", kwlist, jpayne@68: &x, &precision)) jpayne@68: return NULL; jpayne@68: rint_x = (int)(x * precision + 0.5); jpayne@68: #if PY_MAJOR_VERSION >= 3 jpayne@68: return PyLong_FromLong((long)rint_x); jpayne@68: #else jpayne@68: return PyInt_FromLong((long)rint_x); jpayne@68: #endif jpayne@68: } jpayne@68: jpayne@68: /* Module definition stuff */ jpayne@68: jpayne@68: static PyMethodDef cpairwise2Methods[] = { jpayne@68: {"_make_score_matrix_fast", jpayne@68: (PyCFunction)cpairwise2__make_score_matrix_fast, METH_VARARGS, ""}, jpayne@68: {"rint", (PyCFunction)cpairwise2_rint, METH_VARARGS|METH_KEYWORDS, ""}, jpayne@68: {NULL, NULL, 0, NULL} jpayne@68: }; jpayne@68: jpayne@68: static char cpairwise2__doc__[] = jpayne@68: "Optimized C routines that complement pairwise2.py. These are called from within pairwise2.py.\n\ jpayne@68: \n\ jpayne@68: "; jpayne@68: jpayne@68: #if PY_MAJOR_VERSION >= 3 jpayne@68: jpayne@68: static struct PyModuleDef moduledef = { jpayne@68: PyModuleDef_HEAD_INIT, jpayne@68: "cpairwise2", jpayne@68: cpairwise2__doc__, jpayne@68: -1, jpayne@68: cpairwise2Methods, jpayne@68: NULL, jpayne@68: NULL, jpayne@68: NULL, jpayne@68: NULL jpayne@68: }; jpayne@68: jpayne@68: PyObject * jpayne@68: PyInit_cpairwise2(void) jpayne@68: jpayne@68: #else jpayne@68: jpayne@68: void jpayne@68: /* for Windows: _declspec(dllexport) initcpairwise2(void) */ jpayne@68: initcpairwise2(void) jpayne@68: #endif jpayne@68: jpayne@68: { jpayne@68: #if PY_MAJOR_VERSION >= 3 jpayne@68: PyObject* module = PyModule_Create(&moduledef); jpayne@68: if (module==NULL) return NULL; jpayne@68: return module; jpayne@68: #else jpayne@68: (void) Py_InitModule3("cpairwise2", cpairwise2Methods, cpairwise2__doc__); jpayne@68: #endif jpayne@68: }