annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/pysam/libchtslib.pyx @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 # cython: language_level=3
jpayne@68 2 # cython: embedsignature=True
jpayne@68 3 # cython: profile=True
jpayne@68 4 # adds doc-strings for sphinx
jpayne@68 5
jpayne@68 6 ########################################################################
jpayne@68 7 ########################################################################
jpayne@68 8 ## Cython cimports
jpayne@68 9 ########################################################################
jpayne@68 10
jpayne@68 11 from posix.unistd cimport dup
jpayne@68 12 from libc.errno cimport errno
jpayne@68 13 from libc.stdint cimport INT32_MAX
jpayne@68 14 from cpython cimport PyBytes_FromStringAndSize
jpayne@68 15 from pysam.libchtslib cimport *
jpayne@68 16 from pysam.libcutils cimport force_bytes, force_str, charptr_to_str, charptr_to_str_w_len
jpayne@68 17 from pysam.libcutils cimport encode_filename, from_string_and_size, libc_whence_from_io
jpayne@68 18
jpayne@68 19
jpayne@68 20 ########################################################################
jpayne@68 21 ########################################################################
jpayne@68 22 ## Python imports
jpayne@68 23 ########################################################################
jpayne@68 24
jpayne@68 25 import os
jpayne@68 26 import io
jpayne@68 27 import re
jpayne@68 28 from warnings import warn
jpayne@68 29
jpayne@68 30
jpayne@68 31 ########################################################################
jpayne@68 32 ########################################################################
jpayne@68 33 ## Constants
jpayne@68 34 ########################################################################
jpayne@68 35
jpayne@68 36 __all__ = ['get_verbosity', 'set_verbosity', 'HFile', 'HTSFile']
jpayne@68 37
jpayne@68 38 # maximum genomic coordinace
jpayne@68 39 cdef int MAX_POS = (1 << 31) - 1
jpayne@68 40
jpayne@68 41 cdef tuple FORMAT_CATEGORIES = ('UNKNOWN', 'ALIGNMENTS', 'VARIANTS', 'INDEX', 'REGIONS')
jpayne@68 42 cdef tuple FORMATS = ('UNKNOWN', 'BINARY_FORMAT', 'TEXT_FORMAT', 'SAM', 'BAM', 'BAI', 'CRAM', 'CRAI',
jpayne@68 43 'VCF', 'BCF', 'CSI', 'GZI', 'TBI', 'BED')
jpayne@68 44 cdef tuple COMPRESSION = ('NONE', 'GZIP', 'BGZF', 'CUSTOM')
jpayne@68 45
jpayne@68 46
jpayne@68 47 ########################################################################
jpayne@68 48 ########################################################################
jpayne@68 49 ## Verbosity functions
jpayne@68 50 ########################################################################
jpayne@68 51
jpayne@68 52
jpayne@68 53 cpdef set_verbosity(int verbosity):
jpayne@68 54 """Set htslib's hts_verbose global variable to the specified value."""
jpayne@68 55 return hts_set_verbosity(verbosity)
jpayne@68 56
jpayne@68 57 cpdef get_verbosity():
jpayne@68 58 """Return the value of htslib's hts_verbose global variable."""
jpayne@68 59 return hts_get_verbosity()
jpayne@68 60
jpayne@68 61
jpayne@68 62 ########################################################################
jpayne@68 63 ########################################################################
jpayne@68 64 ## HFile wrapper class
jpayne@68 65 ########################################################################
jpayne@68 66
jpayne@68 67 cdef class HFile(object):
jpayne@68 68 cdef hFILE *fp
jpayne@68 69 cdef readonly object name, mode
jpayne@68 70
jpayne@68 71 def __init__(self, name, mode='r', closefd=True):
jpayne@68 72 self._open(name, mode, closefd=True)
jpayne@68 73
jpayne@68 74 def __dealloc__(self):
jpayne@68 75 self.close()
jpayne@68 76
jpayne@68 77 @property
jpayne@68 78 def closed(self):
jpayne@68 79 return self.fp == NULL
jpayne@68 80
jpayne@68 81 cdef _open(self, name, mode, closefd=True):
jpayne@68 82 self.name = name
jpayne@68 83 self.mode = mode
jpayne@68 84
jpayne@68 85 mode = force_bytes(mode)
jpayne@68 86
jpayne@68 87 if isinstance(name, int):
jpayne@68 88 if self.fp != NULL:
jpayne@68 89 name = dup(name)
jpayne@68 90 self.fp = hdopen(name, mode)
jpayne@68 91 else:
jpayne@68 92 name = encode_filename(name)
jpayne@68 93 self.fp = hopen(name, mode)
jpayne@68 94
jpayne@68 95 if not self.fp:
jpayne@68 96 raise IOError(errno, 'failed to open HFile', self.name)
jpayne@68 97
jpayne@68 98 def close(self):
jpayne@68 99 if self.fp == NULL:
jpayne@68 100 return
jpayne@68 101
jpayne@68 102 cdef hFILE *fp = self.fp
jpayne@68 103 self.fp = NULL
jpayne@68 104
jpayne@68 105 if hclose(fp) != 0:
jpayne@68 106 raise IOError(errno, 'failed to close HFile', self.name)
jpayne@68 107
jpayne@68 108 def fileno(self):
jpayne@68 109 if self.fp == NULL:
jpayne@68 110 raise IOError('operation on closed HFile')
jpayne@68 111 if isinstance(self.name, int):
jpayne@68 112 return self.name
jpayne@68 113 else:
jpayne@68 114 raise AttributeError('fileno not available')
jpayne@68 115
jpayne@68 116 def __enter__(self):
jpayne@68 117 return self
jpayne@68 118
jpayne@68 119 def __exit__(self, type, value, tb):
jpayne@68 120 self.close()
jpayne@68 121
jpayne@68 122 def __iter__(self):
jpayne@68 123 return self
jpayne@68 124
jpayne@68 125 def __next__(self):
jpayne@68 126 line = self.readline()
jpayne@68 127 if not line:
jpayne@68 128 raise StopIteration()
jpayne@68 129 return line
jpayne@68 130
jpayne@68 131 def flush(self):
jpayne@68 132 if self.fp == NULL:
jpayne@68 133 raise IOError('operation on closed HFile')
jpayne@68 134 if hflush(self.fp) != 0:
jpayne@68 135 raise IOError(herrno(self.fp), 'failed to flush HFile', self.name)
jpayne@68 136
jpayne@68 137 def isatty(self):
jpayne@68 138 if self.fp == NULL:
jpayne@68 139 raise IOError('operation on closed HFile')
jpayne@68 140 return False
jpayne@68 141
jpayne@68 142 def readable(self):
jpayne@68 143 return self.fp != NULL and 'r' in self.mode
jpayne@68 144
jpayne@68 145 def read(self, Py_ssize_t size=-1):
jpayne@68 146 if self.fp == NULL:
jpayne@68 147 raise IOError('operation on closed HFile')
jpayne@68 148
jpayne@68 149 if size == 0:
jpayne@68 150 return b''
jpayne@68 151
jpayne@68 152 cdef list parts = []
jpayne@68 153 cdef bytes part
jpayne@68 154 cdef Py_ssize_t chunk_size, ret, bytes_read = 0
jpayne@68 155 cdef char *cpart
jpayne@68 156
jpayne@68 157 while size == -1 or bytes_read < size:
jpayne@68 158 chunk_size = 4096
jpayne@68 159 if size != -1:
jpayne@68 160 chunk_size = min(chunk_size, size - bytes_read)
jpayne@68 161
jpayne@68 162 part = PyBytes_FromStringAndSize(NULL, chunk_size)
jpayne@68 163 cpart = <char *>part
jpayne@68 164 ret = hread(self.fp, <void *>cpart, chunk_size)
jpayne@68 165
jpayne@68 166 if ret < 0:
jpayne@68 167 IOError(herrno(self.fp), 'failed to read HFile', self.name)
jpayne@68 168 elif not ret:
jpayne@68 169 break
jpayne@68 170
jpayne@68 171 bytes_read += ret
jpayne@68 172
jpayne@68 173 if ret < chunk_size:
jpayne@68 174 part = cpart[:ret]
jpayne@68 175
jpayne@68 176 parts.append(part)
jpayne@68 177
jpayne@68 178 return b''.join(parts)
jpayne@68 179
jpayne@68 180 def readall(self):
jpayne@68 181 return self.read()
jpayne@68 182
jpayne@68 183 def readinto(self, buf):
jpayne@68 184 if self.fp == NULL:
jpayne@68 185 raise IOError('operation on closed HFile')
jpayne@68 186
jpayne@68 187 size = len(buf)
jpayne@68 188
jpayne@68 189 if size == 0:
jpayne@68 190 return size
jpayne@68 191
jpayne@68 192 mv = memoryview(buf)
jpayne@68 193 ret = hread(self.fp, <void *>mv, size)
jpayne@68 194
jpayne@68 195 if ret < 0:
jpayne@68 196 IOError(herrno(self.fp), 'failed to read HFile', self.name)
jpayne@68 197
jpayne@68 198 return ret
jpayne@68 199
jpayne@68 200 def readline(self, Py_ssize_t size=-1):
jpayne@68 201 if self.fp == NULL:
jpayne@68 202 raise IOError('operation on closed HFile')
jpayne@68 203
jpayne@68 204 if size == 0:
jpayne@68 205 return b''
jpayne@68 206
jpayne@68 207 cdef list parts = []
jpayne@68 208 cdef bytes part
jpayne@68 209 cdef Py_ssize_t chunk_size, ret, bytes_read = 0
jpayne@68 210 cdef char *cpart
jpayne@68 211
jpayne@68 212 while size == -1 or bytes_read < size:
jpayne@68 213 chunk_size = 4096
jpayne@68 214 if size != -1:
jpayne@68 215 chunk_size = min(chunk_size, size - bytes_read)
jpayne@68 216
jpayne@68 217 part = PyBytes_FromStringAndSize(NULL, chunk_size)
jpayne@68 218 cpart = <char *>part
jpayne@68 219
jpayne@68 220 # Python bytes objects allocate an extra byte for a null terminator
jpayne@68 221 ret = hgetln(cpart, chunk_size+1, self.fp)
jpayne@68 222
jpayne@68 223 if ret < 0:
jpayne@68 224 IOError(herrno(self.fp), 'failed to read HFile', self.name)
jpayne@68 225 elif not ret:
jpayne@68 226 break
jpayne@68 227
jpayne@68 228 bytes_read += ret
jpayne@68 229
jpayne@68 230 if ret < chunk_size:
jpayne@68 231 part = cpart[:ret]
jpayne@68 232 cpart = <char *>part
jpayne@68 233
jpayne@68 234 parts.append(part)
jpayne@68 235
jpayne@68 236 if cpart[ret-1] == b'\n':
jpayne@68 237 break
jpayne@68 238
jpayne@68 239 return b''.join(parts)
jpayne@68 240
jpayne@68 241 def readlines(self):
jpayne@68 242 return list(self)
jpayne@68 243
jpayne@68 244 def seek(self, Py_ssize_t offset, int whence=io.SEEK_SET):
jpayne@68 245 if self.fp == NULL:
jpayne@68 246 raise IOError('operation on closed HFile')
jpayne@68 247
jpayne@68 248 cdef Py_ssize_t off = hseek(self.fp, offset, libc_whence_from_io(whence))
jpayne@68 249
jpayne@68 250 if off < 0:
jpayne@68 251 raise IOError(herrno(self.fp), 'seek failed on HFile', self.name)
jpayne@68 252
jpayne@68 253 return off
jpayne@68 254
jpayne@68 255 def tell(self):
jpayne@68 256 if self.fp == NULL:
jpayne@68 257 raise IOError('operation on closed HFile')
jpayne@68 258
jpayne@68 259 ret = htell(self.fp)
jpayne@68 260
jpayne@68 261 if ret < 0:
jpayne@68 262 raise IOError(herrno(self.fp), 'tell failed on HFile', self.name)
jpayne@68 263
jpayne@68 264 return ret
jpayne@68 265
jpayne@68 266 def seekable(self):
jpayne@68 267 return self.fp != NULL
jpayne@68 268
jpayne@68 269 def truncate(self, size=None):
jpayne@68 270 raise NotImplementedError()
jpayne@68 271
jpayne@68 272 def writable(self):
jpayne@68 273 return self.fp != NULL and 'w' in self.mode
jpayne@68 274
jpayne@68 275 def write(self, bytes b):
jpayne@68 276 if self.fp == NULL:
jpayne@68 277 raise IOError('operation on closed HFile')
jpayne@68 278
jpayne@68 279 got = hwrite(self.fp, <void *>b, len(b))
jpayne@68 280
jpayne@68 281 if got < 0:
jpayne@68 282 raise IOError(herrno(self.fp), 'write failed on HFile', self.name)
jpayne@68 283
jpayne@68 284 return got
jpayne@68 285
jpayne@68 286 def writelines(self, lines):
jpayne@68 287 for line in lines:
jpayne@68 288 self.write(line)
jpayne@68 289
jpayne@68 290
jpayne@68 291 ########################################################################
jpayne@68 292 ########################################################################
jpayne@68 293 ## Helpers for backward compatibility to hide the difference between
jpayne@68 294 ## boolean properties and methods
jpayne@68 295 ########################################################################
jpayne@68 296
jpayne@68 297 class CallableValue(object):
jpayne@68 298 def __init__(self, value):
jpayne@68 299 self.value = value
jpayne@68 300 def __call__(self):
jpayne@68 301 return self.value
jpayne@68 302 def __bool__(self):
jpayne@68 303 return self.value
jpayne@68 304 def __nonzero__(self):
jpayne@68 305 return self.value
jpayne@68 306 def __eq__(self, other):
jpayne@68 307 return self.value == other
jpayne@68 308 def __ne__(self, other):
jpayne@68 309 return self.value != other
jpayne@68 310
jpayne@68 311
jpayne@68 312 CTrue = CallableValue(True)
jpayne@68 313 CFalse = CallableValue(False)
jpayne@68 314
jpayne@68 315
jpayne@68 316 ########################################################################
jpayne@68 317 ########################################################################
jpayne@68 318 ## HTSFile wrapper class (base class for AlignmentFile and VariantFile)
jpayne@68 319 ########################################################################
jpayne@68 320
jpayne@68 321 cdef class HTSFile(object):
jpayne@68 322 """
jpayne@68 323 Base class for HTS file types
jpayne@68 324 """
jpayne@68 325
jpayne@68 326 def __cinit__(self, *args, **kwargs):
jpayne@68 327 self.htsfile = NULL
jpayne@68 328 self.threads = 1
jpayne@68 329 self.duplicate_filehandle = True
jpayne@68 330
jpayne@68 331 def close(self):
jpayne@68 332 if self.htsfile:
jpayne@68 333 hts_close(self.htsfile)
jpayne@68 334 self.htsfile = NULL
jpayne@68 335
jpayne@68 336 def __dealloc__(self):
jpayne@68 337 if self.htsfile:
jpayne@68 338 hts_close(self.htsfile)
jpayne@68 339 self.htsfile = NULL
jpayne@68 340
jpayne@68 341 def flush(self):
jpayne@68 342 """Flush any buffered data to the underlying output stream."""
jpayne@68 343 if self.htsfile:
jpayne@68 344 if hts_flush(self.htsfile) < 0:
jpayne@68 345 raise OSError(errno, f'Flushing {type(self).__name__} failed', force_str(self.filename))
jpayne@68 346
jpayne@68 347 def check_truncation(self, ignore_truncation=False):
jpayne@68 348 """Check if file is truncated."""
jpayne@68 349 if not self.htsfile:
jpayne@68 350 return
jpayne@68 351
jpayne@68 352 if self.htsfile.format.compression != bgzf:
jpayne@68 353 return
jpayne@68 354
jpayne@68 355 cdef BGZF *bgzfp = hts_get_bgzfp(self.htsfile)
jpayne@68 356 if not bgzfp:
jpayne@68 357 return
jpayne@68 358
jpayne@68 359 cdef int ret = bgzf_check_EOF(bgzfp)
jpayne@68 360 if ret < 0:
jpayne@68 361 raise IOError(errno, 'error checking for EOF marker')
jpayne@68 362 elif ret == 0:
jpayne@68 363 msg = 'no BGZF EOF marker; file may be truncated'.format(self.filename)
jpayne@68 364 if ignore_truncation:
jpayne@68 365 warn(msg)
jpayne@68 366 else:
jpayne@68 367 raise IOError(msg)
jpayne@68 368
jpayne@68 369 def __enter__(self):
jpayne@68 370 return self
jpayne@68 371
jpayne@68 372 def __exit__(self, exc_type, exc_value, traceback):
jpayne@68 373 self.close()
jpayne@68 374 return False
jpayne@68 375
jpayne@68 376 @property
jpayne@68 377 def category(self):
jpayne@68 378 """General file format category. One of UNKNOWN, ALIGNMENTS,
jpayne@68 379 VARIANTS, INDEX, REGIONS"""
jpayne@68 380 if not self.htsfile:
jpayne@68 381 raise ValueError('metadata not available on closed file')
jpayne@68 382 return FORMAT_CATEGORIES[self.htsfile.format.category]
jpayne@68 383
jpayne@68 384 @property
jpayne@68 385 def format(self):
jpayne@68 386 """File format.
jpayne@68 387
jpayne@68 388 One of UNKNOWN, BINARY_FORMAT, TEXT_FORMAT, SAM, BAM,
jpayne@68 389 BAI, CRAM, CRAI, VCF, BCF, CSI, GZI, TBI, BED.
jpayne@68 390 """
jpayne@68 391 if not self.htsfile:
jpayne@68 392 raise ValueError('metadata not available on closed file')
jpayne@68 393 return FORMATS[self.htsfile.format.format]
jpayne@68 394
jpayne@68 395 @property
jpayne@68 396 def version(self):
jpayne@68 397 """Tuple of file format version numbers (major, minor)"""
jpayne@68 398 if not self.htsfile:
jpayne@68 399 raise ValueError('metadata not available on closed file')
jpayne@68 400 return self.htsfile.format.version.major, self.htsfile.format.version.minor
jpayne@68 401
jpayne@68 402 @property
jpayne@68 403 def compression(self):
jpayne@68 404 """File compression.
jpayne@68 405
jpayne@68 406 One of NONE, GZIP, BGZF, CUSTOM."""
jpayne@68 407 if not self.htsfile:
jpayne@68 408 raise ValueError('metadata not available on closed file')
jpayne@68 409 return COMPRESSION[self.htsfile.format.compression]
jpayne@68 410
jpayne@68 411 @property
jpayne@68 412 def description(self):
jpayne@68 413 """Vaguely human readable description of the file format"""
jpayne@68 414 if not self.htsfile:
jpayne@68 415 raise ValueError('metadata not available on closed file')
jpayne@68 416 cdef char *desc = hts_format_description(&self.htsfile.format)
jpayne@68 417 try:
jpayne@68 418 return charptr_to_str(desc)
jpayne@68 419 finally:
jpayne@68 420 free(desc)
jpayne@68 421
jpayne@68 422 @property
jpayne@68 423 def is_open(self):
jpayne@68 424 """return True if HTSFile is open and in a valid state."""
jpayne@68 425 return CTrue if self.htsfile != NULL else CFalse
jpayne@68 426
jpayne@68 427 @property
jpayne@68 428 def is_closed(self):
jpayne@68 429 """return True if HTSFile is closed."""
jpayne@68 430 return self.htsfile == NULL
jpayne@68 431
jpayne@68 432 @property
jpayne@68 433 def closed(self):
jpayne@68 434 """return True if HTSFile is closed."""
jpayne@68 435 return self.htsfile == NULL
jpayne@68 436
jpayne@68 437 @property
jpayne@68 438 def is_write(self):
jpayne@68 439 """return True if HTSFile is open for writing"""
jpayne@68 440 return self.htsfile != NULL and self.htsfile.is_write != 0
jpayne@68 441
jpayne@68 442 @property
jpayne@68 443 def is_read(self):
jpayne@68 444 """return True if HTSFile is open for reading"""
jpayne@68 445 return self.htsfile != NULL and self.htsfile.is_write == 0
jpayne@68 446
jpayne@68 447 @property
jpayne@68 448 def is_sam(self):
jpayne@68 449 """return True if HTSFile is reading or writing a SAM alignment file"""
jpayne@68 450 return self.htsfile != NULL and self.htsfile.format.format == sam
jpayne@68 451
jpayne@68 452 @property
jpayne@68 453 def is_bam(self):
jpayne@68 454 """return True if HTSFile is reading or writing a BAM alignment file"""
jpayne@68 455 return self.htsfile != NULL and self.htsfile.format.format == bam
jpayne@68 456
jpayne@68 457 @property
jpayne@68 458 def is_cram(self):
jpayne@68 459 """return True if HTSFile is reading or writing a BAM alignment file"""
jpayne@68 460 return self.htsfile != NULL and self.htsfile.format.format == cram
jpayne@68 461
jpayne@68 462 @property
jpayne@68 463 def is_vcf(self):
jpayne@68 464 """return True if HTSFile is reading or writing a VCF variant file"""
jpayne@68 465 return self.htsfile != NULL and self.htsfile.format.format == vcf
jpayne@68 466
jpayne@68 467 @property
jpayne@68 468 def is_bcf(self):
jpayne@68 469 """return True if HTSFile is reading or writing a BCF variant file"""
jpayne@68 470 return self.htsfile != NULL and self.htsfile.format.format == bcf
jpayne@68 471
jpayne@68 472 def reset(self):
jpayne@68 473 """reset file position to beginning of file just after the header.
jpayne@68 474
jpayne@68 475 Returns
jpayne@68 476 -------
jpayne@68 477
jpayne@68 478 The file position after moving the file pointer. : pointer
jpayne@68 479
jpayne@68 480 """
jpayne@68 481 return self.seek(self.start_offset)
jpayne@68 482
jpayne@68 483 def seek(self, uint64_t offset, int whence=io.SEEK_SET):
jpayne@68 484 """move file pointer to position *offset*, see :meth:`pysam.HTSFile.tell`."""
jpayne@68 485 if not self.is_open:
jpayne@68 486 raise ValueError('I/O operation on closed file')
jpayne@68 487 if self.is_stream:
jpayne@68 488 raise IOError('seek not available in streams')
jpayne@68 489
jpayne@68 490 whence = libc_whence_from_io(whence)
jpayne@68 491
jpayne@68 492 cdef int64_t ret
jpayne@68 493 if self.htsfile.format.compression == bgzf:
jpayne@68 494 with nogil:
jpayne@68 495 ret = bgzf_seek(hts_get_bgzfp(self.htsfile), offset, whence)
jpayne@68 496 elif self.htsfile.format.compression == no_compression:
jpayne@68 497 ret = 0 if (hseek(self.htsfile.fp.hfile, offset, whence) >= 0) else -1
jpayne@68 498 else:
jpayne@68 499 raise NotImplementedError("seek not implemented in files compressed by method {}".format(
jpayne@68 500 self.htsfile.format.compression))
jpayne@68 501 return ret
jpayne@68 502
jpayne@68 503 def tell(self):
jpayne@68 504 """return current file position, see :meth:`pysam.HTSFile.seek`."""
jpayne@68 505 if not self.is_open:
jpayne@68 506 raise ValueError('I/O operation on closed file')
jpayne@68 507 if self.is_stream:
jpayne@68 508 raise IOError('tell not available in streams')
jpayne@68 509
jpayne@68 510 cdef int64_t ret
jpayne@68 511 if self.htsfile.format.compression == bgzf:
jpayne@68 512 with nogil:
jpayne@68 513 ret = bgzf_tell(hts_get_bgzfp(self.htsfile))
jpayne@68 514 elif self.htsfile.format.compression == no_compression:
jpayne@68 515 ret = htell(self.htsfile.fp.hfile)
jpayne@68 516 elif self.htsfile.format.format == cram:
jpayne@68 517 with nogil:
jpayne@68 518 ret = htell(cram_fd_get_fp(self.htsfile.fp.cram))
jpayne@68 519 else:
jpayne@68 520 raise NotImplementedError("seek not implemented in files compressed by method {}".format(
jpayne@68 521 self.htsfile.format.compression))
jpayne@68 522
jpayne@68 523 return ret
jpayne@68 524
jpayne@68 525 cdef htsFile *_open_htsfile(self) except? NULL:
jpayne@68 526 cdef char *cfilename
jpayne@68 527 cdef char *cmode = self.mode
jpayne@68 528 cdef int fd, dup_fd, threads
jpayne@68 529
jpayne@68 530 threads = self.threads - 1
jpayne@68 531 if isinstance(self.filename, bytes):
jpayne@68 532 cfilename = self.filename
jpayne@68 533 with nogil:
jpayne@68 534 htsfile = hts_open(cfilename, cmode)
jpayne@68 535 if htsfile != NULL:
jpayne@68 536 hts_set_threads(htsfile, threads)
jpayne@68 537 return htsfile
jpayne@68 538 else:
jpayne@68 539 if isinstance(self.filename, int):
jpayne@68 540 fd = self.filename
jpayne@68 541 else:
jpayne@68 542 fd = self.filename.fileno()
jpayne@68 543
jpayne@68 544 if self.duplicate_filehandle:
jpayne@68 545 dup_fd = dup(fd)
jpayne@68 546 else:
jpayne@68 547 dup_fd = fd
jpayne@68 548
jpayne@68 549 # Replicate mode normalization done in hts_open_format
jpayne@68 550 smode = self.mode.replace(b'b', b'').replace(b'c', b'')
jpayne@68 551 if b'b' in self.mode:
jpayne@68 552 smode += b'b'
jpayne@68 553 elif b'c' in self.mode:
jpayne@68 554 smode += b'c'
jpayne@68 555 cmode = smode
jpayne@68 556
jpayne@68 557 hfile = hdopen(dup_fd, cmode)
jpayne@68 558 if hfile == NULL:
jpayne@68 559 raise IOError('Cannot create hfile')
jpayne@68 560
jpayne@68 561 try:
jpayne@68 562 # filename.name can be an int
jpayne@68 563 filename = str(self.filename.name)
jpayne@68 564 except AttributeError:
jpayne@68 565 filename = '<fd:{}>'.format(fd)
jpayne@68 566
jpayne@68 567 filename = encode_filename(filename)
jpayne@68 568 cfilename = filename
jpayne@68 569 with nogil:
jpayne@68 570 htsfile = hts_hopen(hfile, cfilename, cmode)
jpayne@68 571 if htsfile != NULL:
jpayne@68 572 hts_set_threads(htsfile, threads)
jpayne@68 573 return htsfile
jpayne@68 574
jpayne@68 575 def add_hts_options(self, format_options=None):
jpayne@68 576 """Given a list of key=value format option strings, add them to an open htsFile
jpayne@68 577 """
jpayne@68 578 cdef int rval
jpayne@68 579 cdef hts_opt *opts = NULL
jpayne@68 580
jpayne@68 581 if format_options:
jpayne@68 582 for format_option in format_options:
jpayne@68 583 rval = hts_opt_add(&opts, format_option)
jpayne@68 584 if rval != 0:
jpayne@68 585 if opts != NULL:
jpayne@68 586 hts_opt_free(opts)
jpayne@68 587 raise RuntimeError('Invalid format option ({}) specified'.format(format_option))
jpayne@68 588 if opts != NULL:
jpayne@68 589 rval = hts_opt_apply(self.htsfile, opts)
jpayne@68 590 if rval != 0:
jpayne@68 591 hts_opt_free(opts)
jpayne@68 592 raise RuntimeError('An error occurred while applying the requested format options')
jpayne@68 593 hts_opt_free(opts)
jpayne@68 594
jpayne@68 595 def parse_region(self, contig=None, start=None, stop=None,
jpayne@68 596 region=None, tid=None,
jpayne@68 597 reference=None, end=None):
jpayne@68 598 """parse alternative ways to specify a genomic region. A region can
jpayne@68 599 either be specified by :term:`contig`, `start` and
jpayne@68 600 `stop`. `start` and `stop` denote 0-based, half-open
jpayne@68 601 intervals. :term:`reference` and `end` are also accepted for
jpayne@68 602 backward compatibility as synonyms for :term:`contig` and
jpayne@68 603 `stop`, respectively.
jpayne@68 604
jpayne@68 605 Alternatively, a samtools :term:`region` string can be
jpayne@68 606 supplied.
jpayne@68 607
jpayne@68 608 If any of the coordinates are missing they will be replaced by
jpayne@68 609 the minimum (`start`) or maximum (`stop`) coordinate.
jpayne@68 610
jpayne@68 611 Note that region strings are 1-based inclusive, while `start`
jpayne@68 612 and `stop` denote an interval in 0-based, half-open
jpayne@68 613 coordinates (like BED files and Python slices).
jpayne@68 614
jpayne@68 615 If `contig` or `region` or are ``*``, unmapped reads at the end
jpayne@68 616 of a BAM file will be returned. Setting either to ``.`` will
jpayne@68 617 iterate from the beginning of the file.
jpayne@68 618
jpayne@68 619 Returns
jpayne@68 620 -------
jpayne@68 621
jpayne@68 622 tuple :
jpayne@68 623 a tuple of `flag`, :term:`tid`, `start` and
jpayne@68 624 `stop`. The flag indicates whether no coordinates were
jpayne@68 625 supplied and the genomic region is the complete genomic space.
jpayne@68 626
jpayne@68 627 Raises
jpayne@68 628 ------
jpayne@68 629
jpayne@68 630 ValueError
jpayne@68 631 for invalid or out of bounds regions.
jpayne@68 632
jpayne@68 633 """
jpayne@68 634 cdef int rtid
jpayne@68 635 cdef int32_t rstart
jpayne@68 636 cdef int32_t rstop
jpayne@68 637
jpayne@68 638 if reference is not None:
jpayne@68 639 if contig is not None:
jpayne@68 640 raise ValueError('contig and reference should not both be specified')
jpayne@68 641 contig = reference
jpayne@68 642
jpayne@68 643 if end is not None:
jpayne@68 644 if stop is not None:
jpayne@68 645 raise ValueError('stop and end should not both be specified')
jpayne@68 646 stop = end
jpayne@68 647
jpayne@68 648 if contig is None and tid is None and region is None:
jpayne@68 649 return 0, 0, 0, MAX_POS
jpayne@68 650
jpayne@68 651 rtid = -1
jpayne@68 652 rstart = 0
jpayne@68 653 rstop = MAX_POS
jpayne@68 654 if start is not None:
jpayne@68 655 try:
jpayne@68 656 rstart = start
jpayne@68 657 except OverflowError:
jpayne@68 658 raise ValueError('start out of range (%i)' % start)
jpayne@68 659
jpayne@68 660 if stop is not None:
jpayne@68 661 try:
jpayne@68 662 rstop = stop
jpayne@68 663 except OverflowError:
jpayne@68 664 raise ValueError('stop out of range (%i)' % stop)
jpayne@68 665
jpayne@68 666 if region:
jpayne@68 667 region = force_str(region)
jpayne@68 668 if ":" in region:
jpayne@68 669 contig, coord = region.split(":")
jpayne@68 670 parts = coord.split("-")
jpayne@68 671 rstart = int(parts[0]) - 1
jpayne@68 672 if len(parts) >= 1:
jpayne@68 673 rstop = int(parts[1])
jpayne@68 674 else:
jpayne@68 675 contig = region
jpayne@68 676
jpayne@68 677 if tid is not None:
jpayne@68 678 if not self.is_valid_tid(tid):
jpayne@68 679 raise IndexError('invalid tid')
jpayne@68 680 rtid = tid
jpayne@68 681 else:
jpayne@68 682 if contig == "*":
jpayne@68 683 rtid = HTS_IDX_NOCOOR
jpayne@68 684 elif contig == ".":
jpayne@68 685 rtid = HTS_IDX_START
jpayne@68 686 else:
jpayne@68 687 rtid = self.get_tid(contig)
jpayne@68 688 if rtid < 0:
jpayne@68 689 raise ValueError('invalid contig `%s`' % contig)
jpayne@68 690
jpayne@68 691 if rstart > rstop:
jpayne@68 692 raise ValueError('invalid coordinates: start (%i) > stop (%i)' % (rstart, rstop))
jpayne@68 693 if not 0 <= rstart < MAX_POS:
jpayne@68 694 raise ValueError('start out of range (%i)' % rstart)
jpayne@68 695 if not 0 <= rstop <= MAX_POS:
jpayne@68 696 raise ValueError('stop out of range (%i)' % rstop)
jpayne@68 697
jpayne@68 698 return 1, rtid, rstart, rstop
jpayne@68 699
jpayne@68 700 def is_valid_tid(self, tid):
jpayne@68 701 """
jpayne@68 702 return True if the numerical :term:`tid` is valid; False otherwise.
jpayne@68 703
jpayne@68 704 returns -1 if contig is not known.
jpayne@68 705 """
jpayne@68 706 raise NotImplementedError()
jpayne@68 707
jpayne@68 708 def is_valid_reference_name(self, contig):
jpayne@68 709 """
jpayne@68 710 return True if the contig name :term:`contig` is valid; False otherwise.
jpayne@68 711 """
jpayne@68 712 return self.get_tid(contig) != -1
jpayne@68 713
jpayne@68 714 def get_tid(self, contig):
jpayne@68 715 """
jpayne@68 716 return the numerical :term:`tid` corresponding to
jpayne@68 717 :term:`contig`
jpayne@68 718
jpayne@68 719 returns -1 if contig is not known.
jpayne@68 720 """
jpayne@68 721 raise NotImplementedError()
jpayne@68 722
jpayne@68 723 def get_reference_name(self, tid):
jpayne@68 724 """
jpayne@68 725 return :term:`contig` name corresponding to numerical :term:`tid`
jpayne@68 726 """
jpayne@68 727 raise NotImplementedError()