annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/pysam/libchtslib.pyx @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 # cython: language_level=3
jpayne@69 2 # cython: embedsignature=True
jpayne@69 3 # cython: profile=True
jpayne@69 4 # adds doc-strings for sphinx
jpayne@69 5
jpayne@69 6 ########################################################################
jpayne@69 7 ########################################################################
jpayne@69 8 ## Cython cimports
jpayne@69 9 ########################################################################
jpayne@69 10
jpayne@69 11 from posix.unistd cimport dup
jpayne@69 12 from libc.errno cimport errno
jpayne@69 13 from libc.stdint cimport INT32_MAX
jpayne@69 14 from cpython cimport PyBytes_FromStringAndSize
jpayne@69 15 from pysam.libchtslib cimport *
jpayne@69 16 from pysam.libcutils cimport force_bytes, force_str, charptr_to_str, charptr_to_str_w_len
jpayne@69 17 from pysam.libcutils cimport encode_filename, from_string_and_size, libc_whence_from_io
jpayne@69 18
jpayne@69 19
jpayne@69 20 ########################################################################
jpayne@69 21 ########################################################################
jpayne@69 22 ## Python imports
jpayne@69 23 ########################################################################
jpayne@69 24
jpayne@69 25 import os
jpayne@69 26 import io
jpayne@69 27 import re
jpayne@69 28 from warnings import warn
jpayne@69 29
jpayne@69 30
jpayne@69 31 ########################################################################
jpayne@69 32 ########################################################################
jpayne@69 33 ## Constants
jpayne@69 34 ########################################################################
jpayne@69 35
jpayne@69 36 __all__ = ['get_verbosity', 'set_verbosity', 'HFile', 'HTSFile']
jpayne@69 37
jpayne@69 38 # maximum genomic coordinace
jpayne@69 39 cdef int MAX_POS = (1 << 31) - 1
jpayne@69 40
jpayne@69 41 cdef tuple FORMAT_CATEGORIES = ('UNKNOWN', 'ALIGNMENTS', 'VARIANTS', 'INDEX', 'REGIONS')
jpayne@69 42 cdef tuple FORMATS = ('UNKNOWN', 'BINARY_FORMAT', 'TEXT_FORMAT', 'SAM', 'BAM', 'BAI', 'CRAM', 'CRAI',
jpayne@69 43 'VCF', 'BCF', 'CSI', 'GZI', 'TBI', 'BED')
jpayne@69 44 cdef tuple COMPRESSION = ('NONE', 'GZIP', 'BGZF', 'CUSTOM')
jpayne@69 45
jpayne@69 46
jpayne@69 47 ########################################################################
jpayne@69 48 ########################################################################
jpayne@69 49 ## Verbosity functions
jpayne@69 50 ########################################################################
jpayne@69 51
jpayne@69 52
jpayne@69 53 cpdef set_verbosity(int verbosity):
jpayne@69 54 """Set htslib's hts_verbose global variable to the specified value."""
jpayne@69 55 return hts_set_verbosity(verbosity)
jpayne@69 56
jpayne@69 57 cpdef get_verbosity():
jpayne@69 58 """Return the value of htslib's hts_verbose global variable."""
jpayne@69 59 return hts_get_verbosity()
jpayne@69 60
jpayne@69 61
jpayne@69 62 ########################################################################
jpayne@69 63 ########################################################################
jpayne@69 64 ## HFile wrapper class
jpayne@69 65 ########################################################################
jpayne@69 66
jpayne@69 67 cdef class HFile(object):
jpayne@69 68 cdef hFILE *fp
jpayne@69 69 cdef readonly object name, mode
jpayne@69 70
jpayne@69 71 def __init__(self, name, mode='r', closefd=True):
jpayne@69 72 self._open(name, mode, closefd=True)
jpayne@69 73
jpayne@69 74 def __dealloc__(self):
jpayne@69 75 self.close()
jpayne@69 76
jpayne@69 77 @property
jpayne@69 78 def closed(self):
jpayne@69 79 return self.fp == NULL
jpayne@69 80
jpayne@69 81 cdef _open(self, name, mode, closefd=True):
jpayne@69 82 self.name = name
jpayne@69 83 self.mode = mode
jpayne@69 84
jpayne@69 85 mode = force_bytes(mode)
jpayne@69 86
jpayne@69 87 if isinstance(name, int):
jpayne@69 88 if self.fp != NULL:
jpayne@69 89 name = dup(name)
jpayne@69 90 self.fp = hdopen(name, mode)
jpayne@69 91 else:
jpayne@69 92 name = encode_filename(name)
jpayne@69 93 self.fp = hopen(name, mode)
jpayne@69 94
jpayne@69 95 if not self.fp:
jpayne@69 96 raise IOError(errno, 'failed to open HFile', self.name)
jpayne@69 97
jpayne@69 98 def close(self):
jpayne@69 99 if self.fp == NULL:
jpayne@69 100 return
jpayne@69 101
jpayne@69 102 cdef hFILE *fp = self.fp
jpayne@69 103 self.fp = NULL
jpayne@69 104
jpayne@69 105 if hclose(fp) != 0:
jpayne@69 106 raise IOError(errno, 'failed to close HFile', self.name)
jpayne@69 107
jpayne@69 108 def fileno(self):
jpayne@69 109 if self.fp == NULL:
jpayne@69 110 raise IOError('operation on closed HFile')
jpayne@69 111 if isinstance(self.name, int):
jpayne@69 112 return self.name
jpayne@69 113 else:
jpayne@69 114 raise AttributeError('fileno not available')
jpayne@69 115
jpayne@69 116 def __enter__(self):
jpayne@69 117 return self
jpayne@69 118
jpayne@69 119 def __exit__(self, type, value, tb):
jpayne@69 120 self.close()
jpayne@69 121
jpayne@69 122 def __iter__(self):
jpayne@69 123 return self
jpayne@69 124
jpayne@69 125 def __next__(self):
jpayne@69 126 line = self.readline()
jpayne@69 127 if not line:
jpayne@69 128 raise StopIteration()
jpayne@69 129 return line
jpayne@69 130
jpayne@69 131 def flush(self):
jpayne@69 132 if self.fp == NULL:
jpayne@69 133 raise IOError('operation on closed HFile')
jpayne@69 134 if hflush(self.fp) != 0:
jpayne@69 135 raise IOError(herrno(self.fp), 'failed to flush HFile', self.name)
jpayne@69 136
jpayne@69 137 def isatty(self):
jpayne@69 138 if self.fp == NULL:
jpayne@69 139 raise IOError('operation on closed HFile')
jpayne@69 140 return False
jpayne@69 141
jpayne@69 142 def readable(self):
jpayne@69 143 return self.fp != NULL and 'r' in self.mode
jpayne@69 144
jpayne@69 145 def read(self, Py_ssize_t size=-1):
jpayne@69 146 if self.fp == NULL:
jpayne@69 147 raise IOError('operation on closed HFile')
jpayne@69 148
jpayne@69 149 if size == 0:
jpayne@69 150 return b''
jpayne@69 151
jpayne@69 152 cdef list parts = []
jpayne@69 153 cdef bytes part
jpayne@69 154 cdef Py_ssize_t chunk_size, ret, bytes_read = 0
jpayne@69 155 cdef char *cpart
jpayne@69 156
jpayne@69 157 while size == -1 or bytes_read < size:
jpayne@69 158 chunk_size = 4096
jpayne@69 159 if size != -1:
jpayne@69 160 chunk_size = min(chunk_size, size - bytes_read)
jpayne@69 161
jpayne@69 162 part = PyBytes_FromStringAndSize(NULL, chunk_size)
jpayne@69 163 cpart = <char *>part
jpayne@69 164 ret = hread(self.fp, <void *>cpart, chunk_size)
jpayne@69 165
jpayne@69 166 if ret < 0:
jpayne@69 167 IOError(herrno(self.fp), 'failed to read HFile', self.name)
jpayne@69 168 elif not ret:
jpayne@69 169 break
jpayne@69 170
jpayne@69 171 bytes_read += ret
jpayne@69 172
jpayne@69 173 if ret < chunk_size:
jpayne@69 174 part = cpart[:ret]
jpayne@69 175
jpayne@69 176 parts.append(part)
jpayne@69 177
jpayne@69 178 return b''.join(parts)
jpayne@69 179
jpayne@69 180 def readall(self):
jpayne@69 181 return self.read()
jpayne@69 182
jpayne@69 183 def readinto(self, buf):
jpayne@69 184 if self.fp == NULL:
jpayne@69 185 raise IOError('operation on closed HFile')
jpayne@69 186
jpayne@69 187 size = len(buf)
jpayne@69 188
jpayne@69 189 if size == 0:
jpayne@69 190 return size
jpayne@69 191
jpayne@69 192 mv = memoryview(buf)
jpayne@69 193 ret = hread(self.fp, <void *>mv, size)
jpayne@69 194
jpayne@69 195 if ret < 0:
jpayne@69 196 IOError(herrno(self.fp), 'failed to read HFile', self.name)
jpayne@69 197
jpayne@69 198 return ret
jpayne@69 199
jpayne@69 200 def readline(self, Py_ssize_t size=-1):
jpayne@69 201 if self.fp == NULL:
jpayne@69 202 raise IOError('operation on closed HFile')
jpayne@69 203
jpayne@69 204 if size == 0:
jpayne@69 205 return b''
jpayne@69 206
jpayne@69 207 cdef list parts = []
jpayne@69 208 cdef bytes part
jpayne@69 209 cdef Py_ssize_t chunk_size, ret, bytes_read = 0
jpayne@69 210 cdef char *cpart
jpayne@69 211
jpayne@69 212 while size == -1 or bytes_read < size:
jpayne@69 213 chunk_size = 4096
jpayne@69 214 if size != -1:
jpayne@69 215 chunk_size = min(chunk_size, size - bytes_read)
jpayne@69 216
jpayne@69 217 part = PyBytes_FromStringAndSize(NULL, chunk_size)
jpayne@69 218 cpart = <char *>part
jpayne@69 219
jpayne@69 220 # Python bytes objects allocate an extra byte for a null terminator
jpayne@69 221 ret = hgetln(cpart, chunk_size+1, self.fp)
jpayne@69 222
jpayne@69 223 if ret < 0:
jpayne@69 224 IOError(herrno(self.fp), 'failed to read HFile', self.name)
jpayne@69 225 elif not ret:
jpayne@69 226 break
jpayne@69 227
jpayne@69 228 bytes_read += ret
jpayne@69 229
jpayne@69 230 if ret < chunk_size:
jpayne@69 231 part = cpart[:ret]
jpayne@69 232 cpart = <char *>part
jpayne@69 233
jpayne@69 234 parts.append(part)
jpayne@69 235
jpayne@69 236 if cpart[ret-1] == b'\n':
jpayne@69 237 break
jpayne@69 238
jpayne@69 239 return b''.join(parts)
jpayne@69 240
jpayne@69 241 def readlines(self):
jpayne@69 242 return list(self)
jpayne@69 243
jpayne@69 244 def seek(self, Py_ssize_t offset, int whence=io.SEEK_SET):
jpayne@69 245 if self.fp == NULL:
jpayne@69 246 raise IOError('operation on closed HFile')
jpayne@69 247
jpayne@69 248 cdef Py_ssize_t off = hseek(self.fp, offset, libc_whence_from_io(whence))
jpayne@69 249
jpayne@69 250 if off < 0:
jpayne@69 251 raise IOError(herrno(self.fp), 'seek failed on HFile', self.name)
jpayne@69 252
jpayne@69 253 return off
jpayne@69 254
jpayne@69 255 def tell(self):
jpayne@69 256 if self.fp == NULL:
jpayne@69 257 raise IOError('operation on closed HFile')
jpayne@69 258
jpayne@69 259 ret = htell(self.fp)
jpayne@69 260
jpayne@69 261 if ret < 0:
jpayne@69 262 raise IOError(herrno(self.fp), 'tell failed on HFile', self.name)
jpayne@69 263
jpayne@69 264 return ret
jpayne@69 265
jpayne@69 266 def seekable(self):
jpayne@69 267 return self.fp != NULL
jpayne@69 268
jpayne@69 269 def truncate(self, size=None):
jpayne@69 270 raise NotImplementedError()
jpayne@69 271
jpayne@69 272 def writable(self):
jpayne@69 273 return self.fp != NULL and 'w' in self.mode
jpayne@69 274
jpayne@69 275 def write(self, bytes b):
jpayne@69 276 if self.fp == NULL:
jpayne@69 277 raise IOError('operation on closed HFile')
jpayne@69 278
jpayne@69 279 got = hwrite(self.fp, <void *>b, len(b))
jpayne@69 280
jpayne@69 281 if got < 0:
jpayne@69 282 raise IOError(herrno(self.fp), 'write failed on HFile', self.name)
jpayne@69 283
jpayne@69 284 return got
jpayne@69 285
jpayne@69 286 def writelines(self, lines):
jpayne@69 287 for line in lines:
jpayne@69 288 self.write(line)
jpayne@69 289
jpayne@69 290
jpayne@69 291 ########################################################################
jpayne@69 292 ########################################################################
jpayne@69 293 ## Helpers for backward compatibility to hide the difference between
jpayne@69 294 ## boolean properties and methods
jpayne@69 295 ########################################################################
jpayne@69 296
jpayne@69 297 class CallableValue(object):
jpayne@69 298 def __init__(self, value):
jpayne@69 299 self.value = value
jpayne@69 300 def __call__(self):
jpayne@69 301 return self.value
jpayne@69 302 def __bool__(self):
jpayne@69 303 return self.value
jpayne@69 304 def __nonzero__(self):
jpayne@69 305 return self.value
jpayne@69 306 def __eq__(self, other):
jpayne@69 307 return self.value == other
jpayne@69 308 def __ne__(self, other):
jpayne@69 309 return self.value != other
jpayne@69 310
jpayne@69 311
jpayne@69 312 CTrue = CallableValue(True)
jpayne@69 313 CFalse = CallableValue(False)
jpayne@69 314
jpayne@69 315
jpayne@69 316 ########################################################################
jpayne@69 317 ########################################################################
jpayne@69 318 ## HTSFile wrapper class (base class for AlignmentFile and VariantFile)
jpayne@69 319 ########################################################################
jpayne@69 320
jpayne@69 321 cdef class HTSFile(object):
jpayne@69 322 """
jpayne@69 323 Base class for HTS file types
jpayne@69 324 """
jpayne@69 325
jpayne@69 326 def __cinit__(self, *args, **kwargs):
jpayne@69 327 self.htsfile = NULL
jpayne@69 328 self.threads = 1
jpayne@69 329 self.duplicate_filehandle = True
jpayne@69 330
jpayne@69 331 def close(self):
jpayne@69 332 if self.htsfile:
jpayne@69 333 hts_close(self.htsfile)
jpayne@69 334 self.htsfile = NULL
jpayne@69 335
jpayne@69 336 def __dealloc__(self):
jpayne@69 337 if self.htsfile:
jpayne@69 338 hts_close(self.htsfile)
jpayne@69 339 self.htsfile = NULL
jpayne@69 340
jpayne@69 341 def flush(self):
jpayne@69 342 """Flush any buffered data to the underlying output stream."""
jpayne@69 343 if self.htsfile:
jpayne@69 344 if hts_flush(self.htsfile) < 0:
jpayne@69 345 raise OSError(errno, f'Flushing {type(self).__name__} failed', force_str(self.filename))
jpayne@69 346
jpayne@69 347 def check_truncation(self, ignore_truncation=False):
jpayne@69 348 """Check if file is truncated."""
jpayne@69 349 if not self.htsfile:
jpayne@69 350 return
jpayne@69 351
jpayne@69 352 if self.htsfile.format.compression != bgzf:
jpayne@69 353 return
jpayne@69 354
jpayne@69 355 cdef BGZF *bgzfp = hts_get_bgzfp(self.htsfile)
jpayne@69 356 if not bgzfp:
jpayne@69 357 return
jpayne@69 358
jpayne@69 359 cdef int ret = bgzf_check_EOF(bgzfp)
jpayne@69 360 if ret < 0:
jpayne@69 361 raise IOError(errno, 'error checking for EOF marker')
jpayne@69 362 elif ret == 0:
jpayne@69 363 msg = 'no BGZF EOF marker; file may be truncated'.format(self.filename)
jpayne@69 364 if ignore_truncation:
jpayne@69 365 warn(msg)
jpayne@69 366 else:
jpayne@69 367 raise IOError(msg)
jpayne@69 368
jpayne@69 369 def __enter__(self):
jpayne@69 370 return self
jpayne@69 371
jpayne@69 372 def __exit__(self, exc_type, exc_value, traceback):
jpayne@69 373 self.close()
jpayne@69 374 return False
jpayne@69 375
jpayne@69 376 @property
jpayne@69 377 def category(self):
jpayne@69 378 """General file format category. One of UNKNOWN, ALIGNMENTS,
jpayne@69 379 VARIANTS, INDEX, REGIONS"""
jpayne@69 380 if not self.htsfile:
jpayne@69 381 raise ValueError('metadata not available on closed file')
jpayne@69 382 return FORMAT_CATEGORIES[self.htsfile.format.category]
jpayne@69 383
jpayne@69 384 @property
jpayne@69 385 def format(self):
jpayne@69 386 """File format.
jpayne@69 387
jpayne@69 388 One of UNKNOWN, BINARY_FORMAT, TEXT_FORMAT, SAM, BAM,
jpayne@69 389 BAI, CRAM, CRAI, VCF, BCF, CSI, GZI, TBI, BED.
jpayne@69 390 """
jpayne@69 391 if not self.htsfile:
jpayne@69 392 raise ValueError('metadata not available on closed file')
jpayne@69 393 return FORMATS[self.htsfile.format.format]
jpayne@69 394
jpayne@69 395 @property
jpayne@69 396 def version(self):
jpayne@69 397 """Tuple of file format version numbers (major, minor)"""
jpayne@69 398 if not self.htsfile:
jpayne@69 399 raise ValueError('metadata not available on closed file')
jpayne@69 400 return self.htsfile.format.version.major, self.htsfile.format.version.minor
jpayne@69 401
jpayne@69 402 @property
jpayne@69 403 def compression(self):
jpayne@69 404 """File compression.
jpayne@69 405
jpayne@69 406 One of NONE, GZIP, BGZF, CUSTOM."""
jpayne@69 407 if not self.htsfile:
jpayne@69 408 raise ValueError('metadata not available on closed file')
jpayne@69 409 return COMPRESSION[self.htsfile.format.compression]
jpayne@69 410
jpayne@69 411 @property
jpayne@69 412 def description(self):
jpayne@69 413 """Vaguely human readable description of the file format"""
jpayne@69 414 if not self.htsfile:
jpayne@69 415 raise ValueError('metadata not available on closed file')
jpayne@69 416 cdef char *desc = hts_format_description(&self.htsfile.format)
jpayne@69 417 try:
jpayne@69 418 return charptr_to_str(desc)
jpayne@69 419 finally:
jpayne@69 420 free(desc)
jpayne@69 421
jpayne@69 422 @property
jpayne@69 423 def is_open(self):
jpayne@69 424 """return True if HTSFile is open and in a valid state."""
jpayne@69 425 return CTrue if self.htsfile != NULL else CFalse
jpayne@69 426
jpayne@69 427 @property
jpayne@69 428 def is_closed(self):
jpayne@69 429 """return True if HTSFile is closed."""
jpayne@69 430 return self.htsfile == NULL
jpayne@69 431
jpayne@69 432 @property
jpayne@69 433 def closed(self):
jpayne@69 434 """return True if HTSFile is closed."""
jpayne@69 435 return self.htsfile == NULL
jpayne@69 436
jpayne@69 437 @property
jpayne@69 438 def is_write(self):
jpayne@69 439 """return True if HTSFile is open for writing"""
jpayne@69 440 return self.htsfile != NULL and self.htsfile.is_write != 0
jpayne@69 441
jpayne@69 442 @property
jpayne@69 443 def is_read(self):
jpayne@69 444 """return True if HTSFile is open for reading"""
jpayne@69 445 return self.htsfile != NULL and self.htsfile.is_write == 0
jpayne@69 446
jpayne@69 447 @property
jpayne@69 448 def is_sam(self):
jpayne@69 449 """return True if HTSFile is reading or writing a SAM alignment file"""
jpayne@69 450 return self.htsfile != NULL and self.htsfile.format.format == sam
jpayne@69 451
jpayne@69 452 @property
jpayne@69 453 def is_bam(self):
jpayne@69 454 """return True if HTSFile is reading or writing a BAM alignment file"""
jpayne@69 455 return self.htsfile != NULL and self.htsfile.format.format == bam
jpayne@69 456
jpayne@69 457 @property
jpayne@69 458 def is_cram(self):
jpayne@69 459 """return True if HTSFile is reading or writing a BAM alignment file"""
jpayne@69 460 return self.htsfile != NULL and self.htsfile.format.format == cram
jpayne@69 461
jpayne@69 462 @property
jpayne@69 463 def is_vcf(self):
jpayne@69 464 """return True if HTSFile is reading or writing a VCF variant file"""
jpayne@69 465 return self.htsfile != NULL and self.htsfile.format.format == vcf
jpayne@69 466
jpayne@69 467 @property
jpayne@69 468 def is_bcf(self):
jpayne@69 469 """return True if HTSFile is reading or writing a BCF variant file"""
jpayne@69 470 return self.htsfile != NULL and self.htsfile.format.format == bcf
jpayne@69 471
jpayne@69 472 def reset(self):
jpayne@69 473 """reset file position to beginning of file just after the header.
jpayne@69 474
jpayne@69 475 Returns
jpayne@69 476 -------
jpayne@69 477
jpayne@69 478 The file position after moving the file pointer. : pointer
jpayne@69 479
jpayne@69 480 """
jpayne@69 481 return self.seek(self.start_offset)
jpayne@69 482
jpayne@69 483 def seek(self, uint64_t offset, int whence=io.SEEK_SET):
jpayne@69 484 """move file pointer to position *offset*, see :meth:`pysam.HTSFile.tell`."""
jpayne@69 485 if not self.is_open:
jpayne@69 486 raise ValueError('I/O operation on closed file')
jpayne@69 487 if self.is_stream:
jpayne@69 488 raise IOError('seek not available in streams')
jpayne@69 489
jpayne@69 490 whence = libc_whence_from_io(whence)
jpayne@69 491
jpayne@69 492 cdef int64_t ret
jpayne@69 493 if self.htsfile.format.compression == bgzf:
jpayne@69 494 with nogil:
jpayne@69 495 ret = bgzf_seek(hts_get_bgzfp(self.htsfile), offset, whence)
jpayne@69 496 elif self.htsfile.format.compression == no_compression:
jpayne@69 497 ret = 0 if (hseek(self.htsfile.fp.hfile, offset, whence) >= 0) else -1
jpayne@69 498 else:
jpayne@69 499 raise NotImplementedError("seek not implemented in files compressed by method {}".format(
jpayne@69 500 self.htsfile.format.compression))
jpayne@69 501 return ret
jpayne@69 502
jpayne@69 503 def tell(self):
jpayne@69 504 """return current file position, see :meth:`pysam.HTSFile.seek`."""
jpayne@69 505 if not self.is_open:
jpayne@69 506 raise ValueError('I/O operation on closed file')
jpayne@69 507 if self.is_stream:
jpayne@69 508 raise IOError('tell not available in streams')
jpayne@69 509
jpayne@69 510 cdef int64_t ret
jpayne@69 511 if self.htsfile.format.compression == bgzf:
jpayne@69 512 with nogil:
jpayne@69 513 ret = bgzf_tell(hts_get_bgzfp(self.htsfile))
jpayne@69 514 elif self.htsfile.format.compression == no_compression:
jpayne@69 515 ret = htell(self.htsfile.fp.hfile)
jpayne@69 516 elif self.htsfile.format.format == cram:
jpayne@69 517 with nogil:
jpayne@69 518 ret = htell(cram_fd_get_fp(self.htsfile.fp.cram))
jpayne@69 519 else:
jpayne@69 520 raise NotImplementedError("seek not implemented in files compressed by method {}".format(
jpayne@69 521 self.htsfile.format.compression))
jpayne@69 522
jpayne@69 523 return ret
jpayne@69 524
jpayne@69 525 cdef htsFile *_open_htsfile(self) except? NULL:
jpayne@69 526 cdef char *cfilename
jpayne@69 527 cdef char *cmode = self.mode
jpayne@69 528 cdef int fd, dup_fd, threads
jpayne@69 529
jpayne@69 530 threads = self.threads - 1
jpayne@69 531 if isinstance(self.filename, bytes):
jpayne@69 532 cfilename = self.filename
jpayne@69 533 with nogil:
jpayne@69 534 htsfile = hts_open(cfilename, cmode)
jpayne@69 535 if htsfile != NULL:
jpayne@69 536 hts_set_threads(htsfile, threads)
jpayne@69 537 return htsfile
jpayne@69 538 else:
jpayne@69 539 if isinstance(self.filename, int):
jpayne@69 540 fd = self.filename
jpayne@69 541 else:
jpayne@69 542 fd = self.filename.fileno()
jpayne@69 543
jpayne@69 544 if self.duplicate_filehandle:
jpayne@69 545 dup_fd = dup(fd)
jpayne@69 546 else:
jpayne@69 547 dup_fd = fd
jpayne@69 548
jpayne@69 549 # Replicate mode normalization done in hts_open_format
jpayne@69 550 smode = self.mode.replace(b'b', b'').replace(b'c', b'')
jpayne@69 551 if b'b' in self.mode:
jpayne@69 552 smode += b'b'
jpayne@69 553 elif b'c' in self.mode:
jpayne@69 554 smode += b'c'
jpayne@69 555 cmode = smode
jpayne@69 556
jpayne@69 557 hfile = hdopen(dup_fd, cmode)
jpayne@69 558 if hfile == NULL:
jpayne@69 559 raise IOError('Cannot create hfile')
jpayne@69 560
jpayne@69 561 try:
jpayne@69 562 # filename.name can be an int
jpayne@69 563 filename = str(self.filename.name)
jpayne@69 564 except AttributeError:
jpayne@69 565 filename = '<fd:{}>'.format(fd)
jpayne@69 566
jpayne@69 567 filename = encode_filename(filename)
jpayne@69 568 cfilename = filename
jpayne@69 569 with nogil:
jpayne@69 570 htsfile = hts_hopen(hfile, cfilename, cmode)
jpayne@69 571 if htsfile != NULL:
jpayne@69 572 hts_set_threads(htsfile, threads)
jpayne@69 573 return htsfile
jpayne@69 574
jpayne@69 575 def add_hts_options(self, format_options=None):
jpayne@69 576 """Given a list of key=value format option strings, add them to an open htsFile
jpayne@69 577 """
jpayne@69 578 cdef int rval
jpayne@69 579 cdef hts_opt *opts = NULL
jpayne@69 580
jpayne@69 581 if format_options:
jpayne@69 582 for format_option in format_options:
jpayne@69 583 rval = hts_opt_add(&opts, format_option)
jpayne@69 584 if rval != 0:
jpayne@69 585 if opts != NULL:
jpayne@69 586 hts_opt_free(opts)
jpayne@69 587 raise RuntimeError('Invalid format option ({}) specified'.format(format_option))
jpayne@69 588 if opts != NULL:
jpayne@69 589 rval = hts_opt_apply(self.htsfile, opts)
jpayne@69 590 if rval != 0:
jpayne@69 591 hts_opt_free(opts)
jpayne@69 592 raise RuntimeError('An error occurred while applying the requested format options')
jpayne@69 593 hts_opt_free(opts)
jpayne@69 594
jpayne@69 595 def parse_region(self, contig=None, start=None, stop=None,
jpayne@69 596 region=None, tid=None,
jpayne@69 597 reference=None, end=None):
jpayne@69 598 """parse alternative ways to specify a genomic region. A region can
jpayne@69 599 either be specified by :term:`contig`, `start` and
jpayne@69 600 `stop`. `start` and `stop` denote 0-based, half-open
jpayne@69 601 intervals. :term:`reference` and `end` are also accepted for
jpayne@69 602 backward compatibility as synonyms for :term:`contig` and
jpayne@69 603 `stop`, respectively.
jpayne@69 604
jpayne@69 605 Alternatively, a samtools :term:`region` string can be
jpayne@69 606 supplied.
jpayne@69 607
jpayne@69 608 If any of the coordinates are missing they will be replaced by
jpayne@69 609 the minimum (`start`) or maximum (`stop`) coordinate.
jpayne@69 610
jpayne@69 611 Note that region strings are 1-based inclusive, while `start`
jpayne@69 612 and `stop` denote an interval in 0-based, half-open
jpayne@69 613 coordinates (like BED files and Python slices).
jpayne@69 614
jpayne@69 615 If `contig` or `region` or are ``*``, unmapped reads at the end
jpayne@69 616 of a BAM file will be returned. Setting either to ``.`` will
jpayne@69 617 iterate from the beginning of the file.
jpayne@69 618
jpayne@69 619 Returns
jpayne@69 620 -------
jpayne@69 621
jpayne@69 622 tuple :
jpayne@69 623 a tuple of `flag`, :term:`tid`, `start` and
jpayne@69 624 `stop`. The flag indicates whether no coordinates were
jpayne@69 625 supplied and the genomic region is the complete genomic space.
jpayne@69 626
jpayne@69 627 Raises
jpayne@69 628 ------
jpayne@69 629
jpayne@69 630 ValueError
jpayne@69 631 for invalid or out of bounds regions.
jpayne@69 632
jpayne@69 633 """
jpayne@69 634 cdef int rtid
jpayne@69 635 cdef int32_t rstart
jpayne@69 636 cdef int32_t rstop
jpayne@69 637
jpayne@69 638 if reference is not None:
jpayne@69 639 if contig is not None:
jpayne@69 640 raise ValueError('contig and reference should not both be specified')
jpayne@69 641 contig = reference
jpayne@69 642
jpayne@69 643 if end is not None:
jpayne@69 644 if stop is not None:
jpayne@69 645 raise ValueError('stop and end should not both be specified')
jpayne@69 646 stop = end
jpayne@69 647
jpayne@69 648 if contig is None and tid is None and region is None:
jpayne@69 649 return 0, 0, 0, MAX_POS
jpayne@69 650
jpayne@69 651 rtid = -1
jpayne@69 652 rstart = 0
jpayne@69 653 rstop = MAX_POS
jpayne@69 654 if start is not None:
jpayne@69 655 try:
jpayne@69 656 rstart = start
jpayne@69 657 except OverflowError:
jpayne@69 658 raise ValueError('start out of range (%i)' % start)
jpayne@69 659
jpayne@69 660 if stop is not None:
jpayne@69 661 try:
jpayne@69 662 rstop = stop
jpayne@69 663 except OverflowError:
jpayne@69 664 raise ValueError('stop out of range (%i)' % stop)
jpayne@69 665
jpayne@69 666 if region:
jpayne@69 667 region = force_str(region)
jpayne@69 668 if ":" in region:
jpayne@69 669 contig, coord = region.split(":")
jpayne@69 670 parts = coord.split("-")
jpayne@69 671 rstart = int(parts[0]) - 1
jpayne@69 672 if len(parts) >= 1:
jpayne@69 673 rstop = int(parts[1])
jpayne@69 674 else:
jpayne@69 675 contig = region
jpayne@69 676
jpayne@69 677 if tid is not None:
jpayne@69 678 if not self.is_valid_tid(tid):
jpayne@69 679 raise IndexError('invalid tid')
jpayne@69 680 rtid = tid
jpayne@69 681 else:
jpayne@69 682 if contig == "*":
jpayne@69 683 rtid = HTS_IDX_NOCOOR
jpayne@69 684 elif contig == ".":
jpayne@69 685 rtid = HTS_IDX_START
jpayne@69 686 else:
jpayne@69 687 rtid = self.get_tid(contig)
jpayne@69 688 if rtid < 0:
jpayne@69 689 raise ValueError('invalid contig `%s`' % contig)
jpayne@69 690
jpayne@69 691 if rstart > rstop:
jpayne@69 692 raise ValueError('invalid coordinates: start (%i) > stop (%i)' % (rstart, rstop))
jpayne@69 693 if not 0 <= rstart < MAX_POS:
jpayne@69 694 raise ValueError('start out of range (%i)' % rstart)
jpayne@69 695 if not 0 <= rstop <= MAX_POS:
jpayne@69 696 raise ValueError('stop out of range (%i)' % rstop)
jpayne@69 697
jpayne@69 698 return 1, rtid, rstart, rstop
jpayne@69 699
jpayne@69 700 def is_valid_tid(self, tid):
jpayne@69 701 """
jpayne@69 702 return True if the numerical :term:`tid` is valid; False otherwise.
jpayne@69 703
jpayne@69 704 returns -1 if contig is not known.
jpayne@69 705 """
jpayne@69 706 raise NotImplementedError()
jpayne@69 707
jpayne@69 708 def is_valid_reference_name(self, contig):
jpayne@69 709 """
jpayne@69 710 return True if the contig name :term:`contig` is valid; False otherwise.
jpayne@69 711 """
jpayne@69 712 return self.get_tid(contig) != -1
jpayne@69 713
jpayne@69 714 def get_tid(self, contig):
jpayne@69 715 """
jpayne@69 716 return the numerical :term:`tid` corresponding to
jpayne@69 717 :term:`contig`
jpayne@69 718
jpayne@69 719 returns -1 if contig is not known.
jpayne@69 720 """
jpayne@69 721 raise NotImplementedError()
jpayne@69 722
jpayne@69 723 def get_reference_name(self, tid):
jpayne@69 724 """
jpayne@69 725 return :term:`contig` name corresponding to numerical :term:`tid`
jpayne@69 726 """
jpayne@69 727 raise NotImplementedError()