annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/pysam/libcbgzf.pyx @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 # cython: language_level=3
jpayne@68 2 """Functions that read and write block gzipped files.
jpayne@68 3
jpayne@68 4 The user of the file doesn't have to worry about the compression
jpayne@68 5 and random access is allowed if an index file is present."""
jpayne@68 6
jpayne@68 7 # based on Python 3.5's gzip module
jpayne@68 8
jpayne@68 9 import io
jpayne@68 10
jpayne@68 11 from libc.stdint cimport int8_t, int16_t, int32_t, int64_t
jpayne@68 12 from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t
jpayne@68 13 from libc.stdio cimport SEEK_SET
jpayne@68 14 from libc.stdlib cimport malloc, calloc, realloc, free
jpayne@68 15
jpayne@68 16 from cpython.object cimport PyObject
jpayne@68 17 from cpython.bytes cimport PyBytes_FromStringAndSize, _PyBytes_Resize
jpayne@68 18
jpayne@68 19 from pysam.libcutils cimport force_bytes, encode_filename
jpayne@68 20 from pysam.libchtslib cimport bgzf_open, bgzf_index_build_init, bgzf_write, bgzf_read, \
jpayne@68 21 bgzf_flush, bgzf_index_dump, bgzf_close, bgzf_seek, \
jpayne@68 22 bgzf_tell, bgzf_getline, kstring_t, BGZF
jpayne@68 23
jpayne@68 24 __all__ = ["BGZFile"]
jpayne@68 25
jpayne@68 26
jpayne@68 27 BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE
jpayne@68 28
jpayne@68 29
jpayne@68 30 cdef class BGZFile(object):
jpayne@68 31 """The BGZFile class simulates most of the methods of a file object with
jpayne@68 32 the exception of the truncate() method.
jpayne@68 33
jpayne@68 34 This class only supports opening files in binary mode. If you need to open a
jpayne@68 35 compressed file in text mode, use the gzip.open() function.
jpayne@68 36 """
jpayne@68 37 def __init__(self, filename, mode=None, index=None):
jpayne@68 38 """Constructor for the BGZFile class.
jpayne@68 39
jpayne@68 40 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', 'wb', 'x', or
jpayne@68 41 'xb' depending on whether the file will be read or written. The default
jpayne@68 42 is the mode of fileobj if discernible; otherwise, the default is 'rb'.
jpayne@68 43 A mode of 'r' is equivalent to one of 'rb', and similarly for 'w' and
jpayne@68 44 'wb', 'a' and 'ab', and 'x' and 'xb'.
jpayne@68 45 """
jpayne@68 46 if mode and ('t' in mode or 'U' in mode):
jpayne@68 47 raise ValueError("Invalid mode: {!r}".format(mode))
jpayne@68 48 if not mode:
jpayne@68 49 mode = 'rb'
jpayne@68 50 elif mode and 'b' not in mode:
jpayne@68 51 mode += 'b'
jpayne@68 52
jpayne@68 53 mode = force_bytes(mode)
jpayne@68 54
jpayne@68 55 self.name = encode_filename(filename)
jpayne@68 56 self.index = encode_filename(index) if index is not None else None
jpayne@68 57
jpayne@68 58 self.bgzf = bgzf_open(self.name, mode)
jpayne@68 59
jpayne@68 60 if self.bgzf.is_write and index is not None and bgzf_index_build_init(self.bgzf) < 0:
jpayne@68 61 raise IOError('Error building bgzf index')
jpayne@68 62
jpayne@68 63 def __dealloc__(self):
jpayne@68 64 self.close()
jpayne@68 65
jpayne@68 66 def write(self, data):
jpayne@68 67 if not self.bgzf:
jpayne@68 68 raise ValueError("write() on closed BGZFile object")
jpayne@68 69
jpayne@68 70 if not self.bgzf.is_write:
jpayne@68 71 import errno
jpayne@68 72 raise IOError(errno.EBADF, "write() on read-only BGZFile object")
jpayne@68 73
jpayne@68 74 if isinstance(data, bytes):
jpayne@68 75 length = len(data)
jpayne@68 76 else:
jpayne@68 77 # accept any data that supports the buffer protocol
jpayne@68 78 data = memoryview(data)
jpayne@68 79 length = data.nbytes
jpayne@68 80
jpayne@68 81 if length > 0 and bgzf_write(self.bgzf, <char *>data, length) < 0:
jpayne@68 82 raise IOError('BGZFile write failed')
jpayne@68 83
jpayne@68 84 return length
jpayne@68 85
jpayne@68 86 def read(self, size=-1):
jpayne@68 87 cdef ssize_t read_size
jpayne@68 88
jpayne@68 89 if not self.bgzf:
jpayne@68 90 raise ValueError("read() on closed BGZFile object")
jpayne@68 91
jpayne@68 92 if self.bgzf.is_write:
jpayne@68 93 import errno
jpayne@68 94 raise IOError(errno.EBADF, "read() on write-only BGZFile object")
jpayne@68 95
jpayne@68 96 if size < 0:
jpayne@68 97 chunks = []
jpayne@68 98 while 1:
jpayne@68 99 chunk = PyBytes_FromStringAndSize(NULL, BUFFER_SIZE)
jpayne@68 100 cdata = <bytes>chunk
jpayne@68 101 read_size = bgzf_read(self.bgzf, <char *>chunk, BUFFER_SIZE)
jpayne@68 102 if read_size < 0:
jpayne@68 103 raise IOError('Error reading from BGZFile')
jpayne@68 104 elif not read_size:
jpayne@68 105 break
jpayne@68 106 elif read_size < BUFFER_SIZE:
jpayne@68 107 chunk = chunk[:read_size]
jpayne@68 108 chunks.append(chunk)
jpayne@68 109 return b''.join(chunks)
jpayne@68 110
jpayne@68 111 elif size > 0:
jpayne@68 112 chunk = PyBytes_FromStringAndSize(NULL, size)
jpayne@68 113 read_size = bgzf_read(self.bgzf, <char *>chunk, size)
jpayne@68 114 if read_size < 0:
jpayne@68 115 raise IOError('Error reading from BGZFile')
jpayne@68 116 elif read_size < size:
jpayne@68 117 chunk = chunk[:read_size]
jpayne@68 118 return chunk
jpayne@68 119 else:
jpayne@68 120 return b''
jpayne@68 121
jpayne@68 122 @property
jpayne@68 123 def closed(self):
jpayne@68 124 return self.bgzf == NULL
jpayne@68 125
jpayne@68 126 def close(self):
jpayne@68 127 if not self.bgzf:
jpayne@68 128 return
jpayne@68 129
jpayne@68 130 if self.bgzf.is_write and bgzf_flush(self.bgzf) < 0:
jpayne@68 131 raise IOError('Error flushing BGZFile object')
jpayne@68 132
jpayne@68 133 if self.index and bgzf_index_dump(self.bgzf, self.index, NULL) < 0:
jpayne@68 134 raise IOError('Cannot write index')
jpayne@68 135
jpayne@68 136 cdef ret = bgzf_close(self.bgzf)
jpayne@68 137 self.bgzf = NULL
jpayne@68 138
jpayne@68 139 if ret < 0:
jpayne@68 140 raise IOError('Error closing BGZFile object')
jpayne@68 141
jpayne@68 142 def __enter__(self):
jpayne@68 143 return self
jpayne@68 144
jpayne@68 145 def __exit__(self, type, value, tb):
jpayne@68 146 self.close()
jpayne@68 147
jpayne@68 148 def flush(self):
jpayne@68 149 if not self.bgzf:
jpayne@68 150 return
jpayne@68 151
jpayne@68 152 if self.bgzf.is_write and bgzf_flush(self.bgzf) < 0:
jpayne@68 153 raise IOError('Error flushing BGZFile object')
jpayne@68 154
jpayne@68 155 def fileno(self):
jpayne@68 156 """Invoke the underlying file object's fileno() method.
jpayne@68 157
jpayne@68 158 This will raise AttributeError if the underlying file object
jpayne@68 159 doesn't support fileno().
jpayne@68 160 """
jpayne@68 161 raise AttributeError('fileno')
jpayne@68 162
jpayne@68 163 def rewind(self):
jpayne@68 164 '''Return the uncompressed stream file position indicator to the
jpayne@68 165 beginning of the file'''
jpayne@68 166 if not self.bgzf:
jpayne@68 167 raise ValueError("rewind() on closed BGZFile object")
jpayne@68 168 if not self.bgzf.is_write:
jpayne@68 169 raise IOError("Can't rewind in write mode")
jpayne@68 170 if bgzf_seek(self.bgzf, 0, SEEK_SET) < 0:
jpayne@68 171 raise IOError('Error seeking BGZFFile object')
jpayne@68 172
jpayne@68 173 def readable(self):
jpayne@68 174 if not self.bgzf:
jpayne@68 175 raise ValueError("readable() on closed BGZFile object")
jpayne@68 176 return self.bgzf != NULL and not self.bgzf.is_write
jpayne@68 177
jpayne@68 178 def writable(self):
jpayne@68 179 return self.bgzf != NULL and self.bgzf.is_write
jpayne@68 180
jpayne@68 181 def seekable(self):
jpayne@68 182 return True
jpayne@68 183
jpayne@68 184 def tell(self):
jpayne@68 185 if not self.bgzf:
jpayne@68 186 raise ValueError("seek() on closed BGZFile object")
jpayne@68 187 cdef int64_t off = bgzf_tell(self.bgzf)
jpayne@68 188 if off < 0:
jpayne@68 189 raise IOError('Error in tell on BGZFFile object')
jpayne@68 190
jpayne@68 191 return off
jpayne@68 192
jpayne@68 193 def seek(self, offset, whence=io.SEEK_SET):
jpayne@68 194 if not self.bgzf:
jpayne@68 195 raise ValueError("seek() on closed BGZFile object")
jpayne@68 196 if whence is not io.SEEK_SET:
jpayne@68 197 raise ValueError('Seek from end not supported')
jpayne@68 198
jpayne@68 199 cdef int64_t off = bgzf_seek(self.bgzf, offset, SEEK_SET)
jpayne@68 200 if off < 0:
jpayne@68 201 raise IOError('Error seeking BGZFFile object')
jpayne@68 202
jpayne@68 203 return off
jpayne@68 204
jpayne@68 205 def readline(self, size=-1):
jpayne@68 206 if not self.bgzf:
jpayne@68 207 raise ValueError("readline() on closed BGZFile object")
jpayne@68 208
jpayne@68 209 cdef kstring_t line
jpayne@68 210 cdef char c
jpayne@68 211
jpayne@68 212 line.l = line.m = 0
jpayne@68 213 line.s = NULL
jpayne@68 214
jpayne@68 215 cdef int ret = bgzf_getline(self.bgzf, b'\n', &line)
jpayne@68 216 if ret == -1:
jpayne@68 217 s = b''
jpayne@68 218 elif ret == -2:
jpayne@68 219 if line.m:
jpayne@68 220 free(line.s)
jpayne@68 221 raise IOError('Error reading line in BGZFFile object')
jpayne@68 222 else:
jpayne@68 223 s = line.s[:line.l]
jpayne@68 224
jpayne@68 225 if line.m:
jpayne@68 226 free(line.s)
jpayne@68 227
jpayne@68 228 return s
jpayne@68 229
jpayne@68 230 def __iter__(self):
jpayne@68 231 return self
jpayne@68 232
jpayne@68 233 def __next__(self):
jpayne@68 234 line = self.readline()
jpayne@68 235 if not line:
jpayne@68 236 raise StopIteration()
jpayne@68 237 return line