annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/pysam/libcbgzf.pyx @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 # cython: language_level=3
jpayne@69 2 """Functions that read and write block gzipped files.
jpayne@69 3
jpayne@69 4 The user of the file doesn't have to worry about the compression
jpayne@69 5 and random access is allowed if an index file is present."""
jpayne@69 6
jpayne@69 7 # based on Python 3.5's gzip module
jpayne@69 8
jpayne@69 9 import io
jpayne@69 10
jpayne@69 11 from libc.stdint cimport int8_t, int16_t, int32_t, int64_t
jpayne@69 12 from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t
jpayne@69 13 from libc.stdio cimport SEEK_SET
jpayne@69 14 from libc.stdlib cimport malloc, calloc, realloc, free
jpayne@69 15
jpayne@69 16 from cpython.object cimport PyObject
jpayne@69 17 from cpython.bytes cimport PyBytes_FromStringAndSize, _PyBytes_Resize
jpayne@69 18
jpayne@69 19 from pysam.libcutils cimport force_bytes, encode_filename
jpayne@69 20 from pysam.libchtslib cimport bgzf_open, bgzf_index_build_init, bgzf_write, bgzf_read, \
jpayne@69 21 bgzf_flush, bgzf_index_dump, bgzf_close, bgzf_seek, \
jpayne@69 22 bgzf_tell, bgzf_getline, kstring_t, BGZF
jpayne@69 23
jpayne@69 24 __all__ = ["BGZFile"]
jpayne@69 25
jpayne@69 26
jpayne@69 27 BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE
jpayne@69 28
jpayne@69 29
jpayne@69 30 cdef class BGZFile(object):
jpayne@69 31 """The BGZFile class simulates most of the methods of a file object with
jpayne@69 32 the exception of the truncate() method.
jpayne@69 33
jpayne@69 34 This class only supports opening files in binary mode. If you need to open a
jpayne@69 35 compressed file in text mode, use the gzip.open() function.
jpayne@69 36 """
jpayne@69 37 def __init__(self, filename, mode=None, index=None):
jpayne@69 38 """Constructor for the BGZFile class.
jpayne@69 39
jpayne@69 40 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', 'wb', 'x', or
jpayne@69 41 'xb' depending on whether the file will be read or written. The default
jpayne@69 42 is the mode of fileobj if discernible; otherwise, the default is 'rb'.
jpayne@69 43 A mode of 'r' is equivalent to one of 'rb', and similarly for 'w' and
jpayne@69 44 'wb', 'a' and 'ab', and 'x' and 'xb'.
jpayne@69 45 """
jpayne@69 46 if mode and ('t' in mode or 'U' in mode):
jpayne@69 47 raise ValueError("Invalid mode: {!r}".format(mode))
jpayne@69 48 if not mode:
jpayne@69 49 mode = 'rb'
jpayne@69 50 elif mode and 'b' not in mode:
jpayne@69 51 mode += 'b'
jpayne@69 52
jpayne@69 53 mode = force_bytes(mode)
jpayne@69 54
jpayne@69 55 self.name = encode_filename(filename)
jpayne@69 56 self.index = encode_filename(index) if index is not None else None
jpayne@69 57
jpayne@69 58 self.bgzf = bgzf_open(self.name, mode)
jpayne@69 59
jpayne@69 60 if self.bgzf.is_write and index is not None and bgzf_index_build_init(self.bgzf) < 0:
jpayne@69 61 raise IOError('Error building bgzf index')
jpayne@69 62
jpayne@69 63 def __dealloc__(self):
jpayne@69 64 self.close()
jpayne@69 65
jpayne@69 66 def write(self, data):
jpayne@69 67 if not self.bgzf:
jpayne@69 68 raise ValueError("write() on closed BGZFile object")
jpayne@69 69
jpayne@69 70 if not self.bgzf.is_write:
jpayne@69 71 import errno
jpayne@69 72 raise IOError(errno.EBADF, "write() on read-only BGZFile object")
jpayne@69 73
jpayne@69 74 if isinstance(data, bytes):
jpayne@69 75 length = len(data)
jpayne@69 76 else:
jpayne@69 77 # accept any data that supports the buffer protocol
jpayne@69 78 data = memoryview(data)
jpayne@69 79 length = data.nbytes
jpayne@69 80
jpayne@69 81 if length > 0 and bgzf_write(self.bgzf, <char *>data, length) < 0:
jpayne@69 82 raise IOError('BGZFile write failed')
jpayne@69 83
jpayne@69 84 return length
jpayne@69 85
jpayne@69 86 def read(self, size=-1):
jpayne@69 87 cdef ssize_t read_size
jpayne@69 88
jpayne@69 89 if not self.bgzf:
jpayne@69 90 raise ValueError("read() on closed BGZFile object")
jpayne@69 91
jpayne@69 92 if self.bgzf.is_write:
jpayne@69 93 import errno
jpayne@69 94 raise IOError(errno.EBADF, "read() on write-only BGZFile object")
jpayne@69 95
jpayne@69 96 if size < 0:
jpayne@69 97 chunks = []
jpayne@69 98 while 1:
jpayne@69 99 chunk = PyBytes_FromStringAndSize(NULL, BUFFER_SIZE)
jpayne@69 100 cdata = <bytes>chunk
jpayne@69 101 read_size = bgzf_read(self.bgzf, <char *>chunk, BUFFER_SIZE)
jpayne@69 102 if read_size < 0:
jpayne@69 103 raise IOError('Error reading from BGZFile')
jpayne@69 104 elif not read_size:
jpayne@69 105 break
jpayne@69 106 elif read_size < BUFFER_SIZE:
jpayne@69 107 chunk = chunk[:read_size]
jpayne@69 108 chunks.append(chunk)
jpayne@69 109 return b''.join(chunks)
jpayne@69 110
jpayne@69 111 elif size > 0:
jpayne@69 112 chunk = PyBytes_FromStringAndSize(NULL, size)
jpayne@69 113 read_size = bgzf_read(self.bgzf, <char *>chunk, size)
jpayne@69 114 if read_size < 0:
jpayne@69 115 raise IOError('Error reading from BGZFile')
jpayne@69 116 elif read_size < size:
jpayne@69 117 chunk = chunk[:read_size]
jpayne@69 118 return chunk
jpayne@69 119 else:
jpayne@69 120 return b''
jpayne@69 121
jpayne@69 122 @property
jpayne@69 123 def closed(self):
jpayne@69 124 return self.bgzf == NULL
jpayne@69 125
jpayne@69 126 def close(self):
jpayne@69 127 if not self.bgzf:
jpayne@69 128 return
jpayne@69 129
jpayne@69 130 if self.bgzf.is_write and bgzf_flush(self.bgzf) < 0:
jpayne@69 131 raise IOError('Error flushing BGZFile object')
jpayne@69 132
jpayne@69 133 if self.index and bgzf_index_dump(self.bgzf, self.index, NULL) < 0:
jpayne@69 134 raise IOError('Cannot write index')
jpayne@69 135
jpayne@69 136 cdef ret = bgzf_close(self.bgzf)
jpayne@69 137 self.bgzf = NULL
jpayne@69 138
jpayne@69 139 if ret < 0:
jpayne@69 140 raise IOError('Error closing BGZFile object')
jpayne@69 141
jpayne@69 142 def __enter__(self):
jpayne@69 143 return self
jpayne@69 144
jpayne@69 145 def __exit__(self, type, value, tb):
jpayne@69 146 self.close()
jpayne@69 147
jpayne@69 148 def flush(self):
jpayne@69 149 if not self.bgzf:
jpayne@69 150 return
jpayne@69 151
jpayne@69 152 if self.bgzf.is_write and bgzf_flush(self.bgzf) < 0:
jpayne@69 153 raise IOError('Error flushing BGZFile object')
jpayne@69 154
jpayne@69 155 def fileno(self):
jpayne@69 156 """Invoke the underlying file object's fileno() method.
jpayne@69 157
jpayne@69 158 This will raise AttributeError if the underlying file object
jpayne@69 159 doesn't support fileno().
jpayne@69 160 """
jpayne@69 161 raise AttributeError('fileno')
jpayne@69 162
jpayne@69 163 def rewind(self):
jpayne@69 164 '''Return the uncompressed stream file position indicator to the
jpayne@69 165 beginning of the file'''
jpayne@69 166 if not self.bgzf:
jpayne@69 167 raise ValueError("rewind() on closed BGZFile object")
jpayne@69 168 if not self.bgzf.is_write:
jpayne@69 169 raise IOError("Can't rewind in write mode")
jpayne@69 170 if bgzf_seek(self.bgzf, 0, SEEK_SET) < 0:
jpayne@69 171 raise IOError('Error seeking BGZFFile object')
jpayne@69 172
jpayne@69 173 def readable(self):
jpayne@69 174 if not self.bgzf:
jpayne@69 175 raise ValueError("readable() on closed BGZFile object")
jpayne@69 176 return self.bgzf != NULL and not self.bgzf.is_write
jpayne@69 177
jpayne@69 178 def writable(self):
jpayne@69 179 return self.bgzf != NULL and self.bgzf.is_write
jpayne@69 180
jpayne@69 181 def seekable(self):
jpayne@69 182 return True
jpayne@69 183
jpayne@69 184 def tell(self):
jpayne@69 185 if not self.bgzf:
jpayne@69 186 raise ValueError("seek() on closed BGZFile object")
jpayne@69 187 cdef int64_t off = bgzf_tell(self.bgzf)
jpayne@69 188 if off < 0:
jpayne@69 189 raise IOError('Error in tell on BGZFFile object')
jpayne@69 190
jpayne@69 191 return off
jpayne@69 192
jpayne@69 193 def seek(self, offset, whence=io.SEEK_SET):
jpayne@69 194 if not self.bgzf:
jpayne@69 195 raise ValueError("seek() on closed BGZFile object")
jpayne@69 196 if whence is not io.SEEK_SET:
jpayne@69 197 raise ValueError('Seek from end not supported')
jpayne@69 198
jpayne@69 199 cdef int64_t off = bgzf_seek(self.bgzf, offset, SEEK_SET)
jpayne@69 200 if off < 0:
jpayne@69 201 raise IOError('Error seeking BGZFFile object')
jpayne@69 202
jpayne@69 203 return off
jpayne@69 204
jpayne@69 205 def readline(self, size=-1):
jpayne@69 206 if not self.bgzf:
jpayne@69 207 raise ValueError("readline() on closed BGZFile object")
jpayne@69 208
jpayne@69 209 cdef kstring_t line
jpayne@69 210 cdef char c
jpayne@69 211
jpayne@69 212 line.l = line.m = 0
jpayne@69 213 line.s = NULL
jpayne@69 214
jpayne@69 215 cdef int ret = bgzf_getline(self.bgzf, b'\n', &line)
jpayne@69 216 if ret == -1:
jpayne@69 217 s = b''
jpayne@69 218 elif ret == -2:
jpayne@69 219 if line.m:
jpayne@69 220 free(line.s)
jpayne@69 221 raise IOError('Error reading line in BGZFFile object')
jpayne@69 222 else:
jpayne@69 223 s = line.s[:line.l]
jpayne@69 224
jpayne@69 225 if line.m:
jpayne@69 226 free(line.s)
jpayne@69 227
jpayne@69 228 return s
jpayne@69 229
jpayne@69 230 def __iter__(self):
jpayne@69 231 return self
jpayne@69 232
jpayne@69 233 def __next__(self):
jpayne@69 234 line = self.readline()
jpayne@69 235 if not line:
jpayne@69 236 raise StopIteration()
jpayne@69 237 return line