comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/pysam/libcbgzf.pyx @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 # cython: language_level=3
2 """Functions that read and write block gzipped files.
3
4 The user of the file doesn't have to worry about the compression
5 and random access is allowed if an index file is present."""
6
7 # based on Python 3.5's gzip module
8
9 import io
10
11 from libc.stdint cimport int8_t, int16_t, int32_t, int64_t
12 from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t
13 from libc.stdio cimport SEEK_SET
14 from libc.stdlib cimport malloc, calloc, realloc, free
15
16 from cpython.object cimport PyObject
17 from cpython.bytes cimport PyBytes_FromStringAndSize, _PyBytes_Resize
18
19 from pysam.libcutils cimport force_bytes, encode_filename
20 from pysam.libchtslib cimport bgzf_open, bgzf_index_build_init, bgzf_write, bgzf_read, \
21 bgzf_flush, bgzf_index_dump, bgzf_close, bgzf_seek, \
22 bgzf_tell, bgzf_getline, kstring_t, BGZF
23
24 __all__ = ["BGZFile"]
25
26
27 BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE
28
29
30 cdef class BGZFile(object):
31 """The BGZFile class simulates most of the methods of a file object with
32 the exception of the truncate() method.
33
34 This class only supports opening files in binary mode. If you need to open a
35 compressed file in text mode, use the gzip.open() function.
36 """
37 def __init__(self, filename, mode=None, index=None):
38 """Constructor for the BGZFile class.
39
40 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', 'wb', 'x', or
41 'xb' depending on whether the file will be read or written. The default
42 is the mode of fileobj if discernible; otherwise, the default is 'rb'.
43 A mode of 'r' is equivalent to one of 'rb', and similarly for 'w' and
44 'wb', 'a' and 'ab', and 'x' and 'xb'.
45 """
46 if mode and ('t' in mode or 'U' in mode):
47 raise ValueError("Invalid mode: {!r}".format(mode))
48 if not mode:
49 mode = 'rb'
50 elif mode and 'b' not in mode:
51 mode += 'b'
52
53 mode = force_bytes(mode)
54
55 self.name = encode_filename(filename)
56 self.index = encode_filename(index) if index is not None else None
57
58 self.bgzf = bgzf_open(self.name, mode)
59
60 if self.bgzf.is_write and index is not None and bgzf_index_build_init(self.bgzf) < 0:
61 raise IOError('Error building bgzf index')
62
63 def __dealloc__(self):
64 self.close()
65
66 def write(self, data):
67 if not self.bgzf:
68 raise ValueError("write() on closed BGZFile object")
69
70 if not self.bgzf.is_write:
71 import errno
72 raise IOError(errno.EBADF, "write() on read-only BGZFile object")
73
74 if isinstance(data, bytes):
75 length = len(data)
76 else:
77 # accept any data that supports the buffer protocol
78 data = memoryview(data)
79 length = data.nbytes
80
81 if length > 0 and bgzf_write(self.bgzf, <char *>data, length) < 0:
82 raise IOError('BGZFile write failed')
83
84 return length
85
86 def read(self, size=-1):
87 cdef ssize_t read_size
88
89 if not self.bgzf:
90 raise ValueError("read() on closed BGZFile object")
91
92 if self.bgzf.is_write:
93 import errno
94 raise IOError(errno.EBADF, "read() on write-only BGZFile object")
95
96 if size < 0:
97 chunks = []
98 while 1:
99 chunk = PyBytes_FromStringAndSize(NULL, BUFFER_SIZE)
100 cdata = <bytes>chunk
101 read_size = bgzf_read(self.bgzf, <char *>chunk, BUFFER_SIZE)
102 if read_size < 0:
103 raise IOError('Error reading from BGZFile')
104 elif not read_size:
105 break
106 elif read_size < BUFFER_SIZE:
107 chunk = chunk[:read_size]
108 chunks.append(chunk)
109 return b''.join(chunks)
110
111 elif size > 0:
112 chunk = PyBytes_FromStringAndSize(NULL, size)
113 read_size = bgzf_read(self.bgzf, <char *>chunk, size)
114 if read_size < 0:
115 raise IOError('Error reading from BGZFile')
116 elif read_size < size:
117 chunk = chunk[:read_size]
118 return chunk
119 else:
120 return b''
121
122 @property
123 def closed(self):
124 return self.bgzf == NULL
125
126 def close(self):
127 if not self.bgzf:
128 return
129
130 if self.bgzf.is_write and bgzf_flush(self.bgzf) < 0:
131 raise IOError('Error flushing BGZFile object')
132
133 if self.index and bgzf_index_dump(self.bgzf, self.index, NULL) < 0:
134 raise IOError('Cannot write index')
135
136 cdef ret = bgzf_close(self.bgzf)
137 self.bgzf = NULL
138
139 if ret < 0:
140 raise IOError('Error closing BGZFile object')
141
142 def __enter__(self):
143 return self
144
145 def __exit__(self, type, value, tb):
146 self.close()
147
148 def flush(self):
149 if not self.bgzf:
150 return
151
152 if self.bgzf.is_write and bgzf_flush(self.bgzf) < 0:
153 raise IOError('Error flushing BGZFile object')
154
155 def fileno(self):
156 """Invoke the underlying file object's fileno() method.
157
158 This will raise AttributeError if the underlying file object
159 doesn't support fileno().
160 """
161 raise AttributeError('fileno')
162
163 def rewind(self):
164 '''Return the uncompressed stream file position indicator to the
165 beginning of the file'''
166 if not self.bgzf:
167 raise ValueError("rewind() on closed BGZFile object")
168 if not self.bgzf.is_write:
169 raise IOError("Can't rewind in write mode")
170 if bgzf_seek(self.bgzf, 0, SEEK_SET) < 0:
171 raise IOError('Error seeking BGZFFile object')
172
173 def readable(self):
174 if not self.bgzf:
175 raise ValueError("readable() on closed BGZFile object")
176 return self.bgzf != NULL and not self.bgzf.is_write
177
178 def writable(self):
179 return self.bgzf != NULL and self.bgzf.is_write
180
181 def seekable(self):
182 return True
183
184 def tell(self):
185 if not self.bgzf:
186 raise ValueError("seek() on closed BGZFile object")
187 cdef int64_t off = bgzf_tell(self.bgzf)
188 if off < 0:
189 raise IOError('Error in tell on BGZFFile object')
190
191 return off
192
193 def seek(self, offset, whence=io.SEEK_SET):
194 if not self.bgzf:
195 raise ValueError("seek() on closed BGZFile object")
196 if whence is not io.SEEK_SET:
197 raise ValueError('Seek from end not supported')
198
199 cdef int64_t off = bgzf_seek(self.bgzf, offset, SEEK_SET)
200 if off < 0:
201 raise IOError('Error seeking BGZFFile object')
202
203 return off
204
205 def readline(self, size=-1):
206 if not self.bgzf:
207 raise ValueError("readline() on closed BGZFile object")
208
209 cdef kstring_t line
210 cdef char c
211
212 line.l = line.m = 0
213 line.s = NULL
214
215 cdef int ret = bgzf_getline(self.bgzf, b'\n', &line)
216 if ret == -1:
217 s = b''
218 elif ret == -2:
219 if line.m:
220 free(line.s)
221 raise IOError('Error reading line in BGZFFile object')
222 else:
223 s = line.s[:line.l]
224
225 if line.m:
226 free(line.s)
227
228 return s
229
230 def __iter__(self):
231 return self
232
233 def __next__(self):
234 line = self.readline()
235 if not line:
236 raise StopIteration()
237 return line