Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/pysam/libcbgzf.pyx @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 # cython: language_level=3 | |
2 """Functions that read and write block gzipped files. | |
3 | |
4 The user of the file doesn't have to worry about the compression | |
5 and random access is allowed if an index file is present.""" | |
6 | |
7 # based on Python 3.5's gzip module | |
8 | |
9 import io | |
10 | |
11 from libc.stdint cimport int8_t, int16_t, int32_t, int64_t | |
12 from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t | |
13 from libc.stdio cimport SEEK_SET | |
14 from libc.stdlib cimport malloc, calloc, realloc, free | |
15 | |
16 from cpython.object cimport PyObject | |
17 from cpython.bytes cimport PyBytes_FromStringAndSize, _PyBytes_Resize | |
18 | |
19 from pysam.libcutils cimport force_bytes, encode_filename | |
20 from pysam.libchtslib cimport bgzf_open, bgzf_index_build_init, bgzf_write, bgzf_read, \ | |
21 bgzf_flush, bgzf_index_dump, bgzf_close, bgzf_seek, \ | |
22 bgzf_tell, bgzf_getline, kstring_t, BGZF | |
23 | |
24 __all__ = ["BGZFile"] | |
25 | |
26 | |
27 BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE | |
28 | |
29 | |
30 cdef class BGZFile(object): | |
31 """The BGZFile class simulates most of the methods of a file object with | |
32 the exception of the truncate() method. | |
33 | |
34 This class only supports opening files in binary mode. If you need to open a | |
35 compressed file in text mode, use the gzip.open() function. | |
36 """ | |
37 def __init__(self, filename, mode=None, index=None): | |
38 """Constructor for the BGZFile class. | |
39 | |
40 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', 'wb', 'x', or | |
41 'xb' depending on whether the file will be read or written. The default | |
42 is the mode of fileobj if discernible; otherwise, the default is 'rb'. | |
43 A mode of 'r' is equivalent to one of 'rb', and similarly for 'w' and | |
44 'wb', 'a' and 'ab', and 'x' and 'xb'. | |
45 """ | |
46 if mode and ('t' in mode or 'U' in mode): | |
47 raise ValueError("Invalid mode: {!r}".format(mode)) | |
48 if not mode: | |
49 mode = 'rb' | |
50 elif mode and 'b' not in mode: | |
51 mode += 'b' | |
52 | |
53 mode = force_bytes(mode) | |
54 | |
55 self.name = encode_filename(filename) | |
56 self.index = encode_filename(index) if index is not None else None | |
57 | |
58 self.bgzf = bgzf_open(self.name, mode) | |
59 | |
60 if self.bgzf.is_write and index is not None and bgzf_index_build_init(self.bgzf) < 0: | |
61 raise IOError('Error building bgzf index') | |
62 | |
63 def __dealloc__(self): | |
64 self.close() | |
65 | |
66 def write(self, data): | |
67 if not self.bgzf: | |
68 raise ValueError("write() on closed BGZFile object") | |
69 | |
70 if not self.bgzf.is_write: | |
71 import errno | |
72 raise IOError(errno.EBADF, "write() on read-only BGZFile object") | |
73 | |
74 if isinstance(data, bytes): | |
75 length = len(data) | |
76 else: | |
77 # accept any data that supports the buffer protocol | |
78 data = memoryview(data) | |
79 length = data.nbytes | |
80 | |
81 if length > 0 and bgzf_write(self.bgzf, <char *>data, length) < 0: | |
82 raise IOError('BGZFile write failed') | |
83 | |
84 return length | |
85 | |
86 def read(self, size=-1): | |
87 cdef ssize_t read_size | |
88 | |
89 if not self.bgzf: | |
90 raise ValueError("read() on closed BGZFile object") | |
91 | |
92 if self.bgzf.is_write: | |
93 import errno | |
94 raise IOError(errno.EBADF, "read() on write-only BGZFile object") | |
95 | |
96 if size < 0: | |
97 chunks = [] | |
98 while 1: | |
99 chunk = PyBytes_FromStringAndSize(NULL, BUFFER_SIZE) | |
100 cdata = <bytes>chunk | |
101 read_size = bgzf_read(self.bgzf, <char *>chunk, BUFFER_SIZE) | |
102 if read_size < 0: | |
103 raise IOError('Error reading from BGZFile') | |
104 elif not read_size: | |
105 break | |
106 elif read_size < BUFFER_SIZE: | |
107 chunk = chunk[:read_size] | |
108 chunks.append(chunk) | |
109 return b''.join(chunks) | |
110 | |
111 elif size > 0: | |
112 chunk = PyBytes_FromStringAndSize(NULL, size) | |
113 read_size = bgzf_read(self.bgzf, <char *>chunk, size) | |
114 if read_size < 0: | |
115 raise IOError('Error reading from BGZFile') | |
116 elif read_size < size: | |
117 chunk = chunk[:read_size] | |
118 return chunk | |
119 else: | |
120 return b'' | |
121 | |
122 @property | |
123 def closed(self): | |
124 return self.bgzf == NULL | |
125 | |
126 def close(self): | |
127 if not self.bgzf: | |
128 return | |
129 | |
130 if self.bgzf.is_write and bgzf_flush(self.bgzf) < 0: | |
131 raise IOError('Error flushing BGZFile object') | |
132 | |
133 if self.index and bgzf_index_dump(self.bgzf, self.index, NULL) < 0: | |
134 raise IOError('Cannot write index') | |
135 | |
136 cdef ret = bgzf_close(self.bgzf) | |
137 self.bgzf = NULL | |
138 | |
139 if ret < 0: | |
140 raise IOError('Error closing BGZFile object') | |
141 | |
142 def __enter__(self): | |
143 return self | |
144 | |
145 def __exit__(self, type, value, tb): | |
146 self.close() | |
147 | |
148 def flush(self): | |
149 if not self.bgzf: | |
150 return | |
151 | |
152 if self.bgzf.is_write and bgzf_flush(self.bgzf) < 0: | |
153 raise IOError('Error flushing BGZFile object') | |
154 | |
155 def fileno(self): | |
156 """Invoke the underlying file object's fileno() method. | |
157 | |
158 This will raise AttributeError if the underlying file object | |
159 doesn't support fileno(). | |
160 """ | |
161 raise AttributeError('fileno') | |
162 | |
163 def rewind(self): | |
164 '''Return the uncompressed stream file position indicator to the | |
165 beginning of the file''' | |
166 if not self.bgzf: | |
167 raise ValueError("rewind() on closed BGZFile object") | |
168 if not self.bgzf.is_write: | |
169 raise IOError("Can't rewind in write mode") | |
170 if bgzf_seek(self.bgzf, 0, SEEK_SET) < 0: | |
171 raise IOError('Error seeking BGZFFile object') | |
172 | |
173 def readable(self): | |
174 if not self.bgzf: | |
175 raise ValueError("readable() on closed BGZFile object") | |
176 return self.bgzf != NULL and not self.bgzf.is_write | |
177 | |
178 def writable(self): | |
179 return self.bgzf != NULL and self.bgzf.is_write | |
180 | |
181 def seekable(self): | |
182 return True | |
183 | |
184 def tell(self): | |
185 if not self.bgzf: | |
186 raise ValueError("seek() on closed BGZFile object") | |
187 cdef int64_t off = bgzf_tell(self.bgzf) | |
188 if off < 0: | |
189 raise IOError('Error in tell on BGZFFile object') | |
190 | |
191 return off | |
192 | |
193 def seek(self, offset, whence=io.SEEK_SET): | |
194 if not self.bgzf: | |
195 raise ValueError("seek() on closed BGZFile object") | |
196 if whence is not io.SEEK_SET: | |
197 raise ValueError('Seek from end not supported') | |
198 | |
199 cdef int64_t off = bgzf_seek(self.bgzf, offset, SEEK_SET) | |
200 if off < 0: | |
201 raise IOError('Error seeking BGZFFile object') | |
202 | |
203 return off | |
204 | |
205 def readline(self, size=-1): | |
206 if not self.bgzf: | |
207 raise ValueError("readline() on closed BGZFile object") | |
208 | |
209 cdef kstring_t line | |
210 cdef char c | |
211 | |
212 line.l = line.m = 0 | |
213 line.s = NULL | |
214 | |
215 cdef int ret = bgzf_getline(self.bgzf, b'\n', &line) | |
216 if ret == -1: | |
217 s = b'' | |
218 elif ret == -2: | |
219 if line.m: | |
220 free(line.s) | |
221 raise IOError('Error reading line in BGZFFile object') | |
222 else: | |
223 s = line.s[:line.l] | |
224 | |
225 if line.m: | |
226 free(line.s) | |
227 | |
228 return s | |
229 | |
230 def __iter__(self): | |
231 return self | |
232 | |
233 def __next__(self): | |
234 line = self.readline() | |
235 if not line: | |
236 raise StopIteration() | |
237 return line |