Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/dbm/dumb.py @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 """A dumb and slow but simple dbm clone. | |
2 | |
3 For database spam, spam.dir contains the index (a text file), | |
4 spam.bak *may* contain a backup of the index (also a text file), | |
5 while spam.dat contains the data (a binary file). | |
6 | |
7 XXX TO DO: | |
8 | |
9 - seems to contain a bug when updating... | |
10 | |
11 - reclaim free space (currently, space once occupied by deleted or expanded | |
12 items is never reused) | |
13 | |
14 - support concurrent access (currently, if two processes take turns making | |
15 updates, they can mess up the index) | |
16 | |
17 - support efficient access to large databases (currently, the whole index | |
18 is read when the database is opened, and some updates rewrite the whole index) | |
19 | |
20 - support opening for read-only (flag = 'm') | |
21 | |
22 """ | |
23 | |
24 import ast as _ast | |
25 import io as _io | |
26 import os as _os | |
27 import collections.abc | |
28 | |
29 __all__ = ["error", "open"] | |
30 | |
31 _BLOCKSIZE = 512 | |
32 | |
33 error = OSError | |
34 | |
35 class _Database(collections.abc.MutableMapping): | |
36 | |
37 # The on-disk directory and data files can remain in mutually | |
38 # inconsistent states for an arbitrarily long time (see comments | |
39 # at the end of __setitem__). This is only repaired when _commit() | |
40 # gets called. One place _commit() gets called is from __del__(), | |
41 # and if that occurs at program shutdown time, module globals may | |
42 # already have gotten rebound to None. Since it's crucial that | |
43 # _commit() finish successfully, we can't ignore shutdown races | |
44 # here, and _commit() must not reference any globals. | |
45 _os = _os # for _commit() | |
46 _io = _io # for _commit() | |
47 | |
48 def __init__(self, filebasename, mode, flag='c'): | |
49 self._mode = mode | |
50 self._readonly = (flag == 'r') | |
51 | |
52 # The directory file is a text file. Each line looks like | |
53 # "%r, (%d, %d)\n" % (key, pos, siz) | |
54 # where key is the string key, pos is the offset into the dat | |
55 # file of the associated value's first byte, and siz is the number | |
56 # of bytes in the associated value. | |
57 self._dirfile = filebasename + '.dir' | |
58 | |
59 # The data file is a binary file pointed into by the directory | |
60 # file, and holds the values associated with keys. Each value | |
61 # begins at a _BLOCKSIZE-aligned byte offset, and is a raw | |
62 # binary 8-bit string value. | |
63 self._datfile = filebasename + '.dat' | |
64 self._bakfile = filebasename + '.bak' | |
65 | |
66 # The index is an in-memory dict, mirroring the directory file. | |
67 self._index = None # maps keys to (pos, siz) pairs | |
68 | |
69 # Handle the creation | |
70 self._create(flag) | |
71 self._update(flag) | |
72 | |
73 def _create(self, flag): | |
74 if flag == 'n': | |
75 for filename in (self._datfile, self._bakfile, self._dirfile): | |
76 try: | |
77 _os.remove(filename) | |
78 except OSError: | |
79 pass | |
80 # Mod by Jack: create data file if needed | |
81 try: | |
82 f = _io.open(self._datfile, 'r', encoding="Latin-1") | |
83 except OSError: | |
84 if flag not in ('c', 'n'): | |
85 raise | |
86 with _io.open(self._datfile, 'w', encoding="Latin-1") as f: | |
87 self._chmod(self._datfile) | |
88 else: | |
89 f.close() | |
90 | |
91 # Read directory file into the in-memory index dict. | |
92 def _update(self, flag): | |
93 self._modified = False | |
94 self._index = {} | |
95 try: | |
96 f = _io.open(self._dirfile, 'r', encoding="Latin-1") | |
97 except OSError: | |
98 if flag not in ('c', 'n'): | |
99 raise | |
100 self._modified = True | |
101 else: | |
102 with f: | |
103 for line in f: | |
104 line = line.rstrip() | |
105 key, pos_and_siz_pair = _ast.literal_eval(line) | |
106 key = key.encode('Latin-1') | |
107 self._index[key] = pos_and_siz_pair | |
108 | |
109 # Write the index dict to the directory file. The original directory | |
110 # file (if any) is renamed with a .bak extension first. If a .bak | |
111 # file currently exists, it's deleted. | |
112 def _commit(self): | |
113 # CAUTION: It's vital that _commit() succeed, and _commit() can | |
114 # be called from __del__(). Therefore we must never reference a | |
115 # global in this routine. | |
116 if self._index is None or not self._modified: | |
117 return # nothing to do | |
118 | |
119 try: | |
120 self._os.unlink(self._bakfile) | |
121 except OSError: | |
122 pass | |
123 | |
124 try: | |
125 self._os.rename(self._dirfile, self._bakfile) | |
126 except OSError: | |
127 pass | |
128 | |
129 with self._io.open(self._dirfile, 'w', encoding="Latin-1") as f: | |
130 self._chmod(self._dirfile) | |
131 for key, pos_and_siz_pair in self._index.items(): | |
132 # Use Latin-1 since it has no qualms with any value in any | |
133 # position; UTF-8, though, does care sometimes. | |
134 entry = "%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair) | |
135 f.write(entry) | |
136 | |
137 sync = _commit | |
138 | |
139 def _verify_open(self): | |
140 if self._index is None: | |
141 raise error('DBM object has already been closed') | |
142 | |
143 def __getitem__(self, key): | |
144 if isinstance(key, str): | |
145 key = key.encode('utf-8') | |
146 self._verify_open() | |
147 pos, siz = self._index[key] # may raise KeyError | |
148 with _io.open(self._datfile, 'rb') as f: | |
149 f.seek(pos) | |
150 dat = f.read(siz) | |
151 return dat | |
152 | |
153 # Append val to the data file, starting at a _BLOCKSIZE-aligned | |
154 # offset. The data file is first padded with NUL bytes (if needed) | |
155 # to get to an aligned offset. Return pair | |
156 # (starting offset of val, len(val)) | |
157 def _addval(self, val): | |
158 with _io.open(self._datfile, 'rb+') as f: | |
159 f.seek(0, 2) | |
160 pos = int(f.tell()) | |
161 npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE | |
162 f.write(b'\0'*(npos-pos)) | |
163 pos = npos | |
164 f.write(val) | |
165 return (pos, len(val)) | |
166 | |
167 # Write val to the data file, starting at offset pos. The caller | |
168 # is responsible for ensuring that there's enough room starting at | |
169 # pos to hold val, without overwriting some other value. Return | |
170 # pair (pos, len(val)). | |
171 def _setval(self, pos, val): | |
172 with _io.open(self._datfile, 'rb+') as f: | |
173 f.seek(pos) | |
174 f.write(val) | |
175 return (pos, len(val)) | |
176 | |
177 # key is a new key whose associated value starts in the data file | |
178 # at offset pos and with length siz. Add an index record to | |
179 # the in-memory index dict, and append one to the directory file. | |
180 def _addkey(self, key, pos_and_siz_pair): | |
181 self._index[key] = pos_and_siz_pair | |
182 with _io.open(self._dirfile, 'a', encoding="Latin-1") as f: | |
183 self._chmod(self._dirfile) | |
184 f.write("%r, %r\n" % (key.decode("Latin-1"), pos_and_siz_pair)) | |
185 | |
186 def __setitem__(self, key, val): | |
187 if self._readonly: | |
188 raise error('The database is opened for reading only') | |
189 if isinstance(key, str): | |
190 key = key.encode('utf-8') | |
191 elif not isinstance(key, (bytes, bytearray)): | |
192 raise TypeError("keys must be bytes or strings") | |
193 if isinstance(val, str): | |
194 val = val.encode('utf-8') | |
195 elif not isinstance(val, (bytes, bytearray)): | |
196 raise TypeError("values must be bytes or strings") | |
197 self._verify_open() | |
198 self._modified = True | |
199 if key not in self._index: | |
200 self._addkey(key, self._addval(val)) | |
201 else: | |
202 # See whether the new value is small enough to fit in the | |
203 # (padded) space currently occupied by the old value. | |
204 pos, siz = self._index[key] | |
205 oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE | |
206 newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE | |
207 if newblocks <= oldblocks: | |
208 self._index[key] = self._setval(pos, val) | |
209 else: | |
210 # The new value doesn't fit in the (padded) space used | |
211 # by the old value. The blocks used by the old value are | |
212 # forever lost. | |
213 self._index[key] = self._addval(val) | |
214 | |
215 # Note that _index may be out of synch with the directory | |
216 # file now: _setval() and _addval() don't update the directory | |
217 # file. This also means that the on-disk directory and data | |
218 # files are in a mutually inconsistent state, and they'll | |
219 # remain that way until _commit() is called. Note that this | |
220 # is a disaster (for the database) if the program crashes | |
221 # (so that _commit() never gets called). | |
222 | |
223 def __delitem__(self, key): | |
224 if self._readonly: | |
225 raise error('The database is opened for reading only') | |
226 if isinstance(key, str): | |
227 key = key.encode('utf-8') | |
228 self._verify_open() | |
229 self._modified = True | |
230 # The blocks used by the associated value are lost. | |
231 del self._index[key] | |
232 # XXX It's unclear why we do a _commit() here (the code always | |
233 # XXX has, so I'm not changing it). __setitem__ doesn't try to | |
234 # XXX keep the directory file in synch. Why should we? Or | |
235 # XXX why shouldn't __setitem__? | |
236 self._commit() | |
237 | |
238 def keys(self): | |
239 try: | |
240 return list(self._index) | |
241 except TypeError: | |
242 raise error('DBM object has already been closed') from None | |
243 | |
244 def items(self): | |
245 self._verify_open() | |
246 return [(key, self[key]) for key in self._index.keys()] | |
247 | |
248 def __contains__(self, key): | |
249 if isinstance(key, str): | |
250 key = key.encode('utf-8') | |
251 try: | |
252 return key in self._index | |
253 except TypeError: | |
254 if self._index is None: | |
255 raise error('DBM object has already been closed') from None | |
256 else: | |
257 raise | |
258 | |
259 def iterkeys(self): | |
260 try: | |
261 return iter(self._index) | |
262 except TypeError: | |
263 raise error('DBM object has already been closed') from None | |
264 __iter__ = iterkeys | |
265 | |
266 def __len__(self): | |
267 try: | |
268 return len(self._index) | |
269 except TypeError: | |
270 raise error('DBM object has already been closed') from None | |
271 | |
272 def close(self): | |
273 try: | |
274 self._commit() | |
275 finally: | |
276 self._index = self._datfile = self._dirfile = self._bakfile = None | |
277 | |
278 __del__ = close | |
279 | |
280 def _chmod(self, file): | |
281 self._os.chmod(file, self._mode) | |
282 | |
283 def __enter__(self): | |
284 return self | |
285 | |
286 def __exit__(self, *args): | |
287 self.close() | |
288 | |
289 | |
290 def open(file, flag='c', mode=0o666): | |
291 """Open the database file, filename, and return corresponding object. | |
292 | |
293 The flag argument, used to control how the database is opened in the | |
294 other DBM implementations, supports only the semantics of 'c' and 'n' | |
295 values. Other values will default to the semantics of 'c' value: | |
296 the database will always opened for update and will be created if it | |
297 does not exist. | |
298 | |
299 The optional mode argument is the UNIX mode of the file, used only when | |
300 the database has to be created. It defaults to octal code 0o666 (and | |
301 will be modified by the prevailing umask). | |
302 | |
303 """ | |
304 | |
305 # Modify mode depending on the umask | |
306 try: | |
307 um = _os.umask(0) | |
308 _os.umask(um) | |
309 except AttributeError: | |
310 pass | |
311 else: | |
312 # Turn off any bits that are set in the umask | |
313 mode = mode & (~um) | |
314 if flag not in ('r', 'w', 'c', 'n'): | |
315 raise ValueError("Flag must be one of 'r', 'w', 'c', or 'n'") | |
316 return _Database(file, mode, flag=flag) |