comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/dbm/dumb.py @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 """A dumb and slow but simple dbm clone.
2
3 For database spam, spam.dir contains the index (a text file),
4 spam.bak *may* contain a backup of the index (also a text file),
5 while spam.dat contains the data (a binary file).
6
7 XXX TO DO:
8
9 - seems to contain a bug when updating...
10
11 - reclaim free space (currently, space once occupied by deleted or expanded
12 items is never reused)
13
14 - support concurrent access (currently, if two processes take turns making
15 updates, they can mess up the index)
16
17 - support efficient access to large databases (currently, the whole index
18 is read when the database is opened, and some updates rewrite the whole index)
19
20 - support opening for read-only (flag = 'm')
21
22 """
23
24 import ast as _ast
25 import io as _io
26 import os as _os
27 import collections.abc
28
29 __all__ = ["error", "open"]
30
31 _BLOCKSIZE = 512
32
33 error = OSError
34
35 class _Database(collections.abc.MutableMapping):
36
37 # The on-disk directory and data files can remain in mutually
38 # inconsistent states for an arbitrarily long time (see comments
39 # at the end of __setitem__). This is only repaired when _commit()
40 # gets called. One place _commit() gets called is from __del__(),
41 # and if that occurs at program shutdown time, module globals may
42 # already have gotten rebound to None. Since it's crucial that
43 # _commit() finish successfully, we can't ignore shutdown races
44 # here, and _commit() must not reference any globals.
45 _os = _os # for _commit()
46 _io = _io # for _commit()
47
48 def __init__(self, filebasename, mode, flag='c'):
49 self._mode = mode
50 self._readonly = (flag == 'r')
51
52 # The directory file is a text file. Each line looks like
53 # "%r, (%d, %d)\n" % (key, pos, siz)
54 # where key is the string key, pos is the offset into the dat
55 # file of the associated value's first byte, and siz is the number
56 # of bytes in the associated value.
57 self._dirfile = filebasename + '.dir'
58
59 # The data file is a binary file pointed into by the directory
60 # file, and holds the values associated with keys. Each value
61 # begins at a _BLOCKSIZE-aligned byte offset, and is a raw
62 # binary 8-bit string value.
63 self._datfile = filebasename + '.dat'
64 self._bakfile = filebasename + '.bak'
65
66 # The index is an in-memory dict, mirroring the directory file.
67 self._index = None # maps keys to (pos, siz) pairs
68
69 # Handle the creation
70 self._create(flag)
71 self._update(flag)
72
73 def _create(self, flag):
74 if flag == 'n':
75 for filename in (self._datfile, self._bakfile, self._dirfile):
76 try:
77 _os.remove(filename)
78 except OSError:
79 pass
80 # Mod by Jack: create data file if needed
81 try:
82 f = _io.open(self._datfile, 'r', encoding="Latin-1")
83 except OSError:
84 if flag not in ('c', 'n'):
85 raise
86 with _io.open(self._datfile, 'w', encoding="Latin-1") as f:
87 self._chmod(self._datfile)
88 else:
89 f.close()
90
91 # Read directory file into the in-memory index dict.
92 def _update(self, flag):
93 self._modified = False
94 self._index = {}
95 try:
96 f = _io.open(self._dirfile, 'r', encoding="Latin-1")
97 except OSError:
98 if flag not in ('c', 'n'):
99 raise
100 self._modified = True
101 else:
102 with f:
103 for line in f:
104 line = line.rstrip()
105 key, pos_and_siz_pair = _ast.literal_eval(line)
106 key = key.encode('Latin-1')
107 self._index[key] = pos_and_siz_pair
108
109 # Write the index dict to the directory file. The original directory
110 # file (if any) is renamed with a .bak extension first. If a .bak
111 # file currently exists, it's deleted.
112 def _commit(self):
113 # CAUTION: It's vital that _commit() succeed, and _commit() can
114 # be called from __del__(). Therefore we must never reference a
115 # global in this routine.
116 if self._index is None or not self._modified:
117 return # nothing to do
118
119 try:
120 self._os.unlink(self._bakfile)
121 except OSError:
122 pass
123
124 try:
125 self._os.rename(self._dirfile, self._bakfile)
126 except OSError:
127 pass
128
129 with self._io.open(self._dirfile, 'w', encoding="Latin-1") as f:
130 self._chmod(self._dirfile)
131 for key, pos_and_siz_pair in self._index.items():
132 # Use Latin-1 since it has no qualms with any value in any
133 # position; UTF-8, though, does care sometimes.
134 entry = "%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair)
135 f.write(entry)
136
137 sync = _commit
138
139 def _verify_open(self):
140 if self._index is None:
141 raise error('DBM object has already been closed')
142
143 def __getitem__(self, key):
144 if isinstance(key, str):
145 key = key.encode('utf-8')
146 self._verify_open()
147 pos, siz = self._index[key] # may raise KeyError
148 with _io.open(self._datfile, 'rb') as f:
149 f.seek(pos)
150 dat = f.read(siz)
151 return dat
152
153 # Append val to the data file, starting at a _BLOCKSIZE-aligned
154 # offset. The data file is first padded with NUL bytes (if needed)
155 # to get to an aligned offset. Return pair
156 # (starting offset of val, len(val))
157 def _addval(self, val):
158 with _io.open(self._datfile, 'rb+') as f:
159 f.seek(0, 2)
160 pos = int(f.tell())
161 npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
162 f.write(b'\0'*(npos-pos))
163 pos = npos
164 f.write(val)
165 return (pos, len(val))
166
167 # Write val to the data file, starting at offset pos. The caller
168 # is responsible for ensuring that there's enough room starting at
169 # pos to hold val, without overwriting some other value. Return
170 # pair (pos, len(val)).
171 def _setval(self, pos, val):
172 with _io.open(self._datfile, 'rb+') as f:
173 f.seek(pos)
174 f.write(val)
175 return (pos, len(val))
176
177 # key is a new key whose associated value starts in the data file
178 # at offset pos and with length siz. Add an index record to
179 # the in-memory index dict, and append one to the directory file.
180 def _addkey(self, key, pos_and_siz_pair):
181 self._index[key] = pos_and_siz_pair
182 with _io.open(self._dirfile, 'a', encoding="Latin-1") as f:
183 self._chmod(self._dirfile)
184 f.write("%r, %r\n" % (key.decode("Latin-1"), pos_and_siz_pair))
185
186 def __setitem__(self, key, val):
187 if self._readonly:
188 raise error('The database is opened for reading only')
189 if isinstance(key, str):
190 key = key.encode('utf-8')
191 elif not isinstance(key, (bytes, bytearray)):
192 raise TypeError("keys must be bytes or strings")
193 if isinstance(val, str):
194 val = val.encode('utf-8')
195 elif not isinstance(val, (bytes, bytearray)):
196 raise TypeError("values must be bytes or strings")
197 self._verify_open()
198 self._modified = True
199 if key not in self._index:
200 self._addkey(key, self._addval(val))
201 else:
202 # See whether the new value is small enough to fit in the
203 # (padded) space currently occupied by the old value.
204 pos, siz = self._index[key]
205 oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
206 newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
207 if newblocks <= oldblocks:
208 self._index[key] = self._setval(pos, val)
209 else:
210 # The new value doesn't fit in the (padded) space used
211 # by the old value. The blocks used by the old value are
212 # forever lost.
213 self._index[key] = self._addval(val)
214
215 # Note that _index may be out of synch with the directory
216 # file now: _setval() and _addval() don't update the directory
217 # file. This also means that the on-disk directory and data
218 # files are in a mutually inconsistent state, and they'll
219 # remain that way until _commit() is called. Note that this
220 # is a disaster (for the database) if the program crashes
221 # (so that _commit() never gets called).
222
223 def __delitem__(self, key):
224 if self._readonly:
225 raise error('The database is opened for reading only')
226 if isinstance(key, str):
227 key = key.encode('utf-8')
228 self._verify_open()
229 self._modified = True
230 # The blocks used by the associated value are lost.
231 del self._index[key]
232 # XXX It's unclear why we do a _commit() here (the code always
233 # XXX has, so I'm not changing it). __setitem__ doesn't try to
234 # XXX keep the directory file in synch. Why should we? Or
235 # XXX why shouldn't __setitem__?
236 self._commit()
237
238 def keys(self):
239 try:
240 return list(self._index)
241 except TypeError:
242 raise error('DBM object has already been closed') from None
243
244 def items(self):
245 self._verify_open()
246 return [(key, self[key]) for key in self._index.keys()]
247
248 def __contains__(self, key):
249 if isinstance(key, str):
250 key = key.encode('utf-8')
251 try:
252 return key in self._index
253 except TypeError:
254 if self._index is None:
255 raise error('DBM object has already been closed') from None
256 else:
257 raise
258
259 def iterkeys(self):
260 try:
261 return iter(self._index)
262 except TypeError:
263 raise error('DBM object has already been closed') from None
264 __iter__ = iterkeys
265
266 def __len__(self):
267 try:
268 return len(self._index)
269 except TypeError:
270 raise error('DBM object has already been closed') from None
271
272 def close(self):
273 try:
274 self._commit()
275 finally:
276 self._index = self._datfile = self._dirfile = self._bakfile = None
277
278 __del__ = close
279
280 def _chmod(self, file):
281 self._os.chmod(file, self._mode)
282
283 def __enter__(self):
284 return self
285
286 def __exit__(self, *args):
287 self.close()
288
289
290 def open(file, flag='c', mode=0o666):
291 """Open the database file, filename, and return corresponding object.
292
293 The flag argument, used to control how the database is opened in the
294 other DBM implementations, supports only the semantics of 'c' and 'n'
295 values. Other values will default to the semantics of 'c' value:
296 the database will always opened for update and will be created if it
297 does not exist.
298
299 The optional mode argument is the UNIX mode of the file, used only when
300 the database has to be created. It defaults to octal code 0o666 (and
301 will be modified by the prevailing umask).
302
303 """
304
305 # Modify mode depending on the umask
306 try:
307 um = _os.umask(0)
308 _os.umask(um)
309 except AttributeError:
310 pass
311 else:
312 # Turn off any bits that are set in the umask
313 mode = mode & (~um)
314 if flag not in ('r', 'w', 'c', 'n'):
315 raise ValueError("Flag must be one of 'r', 'w', 'c', or 'n'")
316 return _Database(file, mode, flag=flag)