annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/dbm/dumb.py @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 """A dumb and slow but simple dbm clone.
jpayne@68 2
jpayne@68 3 For database spam, spam.dir contains the index (a text file),
jpayne@68 4 spam.bak *may* contain a backup of the index (also a text file),
jpayne@68 5 while spam.dat contains the data (a binary file).
jpayne@68 6
jpayne@68 7 XXX TO DO:
jpayne@68 8
jpayne@68 9 - seems to contain a bug when updating...
jpayne@68 10
jpayne@68 11 - reclaim free space (currently, space once occupied by deleted or expanded
jpayne@68 12 items is never reused)
jpayne@68 13
jpayne@68 14 - support concurrent access (currently, if two processes take turns making
jpayne@68 15 updates, they can mess up the index)
jpayne@68 16
jpayne@68 17 - support efficient access to large databases (currently, the whole index
jpayne@68 18 is read when the database is opened, and some updates rewrite the whole index)
jpayne@68 19
jpayne@68 20 - support opening for read-only (flag = 'm')
jpayne@68 21
jpayne@68 22 """
jpayne@68 23
jpayne@68 24 import ast as _ast
jpayne@68 25 import io as _io
jpayne@68 26 import os as _os
jpayne@68 27 import collections.abc
jpayne@68 28
jpayne@68 29 __all__ = ["error", "open"]
jpayne@68 30
jpayne@68 31 _BLOCKSIZE = 512
jpayne@68 32
jpayne@68 33 error = OSError
jpayne@68 34
jpayne@68 35 class _Database(collections.abc.MutableMapping):
jpayne@68 36
jpayne@68 37 # The on-disk directory and data files can remain in mutually
jpayne@68 38 # inconsistent states for an arbitrarily long time (see comments
jpayne@68 39 # at the end of __setitem__). This is only repaired when _commit()
jpayne@68 40 # gets called. One place _commit() gets called is from __del__(),
jpayne@68 41 # and if that occurs at program shutdown time, module globals may
jpayne@68 42 # already have gotten rebound to None. Since it's crucial that
jpayne@68 43 # _commit() finish successfully, we can't ignore shutdown races
jpayne@68 44 # here, and _commit() must not reference any globals.
jpayne@68 45 _os = _os # for _commit()
jpayne@68 46 _io = _io # for _commit()
jpayne@68 47
jpayne@68 48 def __init__(self, filebasename, mode, flag='c'):
jpayne@68 49 self._mode = mode
jpayne@68 50 self._readonly = (flag == 'r')
jpayne@68 51
jpayne@68 52 # The directory file is a text file. Each line looks like
jpayne@68 53 # "%r, (%d, %d)\n" % (key, pos, siz)
jpayne@68 54 # where key is the string key, pos is the offset into the dat
jpayne@68 55 # file of the associated value's first byte, and siz is the number
jpayne@68 56 # of bytes in the associated value.
jpayne@68 57 self._dirfile = filebasename + '.dir'
jpayne@68 58
jpayne@68 59 # The data file is a binary file pointed into by the directory
jpayne@68 60 # file, and holds the values associated with keys. Each value
jpayne@68 61 # begins at a _BLOCKSIZE-aligned byte offset, and is a raw
jpayne@68 62 # binary 8-bit string value.
jpayne@68 63 self._datfile = filebasename + '.dat'
jpayne@68 64 self._bakfile = filebasename + '.bak'
jpayne@68 65
jpayne@68 66 # The index is an in-memory dict, mirroring the directory file.
jpayne@68 67 self._index = None # maps keys to (pos, siz) pairs
jpayne@68 68
jpayne@68 69 # Handle the creation
jpayne@68 70 self._create(flag)
jpayne@68 71 self._update(flag)
jpayne@68 72
jpayne@68 73 def _create(self, flag):
jpayne@68 74 if flag == 'n':
jpayne@68 75 for filename in (self._datfile, self._bakfile, self._dirfile):
jpayne@68 76 try:
jpayne@68 77 _os.remove(filename)
jpayne@68 78 except OSError:
jpayne@68 79 pass
jpayne@68 80 # Mod by Jack: create data file if needed
jpayne@68 81 try:
jpayne@68 82 f = _io.open(self._datfile, 'r', encoding="Latin-1")
jpayne@68 83 except OSError:
jpayne@68 84 if flag not in ('c', 'n'):
jpayne@68 85 raise
jpayne@68 86 with _io.open(self._datfile, 'w', encoding="Latin-1") as f:
jpayne@68 87 self._chmod(self._datfile)
jpayne@68 88 else:
jpayne@68 89 f.close()
jpayne@68 90
jpayne@68 91 # Read directory file into the in-memory index dict.
jpayne@68 92 def _update(self, flag):
jpayne@68 93 self._modified = False
jpayne@68 94 self._index = {}
jpayne@68 95 try:
jpayne@68 96 f = _io.open(self._dirfile, 'r', encoding="Latin-1")
jpayne@68 97 except OSError:
jpayne@68 98 if flag not in ('c', 'n'):
jpayne@68 99 raise
jpayne@68 100 self._modified = True
jpayne@68 101 else:
jpayne@68 102 with f:
jpayne@68 103 for line in f:
jpayne@68 104 line = line.rstrip()
jpayne@68 105 key, pos_and_siz_pair = _ast.literal_eval(line)
jpayne@68 106 key = key.encode('Latin-1')
jpayne@68 107 self._index[key] = pos_and_siz_pair
jpayne@68 108
jpayne@68 109 # Write the index dict to the directory file. The original directory
jpayne@68 110 # file (if any) is renamed with a .bak extension first. If a .bak
jpayne@68 111 # file currently exists, it's deleted.
jpayne@68 112 def _commit(self):
jpayne@68 113 # CAUTION: It's vital that _commit() succeed, and _commit() can
jpayne@68 114 # be called from __del__(). Therefore we must never reference a
jpayne@68 115 # global in this routine.
jpayne@68 116 if self._index is None or not self._modified:
jpayne@68 117 return # nothing to do
jpayne@68 118
jpayne@68 119 try:
jpayne@68 120 self._os.unlink(self._bakfile)
jpayne@68 121 except OSError:
jpayne@68 122 pass
jpayne@68 123
jpayne@68 124 try:
jpayne@68 125 self._os.rename(self._dirfile, self._bakfile)
jpayne@68 126 except OSError:
jpayne@68 127 pass
jpayne@68 128
jpayne@68 129 with self._io.open(self._dirfile, 'w', encoding="Latin-1") as f:
jpayne@68 130 self._chmod(self._dirfile)
jpayne@68 131 for key, pos_and_siz_pair in self._index.items():
jpayne@68 132 # Use Latin-1 since it has no qualms with any value in any
jpayne@68 133 # position; UTF-8, though, does care sometimes.
jpayne@68 134 entry = "%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair)
jpayne@68 135 f.write(entry)
jpayne@68 136
jpayne@68 137 sync = _commit
jpayne@68 138
jpayne@68 139 def _verify_open(self):
jpayne@68 140 if self._index is None:
jpayne@68 141 raise error('DBM object has already been closed')
jpayne@68 142
jpayne@68 143 def __getitem__(self, key):
jpayne@68 144 if isinstance(key, str):
jpayne@68 145 key = key.encode('utf-8')
jpayne@68 146 self._verify_open()
jpayne@68 147 pos, siz = self._index[key] # may raise KeyError
jpayne@68 148 with _io.open(self._datfile, 'rb') as f:
jpayne@68 149 f.seek(pos)
jpayne@68 150 dat = f.read(siz)
jpayne@68 151 return dat
jpayne@68 152
jpayne@68 153 # Append val to the data file, starting at a _BLOCKSIZE-aligned
jpayne@68 154 # offset. The data file is first padded with NUL bytes (if needed)
jpayne@68 155 # to get to an aligned offset. Return pair
jpayne@68 156 # (starting offset of val, len(val))
jpayne@68 157 def _addval(self, val):
jpayne@68 158 with _io.open(self._datfile, 'rb+') as f:
jpayne@68 159 f.seek(0, 2)
jpayne@68 160 pos = int(f.tell())
jpayne@68 161 npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
jpayne@68 162 f.write(b'\0'*(npos-pos))
jpayne@68 163 pos = npos
jpayne@68 164 f.write(val)
jpayne@68 165 return (pos, len(val))
jpayne@68 166
jpayne@68 167 # Write val to the data file, starting at offset pos. The caller
jpayne@68 168 # is responsible for ensuring that there's enough room starting at
jpayne@68 169 # pos to hold val, without overwriting some other value. Return
jpayne@68 170 # pair (pos, len(val)).
jpayne@68 171 def _setval(self, pos, val):
jpayne@68 172 with _io.open(self._datfile, 'rb+') as f:
jpayne@68 173 f.seek(pos)
jpayne@68 174 f.write(val)
jpayne@68 175 return (pos, len(val))
jpayne@68 176
jpayne@68 177 # key is a new key whose associated value starts in the data file
jpayne@68 178 # at offset pos and with length siz. Add an index record to
jpayne@68 179 # the in-memory index dict, and append one to the directory file.
jpayne@68 180 def _addkey(self, key, pos_and_siz_pair):
jpayne@68 181 self._index[key] = pos_and_siz_pair
jpayne@68 182 with _io.open(self._dirfile, 'a', encoding="Latin-1") as f:
jpayne@68 183 self._chmod(self._dirfile)
jpayne@68 184 f.write("%r, %r\n" % (key.decode("Latin-1"), pos_and_siz_pair))
jpayne@68 185
jpayne@68 186 def __setitem__(self, key, val):
jpayne@68 187 if self._readonly:
jpayne@68 188 raise error('The database is opened for reading only')
jpayne@68 189 if isinstance(key, str):
jpayne@68 190 key = key.encode('utf-8')
jpayne@68 191 elif not isinstance(key, (bytes, bytearray)):
jpayne@68 192 raise TypeError("keys must be bytes or strings")
jpayne@68 193 if isinstance(val, str):
jpayne@68 194 val = val.encode('utf-8')
jpayne@68 195 elif not isinstance(val, (bytes, bytearray)):
jpayne@68 196 raise TypeError("values must be bytes or strings")
jpayne@68 197 self._verify_open()
jpayne@68 198 self._modified = True
jpayne@68 199 if key not in self._index:
jpayne@68 200 self._addkey(key, self._addval(val))
jpayne@68 201 else:
jpayne@68 202 # See whether the new value is small enough to fit in the
jpayne@68 203 # (padded) space currently occupied by the old value.
jpayne@68 204 pos, siz = self._index[key]
jpayne@68 205 oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
jpayne@68 206 newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
jpayne@68 207 if newblocks <= oldblocks:
jpayne@68 208 self._index[key] = self._setval(pos, val)
jpayne@68 209 else:
jpayne@68 210 # The new value doesn't fit in the (padded) space used
jpayne@68 211 # by the old value. The blocks used by the old value are
jpayne@68 212 # forever lost.
jpayne@68 213 self._index[key] = self._addval(val)
jpayne@68 214
jpayne@68 215 # Note that _index may be out of synch with the directory
jpayne@68 216 # file now: _setval() and _addval() don't update the directory
jpayne@68 217 # file. This also means that the on-disk directory and data
jpayne@68 218 # files are in a mutually inconsistent state, and they'll
jpayne@68 219 # remain that way until _commit() is called. Note that this
jpayne@68 220 # is a disaster (for the database) if the program crashes
jpayne@68 221 # (so that _commit() never gets called).
jpayne@68 222
jpayne@68 223 def __delitem__(self, key):
jpayne@68 224 if self._readonly:
jpayne@68 225 raise error('The database is opened for reading only')
jpayne@68 226 if isinstance(key, str):
jpayne@68 227 key = key.encode('utf-8')
jpayne@68 228 self._verify_open()
jpayne@68 229 self._modified = True
jpayne@68 230 # The blocks used by the associated value are lost.
jpayne@68 231 del self._index[key]
jpayne@68 232 # XXX It's unclear why we do a _commit() here (the code always
jpayne@68 233 # XXX has, so I'm not changing it). __setitem__ doesn't try to
jpayne@68 234 # XXX keep the directory file in synch. Why should we? Or
jpayne@68 235 # XXX why shouldn't __setitem__?
jpayne@68 236 self._commit()
jpayne@68 237
jpayne@68 238 def keys(self):
jpayne@68 239 try:
jpayne@68 240 return list(self._index)
jpayne@68 241 except TypeError:
jpayne@68 242 raise error('DBM object has already been closed') from None
jpayne@68 243
jpayne@68 244 def items(self):
jpayne@68 245 self._verify_open()
jpayne@68 246 return [(key, self[key]) for key in self._index.keys()]
jpayne@68 247
jpayne@68 248 def __contains__(self, key):
jpayne@68 249 if isinstance(key, str):
jpayne@68 250 key = key.encode('utf-8')
jpayne@68 251 try:
jpayne@68 252 return key in self._index
jpayne@68 253 except TypeError:
jpayne@68 254 if self._index is None:
jpayne@68 255 raise error('DBM object has already been closed') from None
jpayne@68 256 else:
jpayne@68 257 raise
jpayne@68 258
jpayne@68 259 def iterkeys(self):
jpayne@68 260 try:
jpayne@68 261 return iter(self._index)
jpayne@68 262 except TypeError:
jpayne@68 263 raise error('DBM object has already been closed') from None
jpayne@68 264 __iter__ = iterkeys
jpayne@68 265
jpayne@68 266 def __len__(self):
jpayne@68 267 try:
jpayne@68 268 return len(self._index)
jpayne@68 269 except TypeError:
jpayne@68 270 raise error('DBM object has already been closed') from None
jpayne@68 271
jpayne@68 272 def close(self):
jpayne@68 273 try:
jpayne@68 274 self._commit()
jpayne@68 275 finally:
jpayne@68 276 self._index = self._datfile = self._dirfile = self._bakfile = None
jpayne@68 277
jpayne@68 278 __del__ = close
jpayne@68 279
jpayne@68 280 def _chmod(self, file):
jpayne@68 281 self._os.chmod(file, self._mode)
jpayne@68 282
jpayne@68 283 def __enter__(self):
jpayne@68 284 return self
jpayne@68 285
jpayne@68 286 def __exit__(self, *args):
jpayne@68 287 self.close()
jpayne@68 288
jpayne@68 289
jpayne@68 290 def open(file, flag='c', mode=0o666):
jpayne@68 291 """Open the database file, filename, and return corresponding object.
jpayne@68 292
jpayne@68 293 The flag argument, used to control how the database is opened in the
jpayne@68 294 other DBM implementations, supports only the semantics of 'c' and 'n'
jpayne@68 295 values. Other values will default to the semantics of 'c' value:
jpayne@68 296 the database will always opened for update and will be created if it
jpayne@68 297 does not exist.
jpayne@68 298
jpayne@68 299 The optional mode argument is the UNIX mode of the file, used only when
jpayne@68 300 the database has to be created. It defaults to octal code 0o666 (and
jpayne@68 301 will be modified by the prevailing umask).
jpayne@68 302
jpayne@68 303 """
jpayne@68 304
jpayne@68 305 # Modify mode depending on the umask
jpayne@68 306 try:
jpayne@68 307 um = _os.umask(0)
jpayne@68 308 _os.umask(um)
jpayne@68 309 except AttributeError:
jpayne@68 310 pass
jpayne@68 311 else:
jpayne@68 312 # Turn off any bits that are set in the umask
jpayne@68 313 mode = mode & (~um)
jpayne@68 314 if flag not in ('r', 'w', 'c', 'n'):
jpayne@68 315 raise ValueError("Flag must be one of 'r', 'w', 'c', or 'n'")
jpayne@68 316 return _Database(file, mode, flag=flag)