# (Be in -*- python -*- mode.) # # ==================================================================== # Copyright (c) 2000-2006 CollabNet. All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at http://subversion.tigris.org/license-1.html. # If newer versions of this license are posted there, you may use a # newer version instead, at your option. # # This software consists of voluntary contributions made by many # individuals. For exact contribution history, see the revision # history and logs, available at http://cvs2svn.tigris.org/. # ==================================================================== """This module contains database facilities used by cvs2svn.""" from __future__ import generators import sys import os import marshal import cStringIO import cPickle from cvs2svn_lib.boolean import * from cvs2svn_lib.common import warning_prefix from cvs2svn_lib.common import error_prefix from cvs2svn_lib.primed_pickle import get_memos from cvs2svn_lib.primed_pickle import PrimedPickler from cvs2svn_lib.primed_pickle import PrimedUnpickler # DBM module selection # 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3, # so that the dbhash module used by anydbm will use bsddb3. try: import bsddb3 sys.modules['bsddb'] = sys.modules['bsddb3'] except ImportError: pass # 2. These DBM modules are not good for cvs2svn. import anydbm if (anydbm._defaultmod.__name__ == 'dumbdbm' or anydbm._defaultmod.__name__ == 'dbm'): sys.stderr.write( error_prefix + ': your installation of Python does not contain a suitable\n' + 'DBM module -- cvs2svn cannot continue.\n' + 'See http://python.org/doc/current/lib/module-anydbm.html to solve.\n') sys.exit(1) # 3. If we are using the old bsddb185 module, then try prefer gdbm instead. # Unfortunately, gdbm appears not to be trouble free, either. if hasattr(anydbm._defaultmod, 'bsddb') \ and not hasattr(anydbm._defaultmod.bsddb, '__version__'): try: gdbm = __import__('gdbm') except ImportError: sys.stderr.write(warning_prefix + ': The version of the bsddb module found ' 'on your computer has been reported to malfunction on some datasets, ' 'causing KeyError exceptions. You may wish to upgrade your Python to ' 'version 2.3 or later.\n') else: anydbm._defaultmod = gdbm # Always use these constants for opening databases. DB_OPEN_READ = 'r' DB_OPEN_WRITE = 'w' DB_OPEN_NEW = 'n' class AbstractDatabase: """An abstract base class for anydbm-based databases.""" def __init__(self, filename, mode): """A convenience function for opening an anydbm database.""" # pybsddb3 has a bug which prevents it from working with # Berkeley DB 4.2 if you open the db with 'n' ("new"). This # causes the DB_TRUNCATE flag to be passed, which is disallowed # for databases protected by lock and transaction support # (bsddb databases use locking from bsddb version 4.2.4 onwards). # # Therefore, manually perform the removal (we can do this, because # we know that for bsddb - but *not* anydbm in general - the database # consists of one file with the name we specify, rather than several # based on that name). if mode == 'n' and anydbm._defaultmod.__name__ == 'dbhash': if os.path.isfile(filename): os.unlink(filename) mode = 'c' self.db = anydbm.open(filename, mode) # Import implementations for many mapping interface methods. Note # that we specifically do not do this for any method which handles # *values*, because our derived classes define __getitem__ and # __setitem__ to override the storage of values, and grabbing # methods directly from the dbm object would bypass this. for meth_name in ('__delitem__', 'keys', '__iter__', 'has_key', '__contains__', 'iterkeys', 'clear'): meth_ref = getattr(self.db, meth_name, None) if meth_ref: setattr(self, meth_name, meth_ref) def __delitem__(self, key): # gdbm defines a __delitem__ method, but it cannot be assigned. So # this method provides a fallback definition via explicit delegation: del self.db[key] def __iter__(self): for key in self.keys(): yield key def has_key(self, key): try: self.db[key] return True except KeyError: return False def __contains__(self, key): return self.has_key(key) def iterkeys(self): return self.__iter__() def clear(self): for key in self.keys(): del self[key] def items(self): return [(key, self[key],) for key in self.keys()] def values(self): return [self[key] for key in self.keys()] def get(self, key, default=None): try: return self[key] except KeyError: return default class SDatabase(AbstractDatabase): """A database that can only store strings.""" def __getitem__(self, key): return self.db[key] def __setitem__(self, key, value): self.db[key] = value class Database(AbstractDatabase): """A database that uses the marshal module to store built-in types.""" def __getitem__(self, key): return marshal.loads(self.db[key]) def __setitem__(self, key, value): self.db[key] = marshal.dumps(value) class PDatabase(AbstractDatabase): """A database that uses the cPickle module to store arbitrary objects.""" def __getitem__(self, key): return cPickle.loads(self.db[key]) def __setitem__(self, key, value): self.db[key] = cPickle.dumps(value, -1) class PrimedPDatabase(AbstractDatabase): """A database that uses cPickle module to store arbitrary objects. The Pickler and Unpickler are 'primed' by pre-pickling PRIMER, which can be an arbitrary object (e.g., a list of objects that are expected to occur frequently in the database entries). From then on, if objects within individual database entries are recognized from PRIMER, then only their persistent IDs need to be pickled instead of the whole object. Concretely, when a new database is created, the pickler memo and unpickler memo for PRIMER are computed, pickled, and stored in db[self.primer_key] as a tuple. When an existing database is opened for reading or update, the pickler and unpickler memos are read from db[self.primer_key]. In either case, these memos are used to initialize a PrimedPickler and PrimedUnpickler, which are used for future write and read accesses respectively. Since the database entry with key self.primer_key is used to store the memo, self.primer_key may not be used as a key for normal entries.""" primer_key = '_' def __init__(self, filename, mode, primer): AbstractDatabase.__init__(self, filename, mode) if mode == DB_OPEN_NEW: pickler_memo, unpickler_memo = get_memos(primer) self.db[self.primer_key] = \ cPickle.dumps((pickler_memo, unpickler_memo,)) else: (pickler_memo, unpickler_memo,) = \ cPickle.loads(self.db[self.primer_key]) self.primed_pickler = PrimedPickler(pickler_memo) self.primed_unpickler = PrimedUnpickler(unpickler_memo) def __getitem__(self, key): return self.primed_unpickler.loads(self.db[key]) def __setitem__(self, key, value): self.db[key] = self.primed_pickler.dumps(value)