From f5bcd9012493cbeaa05bb5384b708102241fdd45 Mon Sep 17 00:00:00 2001 From: ARF Date: Wed, 6 May 2015 15:23:21 +0200 Subject: [PATCH] include _arr1 cache in the abstraction This allows implementations of the abstraction layer to cache their data structures just like we currently do for numpy --- bcolz/ctable.py | 47 ++++++++++++++++++++++++++++++++++++++++------- bcolz/defaults.py | 15 +++++---------- 2 files changed, 45 insertions(+), 17 deletions(-) diff --git a/bcolz/ctable.py b/bcolz/ctable.py index 587cf91e..50724120 100644 --- a/bcolz/ctable.py +++ b/bcolz/ctable.py @@ -17,6 +17,7 @@ import os import shutil from .py2help import _inttypes, _strtypes, imap, xrange +from weakref import WeakSet _inttypes += (np.integer,) islice = itertools.islice @@ -222,8 +223,32 @@ def __init__(self, columns=None, names=None, **kwargs): # Attach the attrs to this object self.attrs = attrs.attrs(self.rootdir, self.mode, _new=_new) - # Cache a structured array of len 1 for ctable[int] acceleration - self._arr1 = np.empty(shape=(1,), dtype=self.dtype) + # Initialise output structure cache + self._outstruc_update_cache() + + def __new__(cls, *args, **kwargs): + # keep track of all ctable instances to be able to update their + # output structure caches when the output processor changes + if not hasattr(cls, '_instances'): + cls._instances = WeakSet() + new_instance = object.__new__(cls) + cls._instances.add(new_instance) + return new_instance + + @classmethod + def _update_outstruc_processor(cls, processor): + bcolz.ctable._outstruc_allocate = processor.allocate + bcolz.ctable._outstruc_update_cache = processor.update_cache + bcolz.ctable._outstruc_fromindices = processor.fromindices + bcolz.ctable._outstruc_fromboolarr = processor.fromboolarr + assert hasattr(processor, '__setitem__') + + if not hasattr(cls, '_instances'): + return + + for instance in cls._instances: + instance._outstruc_update_cache() + def create_ctable(self, columns, names, **kwargs): """Create a ctable anew.""" @@ -487,8 +512,9 @@ def addcol(self, newcol, name=None, pos=None, move=False, **kwargs): # Insert the column self.cols.insert(name, pos, newcol) - # Update _arr1 - self._arr1 = np.empty(shape=(1,), dtype=self.dtype) + # Update output structure cache + self._outstruc_update_cache() + if self.auto_flush: self.flush() @@ -540,8 +566,9 @@ def delcol(self, name=None, pos=None, keep=False): if not keep: col.purge() - # Update _arr1 - self._arr1 = np.empty(shape=(1,), dtype=self.dtype) + # Update output structure cache + self._outstruc_update_cache() + if self.auto_flush: self.flush() @@ -1232,6 +1259,7 @@ def __str__(self): # if a custom output structure is configured, use numpy for # bcolz string representation for consistent output formatting current_allocate_fn = self._outstruc_allocate + OutputStructure_numpy.update_cache(self) def tmp_allocate(*args, **kwargs): return OutputStructure_numpy.allocate(self, *args, **kwargs) self._outstruc_allocate = tmp_allocate @@ -1239,6 +1267,7 @@ def tmp_allocate(*args, **kwargs): result = array2string(self) del self._outstruc_allocate + self._outstruc_update_cache() return result def __repr__(self): @@ -1256,11 +1285,15 @@ def __repr__(self): class OutputStructure_numpy(object): + @staticmethod + def update_cache(ctable_): + ctable_._outstruc_cache = np.empty(shape=(1,), dtype=ctable_.dtype) + @staticmethod def allocate(ctable_, size, dtype=None): result = object.__new__(OutputStructure_numpy) if size == 1: - result.ra = ctable_._arr1.copy() + result.ra = ctable_._outstruc_cache.copy() else: result.ra = np.empty(size, dtype) return result diff --git a/bcolz/defaults.py b/bcolz/defaults.py index 6433445a..3200a530 100644 --- a/bcolz/defaults.py +++ b/bcolz/defaults.py @@ -80,17 +80,12 @@ def ctable_out_implementation(self, value): if value is None: value = OutputStructure_numpy try: - bcolz.ctable._outstruc_allocate = value.allocate - bcolz.ctable._outstruc_fromindices = value.fromindices - bcolz.ctable._outstruc_fromboolarr = value.fromboolarr - assert hasattr(value, '__setitem__') + bcolz.ctable._update_outstruc_processor(value) + self.__ctable_out_implementation = value except (AttributeError, AssertionError): - value = OutputStructure_numpy - bcolz.ctable._outstruc_allocate = value.allocate - bcolz.ctable._outstruc_fromindices = value.fromindices - bcolz.ctable._outstruc_fromboolarr = value.fromboolarr - raise NotImplementedError('The output structure implementation is incomplete') - self.__ctable_out_implementation = value + bcolz.ctable._update_outstruc_processor(OutputStructure_numpy) + raise NotImplementedError( + 'The output structure implementation is incomplete') @property def cparams(self):