In a current project I have been part of, I have had to deal with a relatively large amount of mostly static data that needs to be readily available in my application. As most of the data is static, and doesn’t change, but drives the application’s functionality, I figured I could store the data in structured files (I chose YAML as the format, for the human-readable, almost Python-like format to the files). More on that later.
First, I need a base class to build my data objects with. There are several features I want:
- The object should be usable as a non-mutable mapping (use the collections.abc.Mapping base class).
- Views should be views of the data and make sense.
- All keys should be accessible as attributes as well as normal getitem access. I like this flexibility in languages like JavaScript, and find writing object.item is simpler and cleaner than object[‘item’]. However, I find object[‘item’] as easier to write than getattr(object, ‘item’), so I definitely still want #1 above.
- Note: This means all keys must be usable as attribute names, and follow the Python variable naming restrictions.
- Values should be lazily-evaluated. So if the value is a tree structure loaded from a YAML file, don’t load the YAML file until accessed.
- However, if the YAML file is loaded, load everything in that tree, don’t wait for access on these elements.
- Items not loaded should be obviously shown as not loaded until they are loaded.
- Assume that the data structure is immutable — the number of items in the structure, and the names (keys) for those structures are immutable.
- This means it is possible for the objects to be hashable.
- However, there should be a way to break this and allow for new elements, just in case.
- Data structures should be pickleable
import collections.abc
class BaseConfig(collections.abc.Mapping):
def __getitem__(self, key): # required by collections.abc.Mapping
pass
def __iter__(self): # required by collections.abc.Mapping
pass
def __len__(self): # required by collections.abc.Mapping
pass
__contains__, keys, items, values, get, __eq__, and __ne__
I also have some abstract methods I need to write code for:
__getitem__, __iter__, __len__
Before I get further into these particular methods, I need to focus on item #4 above. These config objects are needed to be assumed to be immutable — the number and names of their items is a known quantityThere is really two states for every element of the config: a not loaded, and a loaded state. The not loaded state should have a way to load the element (a callable), and the loaded state is essentially the memoized value for that element. These can be defined through a couple descriptors:
import typing
class ConfigSimpleAttr:
"Simple loaded attribute descriptor for Config objects."
__slots__ = ('value', )
def __init__(self, value: typing.Any):
self.value = value
def __get__(self, inst, cls):
return self.value
def __set__(self, inst, value):
raise AttributeError("can't set attribute")
def __delete__(self, inst):
raise AttributeError("can't delete attribute")
class ConfigLoadableAttr:
"Loadable attribute descriptor for Config objects."
__slots__ = ('name', 'func')
def __init__(self, name: str, func: typing.Callable):
self.name = name
self.func = func
def __get__(self, inst, cls):
ret = self.func()
setattr(cls, self.name, ConfigSimpleAttr(ret))
inst._attrs_ = inst._attrs_ | {self.name}
inst._funcs_ = inst._funcs_ - {self.name}
return ret
def __set__(self, inst, value):
raise AttributeError("can't set attribute")
def __delete__(self, inst):
raise AttributeError("can't delete attribute")
We then need a mechanism for setting up these descriptor attributes in the BaseConfig class:
class BaseConfig(collections.abc.Mapping):
def __init__(self, *, attrs: typing.Iterable):
self._funcs_ = frozenset()
self._attrs_ = frozenset()
if attrs:
tuple(map(lambda a: self._set_attr(**a), attrs))
def _set_attr(
self,
name: str,
func: typing.Callable,
doc: typing.Optional[str]=None,
preload: bool=False
):
"Sets up an attribute for the config."
if doc is None:
doc = 'The {name} attribute.'.format(name=name)
if preload:
# Loaded
attr = ConfigSimpleAttr(func())
self._funcs_ = self._funcs_ - {name}
self._attrs_ = self._attrs_ | {name}
else:
# Loadable
attr = ConfigLoadableAttr(name, func)
self._funcs_ = self._funcs_ | {name}
self._attrs_ = self._attrs_ - {name}
setattr(type(self), name, attr)
Next, we should define a special singleton object, that is similar in function and nature to NotImplemented, None, False, and True. This would be a new object that is used to show that an attribute is not loaded yet:
class SingletonMeta(type):
__slots__ = ()
def __new__(cls, name, bases, namespace, slots=()):
return super().__new__(cls, name, bases, namespace)
def __init__(cls, name, bases, namespace, slots=()):
namespace['__slots__'] = slots
super().__init__(name, bases, namespace)
original_new = cls.__new__
def my_new(cls, *args, **kwargs):
try:
return cls._instance
except AttributeError:
cls._instance = original_new(cls, *args, **kwargs)
return cls._instance
cls.__new__ = staticmethod(my_new)
class NotLoadedType(metaclass=SingletonMeta):
"""
Singleton that a Config object's elements are set to before they get
loaded.
"""
def __str__(self):
return 'Not Loaded'
def __repr__(self):
return ''
def __bool__(self):
return False
def __hash__(self):
return hash(type(self).__name__)
def __reduce__(self):
return (NotLoadedType, ())
def __call__(self):
raise TypeError(
"'{name}' object is not callable".format(
name=type(self).__name__
)
)
NotLoaded = NotLoadedType()
class BaseConfig(collections.abc.Mapping):
def __gen_keys(self):
yield from sorted(self._attrs_ | self._funcs_)
def __gen_items(self):
yield from ((key, getattr(self, key)) for key in self._attrs_)
yield from ((key, NotLoaded) for key in self._funcs_)
class BaseConfig(collections.abc.Mapping):
def __getitem__(self, key):
"Return self[key]."
return getattr(self, key)
def __iter__(self):
"Return iter(self)."
yield from self.__gen_keys()
def __len__(self):
"Return len(self)."
return len(tuple(self.__gen_keys()))
class ConfigValuesView(collections.abc.ValuesView):
"View of a config object's values."
__slots__ = ()
def __repr__(self):
return ''.join((type(self).__name__, '(', repr(tuple(self)), ')'))
class ConfigKeysView(collections.abc.KeysView):
"View of a config object's keys."
__slots__ = ()
def __repr__(self):
return ''.join((type(self).__name__, '(', repr(set(self)), ')'))
class ConfigItemsView(collections.abc.ItemsView):
"View of a config object's (key, value) items."
__slots__ = ()
def __repr__(self):
return ''.join((type(self).__name__, '(', repr(tuple(self)), ')'))
class BaseConfig(collections.abc.Mapping):
def __contains__(self, key):
"Return key in self."
return key in set(self.__gen_keys())
def keys(self):
"""
Returns a set-like object providing a view of the config object's
keys.
"""
return ConfigKeysView(self)
def values(self):
"""
Returns a set-like object providing a view of the config object's
values.
"""
return ConfigValuesView(self)
def items(self):
"""
Returns a set-like object providing a view of the config object's
items.
"""
return ConfigItemsView(self)
def get(self, key, default=None):
"D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."
return getattr(self, key, default)
String representations of the BaseConfig need to be made. Since this functions much like a dict, and I like the clean representation of dicts, I simply make these act as if it is a dict:
class BaseConfig(collections.abc.Mapping):
def __str__(self):
"Return str(self)."
return str(dict(self.__gen_items()))
def __repr__(self):
"Return repr(self)."
return repr(dict(self.__gen_items()))
import copy
class BaseConfig(collections.abc.Mapping):
def __copy__(self):
"For use with the :py:func:`copy.copy` function."
return copy.copy(dict(self.__gen_items()))
def copy(self):
"D.copy() -> a shallow copy of D"
return copy.copy(self)
def __deepcopy__(self, memo):
"For use with the :py:func:`copy.deepcopy` function."
try:
return memo[id(self)]
except KeyError:
memo[id(self)] = unpack_element(self, memo=memo)
return memo[id(self)]
Additionally, I overload the __dict__ attribute into a property that uses a read-only types.MappingProxyType as a mapping view of the config object. For completeness’ sake, I also implement the __sizeof__() method:
import types
class BaseConfig(collections.abc.Mapping):
@property
def __dict__(self):
return types.MappingProxyType(self)
def __sizeof__(self):
"Return sys.getsizeof(self)."
return sys.getsizeof(vars(self))
class ConfigSetableAttr:
"Setable attribute descriptor for Config objects."
__slots__ = ('name', )
def __init__(self, name: str):
self.name = name
def __get__(self, inst, cls):
raise AttributeError(
"'{typename}' object has no attribute '{name}'".format(
typename=cls.__name__,
name=self.name
)
)
def __set__(self, inst, func: typing.Callable):
ret = func()
setattr(type(inst), self.name, ConfigSimpleAttr(ret))
inst._attrs_ = inst._attrs_ | {self.name}
inst._setables_ = inst._setables_ - {self.name}
def __delete__(self, inst):
raise AttributeError(
"'{typename}' object has no attribute '{name}'".format(
typename=type(inst).__name__,
name=self.name
)
)
class BaseConfig(collections.abc.Mapping):
def _set_attr(
self,
name: str,
func: typing.Optional[typing.Callable]=None,
doc: typing.Optional[str]=None,
preload: bool=False
):
"Sets up an attribute for the config."
if doc is None:
doc = 'The {name} attribute.'.format(name=name)
if func is None:
# Setable
attr = ConfigSetableAttr(name)
self._funcs_ = self._funcs_ - {name}
self._attrs_ = self._attrs_ - {name}
self._setables_ = self._setables_ | {name}
elif preload:
# Loaded
attr = ConfigSimpleAttr(func())
self._funcs_ = self._funcs_ - {name}
self._attrs_ = self._attrs_ | {name}
self._setables_ = self._setables_ - {name}
else:
# Loadable
attr = ConfigLoadableAttr(name, func)
self._funcs_ = self._funcs_ | {name}
self._attrs_ = self._attrs_ - {name}
self._setables_ = self._setables_ - {name}
setattr(type(self), name, attr)
def __setitem__(self, key, value):
try:
setattr(self, key, value)
except AttributeError:
raise TypeError(
"'{name}' object does not support item assignment".format(
name=type(self).__name__
)
)
Now, we almost have a working BaseConfig class implementation… But there is a fundamental problem with the class as it stands. Attributes assigned this way are assigned to the class definition (like properties), rather than the object instance. So… all objects created from the same class end up sharing the same attributes. This is most likely not a desirable result. So… we need to modify the __new__() method for the BaseConfig class to make a dummy subclass of the original class, to use for the instance. This means each instance of the class effectively has its own class definition, and it separates the attributes cleanly:
class BaseConfig(collections.abc.Mapping):
__slots__ = ()
def __new__(cls, *args, **kwargs):
"""
Constructs a new instance. This functions like a factory, and will
make a dummy subclass of the class before sending to
:py:meth:`__init__`, in order to ensure that properties (attributes)
do not bleed across instances of the class.
"""
if hasattr(cls, '__factory_subclass__'):
return super().__new__(*args, **kwargs)
else:
new_cls_name = cls.__name__
new_cls = type(new_cls_name, (cls, ), {
'__slots__': cls.__slots__,
'__module__': '.'.join((
cls.__module__,
cls.__name__,
'subclass'
)),
'__factory_subclass__': True,
'__doc__': 'n'.join((
'Factory-generated specialized subclass.'.format(
name=cls.__name__
),
cls.__doc__ if cls.__doc__ is not None else ''
))
})
return super().__new__(new_cls)
One final tweak for the class is to make it possible to serialize it through pickle. To accomplish this, I am going to make the class implement the pickle protocol methods of __getstate__() and __setstate__(), as well as force subclasses to implement the __reduce__() method by making it abstract:
class BaseConfig(collections.abc.Mapping):
def __getstate__(self):
return dict(
_setables_=self._setables_,
**{name: getattr(self, name) for name in self._attrs_}
)
def __setstate__(self, state):
for key, value in state.items():
if key == '_setables_':
self._setables_ = value
for name in value:
setattr(type(self), name, ConfigSetableAttr(name))
else:
setattr(type(self), key, ConfigSimpleAttr(value))
@abc.abstractmethod
def __reduce__(self):
pass
def __reduce__(self):
return (, ( ), self.__getstate__())
The final result is a functional base class that will be used to build from for later blog posts, touching on converting to/from dicts, then YAML files into them, and finally making a config object masquerade as a module (causing it to function like a singleton). Here is the complete implementation from the above pieces:
import collections.abc
import sys
import types
import typing
class SingletonMeta(type):
__slots__ = ()
def __new__(cls, name, bases, namespace, slots=()):
return super().__new__(cls, name, bases, namespace)
def __init__(cls, name, bases, namespace, slots=()):
namespace['__slots__'] = slots
super().__init__(name, bases, namespace)
original_new = cls.__new__
def my_new(cls, *args, **kwargs):
try:
return cls._instance
except AttributeError:
cls._instance = original_new(cls, *args, **kwargs)
return cls._instance
cls.__new__ = staticmethod(my_new)
class NotLoadedType(metaclass=SingletonMeta):
"""
Singleton that a Config object's elements are set to before they get
loaded.
"""
def __str__(self):
return 'Not Loaded'
def __repr__(self):
return ''
def __bool__(self):
return False
def __hash__(self):
return hash(type(self).__name__)
def __reduce__(self):
return (NotLoadedType, ())
def __call__(self):
raise TypeError(
"'{name}' object is not callable".format(
name=type(self).__name__
)
)
NotLoaded = NotLoadedType()
class ConfigValuesView(collections.abc.ValuesView):
"View of a config object's values."
__slots__ = ()
def __repr__(self):
return ''.join((type(self).__name__, '(', repr(tuple(self)), ')'))
class ConfigKeysView(collections.abc.KeysView):
"View of a config object's keys."
__slots__ = ()
def __repr__(self):
return ''.join((type(self).__name__, '(', repr(set(self)), ')'))
class ConfigItemsView(collections.abc.ItemsView):
"View of a config object's (key, value) items."
__slots__ = ()
def __repr__(self):
return ''.join((type(self).__name__, '(', repr(tuple(self)), ')'))
class ConfigSimpleAttr:
"Simple loaded attribute descriptor for Config objects."
__slots__ = ('value', )
def __init__(self, value: typing.Any):
self.value = value
def __get__(self, inst, cls):
return self.value
def __set__(self, inst, value):
raise AttributeError("can't set attribute")
def __delete__(self, inst):
raise AttributeError("can't delete attribute")
class ConfigLoadableAttr:
"Loadable attribute descriptor for Config objects."
__slots__ = ('name', 'func')
def __init__(self, name: str, func: typing.Callable):
self.name = name
self.func = func
def __get__(self, inst, cls):
ret = self.func()
setattr(cls, self.name, ConfigSimpleAttr(ret))
inst._attrs_ = inst._attrs_ | {self.name}
inst._funcs_ = inst._funcs_ - {self.name}
return ret
def __set__(self, inst, value):
raise AttributeError("can't set attribute")
def __delete__(self, inst):
raise AttributeError("can't delete attribute")
class ConfigSetableAttr:
"Setable attribute descriptor for Config objects."
__slots__ = ('name', )
def __init__(self, name: str):
self.name = name
def __get__(self, inst, cls):
raise AttributeError(
"'{typename}' object has no attribute '{name}'".format(
typename=cls.__name__,
name=self.name
)
)
def __set__(self, inst, func: typing.Callable):
ret = func()
setattr(type(inst), self.name, ConfigSimpleAttr(ret))
inst._attrs_ = inst._attrs_ | {self.name}
inst._setables_ = inst._setables_ - {self.name}
def __delete__(self, inst):
raise AttributeError(
"'{typename}' object has no attribute '{name}'".format(
typename=type(inst).__name__,
name=self.name
)
)
class BaseConfig(collections.abc.Mapping):
__stots__ = ()
def __init__(self, *, attrs: typing.Iterable):
self._funcs_ = frozenset()
self._attrs_ = frozenset()
if attrs:
tuple(map(lambda a: self._set_attr(**a), attrs))
def __new__(cls, *args, **kwargs):
"""
Constructs a new instance. This functions like a factory, and will
make a dummy subclass of the class before sending to
:py:meth:`__init__`, in order to ensure that properties (attributes)
do not bleed across instances of the class.
"""
if hasattr(cls, '__factory_subclass__'):
return super().__new__(*args, **kwargs)
else:
new_cls_name = cls.__name__
new_cls = type(new_cls_name, (cls, ), {
'__slots__': cls.__slots__,
'__module__': '.'.join((
cls.__module__,
cls.__name__,
'subclass'
)),
'__factory_subclass__': True,
'__doc__': 'n'.join((
'Factory-generated specialized subclass.'.format(
name=cls.__name__
),
cls.__doc__ if cls.__doc__ is not None else ''
))
})
return super().__new__(new_cls)
def _set_attr(
self,
name: str,
func: typing.Optional[typing.Callable]=None,
doc: typing.Optional[str]=None,
preload: bool=False
):
"Sets up an attribute for the config."
if doc is None:
doc = 'The {name} attribute.'.format(name=name)
if func is None:
# Setable
attr = ConfigSetableAttr(name)
self._funcs_ = self._funcs_ - {name}
self._attrs_ = self._attrs_ - {name}
self._setables_ = self._setables_ | {name}
elif preload:
# Loaded
attr = ConfigSimpleAttr(func())
self._funcs_ = self._funcs_ - {name}
self._attrs_ = self._attrs_ | {name}
self._setables_ = self._setables_ - {name}
else:
# Loadable
attr = ConfigLoadableAttr(name, func)
self._funcs_ = self._funcs_ | {name}
self._attrs_ = self._attrs_ - {name}
self._setables_ = self._setables_ - {name}
setattr(type(self), name, attr)
def __gen_keys(self):
yield from sorted(self._attrs_ | self._funcs_)
def __gen_items(self):
yield from ((key, getattr(self, key)) for key in self._attrs_)
yield from ((key, NotLoaded) for key in self._funcs_)
def __hash__(self):
return hash(
(type(self).__module__, type(self).__qualname__) +
tuple(self.__slots__) +
tuple(self.__gen_keys())
)
@property
def __dict__(self):
return types.MappingProxyType(self)
def __contains__(self, key):
"Return key in self."
return key in set(self.__gen_keys())
def __repr__(self):
"Return repr(self)."
return repr(dict(self.__gen_items()))
def __getitem__(self, key):
"Return self[key]."
return getattr(self, key)
def __setitem__(self, key, value):
try:
setattr(self, key, value)
except AttributeError:
raise TypeError(
"'{name}' object does not support item assignment".format(
name=type(self).__name__
)
)
def __str__(self):
"Return str(self)."
return str(dict(self.__gen_items()))
def __sizeof__(self):
"Return sys.getsizeof(self)."
return sys.getsizeof(vars(self))
def __len__(self):
"Return len(self)."
return len(tuple(self.__gen_keys()))
def __iter__(self):
"Return iter(self)."
yield from self.__gen_keys()
def keys(self):
"""
Returns a set-like object providing a view of the config object's
keys.
"""
return ConfigKeysView(self)
def values(self):
"""
Returns a set-like object providing a view of the config object's
values.
"""
return ConfigValuesView(self)
def items(self):
"""
Returns a set-like object providing a view of the config object's
items.
"""
return ConfigItemsView(self)
def copy(self):
"D.copy() -> a shallow copy of D"
return copy.copy(self)
def __copy__(self):
"For use with the :py:func:`copy.copy` function."
return copy.copy(dict(self.__gen_items()))
def get(self, key, default=None):
"D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."
return getattr(self, key, default)
def __deepcopy__(self, memo):
"For use with the :py:func:`copy.deepcopy` function."
try:
return memo[id(self)]
except KeyError:
memo[id(self)] = unpack_element(self, memo=memo)
return memo[id(self)]
def __getstate__(self):
return dict(
_setables_=self._setables_,
**{name: getattr(self, name) for name in self._attrs_}
)
def __setstate__(self, state):
for key, value in state.items():
if key == '_setables_':
self._setables_ = value
for name in value:
setattr(type(self), name, ConfigSetableAttr(name))
else:
setattr(type(self), key, ConfigSimpleAttr(value))
@abc.abstractmethod
def __reduce__(self):
pass