Overriding the `[]` operator in a dictionary of dictionaries - python-3.x

I am trying to implement a class which provides a dictionary with a default value:
from copy import deepcopy
class Dict:
def __init__(self, default) -> None:
self.default = default
self.values = {}
def __getitem__(self, key):
return self.values[key] if key in self.values else deepcopy(self.default)
def __setitem__(self, key, value):
self.values[key] = value
It works as expected when the default value is "plain" (42 in the example below):
KEY = 'k'
d = Dict(42)
print(d[KEY]) # prints 42
d[KEY] = 53
print(d[KEY]) # prints 53
But it doesn't work as expected when the default value is by itself a Dict object:
KEY1 = 'k1'
KEY2 = 'k2'
d = Dict(Dict(42))
print(d[KEY1][KEY2]) # prints 42
d[KEY1][KEY2] = 53
print(d[KEY1][KEY2]) # prints 42
I have tried to debug that by adding various printouts within the class functions, but I haven't been able to figure it out.
What exactly am I doing wrong here?

The immediate problem is in your __getitem__ method:
def __getitem__(self, key):
return self.values[key] if key in self.values else deepcopy(self.default)
Because you're only returning a value here, but not actually setting it, the returned value isn't useful. If you request a key that doesn't exist, the method is equivalent to:
def __getitem__(self, key):
return deepcopy(self.default)
So when you write:
d[KEY1][KEY2] = 53
You're successfully setting a value for KEY2, but only in the dictionary returned by __getitem__. You probably want to use the dictionary setdefault method, which will set the key in self.values if it doesn't exist (in addition to returning it):
def __getitem__(self, key):
return self.values.setdefault(key, deepcopy(self.default))
With this implementation:
>>> KEY1 = 'k1'
>>> KEY2 = 'k2'
>>> d = Dict(Dict(42))
>>> print(d[KEY1][KEY2])
42
>>> d[KEY1][KEY2] = 53
>>> print(d[KEY1][KEY2])
53
But as I mentioned in my comment, a better solution is just to use the existing defaultdict implementation:
>>> from collections import defaultdict
>>> d = defaultdict(lambda: defaultdict(lambda: 42))
>>> d[KEY1][KEY2]
42
>>> d[KEY1][KEY2]=53
>>> d[KEY1][KEY2]
53
(The difference between defaultdict and the class you implemented is that the default must be a callable. Here's I've used lambda expressions, but you could also use actual functions, classes, etc).

Since you are using deepcopy so it creates a copy without reference.
You have to return the object without deepcopy.
def __getitem__(self, key):
return self.values[key] if key in self.values else self.default
Now it should work as expected.

Related

Python: Subclassing a dict to have two keys and a defaultvalue

following the two very readable tutorials 1 and 2, I would like to create a dictionary with two keys that gives a defaultvalue in case the key-pair does not exist.
I managed two fullfill the first condition with
from collections import defaultdict
class DictX(dict):
def __getattr__(self, key1 = None, key2 = None):
try:
return self[(key1,key2)]
# This in idea of how to implement the defaultdict. But it does not seem to work
# except KeyError as k::
# self[(key1,key2)] = 0.
# return self[(key1,key2)]
## or just return 0
except KeyError as k:
raise AttributeError(k)
def __setattr__(self, key1, key2, value):
self[(key1,key2)] = value
def __delattr__(self, key):
try:
del self[key]
except KeyError as k:
raise AttributeError(k)
def __repr__(self):
return '<DictX ' + dict.__repr__(self) + '>'
sampledict = DictX()
sampledict[3,5] = 5
sampledict[1,4] = 4
print("Checking the dict ",sampledict[1,4])
# This line is going to throw an error
print("Checking the default dict ",sampledict[3,6])
How do I code the defaultvalue behaviour?
Pro-Question:
If I just give one value sampledict[1,] or sampledict[1,:], I would like to get a list of all key - value pairs that start with 1. Is that possible?

How to type hint a dict from classes to instances?

I've got a dict that maps classes to instances of those classes. How can I type-hint this in Python 3?
from typing import Dict
d : Dict[???, ???] = {}
d[int] = 0
d[str] = "hello world"
I'm not sure you can enforce that the value must be an instance of the key through type hints, but if you're open to extending the dict class, you can override its __setitem__():
class TypedDict(dict):
def __setitem__(self, key: Type, value: Any):
if not isinstance(value, key):
raise TypeError("TypedDict values must be instances of their keys")
super().__setitem__(key, value)
To use this:
>>> td = TypedDict()
>>> td[int] = 0
{int: 0}
>>> td[float] = 1.0
{int: 0, float: 1.0}
>>> td[str] = -1
TypeError: TypedDict values must be instances of their keys

Alternative solution not using closures

I have a class Data which I want to filter using the below api.
# Example: filter using where
inpt = {"a":np.array((1,2,3,4,2,5,6,2,3,3,2,1)),
"b":np.random.rand(12)}
data = (Data(inpt)
.where(col("a").equals(3)) # This is how where should be called.
)
data
where is a method from class Data
col("a").equals(3) is syntactic sugar for inpt["a"] == 3
I am able to achieve this using another class Expr which handles all the functionality within Data.where() using closures. Reason for this being that Expr doesn't have access to Data.
Questions: can someone provide me with an alternative approach not involving closures. My goal is to learn new approaches / directions.
Here is my code:
from __future__ import annotations
from typing import Dict, Any
import numpy as np
class Data:
def __init__(self, data: Dict):
self._data = data
def where(self, e: Expr) -> Data:
idx = e.collect(self)
for k,v in self._data.items():
self._data[k] = v[idx]
return self
def __repr__(self):
return str(self._data)
class Expr:
def __init__(self):
self.fs = []
def col(self, s: str) -> Self:
f = lambda x: x._data[s]
self.fs.append(f)
return self
def equals(self, el: Any) -> Self:
f = lambda x: x == el
self.fs.append(f)
return self
def collect(self, x: Data) -> Data:
args = x
for f in self.fs:
args = f(args)
return args
def col(s: str) -> Expr:
return Expr().col(s)
I don't really understand the point. Maybe if you give an example of what you're actually trying to do?
If you already know the right key, you can just check directly. If you want to find the right key, the pythonic way is to use a list comprehension.
In [2]: inpt = {
...: "a": (1,2,3,4,2,5,6,2,3,3,2,1),
...: "b": 3,
...: }
In [3]: inpt["a"] == 3
Out[3]: False
In [4]: inpt["b"] == 3
Out[4]: True
In [5]: [key for key, value in inpt.items() if value == 3][0]
Out[5]: 'b'
In [8]: from typing import Sequence
In [9]: [key for key, value in inpt.items() if isinstance(value, Sequence) and 3 in value][0]
Out[9]: 'a'

Recursively iterate through a nested dict and return value of the first matching key

I have a deeply nested dict and need to iterate through it and return the value corresponding to the key argument, second argument of my function.
For example, with
tree = {"a": 12, "g":{ "b": 2, "c": 4}, "d":5}
tree_traverse(tree, "d") should return 5
Here is my code:
def tree_traverse(tree, key):
for k,v in tree.items():
if isinstance(v, dict):
tree_traverse(v, key)
elif k == key:
return v
The problem I have is that this function returns None if it doesnt find the matching key once it's done iterating through the deepest nested dict.
I don't want it to return anything before the matching key is found.
I didn't find a solution in another thread, most of them use print statements and don't return anything so I guess it avoids this issue.
You have to check whether the recursive call actually found something so you can continue the loop. E.g. try the following:
def tree_traverse(tree, key):
if key in tree:
return tree[key]
for v in filter(dict.__instancecheck__, tree.values()):
if (found := tree_traverse(v, key)) is not None:
return found
Here we instantiate an object when the function is created, that all executions of the function will share, called _marker. We return this object if we don't find the key. (You could also use None here, but None is frequently a meaningful value.)
def tree_traverse(tree, key, *, _marker=object()):
for k,v in tree.items():
if isinstance(v, dict):
res = tree_traverse(v, key, _marker=_marker)
if res is not _marker:
return res
elif k == key:
return v
return _marker
def find(tree, key):
_marker = object()
res = tree_traverse(tree, key, _marker=_marker)
if res is _marker:
raise KeyError("Key {} not found".format(key))
return res
I use tree_traverse as a helper function because we want different behaviour at the outermost layer of our recursion (throw an error) than we want inside (return a _marker object)
A NestedDict can solve the problem
from ndicts import NestedDict
def tree_traverse(tree, k):
nd = NestedDict(tree)
for key, value in nd.items():
if k in key:
return value
>>> tree = {"a": 12, "g":{ "b": 2, "c": 4}, "d":5}
>>> tree_traverse(tree, "d")
5
To install ndicts pip install ndicts

Interaction between __hash__ and __eq__ in Python

I wrote this simple code and I was trying to understand what is going on exactly. I created to equal objects and put only one of them in a dictionary.
Then, using the second object as a key, I try to print the name attribute of its value.
Thanks to my hash function, the dictionary returns the value of the hash corresponding to the key I inserted, which is the same for obj1 and obj2.
Here is my question: does my hash function check that the two objects are indeed equal or that is it a case of collision?
I hope the question is clear.
class Test:
def __init__(self, name):
self.name = name
def __eq__(self, other):
return (isinstance(other, type(self)) and self.name == other.name)
def __hash__(self):
return hash(self.name)
obj1 = Test('abc')
obj2 = Test('abc')
d = {}
d[obj1] = obj1
print(d[obj2].name)
You can easily figure this out by testing a few combinations. Consider these two types:
class AlwaysEqualConstantHash:
def __eq__(self, other):
print('AlwaysEqualConstantHash eq')
return True
def __hash__(self):
print('AlwaysEqualConstantHash hash')
return 4
class NeverEqualConstantHash:
def __eq__(self, other):
print('NeverEqualConstantHash eq')
return False
def __hash__(self):
print('NeverEqualConstantHash hash')
return 4
Now let’s put this inside a dictionary and see what happens:
>>> d = {}
>>> d[AlwaysEqualConstantHash()] = 'a'
AlwaysEqualConstantHash hash
>>> d[AlwaysEqualConstantHash()]
AlwaysEqualConstantHash hash
AlwaysEqualConstantHash eq
'a'
>>> d[AlwaysEqualConstantHash()] = 'b'
AlwaysEqualConstantHash hash
AlwaysEqualConstantHash eq
>>> d
{<__main__.AlwaysEqualConstantHash object at 0x00000083E8174A90>: 'b'}
As you can see, the hash is used all the time to address the element in the dictionary. And as soon as there is an element with the same hash inside the dictionary, the equality comparison is also made to figure whether the existing element is equal to the new one. So since all our new AlwaysEqualConstantHash objects are equal to another, they all can be used as the same key in the dictionary.
>>> d = {}
>>> d[NeverEqualConstantHash()] = 'a'
NeverEqualConstantHash hash
>>> d[NeverEqualConstantHash()]
NeverEqualConstantHash hash
NeverEqualConstantHash eq
Traceback (most recent call last):
File "<pyshell#56>", line 1, in <module>
d[NeverEqualConstantHash()]
KeyError: <__main__.NeverEqualConstantHash object at 0x00000083E8186BA8>
>>> d[NeverEqualConstantHash()] = 'b'
NeverEqualConstantHash hash
NeverEqualConstantHash eq
>>> d
{<__main__.NeverEqualConstantHash object at 0x00000083E8186F60>: 'a', <__main__.NeverEqualConstantHash object at 0x00000083E8186FD0>: 'b'}
For the NeverEqualConstantHash this is very different. The hash is also used all the time but since a new object is never equal to another, we cannot retrieve the existing objects that way.
>>> x = NeverEqualConstantHash()
>>> d[x] = 'foo'
NeverEqualConstantHash hash
NeverEqualConstantHash eq
NeverEqualConstantHash eq
>>> d[x]
NeverEqualConstantHash hash
NeverEqualConstantHash eq
NeverEqualConstantHash eq
'foo'
If we use the exact same key though, we can still retrieve the element since it won’t need to compare to itself using __eq__. We also see how the __eq__ is being called for every existing element with the same hash in order to check whether this new object is equal or not to another.
So yeah, the hash is being used to quickly sort the element into the dictionary. And the hash must be equal for elements that are considered equal. Only for hash collisions with existing elements the __eq__ is being used to make sure that both objects refer to the same element.

Resources