Skip to content

Commit 962f24a

Browse files
Add IdMapping video
1 parent a559748 commit 962f24a

File tree

4 files changed

+356
-0
lines changed

4 files changed

+356
-0
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ James and his team are available for consulting, contracting, code reviews, and
1212

1313
| N | Code | Video |
1414
|-----| --- |--- |
15+
| 128 | [src](videos/128_id_mapping) | [A forbidden Python technique to put ANYTHING in a dict or set.](https://youtu.be/NpdNDTncxwA) |
1516
| 127 | [src](videos/127_accidentally_quadratic_unique_sum) | [Don't make this big O mistake!](https://youtu.be/PXWL_Xzyrp4) |
1617
| 126 | [src](videos/126_attrs_revolution) | [Why I prefer attrs over dataclasses](https://youtu.be/1S2h11XronA) |
1718
| 125 | [src](videos/125_type_or_class) | [type(obj) vs. obj.__class__ in Python, and changing an object's class.](https://youtu.be/5vpdzRbfTIM) |

videos/128_id_mapping/demo.py

+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# my_dict = {}
2+
#
3+
# not_hashable = [1, 2, 3]
4+
# my_dict[not_hashable] = ... # ERROR
5+
#
6+
# val = my_dict[not_hashable]
7+
8+
9+
def set_in(set_mem, x):
10+
idx = hash(x) % len(set_mem)
11+
link = set_mem[idx]
12+
while link is not None:
13+
if link.value == x:
14+
return True
15+
link = link.next
16+
return False
17+
18+
19+
class UnsafeList(list):
20+
def __hash__(self):
21+
return hash(tuple(self))
22+
23+
24+
x = UnsafeList([1, 2, 3])
25+
my_dict = {x: "subscribe"}
26+
x[0] = 0
27+
28+
29+
# print(my_dict[x]) # KeyError!
30+
31+
32+
class Hashable:
33+
pass
34+
# __hash__ defined automatically
35+
# __eq__ defined automatically
36+
37+
38+
class NotHashable:
39+
def __eq__(self, other):
40+
return self is other
41+
42+
# __hash__ set to None automatically
43+
44+
45+
class SadSet:
46+
def __init__(self):
47+
self.data = []
48+
49+
def add(self, value):
50+
if value not in self.data:
51+
self.data.append(value)
52+
53+
def remove(self, value):
54+
if value in self.data:
55+
self.data.remove(value)
56+
57+
def __contains__(self, value):
58+
return value in self.data
59+
60+
def __len__(self):
61+
return len(self.data)
62+
63+
def __iter__(self):
64+
return iter(self.data)
65+
66+
67+
class A:
68+
pass
69+
70+
71+
x = A()
72+
print(id(x)) # 2086544423440
73+
74+
del x
75+
y = A()
76+
print(id(y)) # 2086544423440 (SAME!)
77+
78+
from id_mapping import IdMapping, IdSet
79+
80+
d = IdMapping()
81+
82+
my_list = [1, 2, 3]
83+
d[my_list] = "subscribe"
84+
my_list[0] = 0
85+
86+
s = d[my_list]
87+
assert s == "subscribe" # SUCCESS!
88+
89+
collection = IdSet()
90+
91+
n = 257
92+
collection.add(n)
93+
collection.add(n)
94+
95+
print(collection) # {257}

videos/128_id_mapping/id_mapping.py

+144
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
from collections.abc import MutableMapping, MutableSet
2+
from typing import TypeVar
3+
4+
KeyT = TypeVar('KeyT')
5+
ValT = TypeVar('ValT')
6+
7+
8+
class IdMapping(MutableMapping[KeyT, ValT]):
9+
"""A mapping that internally stores keys by their identity id(key).
10+
11+
Keys can be ANYTHING, even non-hashable objects.
12+
Stores strong references to all keys and values.
13+
When determining if IdMappings are equal, keys are compared by identity, but values are compared by ==.
14+
Warning: key1 == key2 does NOT imply id(key1) == id(key2), which may be confusing if your keys are e.g. ints.
15+
16+
>>> not_hashable = [1, 2, 3]
17+
>>> mapping = IdMapping()
18+
>>> mapping[not_hashable] = "hello"
19+
>>> not_hashable in mapping
20+
True
21+
>>> mapping[not_hashable]
22+
'hello'
23+
>>> not_hashable[0] = 0 # can even mutate the keys, it doesn't matter!
24+
>>> mapping[not_hashable]
25+
'hello'
26+
"""
27+
28+
def __init__(self, *args, **kwargs):
29+
self._data: dict[int, tuple[KeyT, ValT]] = {}
30+
"""id(key) -> (key, value)"""
31+
32+
self.update(*args, **kwargs)
33+
34+
def __getitem__(self, item):
35+
try:
36+
key, val = self._data[id(item)]
37+
except KeyError as exc:
38+
raise KeyError(item) from exc
39+
return val
40+
41+
def __setitem__(self, key, value):
42+
self._data[id(key)] = (key, value)
43+
44+
def __delitem__(self, key):
45+
try:
46+
del self._data[id(key)]
47+
except KeyError as exc:
48+
raise KeyError(key) from exc
49+
50+
def __iter__(self):
51+
for key, val in self._data.values():
52+
yield key
53+
54+
def __len__(self):
55+
return len(self._data)
56+
57+
def __eq__(self, other):
58+
if not isinstance(other, IdMapping):
59+
return super().__eq__(other)
60+
if len(self) != len(other):
61+
return False
62+
for key, val in self._data.values():
63+
try:
64+
if other[key] != val:
65+
return False
66+
except KeyError:
67+
return False
68+
return True
69+
70+
def equal_keys(self, other):
71+
if not isinstance(other, IdMapping):
72+
return self.keys() == other.keys()
73+
return self._data.keys() == other._data.keys()
74+
75+
def clear(self) -> None:
76+
self._data.clear()
77+
78+
79+
class IdSet(MutableSet[ValT]):
80+
"""A set that internally stores values by their identity id(value).
81+
82+
Values can be ANYTHING, even non-hashable objects.
83+
Stores strong references to all values.
84+
When determining if IdSets are equal, values are compared by identity, not by using ==.
85+
Warning: val1 == val2 does NOT imply id(val1) == id(val2), which may be confusing if your values are e.g. ints.
86+
87+
>>> not_hashable = [1, 2, 3]
88+
>>> seen = IdSet()
89+
>>> seen.add(not_hashable)
90+
>>> not_hashable in seen
91+
True
92+
>>> not_hashable[0] = 0 # can even mutate the values, it doesn't matter!
93+
>>> not_hashable in seen
94+
True
95+
"""
96+
97+
def __init__(self, *others):
98+
self._data: dict[int, ValT] = {}
99+
"""id(key) -> value"""
100+
101+
self.update(*others)
102+
103+
def __contains__(self, item):
104+
return id(item) in self._data
105+
106+
def add(self, value: ValT) -> None:
107+
self._data[id(value)] = value
108+
109+
def discard(self, value: ValT) -> None:
110+
self._data.pop(id(value), None)
111+
112+
def __iter__(self):
113+
yield from self._data.values()
114+
115+
def __len__(self):
116+
return len(self._data)
117+
118+
def __le__(self, other):
119+
if not isinstance(other, IdSet):
120+
return super().__le__(other)
121+
return self._data.keys() <= other._data.keys()
122+
123+
def __ge__(self, other):
124+
if not isinstance(other, IdSet):
125+
return super().__ge__(other)
126+
return self._data.keys() >= other._data.keys()
127+
128+
def clear(self) -> None:
129+
self._data.clear()
130+
131+
def update(self, *others):
132+
for other in others:
133+
self.__ior__(other)
134+
135+
def intersection_update(self, *others):
136+
for other in others:
137+
self.__iand__(other)
138+
139+
def difference_update(self, *others):
140+
for other in others:
141+
self.__isub__(other)
142+
143+
def symmetric_difference_update(self, other):
144+
self.__ixor__(other)
+116
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
import pytest
2+
3+
from id_mapping import IdMapping, IdSet
4+
5+
6+
def test_id_mapping_set_get_list():
7+
counts = [1, 2, 3]
8+
9+
mapping = IdMapping()
10+
assert not mapping
11+
assert len(mapping) == 0
12+
13+
mapping[counts] = 1234
14+
15+
assert mapping
16+
assert len(mapping) == 1
17+
assert mapping[counts] == 1234
18+
assert counts in mapping
19+
assert counts in mapping.keys()
20+
assert list(mapping.keys())[0] is counts
21+
22+
del mapping[counts]
23+
assert not mapping
24+
assert len(mapping) == 0
25+
26+
27+
def test_id_set_set_get_list():
28+
counts = [1, 2, 3]
29+
30+
id_set = IdSet()
31+
32+
assert not id_set
33+
assert len(id_set) == 0
34+
35+
id_set.add(counts)
36+
37+
assert len(id_set) == 1
38+
assert counts in id_set
39+
assert list(id_set)[0] is counts
40+
41+
id_set.add(counts)
42+
43+
assert len(id_set) == 1
44+
assert counts in id_set
45+
assert list(id_set)[0] is counts
46+
47+
id_set.remove(counts)
48+
49+
assert counts not in id_set
50+
assert len(id_set) == 0
51+
assert not id_set
52+
53+
54+
def test_compare_to_normal_set():
55+
s = set()
56+
s.add(frozenset({1, 2, 3}))
57+
assert len(s) == 1
58+
s.add(frozenset({1, 2, 3}))
59+
assert len(s) == 1
60+
61+
s = IdSet()
62+
s.add(frozenset({1, 2, 3}))
63+
assert len(s) == 1
64+
s.add(frozenset({1, 2, 3}))
65+
assert len(s) == 2
66+
67+
s = IdSet()
68+
item = frozenset({1, 2, 3})
69+
s.add(item)
70+
assert len(s) == 1
71+
s.add(item)
72+
assert len(s) == 1
73+
74+
75+
def do_once(stuff, op):
76+
seen = set()
77+
for x in stuff:
78+
if x in seen:
79+
continue
80+
op(x)
81+
82+
83+
def test_ints_trouble():
84+
mapping = IdMapping()
85+
86+
mapping[257] = "find me"
87+
with pytest.raises(KeyError):
88+
mapping[int(float(257))] # even though the keys are equal, they have different ids!
89+
90+
91+
def test_python_doesnt_let_you_store_things():
92+
x = set()
93+
94+
with pytest.raises(TypeError):
95+
x.add([])
96+
97+
class A:
98+
pass
99+
100+
x.add(A()) # OK
101+
102+
class B:
103+
def __eq__(self, other):
104+
return self is other
105+
106+
with pytest.raises(TypeError):
107+
x.add(B())
108+
109+
class C:
110+
def __eq__(self, other):
111+
return self is other
112+
113+
def __hash__(self):
114+
return 42 # bad hash!
115+
116+
x.add(C()) # OK, even though hash is bad

0 commit comments

Comments
 (0)