Python3.6字典实现

前言

在Cpython3.6中,对字典的性能以及内存分布做了很大的优化和改进,以下是优化后的字典的Python实现。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import array
import collections

# Placeholder constants
FREE = -1
DUMMY = -2


class Dict(collections.MutableMapping):
'Space efficient dictionary with fast iteration and cheap resizes.'

@staticmethod
def _gen_probes(hashvalue, mask):
'Same sequence of probes used in the current dictionary design'
PERTURB_SHIFT = 5
if hashvalue < 0:
hashvalue = -hashvalue
i = hashvalue & mask
yield i
perturb = hashvalue
while True:
i = (5 * i + perturb + 1) & 0xFFFFFFFFFFFFFFFF
yield i & mask
perturb >>= PERTURB_SHIFT

def _lookup(self, key, hashvalue):
'Same lookup logic as currently used in real dicts'
assert self.filled < len(self.indices) # At least one open slot
freeslot = None
for i in self._gen_probes(hashvalue, len(self.indices) - 1):
index = self.indices[i]
if index == FREE:
return (FREE, i) if freeslot is None else (DUMMY, freeslot)
elif index == DUMMY:
if freeslot is None:
freeslot = i
elif (self.keylist[index] is key or
self.hashlist[index] == hashvalue
and self.keylist[index] == key):
return (index, i)

@staticmethod
def _make_index(n):
'New sequence of indices using the smallest possible datatype'
if n <= 2**7:
return array.array('b', [FREE]) * n # signed char
if n <= 2**15:
return array.array('h', [FREE]) * n # signed short
if n <= 2**31:
return array.array('l', [FREE]) * n # signed long
# python integers
return [FREE] * n

def _resize(self, n):
'''Reindex the existing hash/key/value entries.
Entries do not get moved, they only get new indices.
No calls are made to hash() or __eq__().

'''
n = 2 ** n.bit_length() # round-up to power-of-two
self.indices = self._make_index(n)
for index, hashvalue in enumerate(self.hashlist):
for i in Dict._gen_probes(hashvalue, n - 1):
if self.indices[i] == FREE:
break
self.indices[i] = index
self.filled = self.used

def clear(self):
self.indices = self._make_index(8)
self.hashlist = []
self.keylist = []
self.valuelist = []
self.used = 0
self.filled = 0 # used + dummies

def __getitem__(self, key):
hashvalue = hash(key)
index, i = self._lookup(key, hashvalue)
if index < 0:
raise KeyError(key)
return self.valuelist[index]

def __setitem__(self, key, value):
hashvalue = hash(key)
index, i = self._lookup(key, hashvalue)
if index < 0:
self.indices[i] = self.used
self.hashlist.append(hashvalue)
self.keylist.append(key)
self.valuelist.append(value)
self.used += 1
if index == FREE:
self.filled += 1
if self.filled * 3 > len(self.indices) * 2:
self._resize(4 * len(self))
else:
self.valuelist[index] = value

def __delitem__(self, key):
hashvalue = hash(key)
index, i = self._lookup(key, hashvalue)
if index < 0:
raise KeyError(key)
self.indices[i] = DUMMY
self.used -= 1
# If needed, swap with the lastmost entry to avoid leaving a "hole"
if index != self.used:
lasthash = self.hashlist[-1]
lastkey = self.keylist[-1]
lastvalue = self.valuelist[-1]
lastindex, j = self._lookup(lastkey, lasthash)
assert lastindex >= 0 and i != j
self.indices[j] = index
self.hashlist[index] = lasthash
self.keylist[index] = lastkey
self.valuelist[index] = lastvalue
# Remove the lastmost entry
self.hashlist.pop()
self.keylist.pop()
self.valuelist.pop()

def __init__(self, *args, **kwds):
if not hasattr(self, 'keylist'):
self.clear()
self.update(*args, **kwds)

def __len__(self):
return self.used

def __iter__(self):
return iter(self.keylist)

def iterkeys(self):
return iter(self.keylist)

def keys(self):
return list(self.keylist)

def itervalues(self):
return iter(self.valuelist)

def values(self):
return list(self.valuelist)

def items(self):
return zip(self.keylist, self.valuelist)

def __contains__(self, key):
index, i = self._lookup(key, hash(key))
return index >= 0

def get(self, key, default=None):
index, i = self._lookup(key, hash(key))
return self.valuelist[index] if index >= 0 else default

def popitem(self):
if not self.keylist:
raise KeyError('popitem(): dictionary is empty')
key = self.keylist[-1]
value = self.valuelist[-1]
del self[key]
return key, value

def __repr__(self):
return f'Dict({list(self.items())})'

def show_structure(self):
'Diagnostic method. Not part of the API.'
print('=' * 50)
print(self)
print('Indices:', self.indices)
for i, row in enumerate(
zip(self.hashlist, self.keylist, self.valuelist)):
print(i, row)
print('-' * 50)


if __name__ == '__main__':
d = Dict([(a,a) for a in range(1)])
d.show_structure()
0%