55
66class Array (object ):
77
8- def __init__ (self , size = 32 ):
8+ def __init__ (self , size = 32 , init = None ):
99 self ._size = size
10- self ._items = [None ] * size
10+ self ._items = [init ] * size
1111
1212 def __getitem__ (self , index ):
1313 return self ._items [index ]
@@ -41,96 +41,88 @@ def __init__(self, key, value):
4141
4242class HashTable (object ):
4343
44- UNUSED = None # 没被使用过的槽,作为该类变量的一个单例,下边都是is 判断
45- EMPTY = Slot (None , None ) # 使用过但是被删除的槽
44+ UNUSED = None # 没被使用过
45+ EMPTY = Slot (None , None ) # 使用却被删除过
4646
4747 def __init__ (self ):
48- self ._table = Array (7 )
48+ self ._table = Array (8 , init = HashTable . UNUSED ) # 保持 2*i 次方
4949 self .length = 0
5050
5151 @property
5252 def _load_factor (self ):
53- # load factor 超过 2/3 就重新分配空间
53+ # load_factor 超过 0.8 重新分配
5454 return self .length / float (len (self ._table ))
5555
5656 def __len__ (self ):
5757 return self .length
5858
59- def _hash1 (self , key ):
60- """ 计算key的hash值"""
59+ def _hash (self , key ):
6160 return abs (hash (key )) % len (self ._table )
6261
63- def _find_slot (self , key , for_insert = False ):
64- """_find_slot
65-
66- :param key:
67- :param for_insert: 是否插入,还是仅仅查询
68- :return: slot index or None
69- """
70- index = self ._hash1 (key )
71- base_index = index
72- hash_times = 1
62+ def _find_key (self , key ):
63+ index = self ._hash (key )
7364 _len = len (self ._table )
74-
75- if not for_insert : # 查找是否存在 key
76- while self . _table [ index ] is not HashTable . UNUSED :
77- if self . _table [ index ] is HashTable . EMPTY :
78- index = ( base_index + hash_times * hash_times ) % _len # 一个简单的二次方探查
79- continue
80- elif self . _table [ index ]. key == key :
81- return index
82- index = ( base_index + hash_times * hash_times ) % _len
83- hash_times += 1
84- return None
85- else :
86- while not self ._slot_can_insert ( index ): # 循环直到找到一个可以插入的槽
87- index = ( base_index + hash_times * hash_times ) % _len
88- hash_times += 1
89- return index
65+ while self . _table [ index ] is not HashTable . UNUSED :
66+ if self . _table [ index ] is HashTable . EMPTY :
67+ index = ( index * 5 + 1 ) % _len
68+ continue
69+ elif self . _table [ index ]. key == key :
70+ return index
71+ else :
72+ index = ( index * 5 + 1 ) % _len
73+ return None
74+
75+ def _find_slot_for_insert ( self , key ):
76+ index = self . _hash ( key )
77+ _len = len ( self ._table )
78+ while not self . _slot_can_insert ( index ):
79+ index = ( index * 5 + 1 ) % _len
80+ return index
9081
9182 def _slot_can_insert (self , index ):
9283 return (self ._table [index ] is HashTable .EMPTY or self ._table [index ] is HashTable .UNUSED )
9384
94- def __contains__ (self , key ): # in operator
95- index = self ._find_slot (key , for_insert = False )
85+ def __contains__ (self , key ): # in operator
86+ index = self ._find_key (key )
9687 return index is not None
9788
9889 def add (self , key , value ):
99- if key in self : # key 相同值不一样的时候,用新的值
100- index = self ._find_slot (key , for_insert = False )
90+ if key in self :
91+ index = self ._find_key (key )
10192 self ._table [index ].value = value
10293 return False
10394 else :
104- index = self ._find_slot (key , for_insert = True )
95+ index = self ._find_slot_for_insert (key )
10596 self ._table [index ] = Slot (key , value )
10697 self .length += 1
107- if self ._load_factor >= 0.8 : # 注意超过了 阈值 rehashing
98+ if self ._load_factor >= 0.8 :
10899 self ._rehash ()
109100 return True
110101
111102 def _rehash (self ):
112103 old_table = self ._table
113- newsize = len (self ._table ) * 2 + 1 # 扩大 2*n + 1
114- self ._table = Array (newsize )
104+ newsize = len (self ._table ) * 2
105+ self ._table = Array (newsize , HashTable . UNUSED )
115106
116107 self .length = 0
117108
118109 for slot in old_table :
119110 if slot is not HashTable .UNUSED and slot is not HashTable .EMPTY :
120- index = self ._find_slot (slot .key , for_insert = True )
111+ index = self ._find_slot_for_insert (slot .key )
121112 self ._table [index ] = slot
122113 self .length += 1
123114
124115 def get (self , key , default = None ):
125- index = self ._find_slot (key , for_insert = False )
116+ index = self ._find_key (key )
126117 if index is None :
127118 return default
128119 else :
129120 return self ._table [index ].value
130121
131122 def remove (self , key ):
132- assert key in self , 'keyerror'
133- index = self ._find_slot (key , for_insert = False )
123+ index = self ._find_key (key )
124+ if index is None :
125+ raise KeyError ()
134126 value = self ._table [index ].value
135127 self .length -= 1
136128 self ._table [index ] = HashTable .EMPTY
@@ -139,28 +131,34 @@ def remove(self, key):
139131 def __iter__ (self ):
140132 for slot in self ._table :
141133 if slot not in (HashTable .EMPTY , HashTable .UNUSED ):
142- yield slot .key # 和 python dict 一样,默认遍历 key,需要value 的话写个 items() 方法
134+ yield slot .key
143135
144136
145137def test_hash_table ():
146138 h = HashTable ()
147139 h .add ('a' , 0 )
148140 h .add ('b' , 1 )
149141 h .add ('c' , 2 )
150-
151142 assert len (h ) == 3
152143 assert h .get ('a' ) == 0
153144 assert h .get ('b' ) == 1
154145 assert h .get ('hehe' ) is None
155146
156147 h .remove ('a' )
157148 assert h .get ('a' ) is None
158-
159149 assert sorted (list (h )) == ['b' , 'c' ]
160150
161- # 50 超过了 HashTable 的原始 size,我们测试下是否 reshah 操作能正确工作
162- for i in range (50 ):
151+ n = 50
152+ for i in range (n ):
163153 h .add (i , i )
164154
165- for i in range (50 ):
155+ for i in range (n ):
166156 assert h .get (i ) == i
157+
158+
159+ if __name__ == '__main__' :
160+ print (
161+ 'beg' ,
162+ test_hash_table (),
163+ 'end' ,
164+ )
0 commit comments