# Python与数据结构[4] -> 散列表[2] -> 开放定址法与再散列的 Python 实现

开放定址散列法和再散列

1 开放定址散列法

开放定址法中的探测方法的三种基本方式为，

1. 线性探测法：探测步进为线性增长，最基本的方式为F(i)=i
2. 平方探测法：探测步进为平方增长，最基本的方式为F(i)= i2
3. 双散列：探测方法的步进由一个新的散列函数决定，最基本的方式为F(i)= i*Hash2(i)，通常选择Hash2(i)=R-(X mod R)，其中R为小于TableSize的素数。

2 再散列

1. 装填因子达到一半的时候进行再散列
2. 插入失败时进行再散列
3. 达到某一装填因子时进行散列

3 代码实现

1 from functools import partial as pro 2 from math import ceil, sqrt 3 from hash_table import HashTable, kmt_hashing 4 5 6 class RehashError(Exception): 7 pass 8 9 10 class OpenAddressingHashing(HashTable): 11 def __init__(self, size, hs, pb, fn=None, rf=0.5): 12 self._array = [None for i in range(size)] 13 self._get_hashing = hs 14 self._hashing = hs(size) if not fn else fn 15 self._probing = pb 16 self._rehashing_factor = rf 17 18 def _sniffing(self, item, num, hash_code=None): 19 # Avoid redundant hashing calculation, if hashing calculation is heavy, this would count much. 20 if not hash_code: 21 hash_code = self._hashing(item) 22 return (hash_code + self._probing(num, item, self.size)) % self.size 23 24 def _get_rehashing_size(self): 25 size = self.size * 2 + 1 26 while not is_prime(size): 27 size += 1 28 return size 29 30 def rehashing(self, size=None, fn=None): 31 if not size: 32 size = self._get_rehashing_size() 33 if size <= (self.size * self.load_factor): 34 raise RehashError('Rehash size is too small!') 35 array = self._array 36 self._array = [None for i in range(size)] 37 self._hashing = self._get_hashing(size) if not fn else fn 38 self.insert(filter(lambda x: x is not None, array)) 39 40 def find(self, item): 41 hash_code = ori_hash_code = self._hashing(item) 42 collision_count = 1 43 value = self._array[hash_code] 44 45 # Build up partial function to shorten time consuming when heavy sniffing encountered. 46 collision_handler = pro(self._sniffing, hash_code=ori_hash_code) 47 48 while value is not None and value != item: 49 hash_code = collision_handler(item, collision_count) 50 value = self._array[hash_code] 51 collision_count += 1 52 return value, hash_code 53 54 def _insert(self, item): 55 if item is None: 56 return 57 value, hash_code = self.find(item) 58 if value is None: 59 self._array[hash_code] = item 60 if self.load_factor > self._rehashing_factor: 61 self.rehashing() 62 63 64 def is_prime(num): # O(sqrt(n)) algorithm 65 if num < 2: 66 raise Exception('Invalid number.') 67 if num == 2: 68 return True 69 for i in range(2, ceil(sqrt(num))+1): 70 if num % i == 0: 71 return False 72 return True 73 74 75 def linear_probing(x, *args): 76 return x 77 78 79 def square_probing(x, *args): 80 return x**2 81 82 83 def double_hashing(x, item, size, *args): 84 r = size - 1 85 while not is_prime(r): 86 r -= 1 87 return x * (r - (item % r)) 88 89 90 def test(h): 91 print('\nShow hash table:') 92 h.show() 93 94 print('\nInsert values:') 95 h.insert(range(9)) 96 h.show() 97 98 print('\nInsert value (existed):') 99 h.insert(1) 100 h.show() 101 102 print('\nInsert value (collided):') 103 h.insert(24, 47) 104 h.show() 105 106 print('\nFind value:') 107 print(h.find(7)) 108 print('\nFind value (not existed):') 109 print(h.find(77)) 110 111 print('\nLoad factor is:', h.load_factor) 112 113 114 if __name__ == '__main__': 115 test(OpenAddressingHashing(11, kmt_hashing, linear_probing)) 116 print(30*'-') 117 test(OpenAddressingHashing(11, kmt_hashing, square_probing)) 118 print(30*'-') 119 test(OpenAddressingHashing(11, kmt_hashing, double_hashing))

View Code

```1 from functools import partial as pro
2 from math import ceil, sqrt
3 from hash_table import HashTable, kmt_hashing
4
5
6 class RehashError(Exception):
7     pass```

```1 class OpenAddressingHashing(HashTable):
2     def __init__(self, size, hs, pb, fn=None, rf=0.5):
3         self._array = [None for i in range(size)]
4         self._get_hashing = hs
5         self._hashing = hs(size) if not fn else fn
6         self._probing = pb
7         self._rehashing_factor = rf```

Note: 此处为了避免冗余计算，开放一个参数供散列值传入，当进行同一个插入的不同嗅探时，其原始散列值是不变的，因此这里可以配合后面的偏函数，在多次嗅探中固定这一参数，从而避免多次散列函数的计算。这对于复杂的散列函数来说可以减少嗅探计算时间。

```1     def _sniffing(self, item, num, hash_code=None):
2         # Avoid redundant hashing calculation, if hashing calculation is heavy, this would count much.
3         if not hash_code:
4             hash_code = self._hashing(item)
5         return (hash_code + self._probing(num, item, self.size)) % self.size```

```1     def _get_rehashing_size(self):
2         size = self.size * 2 + 1
3         while not is_prime(size):
4             size += 1
5         return size```

1. 若没有指定再散列大小，则使用默认方式计算，
2. 当传入的再散列大小小于已有元素数量时，引发再散列异常，
3. 保存原始散列表信息，并新建一个散列表，更新散列函数，
4. 利用新的散列函数，遍历原始散列表并插入新的散列表中。
```1     def rehashing(self, size=None, fn=None):
2         if not size:
3             size = self._get_rehashing_size()
4         if size <= (self.size * self.load_factor):
5             raise RehashError('Rehash size is too small!')
6         array = self._array
7         self._array = [None for i in range(size)]
8         self._hashing = self._get_hashing(size) if not fn else fn
9         self.insert(filter(lambda x: x is not None, array))```

Note: 这里使用偏函数处理嗅探函数，减少散列计算，利用嗅探函数循环嗅探新的位置，直到找到目标元素或None，此时返回元素值或None和对应的散列值。

``` 1     def find(self, item):
2         hash_code = ori_hash_code = self._hashing(item)
3         collision_count = 1
4         value = self._array[hash_code]
5
6         # Build up partial function to shorten time consuming when heavy sniffing encountered.
7         collision_handler = pro(self._sniffing, hash_code=ori_hash_code)
8
9         while value is not None and value != item:
10             hash_code = collision_handler(item, collision_count)
11             value = self._array[hash_code]
12             collision_count += 1
13         return value, hash_code```

```1     def _insert(self, item):
2         if item is None:
3             return
4         value, hash_code = self.find(item)
5         if value is None:
6             self._array[hash_code] = item
8             self.rehashing()```

```1 def is_prime(num):  # O(sqrt(n)) algorithm
2     if num < 2:
3         raise Exception('Invalid number.')
4     if num == 2:
5         return True
6     for i in range(2, ceil(sqrt(num))+1):
7         if num % i == 0:
8             return False
9     return True```

``` 1 def linear_probing(x, *args):
2     return x
3
4
5 def square_probing(x, *args):
6     return x**2
7
8
9 def double_hashing(x, item, size, *args):
10     r = size - 1
11     while not is_prime(r):
12         r -= 1
13     return x * (r - (item % r))```

``` 1 def test(h):
2     print('\nShow hash table:')
3     h.show()
4
5     print('\nInsert values:')
6     h.insert(range(9))
7     h.show()
8
9     print('\nInsert value (existed):')
10     h.insert(1)
11     h.show()
12
13     print('\nInsert value (collided):')
14     h.insert(24, 47)
15     h.show()
16
17     print('\nFind value:')
18     print(h.find(7))
19     print('\nFind value (not existed):')
20     print(h.find(77))
21
23
24
25 if __name__ == '__main__':
27     print(30*'-')
29     print(30*'-')

```1     print('\nShow hash table:')
2     h.show()```

```Show hash table:
[0] None
[1] None
[2] None
[3] None
[4] None
[5] None
[6] None
[7] None
[8] None
[9] None
[10] None```

```Insert values:
[0] 0
[1] 1
[2] 2
[3] 3
[4] 4
[5] 5
[6] 6
[7] 7
[8] 8
[9] None
[10] None
[11] None
[12] None
[13] None
[14] None
[15] None
[16] None
[17] None
[18] None
[19] None
[20] None
[21] None
[22] None```

```1     print('\nInsert value (existed):')
2     h.insert(1)
3     h.show()```

```Linear_probing | Square_probing | Double_hashing
[0] 0          | [0] 0          | [0] 0
[1] 1          | [1] 1          | [1] 1
[2] 2          | [2] 2          | [2] 2
[3] 3          | [3] 3          | [3] 3
[4] 4          | [4] 4          | [4] 4
[5] 5          | [5] 5          | [5] 5
[6] 6          | [6] 6          | [6] 6
[7] 7          | [7] 7          | [7] 7
[8] 8          | [8] 8          | [8] 8
[9] 24         | [9] None       | [9] None
[10] 47        | [10] 24        | [10] None
[11] None      | [11] None      | [11] 47
[12] None      | [12] None      | [12] None
[13] None      | [13] None      | [13] None
[14] None      | [14] None      | [14] None
[15] None      | [15] None      | [15] 24
[16] None      | [16] None      | [16] None
[17] None      | [17] 47        | [17] None
[18] None      | [18] None      | [18] None
[19] None      | [19] None      | [19] None
[20] None      | [20] None      | [20] None
[21] None      | [21] None      | [21] None
[22] None      | [22] None      | [22] None```

```--------------------------------------
Linear_probing
--------------------------------------
Find value:
(7, 7)

Find value (not existed):
(None, 11)

--------------------------------------
Square_probing
--------------------------------------
Find value:
(7, 7)

Find value (not existed):
(None, 9)

--------------------------------------
Double_hashing
--------------------------------------
Find value:
(7, 7)

Find value (not existed):
(None, 21)