Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
0% found this document useful (0 votes)
5 views

TheAlgorithms_Python-hashing

Uploaded by

Isidro Arias
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views

TheAlgorithms_Python-hashing

Uploaded by

Isidro Arias
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 16

1

TheAlgorithms/data_structures/hashing

TheAlgorithms_Python-hashing.d .......................................................................
hash_table_with_linked_list.py.....................................................................1
number_theory.........................................................................................
prime_numbers.py .................................................................................. 1
tests......................................................................................................
test_hash_map.py .................................................................................. 2
quadratic_probing.py.................................................................................4
double_hash.py........................................................................................5
bloom_filter.py.......................................................................................6
hash_table.py.........................................................................................8
hash_map.py .......................................................................................... 12

hash_table_with_linked_list.py
1 from collections import deque
2
3 from .hash_table import HashTable
4
5
6 class HashTableWithLinkedList(HashTable):
7 def __init__(self, *args, **kwargs):
8 super().__init__(*args, **kwargs)
9
10 def _set_value(self, key, data):
11 self.values[key] = deque([]) if self.values[key] is None else self.values[key]
12 self.values[key].appendleft(data)
13 self._keys[key] = self.values[key]
14
15 def balanced_factor(self):
16 return (
17 sum(self.charge_factor - len(slot) for slot in self.values)
18 / self.size_table
19 * self.charge_factor
20 )
21
22 def _collision_resolution(self, key, data=None):
23 if not (
24 len(self.values[key]) == self.charge_factor and self.values.count(None) == 0
25 ):
26 return key
27 return super()._collision_resolution(key, data)

number_theory/prime_numbers.py
1 #!/usr/bin/env python3
2 """
3 module to operations with prime numbers
4 """
5
6 import math
7
8
9 def is_prime(number: int) -> bool:
2

10 """Checks to see if a number is a prime in O(sqrt(n)).


11
12 A number is prime if it has exactly two factors: 1 and itself.
13
14 >>> is_prime(0)
15 False
16 >>> is_prime(1)
17 False
18 >>> is_prime(2)
19 True
20 >>> is_prime(3)
21 True
22 >>> is_prime(27)
23 False
24 >>> is_prime(87)
25 False
26 >>> is_prime(563)
27 True
28 >>> is_prime(2999)
29 True
30 >>> is_prime(67483)
31 False
32 """
33
34 # precondition
35 assert isinstance(number, int) and (
36 number >= 0
37 ), "'number' must been an int and positive"
38
39 if 1 < number < 4:
40 # 2 and 3 are primes
41 return True
42 elif number < 2 or not number % 2:
43 # Negatives, 0, 1 and all even numbers are not primes
44 return False
45
46 odd_numbers = range(3, int(math.sqrt(number) + 1), 2)
47 return not any(not number % i for i in odd_numbers)
48
49
50 def next_prime(value, factor=1, **kwargs):
51 value = factor * value
52 first_value_val = value
53
54 while not is_prime(value):
55 value += 1 if not ("desc" in kwargs and kwargs["desc"] is True) else -1
56
57 if value == first_value_val:
58 return next_prime(value + 1, **kwargs)
59 return value

tests/test_hash_map.py
1 from operator import delitem, getitem, setitem
2
3 import pytest
4
5 from data_structures.hashing.hash_map import HashMap
6
7
8 def _get(k):
9 return getitem, k
10
11
12 def _set(k, v):
13 return setitem, k, v
14
15
16 def _del(k):
17 return delitem, k
tests/test_hash_map.py 3

18
19
20 def _run_operation(obj, fun, *args):
21 try:
22 return fun(obj, *args), None
23 except Exception as e:
24 return None, e
25
26
27 _add_items = (
28 _set("key_a", "val_a"),
29 _set("key_b", "val_b"),
30 )
31
32 _overwrite_items = [
33 _set("key_a", "val_a"),
34 _set("key_a", "val_b"),
35 ]
36
37 _delete_items = [
38 _set("key_a", "val_a"),
39 _set("key_b", "val_b"),
40 _del("key_a"),
41 _del("key_b"),
42 _set("key_a", "val_a"),
43 _del("key_a"),
44 ]
45
46 _access_absent_items = [
47 _get("key_a"),
48 _del("key_a"),
49 _set("key_a", "val_a"),
50 _del("key_a"),
51 _del("key_a"),
52 _get("key_a"),
53 ]
54
55 _add_with_resize_up = [
56 *[_set(x, x) for x in range(5)], # guaranteed upsize
57 ]
58
59 _add_with_resize_down = [
60 *[_set(x, x) for x in range(5)], # guaranteed upsize
61 *[_del(x) for x in range(5)],
62 _set("key_a", "val_b"),
63 ]
64
65
66 @pytest.mark.parametrize(
67 "operations",
68 [
69 pytest.param(_add_items, id="add items"),
70 pytest.param(_overwrite_items, id="overwrite items"),
71 pytest.param(_delete_items, id="delete items"),
72 pytest.param(_access_absent_items, id="access absent items"),
73 pytest.param(_add_with_resize_up, id="add with resize up"),
74 pytest.param(_add_with_resize_down, id="add with resize down"),
75 ],
76 )
77 def test_hash_map_is_the_same_as_dict(operations):
78 my = HashMap(initial_block_size=4)
79 py = {}
80 for _, (fun, *args) in enumerate(operations):
81 my_res, my_exc = _run_operation(my, fun, *args)
82 py_res, py_exc = _run_operation(py, fun, *args)
83 assert my_res == py_res
84 assert str(my_exc) == str(py_exc)
85 assert set(py) == set(my)
86 assert len(py) == len(my)
87 assert set(my.items()) == set(py.items())
88
4

89
90 def test_no_new_methods_was_added_to_api():
91 def is_public(name: str) -> bool:
92 return not name.startswith("_")
93
94 dict_public_names = {name for name in dir({}) if is_public(name)}
95 hash_public_names = {name for name in dir(HashMap()) if is_public(name)}
96
97 assert dict_public_names > hash_public_names

quadratic_probing.py
1 #!/usr/bin/env python3
2
3 from .hash_table import HashTable
4
5
6 class QuadraticProbing(HashTable):
7 """
8 Basic Hash Table example with open addressing using Quadratic Probing
9 """
10
11 def __init__(self, *args, **kwargs):
12 super().__init__(*args, **kwargs)
13
14 def _collision_resolution(self, key, data=None): # noqa: ARG002
15 """
16 Quadratic probing is an open addressing scheme used for resolving
17 collisions in hash table.
18
19 It works by taking the original hash index and adding successive
20 values of an arbitrary quadratic polynomial until open slot is found.
21
22 Hash + 1², Hash + 2², Hash + 3² .... Hash + n²
23
24 reference:
25 - https://en.wikipedia.org/wiki/Quadratic_probing
26 e.g:
27 1. Create hash table with size 7
28 >>> qp = QuadraticProbing(7)
29 >>> qp.insert_data(90)
30 >>> qp.insert_data(340)
31 >>> qp.insert_data(24)
32 >>> qp.insert_data(45)
33 >>> qp.insert_data(99)
34 >>> qp.insert_data(73)
35 >>> qp.insert_data(7)
36 >>> qp.keys()
37 {11: 45, 14: 99, 7: 24, 0: 340, 5: 73, 6: 90, 8: 7}
38
39 2. Create hash table with size 8
40 >>> qp = QuadraticProbing(8)
41 >>> qp.insert_data(0)
42 >>> qp.insert_data(999)
43 >>> qp.insert_data(111)
44 >>> qp.keys()
45 {0: 0, 7: 999, 3: 111}
46
47 3. Try to add three data elements when the size is two
48 >>> qp = QuadraticProbing(2)
49 >>> qp.insert_data(0)
50 >>> qp.insert_data(999)
51 >>> qp.insert_data(111)
52 >>> qp.keys()
53 {0: 0, 4: 999, 1: 111}
54
55 4. Try to add three data elements when the size is one
56 >>> qp = QuadraticProbing(1)
57 >>> qp.insert_data(0)
58 >>> qp.insert_data(999)
5

59 >>> qp.insert_data(111)
60 >>> qp.keys()
61 {4: 999, 1: 111}
62 """
63
64 i = 1
65 new_key = self.hash_function(key + i * i)
66
67 while self.values[new_key] is not None and self.values[new_key] != key:
68 i += 1
69 new_key = (
70 self.hash_function(key + i * i)
71 if not self.balanced_factor() >= self.lim_charge
72 else None
73 )
74
75 if new_key is None:
76 break
77
78 return new_key
79
80
81 if __name__ == "__main__":
82 import doctest
83
84 doctest.testmod()

double_hash.py
1 #!/usr/bin/env python3
2 """
3 Double hashing is a collision resolving technique in Open Addressed Hash tables.
4 Double hashing uses the idea of applying a second hash function to key when a collision
5 occurs. The advantage of Double hashing is that it is one of the best form of probing,
6 producing a uniform distribution of records throughout a hash table. This technique
7 does not yield any clusters. It is one of effective method for resolving collisions.
8
9 Double hashing can be done using: (hash1(key) + i * hash2(key)) % TABLE_SIZE
10 Where hash1() and hash2() are hash functions and TABLE_SIZE is size of hash table.
11
12 Reference: https://en.wikipedia.org/wiki/Double_hashing
13 """
14
15 from .hash_table import HashTable
16 from .number_theory.prime_numbers import is_prime, next_prime
17
18
19 class DoubleHash(HashTable):
20 """
21 Hash Table example with open addressing and Double Hash
22 """
23
24 def __init__(self, *args, **kwargs):
25 super().__init__(*args, **kwargs)
26
27 def __hash_function_2(self, value, data):
28 next_prime_gt = (
29 next_prime(value % self.size_table)
30 if not is_prime(value % self.size_table)
31 else value % self.size_table
32 ) # gt = bigger than
33 return next_prime_gt - (data % next_prime_gt)
34
35 def __hash_double_function(self, key, data, increment):
36 return (increment * self.__hash_function_2(key, data)) % self.size_table
37
38 def _collision_resolution(self, key, data=None):
39 """
40 Examples:
41
6

42 1. Try to add three data elements when the size is three


43 >>> dh = DoubleHash(3)
44 >>> dh.insert_data(10)
45 >>> dh.insert_data(20)
46 >>> dh.insert_data(30)
47 >>> dh.keys()
48 {1: 10, 2: 20, 0: 30}
49
50 2. Try to add three data elements when the size is two
51 >>> dh = DoubleHash(2)
52 >>> dh.insert_data(10)
53 >>> dh.insert_data(20)
54 >>> dh.insert_data(30)
55 >>> dh.keys()
56 {10: 10, 9: 20, 8: 30}
57
58 3. Try to add three data elements when the size is four
59 >>> dh = DoubleHash(4)
60 >>> dh.insert_data(10)
61 >>> dh.insert_data(20)
62 >>> dh.insert_data(30)
63 >>> dh.keys()
64 {9: 20, 10: 10, 8: 30}
65 """
66 i = 1
67 new_key = self.hash_function(data)
68
69 while self.values[new_key] is not None and self.values[new_key] != key:
70 new_key = (
71 self.__hash_double_function(key, data, i)
72 if self.balanced_factor() >= self.lim_charge
73 else None
74 )
75 if new_key is None:
76 break
77 else:
78 i += 1
79
80 return new_key
81
82
83 if __name__ == "__main__":
84 import doctest
85
86 doctest.testmod()

bloom_filter.py
1 """
2 See https://en.wikipedia.org/wiki/Bloom_filter
3
4 The use of this data structure is to test membership in a set.
5 Compared to Python's built-in set() it is more space-efficient.
6 In the following example, only 8 bits of memory will be used:
7 >>> bloom = Bloom(size=8)
8
9 Initially, the filter contains all zeros:
10 >>> bloom.bitstring
11 '00000000'
12
13 When an element is added, two bits are set to 1
14 since there are 2 hash functions in this implementation:
15 >>> "Titanic" in bloom
16 False
17 >>> bloom.add("Titanic")
18 >>> bloom.bitstring
19 '01100000'
20 >>> "Titanic" in bloom
21 True
22
bloom_filter.py 7

23 However, sometimes only one bit is added


24 because both hash functions return the same value
25 >>> bloom.add("Avatar")
26 >>> "Avatar" in bloom
27 True
28 >>> bloom.format_hash("Avatar")
29 '00000100'
30 >>> bloom.bitstring
31 '01100100'
32
33 Not added elements should return False ...
34 >>> not_present_films = ("The Godfather", "Interstellar", "Parasite", "Pulp Fiction")
35 >>> {
36 ... film: bloom.format_hash(film) for film in not_present_films
37 ... } # doctest: +NORMALIZE_WHITESPACE
38 {'The Godfather': '00000101',
39 'Interstellar': '00000011',
40 'Parasite': '00010010',
41 'Pulp Fiction': '10000100'}
42 >>> any(film in bloom for film in not_present_films)
43 False
44
45 but sometimes there are false positives:
46 >>> "Ratatouille" in bloom
47 True
48 >>> bloom.format_hash("Ratatouille")
49 '01100000'
50
51 The probability increases with the number of elements added.
52 The probability decreases with the number of bits in the bitarray.
53 >>> bloom.estimated_error_rate
54 0.140625
55 >>> bloom.add("The Godfather")
56 >>> bloom.estimated_error_rate
57 0.25
58 >>> bloom.bitstring
59 '01100101'
60 """
61
62 from hashlib import md5, sha256
63
64 HASH_FUNCTIONS = (sha256, md5)
65
66
67 class Bloom:
68 def __init__(self, size: int = 8) -> None:
69 self.bitarray = 0b0
70 self.size = size
71
72 def add(self, value: str) -> None:
73 h = self.hash_(value)
74 self.bitarray |= h
75
76 def exists(self, value: str) -> bool:
77 h = self.hash_(value)
78 return (h & self.bitarray) == h
79
80 def __contains__(self, other: str) -> bool:
81 return self.exists(other)
82
83 def format_bin(self, bitarray: int) -> str:
84 res = bin(bitarray)[2:]
85 return res.zfill(self.size)
86
87 @property
88 def bitstring(self) -> str:
89 return self.format_bin(self.bitarray)
90
91 def hash_(self, value: str) -> int:
92 res = 0b0
93 for func in HASH_FUNCTIONS:
8

94 position = (
95 int.from_bytes(func(value.encode()).digest(), "little") % self.size
96 )
97 res |= 2**position
98 return res
99
100 def format_hash(self, value: str) -> str:
101 return self.format_bin(self.hash_(value))
102
103 @property
104 def estimated_error_rate(self) -> float:
105 n_ones = bin(self.bitarray).count("1")
106 return (n_ones / self.size) ** len(HASH_FUNCTIONS)

hash_table.py
1 #!/usr/bin/env python3
2 from abc import abstractmethod
3
4 from .number_theory.prime_numbers import next_prime
5
6
7 class HashTable:
8 """
9 Basic Hash Table example with open addressing and linear probing
10 """
11
12 def __init__(
13 self,
14 size_table: int,
15 charge_factor: int | None = None,
16 lim_charge: float | None = None,
17 ) -> None:
18 self.size_table = size_table
19 self.values = [None] * self.size_table
20 self.lim_charge = 0.75 if lim_charge is None else lim_charge
21 self.charge_factor = 1 if charge_factor is None else charge_factor
22 self.__aux_list: list = []
23 self._keys: dict = {}
24
25 def keys(self):
26 """
27 The keys function returns a dictionary containing the key value pairs.
28 key being the index number in hash table and value being the data value.
29
30 Examples:
31 1. creating HashTable with size 10 and inserting 3 elements
32 >>> ht = HashTable(10)
33 >>> ht.insert_data(10)
34 >>> ht.insert_data(20)
35 >>> ht.insert_data(30)
36 >>> ht.keys()
37 {0: 10, 1: 20, 2: 30}
38
39 2. creating HashTable with size 5 and inserting 5 elements
40 >>> ht = HashTable(5)
41 >>> ht.insert_data(5)
42 >>> ht.insert_data(4)
43 >>> ht.insert_data(3)
44 >>> ht.insert_data(2)
45 >>> ht.insert_data(1)
46 >>> ht.keys()
47 {0: 5, 4: 4, 3: 3, 2: 2, 1: 1}
48 """
49 return self._keys
50
51 def balanced_factor(self):
52 return sum(1 for slot in self.values if slot is not None) / (
53 self.size_table * self.charge_factor
54 )
hash_table.py 9

55
56 def hash_function(self, key):
57 """
58 Generates hash for the given key value
59
60 Examples:
61
62 Creating HashTable with size 5
63 >>> ht = HashTable(5)
64 >>> ht.hash_function(10)
65 0
66 >>> ht.hash_function(20)
67 0
68 >>> ht.hash_function(4)
69 4
70 >>> ht.hash_function(18)
71 3
72 >>> ht.hash_function(-18)
73 2
74 >>> ht.hash_function(18.5)
75 3.5
76 >>> ht.hash_function(0)
77 0
78 >>> ht.hash_function(-0)
79 0
80 """
81 return key % self.size_table
82
83 def _step_by_step(self, step_ord):
84 print(f"step {step_ord}")
85 print(list(range(len(self.values))))
86 print(self.values)
87
88 def bulk_insert(self, values):
89 """
90 bulk_insert is used for entering more than one element at a time
91 in the HashTable.
92
93 Examples:
94 1.
95 >>> ht = HashTable(5)
96 >>> ht.bulk_insert((10,20,30))
97 step 1
98 [0, 1, 2, 3, 4]
99 [10, None, None, None, None]
100 step 2
101 [0, 1, 2, 3, 4]
102 [10, 20, None, None, None]
103 step 3
104 [0, 1, 2, 3, 4]
105 [10, 20, 30, None, None]
106
107 2.
108 >>> ht = HashTable(5)
109 >>> ht.bulk_insert([5,4,3,2,1])
110 step 1
111 [0, 1, 2, 3, 4]
112 [5, None, None, None, None]
113 step 2
114 [0, 1, 2, 3, 4]
115 [5, None, None, None, 4]
116 step 3
117 [0, 1, 2, 3, 4]
118 [5, None, None, 3, 4]
119 step 4
120 [0, 1, 2, 3, 4]
121 [5, None, 2, 3, 4]
122 step 5
123 [0, 1, 2, 3, 4]
124 [5, 1, 2, 3, 4]
125 """
hash_table.py 10

126 i = 1
127 self.__aux_list = values
128 for value in values:
129 self.insert_data(value)
130 self._step_by_step(i)
131 i += 1
132
133 def _set_value(self, key, data):
134 """
135 _set_value functions allows to update value at a particular hash
136
137 Examples:
138 1. _set_value in HashTable of size 5
139 >>> ht = HashTable(5)
140 >>> ht.insert_data(10)
141 >>> ht.insert_data(20)
142 >>> ht.insert_data(30)
143 >>> ht._set_value(0,15)
144 >>> ht.keys()
145 {0: 15, 1: 20, 2: 30}
146
147 2. _set_value in HashTable of size 2
148 >>> ht = HashTable(2)
149 >>> ht.insert_data(17)
150 >>> ht.insert_data(18)
151 >>> ht.insert_data(99)
152 >>> ht._set_value(3,15)
153 >>> ht.keys()
154 {3: 15, 2: 17, 4: 99}
155
156 3. _set_value in HashTable when hash is not present
157 >>> ht = HashTable(2)
158 >>> ht.insert_data(17)
159 >>> ht.insert_data(18)
160 >>> ht.insert_data(99)
161 >>> ht._set_value(0,15)
162 >>> ht.keys()
163 {3: 18, 2: 17, 4: 99, 0: 15}
164
165 4. _set_value in HashTable when multiple hash are not present
166 >>> ht = HashTable(2)
167 >>> ht.insert_data(17)
168 >>> ht.insert_data(18)
169 >>> ht.insert_data(99)
170 >>> ht._set_value(0,15)
171 >>> ht._set_value(1,20)
172 >>> ht.keys()
173 {3: 18, 2: 17, 4: 99, 0: 15, 1: 20}
174 """
175 self.values[key] = data
176 self._keys[key] = data
177
178 @abstractmethod
179 def _collision_resolution(self, key, data=None):
180 """
181 This method is a type of open addressing which is used for handling collision.
182
183 In this implementation the concept of linear probing has been used.
184
185 The hash table is searched sequentially from the original location of the
186 hash, if the new hash/location we get is already occupied we check for the next
187 hash/location.
188
189 references:
190 - https://en.wikipedia.org/wiki/Linear_probing
191
192 Examples:
193 1. The collision will be with keys 18 & 99, so new hash will be created for 99
194 >>> ht = HashTable(3)
195 >>> ht.insert_data(17)
196 >>> ht.insert_data(18)
hash_table.py 11

197 >>> ht.insert_data(99)


198 >>> ht.keys()
199 {2: 17, 0: 18, 1: 99}
200
201 2. The collision will be with keys 17 & 101, so new hash
202 will be created for 101
203 >>> ht = HashTable(4)
204 >>> ht.insert_data(17)
205 >>> ht.insert_data(18)
206 >>> ht.insert_data(99)
207 >>> ht.insert_data(101)
208 >>> ht.keys()
209 {1: 17, 2: 18, 3: 99, 0: 101}
210
211 2. The collision will be with all keys, so new hash will be created for all
212 >>> ht = HashTable(1)
213 >>> ht.insert_data(17)
214 >>> ht.insert_data(18)
215 >>> ht.insert_data(99)
216 >>> ht.keys()
217 {2: 17, 3: 18, 4: 99}
218
219 3. Trying to insert float key in hash
220 >>> ht = HashTable(1)
221 >>> ht.insert_data(17)
222 >>> ht.insert_data(18)
223 >>> ht.insert_data(99.99)
224 Traceback (most recent call last):
225 ...
226 TypeError: list indices must be integers or slices, not float
227 """
228 new_key = self.hash_function(key + 1)
229
230 while self.values[new_key] is not None and self.values[new_key] != key:
231 if self.values.count(None) > 0:
232 new_key = self.hash_function(new_key + 1)
233 else:
234 new_key = None
235 break
236
237 return new_key
238
239 def rehashing(self):
240 survivor_values = [value for value in self.values if value is not None]
241 self.size_table = next_prime(self.size_table, factor=2)
242 self._keys.clear()
243 self.values = [None] * self.size_table # hell's pointers D: don't DRY ;/
244 for value in survivor_values:
245 self.insert_data(value)
246
247 def insert_data(self, data):
248 """
249 insert_data is used for inserting a single element at a time in the HashTable.
250
251 Examples:
252
253 >>> ht = HashTable(3)
254 >>> ht.insert_data(5)
255 >>> ht.keys()
256 {2: 5}
257 >>> ht = HashTable(5)
258 >>> ht.insert_data(30)
259 >>> ht.insert_data(50)
260 >>> ht.keys()
261 {0: 30, 1: 50}
262 """
263 key = self.hash_function(data)
264
265 if self.values[key] is None:
266 self._set_value(key, data)
267
12

268 elif self.values[key] == data:


269 pass
270
271 else:
272 collision_resolution = self._collision_resolution(key, data)
273 if collision_resolution is not None:
274 self._set_value(collision_resolution, data)
275 else:
276 self.rehashing()
277 self.insert_data(data)
278
279
280 if __name__ == "__main__":
281 import doctest
282
283 doctest.testmod()

hash_map.py
1 """
2 Hash map with open addressing.
3
4 https://en.wikipedia.org/wiki/Hash_table
5
6 Another hash map implementation, with a good explanation.
7 Modern Dictionaries by Raymond Hettinger
8 https://www.youtube.com/watch?v=p33CVV29OG8
9 """
10
11 from collections.abc import Iterator, MutableMapping
12 from dataclasses import dataclass
13 from typing import Generic, TypeVar
14
15 KEY = TypeVar("KEY")
16 VAL = TypeVar("VAL")
17
18
19 @dataclass(frozen=True, slots=True)
20 class _Item(Generic[KEY, VAL]):
21 key: KEY
22 val: VAL
23
24
25 class _DeletedItem(_Item):
26 def __init__(self) -> None:
27 super().__init__(None, None)
28
29 def __bool__(self) -> bool:
30 return False
31
32
33 _deleted = _DeletedItem()
34
35
36 class HashMap(MutableMapping[KEY, VAL]):
37 """
38 Hash map with open addressing.
39 """
40
41 def __init__(
42 self, initial_block_size: int = 8, capacity_factor: float = 0.75
43 ) -> None:
44 self._initial_block_size = initial_block_size
45 self._buckets: list[_Item | None] = [None] * initial_block_size
46 assert 0.0 < capacity_factor < 1.0
47 self._capacity_factor = capacity_factor
48 self._len = 0
49
50 def _get_bucket_index(self, key: KEY) -> int:
51 return hash(key) % len(self._buckets)
hash_map.py 13

52
53 def _get_next_ind(self, ind: int) -> int:
54 """
55 Get next index.
56
57 Implements linear open addressing.
58 >>> HashMap(5)._get_next_ind(3)
59 4
60 >>> HashMap(5)._get_next_ind(5)
61 1
62 >>> HashMap(5)._get_next_ind(6)
63 2
64 >>> HashMap(5)._get_next_ind(9)
65 0
66 """
67 return (ind + 1) % len(self._buckets)
68
69 def _try_set(self, ind: int, key: KEY, val: VAL) -> bool:
70 """
71 Try to add value to the bucket.
72
73 If bucket is empty or key is the same, does insert and return True.
74
75 If bucket has another key or deleted placeholder,
76 that means that we need to check next bucket.
77 """
78 stored = self._buckets[ind]
79 if not stored:
80 self._buckets[ind] = _Item(key, val)
81 self._len += 1
82 return True
83 elif stored.key == key:
84 self._buckets[ind] = _Item(key, val)
85 return True
86 else:
87 return False
88
89 def _is_full(self) -> bool:
90 """
91 Return true if we have reached safe capacity.
92
93 So we need to increase the number of buckets to avoid collisions.
94
95 >>> hm = HashMap(2)
96 >>> hm._add_item(1, 10)
97 >>> hm._add_item(2, 20)
98 >>> hm._is_full()
99 True
100 >>> HashMap(2)._is_full()
101 False
102 """
103 limit = len(self._buckets) * self._capacity_factor
104 return len(self) >= int(limit)
105
106 def _is_sparse(self) -> bool:
107 """Return true if we need twice fewer buckets when we have now."""
108 if len(self._buckets) <= self._initial_block_size:
109 return False
110 limit = len(self._buckets) * self._capacity_factor / 2
111 return len(self) < limit
112
113 def _resize(self, new_size: int) -> None:
114 old_buckets = self._buckets
115 self._buckets = [None] * new_size
116 self._len = 0
117 for item in old_buckets:
118 if item:
119 self._add_item(item.key, item.val)
120
121 def _size_up(self) -> None:
122 self._resize(len(self._buckets) * 2)
hash_map.py 14

123
124 def _size_down(self) -> None:
125 self._resize(len(self._buckets) // 2)
126
127 def _iterate_buckets(self, key: KEY) -> Iterator[int]:
128 ind = self._get_bucket_index(key)
129 for _ in range(len(self._buckets)):
130 yield ind
131 ind = self._get_next_ind(ind)
132
133 def _add_item(self, key: KEY, val: VAL) -> None:
134 """
135 Try to add 3 elements when the size is 5
136 >>> hm = HashMap(5)
137 >>> hm._add_item(1, 10)
138 >>> hm._add_item(2, 20)
139 >>> hm._add_item(3, 30)
140 >>> hm
141 HashMap(1: 10, 2: 20, 3: 30)
142
143 Try to add 3 elements when the size is 5
144 >>> hm = HashMap(5)
145 >>> hm._add_item(-5, 10)
146 >>> hm._add_item(6, 30)
147 >>> hm._add_item(-7, 20)
148 >>> hm
149 HashMap(-5: 10, 6: 30, -7: 20)
150
151 Try to add 3 elements when size is 1
152 >>> hm = HashMap(1)
153 >>> hm._add_item(10, 13.2)
154 >>> hm._add_item(6, 5.26)
155 >>> hm._add_item(7, 5.155)
156 >>> hm
157 HashMap(10: 13.2)
158
159 Trying to add an element with a key that is a floating point value
160 >>> hm = HashMap(5)
161 >>> hm._add_item(1.5, 10)
162 >>> hm
163 HashMap(1.5: 10)
164
165 5. Trying to add an item with the same key
166 >>> hm = HashMap(5)
167 >>> hm._add_item(1, 10)
168 >>> hm._add_item(1, 20)
169 >>> hm
170 HashMap(1: 20)
171 """
172 for ind in self._iterate_buckets(key):
173 if self._try_set(ind, key, val):
174 break
175
176 def __setitem__(self, key: KEY, val: VAL) -> None:
177 """
178 1. Changing value of item whose key is present
179 >>> hm = HashMap(5)
180 >>> hm._add_item(1, 10)
181 >>> hm.__setitem__(1, 20)
182 >>> hm
183 HashMap(1: 20)
184
185 2. Changing value of item whose key is not present
186 >>> hm = HashMap(5)
187 >>> hm._add_item(1, 10)
188 >>> hm.__setitem__(0, 20)
189 >>> hm
190 HashMap(0: 20, 1: 10)
191
192 3. Changing the value of the same item multiple times
193 >>> hm = HashMap(5)
hash_map.py 15

194 >>> hm._add_item(1, 10)


195 >>> hm.__setitem__(1, 20)
196 >>> hm.__setitem__(1, 30)
197 >>> hm
198 HashMap(1: 30)
199 """
200 if self._is_full():
201 self._size_up()
202
203 self._add_item(key, val)
204
205 def __delitem__(self, key: KEY) -> None:
206 """
207 >>> hm = HashMap(5)
208 >>> hm._add_item(1, 10)
209 >>> hm._add_item(2, 20)
210 >>> hm._add_item(3, 30)
211 >>> hm.__delitem__(3)
212 >>> hm
213 HashMap(1: 10, 2: 20)
214 >>> hm = HashMap(5)
215 >>> hm._add_item(-5, 10)
216 >>> hm._add_item(6, 30)
217 >>> hm._add_item(-7, 20)
218 >>> hm.__delitem__(-5)
219 >>> hm
220 HashMap(6: 30, -7: 20)
221
222 # Trying to remove a non-existing item
223 >>> hm = HashMap(5)
224 >>> hm._add_item(1, 10)
225 >>> hm._add_item(2, 20)
226 >>> hm._add_item(3, 30)
227 >>> hm.__delitem__(4)
228 Traceback (most recent call last):
229 ...
230 KeyError: 4
231 """
232 for ind in self._iterate_buckets(key):
233 item = self._buckets[ind]
234 if item is None:
235 raise KeyError(key)
236 if item is _deleted:
237 continue
238 if item.key == key:
239 self._buckets[ind] = _deleted
240 self._len -= 1
241 break
242 if self._is_sparse():
243 self._size_down()
244
245 def __getitem__(self, key: KEY) -> VAL:
246 """
247 Returns the item at the given key
248
249 >>> hm = HashMap(5)
250 >>> hm._add_item(1, 10)
251 >>> hm.__getitem__(1)
252 10
253
254 >>> hm = HashMap(5)
255 >>> hm._add_item(10, -10)
256 >>> hm._add_item(20, -20)
257 >>> hm.__getitem__(20)
258 -20
259
260 >>> hm = HashMap(5)
261 >>> hm._add_item(-1, 10)
262 >>> hm.__getitem__(-1)
263 10
264 """
hash_map.py 16

265 for ind in self._iterate_buckets(key):


266 item = self._buckets[ind]
267 if item is None:
268 break
269 if item is _deleted:
270 continue
271 if item.key == key:
272 return item.val
273 raise KeyError(key)
274
275 def __len__(self) -> int:
276 """
277 Returns the number of items present in hashmap
278
279 >>> hm = HashMap(5)
280 >>> hm._add_item(1, 10)
281 >>> hm._add_item(2, 20)
282 >>> hm._add_item(3, 30)
283 >>> hm.__len__()
284 3
285
286 >>> hm = HashMap(5)
287 >>> hm.__len__()
288 0
289 """
290 return self._len
291
292 def __iter__(self) -> Iterator[KEY]:
293 yield from (item.key for item in self._buckets if item)
294
295 def __repr__(self) -> str:
296 val_string = ", ".join(
297 f"{item.key}: {item.val}" for item in self._buckets if item
298 )
299 return f"HashMap({val_string})"
300
301
302 if __name__ == "__main__":
303 import doctest
304
305 doctest.testmod()

You might also like