TheAlgorithms_Python-hashing

Uploaded by

Isidro Arias

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

5 views

TheAlgorithms_Python-hashing

Uploaded by

Isidro Arias

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 16

1

TheAlgorithms/data_structures/hashing

TheAlgorithms_Python-hashing.d .......................................................................
hash_table_with_linked_list.py.....................................................................1
number_theory.........................................................................................
prime_numbers.py .................................................................................. 1
tests......................................................................................................
test_hash_map.py .................................................................................. 2
quadratic_probing.py.................................................................................4
double_hash.py........................................................................................5
bloom_filter.py.......................................................................................6
hash_table.py.........................................................................................8
hash_map.py .......................................................................................... 12

hash_table_with_linked_list.py
1 from collections import deque
2
3 from .hash_table import HashTable
4
5
6 class HashTableWithLinkedList(HashTable):
7 def __init__(self, *args, **kwargs):
8 super().__init__(*args, **kwargs)
9
10 def _set_value(self, key, data):
11 self.values[key] = deque([]) if self.values[key] is None else self.values[key]
12 self.values[key].appendleft(data)
13 self._keys[key] = self.values[key]
14
15 def balanced_factor(self):
16 return (
17 sum(self.charge_factor - len(slot) for slot in self.values)
18 / self.size_table
19 * self.charge_factor
20 )
21
22 def _collision_resolution(self, key, data=None):
23 if not (
24 len(self.values[key]) == self.charge_factor and self.values.count(None) == 0
25 ):
26 return key
27 return super()._collision_resolution(key, data)

number_theory/prime_numbers.py
1 #!/usr/bin/env python3
2 """
3 module to operations with prime numbers
4 """
5
6 import math
7
8
9 def is_prime(number: int) -> bool:
2

10 """Checks to see if a number is a prime in O(sqrt(n)).

11
12 A number is prime if it has exactly two factors: 1 and itself.
13
14 >>> is_prime(0)
15 False
16 >>> is_prime(1)
17 False
18 >>> is_prime(2)
19 True
20 >>> is_prime(3)
21 True
22 >>> is_prime(27)
23 False
24 >>> is_prime(87)
25 False
26 >>> is_prime(563)
27 True
28 >>> is_prime(2999)
29 True
30 >>> is_prime(67483)
31 False
32 """
33
34 # precondition
35 assert isinstance(number, int) and (
36 number >= 0
37 ), "'number' must been an int and positive"
38
39 if 1 < number < 4:
40 # 2 and 3 are primes
41 return True
42 elif number < 2 or not number % 2:
43 # Negatives, 0, 1 and all even numbers are not primes
44 return False
45
46 odd_numbers = range(3, int(math.sqrt(number) + 1), 2)
47 return not any(not number % i for i in odd_numbers)
48
49
50 def next_prime(value, factor=1, **kwargs):
51 value = factor * value
52 first_value_val = value
53
54 while not is_prime(value):
55 value += 1 if not ("desc" in kwargs and kwargs["desc"] is True) else -1
56
57 if value == first_value_val:
58 return next_prime(value + 1, **kwargs)
59 return value

tests/test_hash_map.py
1 from operator import delitem, getitem, setitem
2
3 import pytest
4
5 from data_structures.hashing.hash_map import HashMap
6
7
8 def _get(k):
9 return getitem, k
10
11
12 def _set(k, v):
13 return setitem, k, v
14
15
16 def _del(k):
17 return delitem, k
tests/test_hash_map.py 3

18
19
20 def _run_operation(obj, fun, *args):
21 try:
22 return fun(obj, *args), None
23 except Exception as e:
24 return None, e
25
26
27 _add_items = (
28 _set("key_a", "val_a"),
29 _set("key_b", "val_b"),
30 )
31
32 _overwrite_items = [
33 _set("key_a", "val_a"),
34 _set("key_a", "val_b"),
35 ]
36
37 _delete_items = [
38 _set("key_a", "val_a"),
39 _set("key_b", "val_b"),
40 _del("key_a"),
41 _del("key_b"),
42 _set("key_a", "val_a"),
43 _del("key_a"),
44 ]
45
46 _access_absent_items = [
47 _get("key_a"),
48 _del("key_a"),
49 _set("key_a", "val_a"),
50 _del("key_a"),
51 _del("key_a"),
52 _get("key_a"),
53 ]
54
55 _add_with_resize_up = [
56 *[_set(x, x) for x in range(5)], # guaranteed upsize
57 ]
58
59 _add_with_resize_down = [
60 *[_set(x, x) for x in range(5)], # guaranteed upsize
61 *[_del(x) for x in range(5)],
62 _set("key_a", "val_b"),
63 ]
64
65
66 @pytest.mark.parametrize(
67 "operations",
68 [
69 pytest.param(_add_items, id="add items"),
70 pytest.param(_overwrite_items, id="overwrite items"),
71 pytest.param(_delete_items, id="delete items"),
72 pytest.param(_access_absent_items, id="access absent items"),
73 pytest.param(_add_with_resize_up, id="add with resize up"),
74 pytest.param(_add_with_resize_down, id="add with resize down"),
75 ],
76 )
77 def test_hash_map_is_the_same_as_dict(operations):
78 my = HashMap(initial_block_size=4)
79 py = {}
80 for _, (fun, *args) in enumerate(operations):
81 my_res, my_exc = _run_operation(my, fun, *args)
82 py_res, py_exc = _run_operation(py, fun, *args)
83 assert my_res == py_res
84 assert str(my_exc) == str(py_exc)
85 assert set(py) == set(my)
86 assert len(py) == len(my)
87 assert set(my.items()) == set(py.items())
88
4

89
90 def test_no_new_methods_was_added_to_api():
91 def is_public(name: str) -> bool:
92 return not name.startswith("_")
93
94 dict_public_names = {name for name in dir({}) if is_public(name)}
95 hash_public_names = {name for name in dir(HashMap()) if is_public(name)}
96
97 assert dict_public_names > hash_public_names

quadratic_probing.py
1 #!/usr/bin/env python3
2
3 from .hash_table import HashTable
4
5
6 class QuadraticProbing(HashTable):
7 """
8 Basic Hash Table example with open addressing using Quadratic Probing
9 """
10
11 def __init__(self, *args, **kwargs):
12 super().__init__(*args, **kwargs)
13
14 def _collision_resolution(self, key, data=None): # noqa: ARG002
15 """
16 Quadratic probing is an open addressing scheme used for resolving
17 collisions in hash table.
18
19 It works by taking the original hash index and adding successive
20 values of an arbitrary quadratic polynomial until open slot is found.
21
22 Hash + 1², Hash + 2², Hash + 3² .... Hash + n²
23
24 reference:
25 - https://en.wikipedia.org/wiki/Quadratic_probing
26 e.g:
27 1. Create hash table with size 7
28 >>> qp = QuadraticProbing(7)
29 >>> qp.insert_data(90)
30 >>> qp.insert_data(340)
31 >>> qp.insert_data(24)
32 >>> qp.insert_data(45)
33 >>> qp.insert_data(99)
34 >>> qp.insert_data(73)
35 >>> qp.insert_data(7)
36 >>> qp.keys()
37 {11: 45, 14: 99, 7: 24, 0: 340, 5: 73, 6: 90, 8: 7}
38
39 2. Create hash table with size 8
40 >>> qp = QuadraticProbing(8)
41 >>> qp.insert_data(0)
42 >>> qp.insert_data(999)
43 >>> qp.insert_data(111)
44 >>> qp.keys()
45 {0: 0, 7: 999, 3: 111}
46
47 3. Try to add three data elements when the size is two
48 >>> qp = QuadraticProbing(2)
49 >>> qp.insert_data(0)
50 >>> qp.insert_data(999)
51 >>> qp.insert_data(111)
52 >>> qp.keys()
53 {0: 0, 4: 999, 1: 111}
54
55 4. Try to add three data elements when the size is one
56 >>> qp = QuadraticProbing(1)
57 >>> qp.insert_data(0)
58 >>> qp.insert_data(999)
5

59 >>> qp.insert_data(111)
60 >>> qp.keys()
61 {4: 999, 1: 111}
62 """
63
64 i = 1
65 new_key = self.hash_function(key + i * i)
66
67 while self.values[new_key] is not None and self.values[new_key] != key:
68 i += 1
69 new_key = (
70 self.hash_function(key + i * i)
71 if not self.balanced_factor() >= self.lim_charge
72 else None
73 )
74
75 if new_key is None:
76 break
77
78 return new_key
79
80
81 if __name__ == "__main__":
82 import doctest
83
84 doctest.testmod()

double_hash.py
1 #!/usr/bin/env python3
2 """
3 Double hashing is a collision resolving technique in Open Addressed Hash tables.
4 Double hashing uses the idea of applying a second hash function to key when a collision
5 occurs. The advantage of Double hashing is that it is one of the best form of probing,
6 producing a uniform distribution of records throughout a hash table. This technique
7 does not yield any clusters. It is one of effective method for resolving collisions.
8
9 Double hashing can be done using: (hash1(key) + i * hash2(key)) % TABLE_SIZE
10 Where hash1() and hash2() are hash functions and TABLE_SIZE is size of hash table.
11
12 Reference: https://en.wikipedia.org/wiki/Double_hashing
13 """
14
15 from .hash_table import HashTable
16 from .number_theory.prime_numbers import is_prime, next_prime
17
18
19 class DoubleHash(HashTable):
20 """
21 Hash Table example with open addressing and Double Hash
22 """
23
24 def __init__(self, *args, **kwargs):
25 super().__init__(*args, **kwargs)
26
27 def __hash_function_2(self, value, data):
28 next_prime_gt = (
29 next_prime(value % self.size_table)
30 if not is_prime(value % self.size_table)
31 else value % self.size_table
32 ) # gt = bigger than
33 return next_prime_gt - (data % next_prime_gt)
34
35 def __hash_double_function(self, key, data, increment):
36 return (increment * self.__hash_function_2(key, data)) % self.size_table
37
38 def _collision_resolution(self, key, data=None):
39 """
40 Examples:
41
6

42 1. Try to add three data elements when the size is three

43 >>> dh = DoubleHash(3)
44 >>> dh.insert_data(10)
45 >>> dh.insert_data(20)
46 >>> dh.insert_data(30)
47 >>> dh.keys()
48 {1: 10, 2: 20, 0: 30}
49
50 2. Try to add three data elements when the size is two
51 >>> dh = DoubleHash(2)
52 >>> dh.insert_data(10)
53 >>> dh.insert_data(20)
54 >>> dh.insert_data(30)
55 >>> dh.keys()
56 {10: 10, 9: 20, 8: 30}
57
58 3. Try to add three data elements when the size is four
59 >>> dh = DoubleHash(4)
60 >>> dh.insert_data(10)
61 >>> dh.insert_data(20)
62 >>> dh.insert_data(30)
63 >>> dh.keys()
64 {9: 20, 10: 10, 8: 30}
65 """
66 i = 1
67 new_key = self.hash_function(data)
68
69 while self.values[new_key] is not None and self.values[new_key] != key:
70 new_key = (
71 self.__hash_double_function(key, data, i)
72 if self.balanced_factor() >= self.lim_charge
73 else None
74 )
75 if new_key is None:
76 break
77 else:
78 i += 1
79
80 return new_key
81
82
83 if __name__ == "__main__":
84 import doctest
85
86 doctest.testmod()

bloom_filter.py
1 """
2 See https://en.wikipedia.org/wiki/Bloom_filter
3
4 The use of this data structure is to test membership in a set.
5 Compared to Python's built-in set() it is more space-efficient.
6 In the following example, only 8 bits of memory will be used:
7 >>> bloom = Bloom(size=8)
8
9 Initially, the filter contains all zeros:
10 >>> bloom.bitstring
11 '00000000'
12
13 When an element is added, two bits are set to 1
14 since there are 2 hash functions in this implementation:
15 >>> "Titanic" in bloom
16 False
17 >>> bloom.add("Titanic")
18 >>> bloom.bitstring
19 '01100000'
20 >>> "Titanic" in bloom
21 True
22
bloom_filter.py 7

23 However, sometimes only one bit is added

24 because both hash functions return the same value
25 >>> bloom.add("Avatar")
26 >>> "Avatar" in bloom
27 True
28 >>> bloom.format_hash("Avatar")
29 '00000100'
30 >>> bloom.bitstring
31 '01100100'
32
33 Not added elements should return False ...
34 >>> not_present_films = ("The Godfather", "Interstellar", "Parasite", "Pulp Fiction")
35 >>> {
36 ... film: bloom.format_hash(film) for film in not_present_films
37 ... } # doctest: +NORMALIZE_WHITESPACE
38 {'The Godfather': '00000101',
39 'Interstellar': '00000011',
40 'Parasite': '00010010',
41 'Pulp Fiction': '10000100'}
42 >>> any(film in bloom for film in not_present_films)
43 False
44
45 but sometimes there are false positives:
46 >>> "Ratatouille" in bloom
47 True
48 >>> bloom.format_hash("Ratatouille")
49 '01100000'
50
51 The probability increases with the number of elements added.
52 The probability decreases with the number of bits in the bitarray.
53 >>> bloom.estimated_error_rate
54 0.140625
55 >>> bloom.add("The Godfather")
56 >>> bloom.estimated_error_rate
57 0.25
58 >>> bloom.bitstring
59 '01100101'
60 """
61
62 from hashlib import md5, sha256
63
64 HASH_FUNCTIONS = (sha256, md5)
65
66
67 class Bloom:
68 def __init__(self, size: int = 8) -> None:
69 self.bitarray = 0b0
70 self.size = size
71
72 def add(self, value: str) -> None:
73 h = self.hash_(value)
74 self.bitarray |= h
75
76 def exists(self, value: str) -> bool:
77 h = self.hash_(value)
78 return (h & self.bitarray) == h
79
80 def __contains__(self, other: str) -> bool:
81 return self.exists(other)
82
83 def format_bin(self, bitarray: int) -> str:
84 res = bin(bitarray)[2:]
85 return res.zfill(self.size)
86
87 @property
88 def bitstring(self) -> str:
89 return self.format_bin(self.bitarray)
90
91 def hash_(self, value: str) -> int:
92 res = 0b0
93 for func in HASH_FUNCTIONS:
8

94 position = (
95 int.from_bytes(func(value.encode()).digest(), "little") % self.size
96 )
97 res |= 2**position
98 return res
99
100 def format_hash(self, value: str) -> str:
101 return self.format_bin(self.hash_(value))
102
103 @property
104 def estimated_error_rate(self) -> float:
105 n_ones = bin(self.bitarray).count("1")
106 return (n_ones / self.size) ** len(HASH_FUNCTIONS)

hash_table.py
1 #!/usr/bin/env python3
2 from abc import abstractmethod
3
4 from .number_theory.prime_numbers import next_prime
5
6
7 class HashTable:
8 """
9 Basic Hash Table example with open addressing and linear probing
10 """
11
12 def __init__(
13 self,
14 size_table: int,
15 charge_factor: int | None = None,
16 lim_charge: float | None = None,
17 ) -> None:
18 self.size_table = size_table
19 self.values = [None] * self.size_table
20 self.lim_charge = 0.75 if lim_charge is None else lim_charge
21 self.charge_factor = 1 if charge_factor is None else charge_factor
22 self.__aux_list: list = []
23 self._keys: dict = {}
24
25 def keys(self):
26 """
27 The keys function returns a dictionary containing the key value pairs.
28 key being the index number in hash table and value being the data value.
29
30 Examples:
31 1. creating HashTable with size 10 and inserting 3 elements
32 >>> ht = HashTable(10)
33 >>> ht.insert_data(10)
34 >>> ht.insert_data(20)
35 >>> ht.insert_data(30)
36 >>> ht.keys()
37 {0: 10, 1: 20, 2: 30}
38
39 2. creating HashTable with size 5 and inserting 5 elements
40 >>> ht = HashTable(5)
41 >>> ht.insert_data(5)
42 >>> ht.insert_data(4)
43 >>> ht.insert_data(3)
44 >>> ht.insert_data(2)
45 >>> ht.insert_data(1)
46 >>> ht.keys()
47 {0: 5, 4: 4, 3: 3, 2: 2, 1: 1}
48 """
49 return self._keys
50
51 def balanced_factor(self):
52 return sum(1 for slot in self.values if slot is not None) / (
53 self.size_table * self.charge_factor
54 )
hash_table.py 9

55
56 def hash_function(self, key):
57 """
58 Generates hash for the given key value
59
60 Examples:
61
62 Creating HashTable with size 5
63 >>> ht = HashTable(5)
64 >>> ht.hash_function(10)
65 0
66 >>> ht.hash_function(20)
67 0
68 >>> ht.hash_function(4)
69 4
70 >>> ht.hash_function(18)
71 3
72 >>> ht.hash_function(-18)
73 2
74 >>> ht.hash_function(18.5)
75 3.5
76 >>> ht.hash_function(0)
77 0
78 >>> ht.hash_function(-0)
79 0
80 """
81 return key % self.size_table
82
83 def _step_by_step(self, step_ord):
84 print(f"step {step_ord}")
85 print(list(range(len(self.values))))
86 print(self.values)
87
88 def bulk_insert(self, values):
89 """
90 bulk_insert is used for entering more than one element at a time
91 in the HashTable.
92
93 Examples:
94 1.
95 >>> ht = HashTable(5)
96 >>> ht.bulk_insert((10,20,30))
97 step 1
98 [0, 1, 2, 3, 4]
99 [10, None, None, None, None]
100 step 2
101 [0, 1, 2, 3, 4]
102 [10, 20, None, None, None]
103 step 3
104 [0, 1, 2, 3, 4]
105 [10, 20, 30, None, None]
106
107 2.
108 >>> ht = HashTable(5)
109 >>> ht.bulk_insert([5,4,3,2,1])
110 step 1
111 [0, 1, 2, 3, 4]
112 [5, None, None, None, None]
113 step 2
114 [0, 1, 2, 3, 4]
115 [5, None, None, None, 4]
116 step 3
117 [0, 1, 2, 3, 4]
118 [5, None, None, 3, 4]
119 step 4
120 [0, 1, 2, 3, 4]
121 [5, None, 2, 3, 4]
122 step 5
123 [0, 1, 2, 3, 4]
124 [5, 1, 2, 3, 4]
125 """
hash_table.py 10

126 i = 1
127 self.__aux_list = values
128 for value in values:
129 self.insert_data(value)
130 self._step_by_step(i)
131 i += 1
132
133 def _set_value(self, key, data):
134 """
135 _set_value functions allows to update value at a particular hash
136
137 Examples:
138 1. _set_value in HashTable of size 5
139 >>> ht = HashTable(5)
140 >>> ht.insert_data(10)
141 >>> ht.insert_data(20)
142 >>> ht.insert_data(30)
143 >>> ht._set_value(0,15)
144 >>> ht.keys()
145 {0: 15, 1: 20, 2: 30}
146
147 2. _set_value in HashTable of size 2
148 >>> ht = HashTable(2)
149 >>> ht.insert_data(17)
150 >>> ht.insert_data(18)
151 >>> ht.insert_data(99)
152 >>> ht._set_value(3,15)
153 >>> ht.keys()
154 {3: 15, 2: 17, 4: 99}
155
156 3. _set_value in HashTable when hash is not present
157 >>> ht = HashTable(2)
158 >>> ht.insert_data(17)
159 >>> ht.insert_data(18)
160 >>> ht.insert_data(99)
161 >>> ht._set_value(0,15)
162 >>> ht.keys()
163 {3: 18, 2: 17, 4: 99, 0: 15}
164
165 4. _set_value in HashTable when multiple hash are not present
166 >>> ht = HashTable(2)
167 >>> ht.insert_data(17)
168 >>> ht.insert_data(18)
169 >>> ht.insert_data(99)
170 >>> ht._set_value(0,15)
171 >>> ht._set_value(1,20)
172 >>> ht.keys()
173 {3: 18, 2: 17, 4: 99, 0: 15, 1: 20}
174 """
175 self.values[key] = data
176 self._keys[key] = data
177
178 @abstractmethod
179 def _collision_resolution(self, key, data=None):
180 """
181 This method is a type of open addressing which is used for handling collision.
182
183 In this implementation the concept of linear probing has been used.
184
185 The hash table is searched sequentially from the original location of the
186 hash, if the new hash/location we get is already occupied we check for the next
187 hash/location.
188
189 references:
190 - https://en.wikipedia.org/wiki/Linear_probing
191
192 Examples:
193 1. The collision will be with keys 18 & 99, so new hash will be created for 99
194 >>> ht = HashTable(3)
195 >>> ht.insert_data(17)
196 >>> ht.insert_data(18)
hash_table.py 11

197 >>> ht.insert_data(99)

198 >>> ht.keys()
199 {2: 17, 0: 18, 1: 99}
200
201 2. The collision will be with keys 17 & 101, so new hash
202 will be created for 101
203 >>> ht = HashTable(4)
204 >>> ht.insert_data(17)
205 >>> ht.insert_data(18)
206 >>> ht.insert_data(99)
207 >>> ht.insert_data(101)
208 >>> ht.keys()
209 {1: 17, 2: 18, 3: 99, 0: 101}
210
211 2. The collision will be with all keys, so new hash will be created for all
212 >>> ht = HashTable(1)
213 >>> ht.insert_data(17)
214 >>> ht.insert_data(18)
215 >>> ht.insert_data(99)
216 >>> ht.keys()
217 {2: 17, 3: 18, 4: 99}
218
219 3. Trying to insert float key in hash
220 >>> ht = HashTable(1)
221 >>> ht.insert_data(17)
222 >>> ht.insert_data(18)
223 >>> ht.insert_data(99.99)
224 Traceback (most recent call last):
225 ...
226 TypeError: list indices must be integers or slices, not float
227 """
228 new_key = self.hash_function(key + 1)
229
230 while self.values[new_key] is not None and self.values[new_key] != key:
231 if self.values.count(None) > 0:
232 new_key = self.hash_function(new_key + 1)
233 else:
234 new_key = None
235 break
236
237 return new_key
238
239 def rehashing(self):
240 survivor_values = [value for value in self.values if value is not None]
241 self.size_table = next_prime(self.size_table, factor=2)
242 self._keys.clear()
243 self.values = [None] * self.size_table # hell's pointers D: don't DRY ;/
244 for value in survivor_values:
245 self.insert_data(value)
246
247 def insert_data(self, data):
248 """
249 insert_data is used for inserting a single element at a time in the HashTable.
250
251 Examples:
252
253 >>> ht = HashTable(3)
254 >>> ht.insert_data(5)
255 >>> ht.keys()
256 {2: 5}
257 >>> ht = HashTable(5)
258 >>> ht.insert_data(30)
259 >>> ht.insert_data(50)
260 >>> ht.keys()
261 {0: 30, 1: 50}
262 """
263 key = self.hash_function(data)
264
265 if self.values[key] is None:
266 self._set_value(key, data)
267
12

268 elif self.values[key] == data:

269 pass
270
271 else:
272 collision_resolution = self._collision_resolution(key, data)
273 if collision_resolution is not None:
274 self._set_value(collision_resolution, data)
275 else:
276 self.rehashing()
277 self.insert_data(data)
278
279
280 if __name__ == "__main__":
281 import doctest
282
283 doctest.testmod()

hash_map.py
1 """
2 Hash map with open addressing.
3
4 https://en.wikipedia.org/wiki/Hash_table
5
6 Another hash map implementation, with a good explanation.
7 Modern Dictionaries by Raymond Hettinger
8 https://www.youtube.com/watch?v=p33CVV29OG8
9 """
10
11 from collections.abc import Iterator, MutableMapping
12 from dataclasses import dataclass
13 from typing import Generic, TypeVar
14
15 KEY = TypeVar("KEY")
16 VAL = TypeVar("VAL")
17
18
19 @dataclass(frozen=True, slots=True)
20 class _Item(Generic[KEY, VAL]):
21 key: KEY
22 val: VAL
23
24
25 class _DeletedItem(_Item):
26 def __init__(self) -> None:
27 super().__init__(None, None)
28
29 def __bool__(self) -> bool:
30 return False
31
32
33 _deleted = _DeletedItem()
34
35
36 class HashMap(MutableMapping[KEY, VAL]):
37 """
38 Hash map with open addressing.
39 """
40
41 def __init__(
42 self, initial_block_size: int = 8, capacity_factor: float = 0.75
43 ) -> None:
44 self._initial_block_size = initial_block_size
45 self._buckets: list[_Item | None] = [None] * initial_block_size
46 assert 0.0 < capacity_factor < 1.0
47 self._capacity_factor = capacity_factor
48 self._len = 0
49
50 def _get_bucket_index(self, key: KEY) -> int:
51 return hash(key) % len(self._buckets)
hash_map.py 13

52
53 def _get_next_ind(self, ind: int) -> int:
54 """
55 Get next index.
56
57 Implements linear open addressing.
58 >>> HashMap(5)._get_next_ind(3)
59 4
60 >>> HashMap(5)._get_next_ind(5)
61 1
62 >>> HashMap(5)._get_next_ind(6)
63 2
64 >>> HashMap(5)._get_next_ind(9)
65 0
66 """
67 return (ind + 1) % len(self._buckets)
68
69 def _try_set(self, ind: int, key: KEY, val: VAL) -> bool:
70 """
71 Try to add value to the bucket.
72
73 If bucket is empty or key is the same, does insert and return True.
74
75 If bucket has another key or deleted placeholder,
76 that means that we need to check next bucket.
77 """
78 stored = self._buckets[ind]
79 if not stored:
80 self._buckets[ind] = _Item(key, val)
81 self._len += 1
82 return True
83 elif stored.key == key:
84 self._buckets[ind] = _Item(key, val)
85 return True
86 else:
87 return False
88
89 def _is_full(self) -> bool:
90 """
91 Return true if we have reached safe capacity.
92
93 So we need to increase the number of buckets to avoid collisions.
94
95 >>> hm = HashMap(2)
96 >>> hm._add_item(1, 10)
97 >>> hm._add_item(2, 20)
98 >>> hm._is_full()
99 True
100 >>> HashMap(2)._is_full()
101 False
102 """
103 limit = len(self._buckets) * self._capacity_factor
104 return len(self) >= int(limit)
105
106 def _is_sparse(self) -> bool:
107 """Return true if we need twice fewer buckets when we have now."""
108 if len(self._buckets) <= self._initial_block_size:
109 return False
110 limit = len(self._buckets) * self._capacity_factor / 2
111 return len(self) < limit
112
113 def _resize(self, new_size: int) -> None:
114 old_buckets = self._buckets
115 self._buckets = [None] * new_size
116 self._len = 0
117 for item in old_buckets:
118 if item:
119 self._add_item(item.key, item.val)
120
121 def _size_up(self) -> None:
122 self._resize(len(self._buckets) * 2)
hash_map.py 14

123
124 def _size_down(self) -> None:
125 self._resize(len(self._buckets) // 2)
126
127 def _iterate_buckets(self, key: KEY) -> Iterator[int]:
128 ind = self._get_bucket_index(key)
129 for _ in range(len(self._buckets)):
130 yield ind
131 ind = self._get_next_ind(ind)
132
133 def _add_item(self, key: KEY, val: VAL) -> None:
134 """
135 Try to add 3 elements when the size is 5
136 >>> hm = HashMap(5)
137 >>> hm._add_item(1, 10)
138 >>> hm._add_item(2, 20)
139 >>> hm._add_item(3, 30)
140 >>> hm
141 HashMap(1: 10, 2: 20, 3: 30)
142
143 Try to add 3 elements when the size is 5
144 >>> hm = HashMap(5)
145 >>> hm._add_item(-5, 10)
146 >>> hm._add_item(6, 30)
147 >>> hm._add_item(-7, 20)
148 >>> hm
149 HashMap(-5: 10, 6: 30, -7: 20)
150
151 Try to add 3 elements when size is 1
152 >>> hm = HashMap(1)
153 >>> hm._add_item(10, 13.2)
154 >>> hm._add_item(6, 5.26)
155 >>> hm._add_item(7, 5.155)
156 >>> hm
157 HashMap(10: 13.2)
158
159 Trying to add an element with a key that is a floating point value
160 >>> hm = HashMap(5)
161 >>> hm._add_item(1.5, 10)
162 >>> hm
163 HashMap(1.5: 10)
164
165 5. Trying to add an item with the same key
166 >>> hm = HashMap(5)
167 >>> hm._add_item(1, 10)
168 >>> hm._add_item(1, 20)
169 >>> hm
170 HashMap(1: 20)
171 """
172 for ind in self._iterate_buckets(key):
173 if self._try_set(ind, key, val):
174 break
175
176 def __setitem__(self, key: KEY, val: VAL) -> None:
177 """
178 1. Changing value of item whose key is present
179 >>> hm = HashMap(5)
180 >>> hm._add_item(1, 10)
181 >>> hm.__setitem__(1, 20)
182 >>> hm
183 HashMap(1: 20)
184
185 2. Changing value of item whose key is not present
186 >>> hm = HashMap(5)
187 >>> hm._add_item(1, 10)
188 >>> hm.__setitem__(0, 20)
189 >>> hm
190 HashMap(0: 20, 1: 10)
191
192 3. Changing the value of the same item multiple times
193 >>> hm = HashMap(5)
hash_map.py 15

194 >>> hm._add_item(1, 10)

195 >>> hm.__setitem__(1, 20)
196 >>> hm.__setitem__(1, 30)
197 >>> hm
198 HashMap(1: 30)
199 """
200 if self._is_full():
201 self._size_up()
202
203 self._add_item(key, val)
204
205 def __delitem__(self, key: KEY) -> None:
206 """
207 >>> hm = HashMap(5)
208 >>> hm._add_item(1, 10)
209 >>> hm._add_item(2, 20)
210 >>> hm._add_item(3, 30)
211 >>> hm.__delitem__(3)
212 >>> hm
213 HashMap(1: 10, 2: 20)
214 >>> hm = HashMap(5)
215 >>> hm._add_item(-5, 10)
216 >>> hm._add_item(6, 30)
217 >>> hm._add_item(-7, 20)
218 >>> hm.__delitem__(-5)
219 >>> hm
220 HashMap(6: 30, -7: 20)
221
222 # Trying to remove a non-existing item
223 >>> hm = HashMap(5)
224 >>> hm._add_item(1, 10)
225 >>> hm._add_item(2, 20)
226 >>> hm._add_item(3, 30)
227 >>> hm.__delitem__(4)
228 Traceback (most recent call last):
229 ...
230 KeyError: 4
231 """
232 for ind in self._iterate_buckets(key):
233 item = self._buckets[ind]
234 if item is None:
235 raise KeyError(key)
236 if item is _deleted:
237 continue
238 if item.key == key:
239 self._buckets[ind] = _deleted
240 self._len -= 1
241 break
242 if self._is_sparse():
243 self._size_down()
244
245 def __getitem__(self, key: KEY) -> VAL:
246 """
247 Returns the item at the given key
248
249 >>> hm = HashMap(5)
250 >>> hm._add_item(1, 10)
251 >>> hm.__getitem__(1)
252 10
253
254 >>> hm = HashMap(5)
255 >>> hm._add_item(10, -10)
256 >>> hm._add_item(20, -20)
257 >>> hm.__getitem__(20)
258 -20
259
260 >>> hm = HashMap(5)
261 >>> hm._add_item(-1, 10)
262 >>> hm.__getitem__(-1)
263 10
264 """
hash_map.py 16

265 for ind in self._iterate_buckets(key):

266 item = self._buckets[ind]
267 if item is None:
268 break
269 if item is _deleted:
270 continue
271 if item.key == key:
272 return item.val
273 raise KeyError(key)
274
275 def __len__(self) -> int:
276 """
277 Returns the number of items present in hashmap
278
279 >>> hm = HashMap(5)
280 >>> hm._add_item(1, 10)
281 >>> hm._add_item(2, 20)
282 >>> hm._add_item(3, 30)
283 >>> hm.__len__()
284 3
285
286 >>> hm = HashMap(5)
287 >>> hm.__len__()
288 0
289 """
290 return self._len
291
292 def __iter__(self) -> Iterator[KEY]:
293 yield from (item.key for item in self._buckets if item)
294
295 def __repr__(self) -> str:
296 val_string = ", ".join(
297 f"{item.key}: {item.val}" for item in self._buckets if item
298 )
299 return f"HashMap({val_string})"
300
301
302 if __name__ == "__main__":
303 import doctest
304
305 doctest.testmod()

Blind 75 LeetCode Questions
No ratings yet
Blind 75 LeetCode Questions
46 pages
Heap Sort Algorithm: Relationship Between Array Indexes and Tree Elements
No ratings yet
Heap Sort Algorithm: Relationship Between Array Indexes and Tree Elements
15 pages
khushidsf2
No ratings yet
khushidsf2
8 pages
Data Sturcture and Algorithm Week 15
No ratings yet
Data Sturcture and Algorithm Week 15
4 pages
DSA_01_A
No ratings yet
DSA_01_A
1 page
HASH_TABLE[1]
No ratings yet
HASH_TABLE[1]
22 pages
Micro - Project DSP 2024.pdf Prasen Vishal Pratik
No ratings yet
Micro - Project DSP 2024.pdf Prasen Vishal Pratik
18 pages
TL06 - AVL and Hashing
No ratings yet
TL06 - AVL and Hashing
4 pages
CS5800 Assignment 6
No ratings yet
CS5800 Assignment 6
10 pages
dsa1
No ratings yet
dsa1
4 pages
CSE220 Lab 4-Hashing_
No ratings yet
CSE220 Lab 4-Hashing_
7 pages
ADS M TECH MID 2
No ratings yet
ADS M TECH MID 2
26 pages
Chapter 11 Hashing
No ratings yet
Chapter 11 Hashing
42 pages
Hashing - Datastructures and Algorithms
No ratings yet
Hashing - Datastructures and Algorithms
32 pages
Data+Structures+and+Algorithms+Bootcamp+in+Python+slides+Remaster (1) - Part-3
No ratings yet
Data+Structures+and+Algorithms+Bootcamp+in+Python+slides+Remaster (1) - Part-3
22 pages
Chapter 8 - Hashing
No ratings yet
Chapter 8 - Hashing
78 pages
CHAPTER 8 Hashing: Instructors: C. Y. Tang and J. S. Roger Jang
No ratings yet
CHAPTER 8 Hashing: Instructors: C. Y. Tang and J. S. Roger Jang
78 pages
ADS Unit 3
No ratings yet
ADS Unit 3
14 pages
Assignment No 1
No ratings yet
Assignment No 1
8 pages
Lab5 Hashing Algos
No ratings yet
Lab5 Hashing Algos
10 pages
Dsa 1
No ratings yet
Dsa 1
11 pages
Updated PDAssignment6
No ratings yet
Updated PDAssignment6
15 pages
Hashing: An Ideal Hash Table
No ratings yet
Hashing: An Ideal Hash Table
11 pages
Ds 2nd Long
No ratings yet
Ds 2nd Long
43 pages
5 Hash_new
No ratings yet
5 Hash_new
24 pages
Hashing
No ratings yet
Hashing
30 pages
3 Hashing
No ratings yet
3 Hashing
20 pages
Practical No._02
No ratings yet
Practical No._02
2 pages
Hashing
No ratings yet
Hashing
38 pages
Report (2020 EE 395) .
No ratings yet
Report (2020 EE 395) .
7 pages
Week13 1
No ratings yet
Week13 1
16 pages
HASHING
No ratings yet
HASHING
21 pages
vision_cs_2023_algorithm_chapter_2_hashing_85
No ratings yet
vision_cs_2023_algorithm_chapter_2_hashing_85
12 pages
DATA STRUCTURES DIGITAL NOTES-111-120
No ratings yet
DATA STRUCTURES DIGITAL NOTES-111-120
10 pages
HASHING
No ratings yet
HASHING
63 pages
Hashing
No ratings yet
Hashing
30 pages
Maps and Hashing - Final
No ratings yet
Maps and Hashing - Final
51 pages
Study_Material_on_Hashing
No ratings yet
Study_Material_on_Hashing
4 pages
DSA Hashing Notes
No ratings yet
DSA Hashing Notes
2 pages
Lec 11 Hash Table
No ratings yet
Lec 11 Hash Table
43 pages
11. Hafta. (3)
No ratings yet
11. Hafta. (3)
34 pages
Hashing
No ratings yet
Hashing
35 pages
Hashing
No ratings yet
Hashing
35 pages
Lab 2
No ratings yet
Lab 2
10 pages
Chapter 8 - Hashing
No ratings yet
Chapter 8 - Hashing
78 pages
Collision Resolution Techniques
No ratings yet
Collision Resolution Techniques
15 pages
Hashing
No ratings yet
Hashing
20 pages
dsa_pr1-2
No ratings yet
dsa_pr1-2
5 pages
Lecture 12
No ratings yet
Lecture 12
33 pages
Collision
No ratings yet
Collision
24 pages
CSE 326: Data Structures Hash Tables: Autumn 2007
No ratings yet
CSE 326: Data Structures Hash Tables: Autumn 2007
29 pages
Prasad Gade PR-10
No ratings yet
Prasad Gade PR-10
14 pages
CS 04
No ratings yet
CS 04
24 pages
Collision Resolution
No ratings yet
Collision Resolution
19 pages
Hashing
No ratings yet
Hashing
12 pages
Dsa Merged
No ratings yet
Dsa Merged
339 pages
Lecture 13 - Hash Tables
No ratings yet
Lecture 13 - Hash Tables
51 pages
Unit-5
No ratings yet
Unit-5
50 pages
Hashing in Data Structure
No ratings yet
Hashing in Data Structure
25 pages
Module 5 UQ
No ratings yet
Module 5 UQ
15 pages
Hashing PDF
No ratings yet
Hashing PDF
65 pages
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
CS301 Mcqs MidTerm by Vu Topper RM
No ratings yet
CS301 Mcqs MidTerm by Vu Topper RM
30 pages
07 Kdtrees
No ratings yet
07 Kdtrees
17 pages
Chapter-8. Heap
No ratings yet
Chapter-8. Heap
11 pages
Singly Linked List: Node Next Node Start Temp Temp Start Temp Next
No ratings yet
Singly Linked List: Node Next Node Start Temp Temp Start Temp Next
5 pages
DAA - Unit-2
No ratings yet
DAA - Unit-2
156 pages
UE19CS202: Data Structures and Its Applications (4-0-0-4-4) : Course Objectives
No ratings yet
UE19CS202: Data Structures and Its Applications (4-0-0-4-4) : Course Objectives
2 pages
Subhajit Das Data Structures and Algorithms
No ratings yet
Subhajit Das Data Structures and Algorithms
4 pages
Lect 10 - 2024
No ratings yet
Lect 10 - 2024
12 pages
Circular Singly Linked List: Data Structures Using C Satish 8886503423
No ratings yet
Circular Singly Linked List: Data Structures Using C Satish 8886503423
32 pages
BCS304
No ratings yet
BCS304
3 pages
Unit Ii
No ratings yet
Unit Ii
87 pages
Data Structure and Algorithm
No ratings yet
Data Structure and Algorithm
3 pages
DS Unit Iii QB
No ratings yet
DS Unit Iii QB
4 pages
Dsa Practical 5
No ratings yet
Dsa Practical 5
14 pages
Algorithms and Data Structures For Big Data (BDA 5101)
No ratings yet
Algorithms and Data Structures For Big Data (BDA 5101)
1 page
Exercises Final Exam Part1
No ratings yet
Exercises Final Exam Part1
31 pages
Skip Graph in Distributed Environments: A Review: 1.1 Skiplist
No ratings yet
Skip Graph in Distributed Environments: A Review: 1.1 Skiplist
5 pages
Data Structure and Algorithm CS-102/CS2005
No ratings yet
Data Structure and Algorithm CS-102/CS2005
43 pages
Myhashtable
No ratings yet
Myhashtable
4 pages
I Semester BCA Examination (NEP - SCHEME) : Subject: Computer Science
No ratings yet
I Semester BCA Examination (NEP - SCHEME) : Subject: Computer Science
2 pages
IT245 - Module 8
No ratings yet
IT245 - Module 8
41 pages
DSF Combined Notes
No ratings yet
DSF Combined Notes
328 pages
B+ Tree: What Is A B+ Tree Searching Insertion Deletion
No ratings yet
B+ Tree: What Is A B+ Tree Searching Insertion Deletion
24 pages
Lab Lesson Plan
No ratings yet
Lab Lesson Plan
2 pages
Sample Questions
No ratings yet
Sample Questions
6 pages
CS301-P Mcqs FinalTerm by Vu Topper RM
No ratings yet
CS301-P Mcqs FinalTerm by Vu Topper RM
11 pages
Exp 6 - Dsa New1
No ratings yet
Exp 6 - Dsa New1
7 pages
Heaps, Heap Sort, and Priority Queues
No ratings yet
Heaps, Heap Sort, and Priority Queues
35 pages