Adding hash algorithms
This commit is contained in:
150
tests/test_hash_functions.py
Normal file
150
tests/test_hash_functions.py
Normal file
@@ -0,0 +1,150 @@
|
||||
"""
|
||||
Tests for hash functions.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||
|
||||
from src.hash_functions import (
|
||||
division_hash,
|
||||
multiplication_hash,
|
||||
universal_hash,
|
||||
string_hash_simple,
|
||||
string_hash_polynomial,
|
||||
string_hash_djb2,
|
||||
md5_hash,
|
||||
bad_hash_clustering
|
||||
)
|
||||
|
||||
|
||||
class TestDivisionHash:
|
||||
"""Tests for division hash function."""
|
||||
|
||||
def test_basic_division_hash(self):
|
||||
"""Test basic division hash functionality."""
|
||||
assert division_hash(10, 7) == 3
|
||||
assert division_hash(22, 7) == 1
|
||||
assert division_hash(31, 7) == 3
|
||||
|
||||
def test_hash_range(self):
|
||||
"""Test that hash values are in correct range."""
|
||||
table_size = 11
|
||||
for key in range(100):
|
||||
hash_val = division_hash(key, table_size)
|
||||
assert 0 <= hash_val < table_size
|
||||
|
||||
def test_negative_keys(self):
|
||||
"""Test handling of negative keys."""
|
||||
# Division with negative keys
|
||||
assert division_hash(-10, 7) == (-10 % 7)
|
||||
|
||||
|
||||
class TestMultiplicationHash:
|
||||
"""Tests for multiplication hash function."""
|
||||
|
||||
def test_basic_multiplication_hash(self):
|
||||
"""Test basic multiplication hash functionality."""
|
||||
hash_val = multiplication_hash(10, 8)
|
||||
assert 0 <= hash_val < 8
|
||||
|
||||
def test_hash_range(self):
|
||||
"""Test that hash values are in correct range."""
|
||||
table_size = 16
|
||||
for key in range(50):
|
||||
hash_val = multiplication_hash(key, table_size)
|
||||
assert 0 <= hash_val < table_size
|
||||
|
||||
|
||||
class TestUniversalHash:
|
||||
"""Tests for universal hash function."""
|
||||
|
||||
def test_basic_universal_hash(self):
|
||||
"""Test basic universal hash functionality."""
|
||||
p = 101 # Prime larger than max key
|
||||
a, b = 3, 7
|
||||
hash_val = universal_hash(10, 11, a, b, p)
|
||||
assert 0 <= hash_val < 11
|
||||
|
||||
def test_hash_range(self):
|
||||
"""Test that hash values are in correct range."""
|
||||
table_size = 13
|
||||
p = 101
|
||||
a, b = 5, 11
|
||||
for key in range(50):
|
||||
hash_val = universal_hash(key, table_size, a, b, p)
|
||||
assert 0 <= hash_val < table_size
|
||||
|
||||
|
||||
class TestStringHashFunctions:
|
||||
"""Tests for string hash functions."""
|
||||
|
||||
def test_string_hash_simple(self):
|
||||
"""Test simple string hash function."""
|
||||
hash_val = string_hash_simple("hello", 11)
|
||||
assert 0 <= hash_val < 11
|
||||
|
||||
def test_string_hash_polynomial(self):
|
||||
"""Test polynomial string hash function."""
|
||||
hash_val = string_hash_polynomial("hello", 11)
|
||||
assert 0 <= hash_val < 11
|
||||
|
||||
def test_string_hash_djb2(self):
|
||||
"""Test DJB2 string hash function."""
|
||||
hash_val = string_hash_djb2("hello", 11)
|
||||
assert 0 <= hash_val < 11
|
||||
|
||||
def test_string_hash_collisions(self):
|
||||
"""Test that different strings can produce different hashes."""
|
||||
table_size = 100
|
||||
strings = ["hello", "world", "test", "hash", "table"]
|
||||
hashes = [string_hash_polynomial(s, table_size) for s in strings]
|
||||
# At least some should be different (not guaranteed all)
|
||||
assert len(set(hashes)) > 1
|
||||
|
||||
def test_md5_hash(self):
|
||||
"""Test MD5-based hash function."""
|
||||
hash_val = md5_hash("test", 11)
|
||||
assert 0 <= hash_val < 11
|
||||
|
||||
|
||||
class TestBadHashFunctions:
|
||||
"""Tests for bad hash functions (demonstrating poor behavior)."""
|
||||
|
||||
def test_bad_hash_clustering(self):
|
||||
"""Test bad hash function that causes clustering."""
|
||||
# This should demonstrate poor distribution
|
||||
table_size = 10
|
||||
keys = list(range(20))
|
||||
hashes = [bad_hash_clustering(k, table_size) for k in keys]
|
||||
# All hashes should be 0 (demonstrating clustering)
|
||||
assert all(h == 0 for h in hashes)
|
||||
|
||||
|
||||
class TestHashFunctionProperties:
|
||||
"""Tests for hash function properties."""
|
||||
|
||||
def test_deterministic(self):
|
||||
"""Test that hash functions are deterministic."""
|
||||
key = 42
|
||||
table_size = 11
|
||||
hash1 = division_hash(key, table_size)
|
||||
hash2 = division_hash(key, table_size)
|
||||
assert hash1 == hash2
|
||||
|
||||
def test_distribution(self):
|
||||
"""Test that good hash functions distribute keys reasonably."""
|
||||
table_size = 20
|
||||
keys = list(range(100))
|
||||
hashes = [division_hash(k, table_size) for k in keys]
|
||||
|
||||
# Count occurrences in each bucket
|
||||
bucket_counts = {}
|
||||
for h in hashes:
|
||||
bucket_counts[h] = bucket_counts.get(h, 0) + 1
|
||||
|
||||
# Most buckets should be used (not perfect, but reasonable)
|
||||
buckets_used = len(bucket_counts)
|
||||
assert buckets_used > table_size * 0.5 # At least 50% of buckets used
|
||||
|
||||
203
tests/test_hash_tables.py
Normal file
203
tests/test_hash_tables.py
Normal file
@@ -0,0 +1,203 @@
|
||||
"""
|
||||
Tests for hash table implementations.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||
|
||||
from src.hash_tables import (
|
||||
DirectAddressTable,
|
||||
HashTableOpenAddressing,
|
||||
HashTableSeparateChaining
|
||||
)
|
||||
from src.hash_functions import division_hash
|
||||
|
||||
|
||||
class TestDirectAddressTable:
|
||||
"""Tests for direct-address table."""
|
||||
|
||||
def test_insert_and_search(self):
|
||||
"""Test basic insert and search operations."""
|
||||
table = DirectAddressTable(100)
|
||||
table.insert(5, "value1")
|
||||
table.insert(42, "value2")
|
||||
|
||||
assert table.search(5) == "value1"
|
||||
assert table.search(42) == "value2"
|
||||
assert table.search(10) is None
|
||||
|
||||
def test_delete(self):
|
||||
"""Test delete operation."""
|
||||
table = DirectAddressTable(100)
|
||||
table.insert(5, "value1")
|
||||
table.delete(5)
|
||||
assert table.search(5) is None
|
||||
|
||||
def test_out_of_range_key(self):
|
||||
"""Test handling of out-of-range keys."""
|
||||
table = DirectAddressTable(100)
|
||||
with pytest.raises(ValueError):
|
||||
table.insert(100, "value") # Out of range
|
||||
assert table.search(100) is None
|
||||
|
||||
|
||||
class TestHashTableOpenAddressing:
|
||||
"""Tests for open addressing hash table."""
|
||||
|
||||
def test_insert_and_search_linear(self):
|
||||
"""Test insert and search with linear probing."""
|
||||
ht = HashTableOpenAddressing(10, probe_type='linear')
|
||||
ht.insert(10, "value1")
|
||||
ht.insert(22, "value2")
|
||||
ht.insert(31, "value3")
|
||||
|
||||
assert ht.search(10) == "value1"
|
||||
assert ht.search(22) == "value2"
|
||||
assert ht.search(31) == "value3"
|
||||
assert ht.search(99) is None
|
||||
|
||||
def test_insert_and_search_quadratic(self):
|
||||
"""Test insert and search with quadratic probing."""
|
||||
ht = HashTableOpenAddressing(10, probe_type='quadratic')
|
||||
ht.insert(10, "value1")
|
||||
ht.insert(22, "value2")
|
||||
|
||||
assert ht.search(10) == "value1"
|
||||
assert ht.search(22) == "value2"
|
||||
|
||||
def test_insert_and_search_double(self):
|
||||
"""Test insert and search with double hashing."""
|
||||
ht = HashTableOpenAddressing(10, probe_type='double')
|
||||
ht.insert(10, "value1")
|
||||
ht.insert(22, "value2")
|
||||
|
||||
assert ht.search(10) == "value1"
|
||||
assert ht.search(22) == "value2"
|
||||
|
||||
def test_delete(self):
|
||||
"""Test delete operation."""
|
||||
ht = HashTableOpenAddressing(10, probe_type='linear')
|
||||
ht.insert(10, "value1")
|
||||
ht.insert(22, "value2")
|
||||
|
||||
assert ht.delete(10) is True
|
||||
assert ht.search(10) is None
|
||||
assert ht.search(22) == "value2"
|
||||
assert ht.delete(99) is False
|
||||
|
||||
def test_update_existing_key(self):
|
||||
"""Test updating an existing key."""
|
||||
ht = HashTableOpenAddressing(10, probe_type='linear')
|
||||
ht.insert(10, "value1")
|
||||
ht.insert(10, "value2") # Update
|
||||
assert ht.search(10) == "value2"
|
||||
|
||||
def test_resize(self):
|
||||
"""Test automatic resizing."""
|
||||
ht = HashTableOpenAddressing(5, probe_type='linear', load_factor_threshold=0.7)
|
||||
# Insert enough to trigger resize
|
||||
for i in range(10):
|
||||
ht.insert(i, f"value{i}")
|
||||
|
||||
# All should still be searchable
|
||||
for i in range(10):
|
||||
assert ht.search(i) == f"value{i}"
|
||||
|
||||
|
||||
class TestHashTableSeparateChaining:
|
||||
"""Tests for separate chaining hash table."""
|
||||
|
||||
def test_insert_and_search(self):
|
||||
"""Test basic insert and search operations."""
|
||||
ht = HashTableSeparateChaining(10)
|
||||
ht.insert(10, "value1")
|
||||
ht.insert(22, "value2")
|
||||
ht.insert(31, "value3")
|
||||
|
||||
assert ht.search(10) == "value1"
|
||||
assert ht.search(22) == "value2"
|
||||
assert ht.search(31) == "value3"
|
||||
assert ht.search(99) is None
|
||||
|
||||
def test_delete(self):
|
||||
"""Test delete operation."""
|
||||
ht = HashTableSeparateChaining(10)
|
||||
ht.insert(10, "value1")
|
||||
ht.insert(22, "value2")
|
||||
|
||||
assert ht.delete(10) is True
|
||||
assert ht.search(10) is None
|
||||
assert ht.search(22) == "value2"
|
||||
assert ht.delete(99) is False
|
||||
|
||||
def test_update_existing_key(self):
|
||||
"""Test updating an existing key."""
|
||||
ht = HashTableSeparateChaining(10)
|
||||
ht.insert(10, "value1")
|
||||
ht.insert(10, "value2") # Update
|
||||
assert ht.search(10) == "value2"
|
||||
|
||||
def test_collision_handling(self):
|
||||
"""Test that collisions are handled correctly."""
|
||||
ht = HashTableSeparateChaining(5) # Small table to force collisions
|
||||
keys = [10, 15, 20, 25, 30]
|
||||
for key in keys:
|
||||
ht.insert(key, f"value{key}")
|
||||
|
||||
# All should be searchable
|
||||
for key in keys:
|
||||
assert ht.search(key) == f"value{key}"
|
||||
|
||||
def test_chain_lengths(self):
|
||||
"""Test chain length reporting."""
|
||||
ht = HashTableSeparateChaining(5)
|
||||
for i in range(10):
|
||||
ht.insert(i, f"value{i}")
|
||||
|
||||
chain_lengths = ht.get_chain_lengths()
|
||||
# After inserting 10 items, table will resize (load factor > 1.0)
|
||||
# So chain lengths should match current table size, not initial size
|
||||
assert len(chain_lengths) == ht.size
|
||||
assert sum(chain_lengths) == 10
|
||||
|
||||
def test_resize(self):
|
||||
"""Test automatic resizing."""
|
||||
ht = HashTableSeparateChaining(5, load_factor_threshold=1.0)
|
||||
# Insert enough to trigger resize
|
||||
for i in range(20):
|
||||
ht.insert(i, f"value{i}")
|
||||
|
||||
# All should still be searchable
|
||||
for i in range(20):
|
||||
assert ht.search(i) == f"value{i}"
|
||||
|
||||
|
||||
class TestHashTableComparison:
|
||||
"""Tests comparing different hash table implementations."""
|
||||
|
||||
def test_same_operations_different_implementations(self):
|
||||
"""Test that different implementations handle same operations."""
|
||||
keys = [10, 22, 31, 4, 15, 28, 17, 88, 59]
|
||||
|
||||
ht_oa = HashTableOpenAddressing(20, probe_type='linear')
|
||||
ht_sc = HashTableSeparateChaining(20)
|
||||
|
||||
# Insert same keys
|
||||
for key in keys:
|
||||
ht_oa.insert(key, f"value{key}")
|
||||
ht_sc.insert(key, f"value{key}")
|
||||
|
||||
# Both should find all keys
|
||||
for key in keys:
|
||||
assert ht_oa.search(key) == f"value{key}"
|
||||
assert ht_sc.search(key) == f"value{key}"
|
||||
|
||||
# Both should delete successfully
|
||||
for key in keys[:5]:
|
||||
assert ht_oa.delete(key) is True
|
||||
assert ht_sc.delete(key) is True
|
||||
assert ht_oa.search(key) is None
|
||||
assert ht_sc.search(key) is None
|
||||
|
||||
Reference in New Issue
Block a user