Adding hash algorithms

This commit is contained in:
Carlos Gutierrez
2025-11-24 17:11:51 -05:00
commit 7416dfed38
25 changed files with 10315 additions and 0 deletions

View File

@@ -0,0 +1,150 @@
"""
Tests for hash functions.
"""
import pytest
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from src.hash_functions import (
division_hash,
multiplication_hash,
universal_hash,
string_hash_simple,
string_hash_polynomial,
string_hash_djb2,
md5_hash,
bad_hash_clustering
)
class TestDivisionHash:
"""Tests for division hash function."""
def test_basic_division_hash(self):
"""Test basic division hash functionality."""
assert division_hash(10, 7) == 3
assert division_hash(22, 7) == 1
assert division_hash(31, 7) == 3
def test_hash_range(self):
"""Test that hash values are in correct range."""
table_size = 11
for key in range(100):
hash_val = division_hash(key, table_size)
assert 0 <= hash_val < table_size
def test_negative_keys(self):
"""Test handling of negative keys."""
# Division with negative keys
assert division_hash(-10, 7) == (-10 % 7)
class TestMultiplicationHash:
"""Tests for multiplication hash function."""
def test_basic_multiplication_hash(self):
"""Test basic multiplication hash functionality."""
hash_val = multiplication_hash(10, 8)
assert 0 <= hash_val < 8
def test_hash_range(self):
"""Test that hash values are in correct range."""
table_size = 16
for key in range(50):
hash_val = multiplication_hash(key, table_size)
assert 0 <= hash_val < table_size
class TestUniversalHash:
"""Tests for universal hash function."""
def test_basic_universal_hash(self):
"""Test basic universal hash functionality."""
p = 101 # Prime larger than max key
a, b = 3, 7
hash_val = universal_hash(10, 11, a, b, p)
assert 0 <= hash_val < 11
def test_hash_range(self):
"""Test that hash values are in correct range."""
table_size = 13
p = 101
a, b = 5, 11
for key in range(50):
hash_val = universal_hash(key, table_size, a, b, p)
assert 0 <= hash_val < table_size
class TestStringHashFunctions:
"""Tests for string hash functions."""
def test_string_hash_simple(self):
"""Test simple string hash function."""
hash_val = string_hash_simple("hello", 11)
assert 0 <= hash_val < 11
def test_string_hash_polynomial(self):
"""Test polynomial string hash function."""
hash_val = string_hash_polynomial("hello", 11)
assert 0 <= hash_val < 11
def test_string_hash_djb2(self):
"""Test DJB2 string hash function."""
hash_val = string_hash_djb2("hello", 11)
assert 0 <= hash_val < 11
def test_string_hash_collisions(self):
"""Test that different strings can produce different hashes."""
table_size = 100
strings = ["hello", "world", "test", "hash", "table"]
hashes = [string_hash_polynomial(s, table_size) for s in strings]
# At least some should be different (not guaranteed all)
assert len(set(hashes)) > 1
def test_md5_hash(self):
"""Test MD5-based hash function."""
hash_val = md5_hash("test", 11)
assert 0 <= hash_val < 11
class TestBadHashFunctions:
"""Tests for bad hash functions (demonstrating poor behavior)."""
def test_bad_hash_clustering(self):
"""Test bad hash function that causes clustering."""
# This should demonstrate poor distribution
table_size = 10
keys = list(range(20))
hashes = [bad_hash_clustering(k, table_size) for k in keys]
# All hashes should be 0 (demonstrating clustering)
assert all(h == 0 for h in hashes)
class TestHashFunctionProperties:
"""Tests for hash function properties."""
def test_deterministic(self):
"""Test that hash functions are deterministic."""
key = 42
table_size = 11
hash1 = division_hash(key, table_size)
hash2 = division_hash(key, table_size)
assert hash1 == hash2
def test_distribution(self):
"""Test that good hash functions distribute keys reasonably."""
table_size = 20
keys = list(range(100))
hashes = [division_hash(k, table_size) for k in keys]
# Count occurrences in each bucket
bucket_counts = {}
for h in hashes:
bucket_counts[h] = bucket_counts.get(h, 0) + 1
# Most buckets should be used (not perfect, but reasonable)
buckets_used = len(bucket_counts)
assert buckets_used > table_size * 0.5 # At least 50% of buckets used

203
tests/test_hash_tables.py Normal file
View File

@@ -0,0 +1,203 @@
"""
Tests for hash table implementations.
"""
import pytest
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from src.hash_tables import (
DirectAddressTable,
HashTableOpenAddressing,
HashTableSeparateChaining
)
from src.hash_functions import division_hash
class TestDirectAddressTable:
"""Tests for direct-address table."""
def test_insert_and_search(self):
"""Test basic insert and search operations."""
table = DirectAddressTable(100)
table.insert(5, "value1")
table.insert(42, "value2")
assert table.search(5) == "value1"
assert table.search(42) == "value2"
assert table.search(10) is None
def test_delete(self):
"""Test delete operation."""
table = DirectAddressTable(100)
table.insert(5, "value1")
table.delete(5)
assert table.search(5) is None
def test_out_of_range_key(self):
"""Test handling of out-of-range keys."""
table = DirectAddressTable(100)
with pytest.raises(ValueError):
table.insert(100, "value") # Out of range
assert table.search(100) is None
class TestHashTableOpenAddressing:
"""Tests for open addressing hash table."""
def test_insert_and_search_linear(self):
"""Test insert and search with linear probing."""
ht = HashTableOpenAddressing(10, probe_type='linear')
ht.insert(10, "value1")
ht.insert(22, "value2")
ht.insert(31, "value3")
assert ht.search(10) == "value1"
assert ht.search(22) == "value2"
assert ht.search(31) == "value3"
assert ht.search(99) is None
def test_insert_and_search_quadratic(self):
"""Test insert and search with quadratic probing."""
ht = HashTableOpenAddressing(10, probe_type='quadratic')
ht.insert(10, "value1")
ht.insert(22, "value2")
assert ht.search(10) == "value1"
assert ht.search(22) == "value2"
def test_insert_and_search_double(self):
"""Test insert and search with double hashing."""
ht = HashTableOpenAddressing(10, probe_type='double')
ht.insert(10, "value1")
ht.insert(22, "value2")
assert ht.search(10) == "value1"
assert ht.search(22) == "value2"
def test_delete(self):
"""Test delete operation."""
ht = HashTableOpenAddressing(10, probe_type='linear')
ht.insert(10, "value1")
ht.insert(22, "value2")
assert ht.delete(10) is True
assert ht.search(10) is None
assert ht.search(22) == "value2"
assert ht.delete(99) is False
def test_update_existing_key(self):
"""Test updating an existing key."""
ht = HashTableOpenAddressing(10, probe_type='linear')
ht.insert(10, "value1")
ht.insert(10, "value2") # Update
assert ht.search(10) == "value2"
def test_resize(self):
"""Test automatic resizing."""
ht = HashTableOpenAddressing(5, probe_type='linear', load_factor_threshold=0.7)
# Insert enough to trigger resize
for i in range(10):
ht.insert(i, f"value{i}")
# All should still be searchable
for i in range(10):
assert ht.search(i) == f"value{i}"
class TestHashTableSeparateChaining:
"""Tests for separate chaining hash table."""
def test_insert_and_search(self):
"""Test basic insert and search operations."""
ht = HashTableSeparateChaining(10)
ht.insert(10, "value1")
ht.insert(22, "value2")
ht.insert(31, "value3")
assert ht.search(10) == "value1"
assert ht.search(22) == "value2"
assert ht.search(31) == "value3"
assert ht.search(99) is None
def test_delete(self):
"""Test delete operation."""
ht = HashTableSeparateChaining(10)
ht.insert(10, "value1")
ht.insert(22, "value2")
assert ht.delete(10) is True
assert ht.search(10) is None
assert ht.search(22) == "value2"
assert ht.delete(99) is False
def test_update_existing_key(self):
"""Test updating an existing key."""
ht = HashTableSeparateChaining(10)
ht.insert(10, "value1")
ht.insert(10, "value2") # Update
assert ht.search(10) == "value2"
def test_collision_handling(self):
"""Test that collisions are handled correctly."""
ht = HashTableSeparateChaining(5) # Small table to force collisions
keys = [10, 15, 20, 25, 30]
for key in keys:
ht.insert(key, f"value{key}")
# All should be searchable
for key in keys:
assert ht.search(key) == f"value{key}"
def test_chain_lengths(self):
"""Test chain length reporting."""
ht = HashTableSeparateChaining(5)
for i in range(10):
ht.insert(i, f"value{i}")
chain_lengths = ht.get_chain_lengths()
# After inserting 10 items, table will resize (load factor > 1.0)
# So chain lengths should match current table size, not initial size
assert len(chain_lengths) == ht.size
assert sum(chain_lengths) == 10
def test_resize(self):
"""Test automatic resizing."""
ht = HashTableSeparateChaining(5, load_factor_threshold=1.0)
# Insert enough to trigger resize
for i in range(20):
ht.insert(i, f"value{i}")
# All should still be searchable
for i in range(20):
assert ht.search(i) == f"value{i}"
class TestHashTableComparison:
"""Tests comparing different hash table implementations."""
def test_same_operations_different_implementations(self):
"""Test that different implementations handle same operations."""
keys = [10, 22, 31, 4, 15, 28, 17, 88, 59]
ht_oa = HashTableOpenAddressing(20, probe_type='linear')
ht_sc = HashTableSeparateChaining(20)
# Insert same keys
for key in keys:
ht_oa.insert(key, f"value{key}")
ht_sc.insert(key, f"value{key}")
# Both should find all keys
for key in keys:
assert ht_oa.search(key) == f"value{key}"
assert ht_sc.search(key) == f"value{key}"
# Both should delete successfully
for key in keys[:5]:
assert ht_oa.delete(key) is True
assert ht_sc.delete(key) is True
assert ht_oa.search(key) is None
assert ht_sc.search(key) is None