Adding hash algorithms

2025-11-24 17:11:51 -05:00
commit 7416dfed38
25 changed files with 10315 additions and 0 deletions
--- a/examples/generate_plots.py
+++ b/examples/generate_plots.py
@@ -0,0 +1,389 @@
+"""
+Generate visualization plots for hash table performance analysis.
+"""
+
+import sys
+import os
+import matplotlib.pyplot as plt
+import numpy as np
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+from src.benchmark import (
+    benchmark_hash_functions,
+    benchmark_open_addressing_vs_chaining,
+    benchmark_load_factor_impact,
+    benchmark_load_factor_impact_probes,
+    generate_test_data
+)
+from src.hash_functions import (
+    division_hash,
+    multiplication_hash,
+    string_hash_simple,
+    string_hash_polynomial,
+    string_hash_djb2,
+    bad_hash_clustering
+)
+
+
+def plot_hash_function_comparison():
+    """Compare different hash functions."""
+    print("Generating hash function comparison plot...")
+    
+    keys = generate_test_data(1000)
+    table_size = 100
+    
+    hash_funcs = {
+        'Division': division_hash,
+        'Multiplication': lambda k, s: multiplication_hash(k, s),
+        'Simple String': lambda k, s: string_hash_simple(str(k), s),
+        'Polynomial String': lambda k, s: string_hash_polynomial(str(k), s),
+        'DJB2': lambda k, s: string_hash_djb2(str(k), s),
+        'Bad Clustering': bad_hash_clustering,
+    }
+    
+    results = benchmark_hash_functions(hash_funcs, keys, table_size)
+    
+    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
+    fig.suptitle('Hash Function Performance Comparison', fontsize=16, fontweight='bold')
+    
+    names = list(results.keys())
+    collision_rates = [results[n]['collision_rate'] * 100 for n in names]
+    variances = [results[n]['variance'] for n in names]
+    times = [results[n]['time'] * 1000 for n in names]  # Convert to ms
+    max_chains = [results[n]['max_chain_length'] for n in names]
+    
+    # Collision rate
+    axes[0, 0].bar(names, collision_rates, color='steelblue')
+    axes[0, 0].set_title('Collision Rate (%)', fontweight='bold')
+    axes[0, 0].set_ylabel('Collision Rate (%)')
+    axes[0, 0].tick_params(axis='x', rotation=45)
+    axes[0, 0].grid(axis='y', alpha=0.3)
+    
+    # Variance (distribution quality)
+    axes[0, 1].bar(names, variances, color='coral')
+    axes[0, 1].set_title('Distribution Variance (Lower is Better)', fontweight='bold')
+    axes[0, 1].set_ylabel('Variance')
+    axes[0, 1].tick_params(axis='x', rotation=45)
+    axes[0, 1].grid(axis='y', alpha=0.3)
+    
+    # Execution time
+    axes[1, 0].bar(names, times, color='mediumseagreen')
+    axes[1, 0].set_title('Execution Time', fontweight='bold')
+    axes[1, 0].set_ylabel('Time (ms)')
+    axes[1, 0].tick_params(axis='x', rotation=45)
+    axes[1, 0].grid(axis='y', alpha=0.3)
+    
+    # Max chain length
+    axes[1, 1].bar(names, max_chains, color='plum')
+    axes[1, 1].set_title('Maximum Chain Length', fontweight='bold')
+    axes[1, 1].set_ylabel('Max Chain Length')
+    axes[1, 1].tick_params(axis='x', rotation=45)
+    axes[1, 1].grid(axis='y', alpha=0.3)
+    
+    plt.tight_layout()
+    output_path = os.path.join(os.path.dirname(__file__), '..', 'docs', 'hash_function_comparison.png')
+    plt.savefig(output_path, dpi=300, bbox_inches='tight')
+    print(f"Saved: {output_path}")
+    plt.close()
+
+
+def plot_open_addressing_vs_chaining():
+    """Compare open addressing vs separate chaining."""
+    print("Generating open addressing vs separate chaining comparison plot...")
+    
+    sizes = [100, 500, 1000, 5000, 10000]
+    results = benchmark_open_addressing_vs_chaining(sizes)
+    
+    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
+    fig.suptitle('Open Addressing vs Separate Chaining Performance', fontsize=16, fontweight='bold')
+    
+    sizes_arr = np.array(sizes)
+    
+    # Insert time
+    ax = axes[0, 0]
+    for probe_type in ['linear', 'quadratic', 'double']:
+        insert_times = [r['insert_time'] for r in results['open_addressing'][probe_type]]
+        ax.plot(sizes_arr, insert_times, marker='o', label=f'Open Addressing ({probe_type})', linewidth=2)
+    
+    insert_times_sc = [r['insert_time'] for r in results['separate_chaining']]
+    ax.plot(sizes_arr, insert_times_sc, marker='s', label='Separate Chaining', linewidth=2, linestyle='--')
+    ax.set_xlabel('Number of Elements')
+    ax.set_ylabel('Insert Time (seconds)')
+    ax.set_title('Insert Performance', fontweight='bold')
+    ax.legend()
+    ax.grid(alpha=0.3)
+    ax.set_xscale('log')
+    ax.set_yscale('log')
+    
+    # Search time
+    ax = axes[0, 1]
+    for probe_type in ['linear', 'quadratic', 'double']:
+        search_times = [r['search_time'] for r in results['open_addressing'][probe_type]]
+        ax.plot(sizes_arr, search_times, marker='o', label=f'Open Addressing ({probe_type})', linewidth=2)
+    
+    search_times_sc = [r['search_time'] for r in results['separate_chaining']]
+    ax.plot(sizes_arr, search_times_sc, marker='s', label='Separate Chaining', linewidth=2, linestyle='--')
+    ax.set_xlabel('Number of Elements')
+    ax.set_ylabel('Search Time (seconds)')
+    ax.set_title('Search Performance', fontweight='bold')
+    ax.legend()
+    ax.grid(alpha=0.3)
+    ax.set_xscale('log')
+    ax.set_yscale('log')
+    
+    # Delete time
+    ax = axes[1, 0]
+    for probe_type in ['linear', 'quadratic', 'double']:
+        delete_times = [r['delete_time'] for r in results['open_addressing'][probe_type]]
+        ax.plot(sizes_arr, delete_times, marker='o', label=f'Open Addressing ({probe_type})', linewidth=2)
+    
+    delete_times_sc = [r['delete_time'] for r in results['separate_chaining']]
+    ax.plot(sizes_arr, delete_times_sc, marker='s', label='Separate Chaining', linewidth=2, linestyle='--')
+    ax.set_xlabel('Number of Elements')
+    ax.set_ylabel('Delete Time (seconds)')
+    ax.set_title('Delete Performance', fontweight='bold')
+    ax.legend()
+    ax.grid(alpha=0.3)
+    ax.set_xscale('log')
+    ax.set_yscale('log')
+    
+    # Load factors
+    ax = axes[1, 1]
+    for probe_type in ['linear', 'quadratic', 'double']:
+        load_factors = [r['load_factor'] for r in results['open_addressing'][probe_type]]
+        ax.plot(sizes_arr, load_factors, marker='o', label=f'Open Addressing ({probe_type})', linewidth=2)
+    
+    load_factors_sc = [r['load_factor'] for r in results['separate_chaining']]
+    ax.plot(sizes_arr, load_factors_sc, marker='s', label='Separate Chaining', linewidth=2, linestyle='--')
+    ax.set_xlabel('Number of Elements')
+    ax.set_ylabel('Load Factor')
+    ax.set_title('Load Factor', fontweight='bold')
+    ax.legend()
+    ax.grid(alpha=0.3)
+    ax.set_xscale('log')
+    
+    plt.tight_layout()
+    output_path = os.path.join(os.path.dirname(__file__), '..', 'docs', 'open_addressing_vs_chaining.png')
+    plt.savefig(output_path, dpi=300, bbox_inches='tight')
+    print(f"Saved: {output_path}")
+    plt.close()
+
+
+def plot_load_factor_impact():
+    """Plot performance at different load factors with statistical smoothing."""
+    print("Generating load factor impact plot...")
+    
+    results = benchmark_load_factor_impact(initial_size=100, max_elements=1000, probe_type='linear', num_runs=30)
+    
+    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
+    fig.suptitle('Performance Impact of Load Factor', fontsize=16, fontweight='bold')
+    
+    # Extract data
+    oa_data = results['open_addressing']
+    sc_data = results['separate_chaining']
+    
+    # Sort by load factor to avoid zig-zag lines
+    oa_sorted = sorted(oa_data, key=lambda x: x['load_factor'])
+    sc_sorted = sorted(sc_data, key=lambda x: x['load_factor'])
+    
+    oa_load_factors = [r['load_factor'] for r in oa_sorted]
+    oa_insert_times = [r['insert_time'] for r in oa_sorted]
+    oa_insert_stds = [r.get('insert_time_std', 0) for r in oa_sorted]
+    oa_search_times = [r['search_time'] for r in oa_sorted]
+    oa_search_stds = [r.get('search_time_std', 0) for r in oa_sorted]
+    
+    sc_load_factors = [r['load_factor'] for r in sc_sorted]
+    sc_insert_times = [r['insert_time'] for r in sc_sorted]
+    sc_insert_stds = [r.get('insert_time_std', 0) for r in sc_sorted]
+    sc_search_times = [r['search_time'] for r in sc_sorted]
+    sc_search_stds = [r.get('search_time_std', 0) for r in sc_sorted]
+    sc_chain_lengths = [r['avg_chain_length'] for r in sc_sorted]
+    
+    # Insert time vs load factor (per element) with error bars
+    ax = axes[0]
+    ax.errorbar(oa_load_factors, oa_insert_times, yerr=oa_insert_stds, 
+                marker='o', label='Open Addressing (Linear)', linewidth=2, 
+                capsize=3, capthick=1.5, alpha=0.8)
+    ax.errorbar(sc_load_factors, sc_insert_times, yerr=sc_insert_stds,
+                marker='s', label='Separate Chaining', linewidth=2, linestyle='--',
+                capsize=3, capthick=1.5, alpha=0.8)
+    ax.set_xlabel('Load Factor')
+    ax.set_ylabel('Insert Time per Element (seconds)')
+    ax.set_title('Insert Time vs Load Factor', fontweight='bold')
+    ax.legend()
+    ax.grid(alpha=0.3)
+    
+    # Search time vs load factor (per element) with error bars
+    ax = axes[1]
+    ax.errorbar(oa_load_factors, oa_search_times, yerr=oa_search_stds,
+                marker='o', label='Open Addressing (Linear)', linewidth=2,
+                capsize=3, capthick=1.5, alpha=0.8)
+    ax.errorbar(sc_load_factors, sc_search_times, yerr=sc_search_stds,
+                marker='s', label='Separate Chaining', linewidth=2, linestyle='--',
+                capsize=3, capthick=1.5, alpha=0.8)
+    ax2 = ax.twinx()
+    # Chain length is smooth and accurate, so use line plot
+    ax2.plot(sc_load_factors, sc_chain_lengths, marker='^', 
+             label='Avg Chain Length (SC)', color='green', linestyle=':', linewidth=2)
+    ax.set_xlabel('Load Factor')
+    ax.set_ylabel('Search Time per Element (seconds)', color='blue')
+    ax2.set_ylabel('Average Chain Length', color='green')
+    ax.set_title('Search Time vs Load Factor', fontweight='bold')
+    ax.legend(loc='upper left')
+    ax2.legend(loc='upper right')
+    ax.grid(alpha=0.3)
+    
+    plt.tight_layout()
+    output_path = os.path.join(os.path.dirname(__file__), '..', 'docs', 'load_factor_impact.png')
+    plt.savefig(output_path, dpi=300, bbox_inches='tight')
+    print(f"Saved: {output_path}")
+    plt.close()
+
+
+def plot_load_factor_impact_probes():
+    """Plot probe counts and comparisons at different load factors.
+    
+    Uses deterministic metrics (probe counts, comparisons) instead of timing
+    to produce smooth theoretical curves without measurement noise.
+    """
+    print("Generating load factor impact plot (probe counts)...")
+    
+    results = benchmark_load_factor_impact_probes(initial_size=100, max_elements=1000, probe_type='linear', num_runs=10)
+    
+    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
+    fig.suptitle('Performance Impact of Load Factor (Probe Counts & Comparisons)', fontsize=16, fontweight='bold')
+    
+    # Extract data
+    oa_data = results['open_addressing']
+    sc_data = results['separate_chaining']
+    
+    # Sort by load factor
+    oa_sorted = sorted(oa_data, key=lambda x: x['load_factor'])
+    sc_sorted = sorted(sc_data, key=lambda x: x['load_factor'])
+    
+    oa_load_factors = [r['load_factor'] for r in oa_sorted]
+    oa_insert_probes = [r['insert_probes_per_element'] for r in oa_sorted]
+    oa_search_probes = [r['search_probes_per_element'] for r in oa_sorted]
+    oa_insert_comparisons = [r['insert_comparisons_per_element'] for r in oa_sorted]
+    oa_search_comparisons = [r['search_comparisons_per_element'] for r in oa_sorted]
+    
+    sc_load_factors = [r['load_factor'] for r in sc_sorted]
+    sc_insert_comparisons = [r['insert_comparisons_per_element'] for r in sc_sorted]
+    sc_search_comparisons = [r['search_comparisons_per_element'] for r in sc_sorted]
+    sc_chain_lengths = [r['avg_chain_length'] for r in sc_sorted]
+    
+    # Insert probes per element (Open Addressing)
+    ax = axes[0, 0]
+    ax.plot(oa_load_factors, oa_insert_probes, marker='o', label='Open Addressing (Linear)', 
+            linewidth=2, color='blue', markersize=6)
+    ax.set_xlabel('Load Factor')
+    ax.set_ylabel('Probes per Element')
+    ax.set_title('Insert: Probes per Element (Open Addressing)', fontweight='bold')
+    ax.legend()
+    ax.grid(alpha=0.3)
+    
+    # Search probes per element (Open Addressing)
+    ax = axes[0, 1]
+    ax.plot(oa_load_factors, oa_search_probes, marker='o', label='Open Addressing (Linear)', 
+            linewidth=2, color='blue', markersize=6)
+    ax.set_xlabel('Load Factor')
+    ax.set_ylabel('Probes per Element')
+    ax.set_title('Search: Probes per Element (Open Addressing)', fontweight='bold')
+    ax.legend()
+    ax.grid(alpha=0.3)
+    
+    # Comparisons per element (both methods)
+    ax = axes[1, 0]
+    ax.plot(oa_load_factors, oa_insert_comparisons, marker='o', label='Open Addressing (Linear)', 
+            linewidth=2, color='blue', markersize=6)
+    ax.plot(sc_load_factors, sc_insert_comparisons, marker='s', label='Separate Chaining', 
+            linewidth=2, linestyle='--', color='orange', markersize=6)
+    ax.set_xlabel('Load Factor')
+    ax.set_ylabel('Comparisons per Element')
+    ax.set_title('Insert: Comparisons per Element', fontweight='bold')
+    ax.legend()
+    ax.grid(alpha=0.3)
+    
+    # Search comparisons per element and chain length
+    ax = axes[1, 1]
+    ax.plot(oa_load_factors, oa_search_comparisons, marker='o', label='Open Addressing (Linear)', 
+            linewidth=2, color='blue', markersize=6)
+    ax.plot(sc_load_factors, sc_search_comparisons, marker='s', label='Separate Chaining', 
+            linewidth=2, linestyle='--', color='orange', markersize=6)
+    ax2 = ax.twinx()
+    ax2.plot(sc_load_factors, sc_chain_lengths, marker='^', label='Avg Chain Length (SC)', 
+             color='green', linestyle=':', linewidth=2, markersize=6)
+    ax.set_xlabel('Load Factor')
+    ax.set_ylabel('Comparisons per Element', color='blue')
+    ax2.set_ylabel('Average Chain Length', color='green')
+    ax.set_title('Search: Comparisons per Element', fontweight='bold')
+    ax.legend(loc='upper left')
+    ax2.legend(loc='upper right')
+    ax.grid(alpha=0.3)
+    
+    plt.tight_layout()
+    output_path = os.path.join(os.path.dirname(__file__), '..', 'docs', 'load_factor_impact_probes.png')
+    plt.savefig(output_path, dpi=300, bbox_inches='tight')
+    print(f"Saved: {output_path}")
+    plt.close()
+
+
+def plot_collision_analysis():
+    """Plot collision analysis for different hash functions."""
+    print("Generating collision analysis plot...")
+    
+    keys = generate_test_data(500)
+    table_size = 100
+    
+    hash_funcs = {
+        'Division': division_hash,
+        'Multiplication': lambda k, s: multiplication_hash(k, s),
+        'Simple String': lambda k, s: string_hash_simple(str(k), s),
+        'Polynomial': lambda k, s: string_hash_polynomial(str(k), s),
+        'Bad Clustering': bad_hash_clustering,
+    }
+    
+    results = benchmark_hash_functions(hash_funcs, keys, table_size)
+    
+    fig, ax = plt.subplots(figsize=(12, 6))
+    
+    names = list(results.keys())
+    collision_counts = [results[n]['collisions'] for n in names]
+    colors = ['steelblue' if 'Bad' not in n else 'coral' for n in names]
+    
+    bars = ax.bar(names, collision_counts, color=colors)
+    ax.set_xlabel('Hash Function', fontweight='bold')
+    ax.set_ylabel('Number of Collisions', fontweight='bold')
+    ax.set_title('Collision Analysis: Good vs Bad Hash Functions', fontsize=14, fontweight='bold')
+    ax.grid(axis='y', alpha=0.3)
+    ax.tick_params(axis='x', rotation=45)
+    
+    # Add value labels on bars
+    for bar in bars:
+        height = bar.get_height()
+        ax.text(bar.get_x() + bar.get_width()/2., height,
+                f'{int(height)}',
+                ha='center', va='bottom', fontweight='bold')
+    
+    plt.tight_layout()
+    output_path = os.path.join(os.path.dirname(__file__), '..', 'docs', 'collision_analysis.png')
+    plt.savefig(output_path, dpi=300, bbox_inches='tight')
+    print(f"Saved: {output_path}")
+    plt.close()
+
+
+if __name__ == "__main__":
+    # Ensure docs directory exists
+    os.makedirs(os.path.join(os.path.dirname(__file__), '..', 'docs'), exist_ok=True)
+    
+    print("Generating visualization plots...")
+    print("This may take a few minutes...\n")
+    
+    plot_hash_function_comparison()
+    plot_open_addressing_vs_chaining()
+    plot_load_factor_impact()
+    plot_load_factor_impact_probes()
+    plot_collision_analysis()
+    
+    print("\nAll plots generated successfully!")
+
--- a/examples/hash_tables_demo.py
+++ b/examples/hash_tables_demo.py
@@ -0,0 +1,207 @@
+"""
+Demonstration of hash table implementations and their usage.
+"""
+
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+from src.hash_tables import (
+    DirectAddressTable,
+    HashTableOpenAddressing,
+    HashTableSeparateChaining
+)
+from src.hash_functions import (
+    division_hash,
+    string_hash_polynomial,
+    string_hash_simple
+)
+
+
+def demo_direct_address_table():
+    """Demonstrate direct-address table."""
+    print("=" * 60)
+    print("Direct-Address Table Demonstration")
+    print("=" * 60)
+    
+    # Create table for keys in range [0, 99]
+    table = DirectAddressTable(100)
+    
+    # Insert some values
+    table.insert(5, "Alice")
+    table.insert(42, "Bob")
+    table.insert(99, "Charlie")
+    
+    print("\nInserted key-value pairs:")
+    print("  Key 5 ->", table.search(5))
+    print("  Key 42 ->", table.search(42))
+    print("  Key 99 ->", table.search(99))
+    
+    # Search
+    print("\nSearching for key 42:", table.search(42))
+    print("Searching for key 10:", table.search(10))  # Not found
+    
+    # Delete
+    table.delete(42)
+    print("\nAfter deleting key 42:")
+    print("  Key 42 ->", table.search(42))  # None
+    print()
+
+
+def demo_open_addressing():
+    """Demonstrate open addressing hash table."""
+    print("=" * 60)
+    print("Open Addressing Hash Table Demonstration")
+    print("=" * 60)
+    
+    # Test with linear probing
+    print("\n--- Linear Probing ---")
+    ht_linear = HashTableOpenAddressing(10, probe_type='linear')
+    
+    keys = [10, 22, 31, 4, 15, 28, 17, 88, 59]
+    for key in keys:
+        ht_linear.insert(key, f"Value_{key}")
+    
+    print(f"Inserted {len(keys)} keys")
+    print(f"Load factor: {ht_linear._load_factor():.2f}")
+    
+    print("\nSearching for keys:")
+    for key in [10, 22, 88, 99]:
+        result = ht_linear.search(key)
+        print(f"  Key {key}: {'Found' if result else 'Not found'}")
+    
+    # Test with quadratic probing
+    print("\n--- Quadratic Probing ---")
+    # Use larger table size for quadratic probing to avoid probe sequence issues
+    ht_quad = HashTableOpenAddressing(20, probe_type='quadratic')
+    
+    for key in keys:
+        ht_quad.insert(key, f"Value_{key}")
+    
+    print(f"Inserted {len(keys)} keys")
+    print(f"Load factor: {ht_quad._load_factor():.2f}")
+    
+    # Test with double hashing
+    print("\n--- Double Hashing ---")
+    # Use larger table size for double hashing to ensure all keys can be inserted
+    ht_double = HashTableOpenAddressing(20, probe_type='double')
+    
+    for key in keys:
+        ht_double.insert(key, f"Value_{key}")
+    
+    print(f"Inserted {len(keys)} keys")
+    print(f"Load factor: {ht_double._load_factor():.2f}")
+    print()
+
+
+def demo_separate_chaining():
+    """Demonstrate separate chaining hash table."""
+    print("=" * 60)
+    print("Separate Chaining Hash Table Demonstration")
+    print("=" * 60)
+    
+    ht = HashTableSeparateChaining(10)
+    
+    keys = [10, 22, 31, 4, 15, 28, 17, 88, 59, 71]
+    for key in keys:
+        ht.insert(key, f"Value_{key}")
+    
+    print(f"\nInserted {len(keys)} keys")
+    print(f"Load factor: {ht._load_factor():.2f}")
+    
+    chain_lengths = ht.get_chain_lengths()
+    print(f"Chain lengths: {chain_lengths}")
+    print(f"Average chain length: {sum(chain_lengths) / len(chain_lengths):.2f}")
+    print(f"Maximum chain length: {max(chain_lengths)}")
+    
+    print("\nSearching for keys:")
+    for key in [10, 22, 88, 99]:
+        result = ht.search(key)
+        print(f"  Key {key}: {'Found' if result else 'Not found'}")
+    
+    # Delete some keys
+    print("\nDeleting keys 22 and 88:")
+    ht.delete(22)
+    ht.delete(88)
+    print(f"  Key 22: {'Found' if ht.search(22) else 'Not found'}")
+    print(f"  Key 88: {'Found' if ht.search(88) else 'Not found'}")
+    print()
+
+
+def demo_hash_functions():
+    """Demonstrate different hash functions."""
+    print("=" * 60)
+    print("Hash Function Demonstration")
+    print("=" * 60)
+    
+    keys = [10, 22, 31, 4, 15, 28, 17, 88, 59, 71]
+    table_size = 11
+    
+    print(f"\nKeys: {keys}")
+    print(f"Table size: {table_size}\n")
+    
+    # Division method
+    print("Division method (h(k) = k mod m):")
+    for key in keys[:5]:
+        hash_val = division_hash(key, table_size)
+        print(f"  h({key}) = {hash_val}")
+    
+    # String hashing
+    print("\nString hash functions:")
+    string_keys = ["hello", "world", "hash", "table", "test"]
+    
+    print("Simple string hash (BAD - prone to collisions):")
+    for key in string_keys:
+        hash_val = string_hash_simple(key, table_size)
+        print(f"  h('{key}') = {hash_val}")
+    
+    print("\nPolynomial string hash (GOOD - better distribution):")
+    for key in string_keys:
+        hash_val = string_hash_polynomial(key, table_size)
+        print(f"  h('{key}') = {hash_val}")
+    print()
+
+
+def demo_collision_comparison():
+    """Demonstrate collision behavior with different hash functions."""
+    print("=" * 60)
+    print("Collision Comparison Demonstration")
+    print("=" * 60)
+    
+    # Generate test keys
+    keys = list(range(100, 200))
+    table_size = 50
+    
+    from src.hash_functions import (
+        division_hash,
+        multiplication_hash,
+        string_hash_simple,
+        string_hash_polynomial
+    )
+    
+    hash_funcs = {
+        'Division': division_hash,
+        'Multiplication': lambda k, s: multiplication_hash(k, s),
+    }
+    
+    print(f"\nTesting with {len(keys)} keys and table size {table_size}\n")
+    
+    for name, hash_func in hash_funcs.items():
+        hash_values = [hash_func(k, table_size) for k in keys]
+        collisions = len(keys) - len(set(hash_values))
+        collision_rate = collisions / len(keys) * 100
+        
+        print(f"{name} method:")
+        print(f"  Collisions: {collisions}")
+        print(f"  Collision rate: {collision_rate:.2f}%")
+        print(f"  Buckets used: {len(set(hash_values))}/{table_size}")
+        print()
+
+
+if __name__ == "__main__":
+    demo_direct_address_table()
+    demo_open_addressing()
+    demo_separate_chaining()
+    demo_hash_functions()
+    demo_collision_comparison()
+