Adding hash algorithms
This commit is contained in:
389
examples/generate_plots.py
Normal file
389
examples/generate_plots.py
Normal file
@@ -0,0 +1,389 @@
|
||||
"""
|
||||
Generate visualization plots for hash table performance analysis.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||
|
||||
from src.benchmark import (
|
||||
benchmark_hash_functions,
|
||||
benchmark_open_addressing_vs_chaining,
|
||||
benchmark_load_factor_impact,
|
||||
benchmark_load_factor_impact_probes,
|
||||
generate_test_data
|
||||
)
|
||||
from src.hash_functions import (
|
||||
division_hash,
|
||||
multiplication_hash,
|
||||
string_hash_simple,
|
||||
string_hash_polynomial,
|
||||
string_hash_djb2,
|
||||
bad_hash_clustering
|
||||
)
|
||||
|
||||
|
||||
def plot_hash_function_comparison():
|
||||
"""Compare different hash functions."""
|
||||
print("Generating hash function comparison plot...")
|
||||
|
||||
keys = generate_test_data(1000)
|
||||
table_size = 100
|
||||
|
||||
hash_funcs = {
|
||||
'Division': division_hash,
|
||||
'Multiplication': lambda k, s: multiplication_hash(k, s),
|
||||
'Simple String': lambda k, s: string_hash_simple(str(k), s),
|
||||
'Polynomial String': lambda k, s: string_hash_polynomial(str(k), s),
|
||||
'DJB2': lambda k, s: string_hash_djb2(str(k), s),
|
||||
'Bad Clustering': bad_hash_clustering,
|
||||
}
|
||||
|
||||
results = benchmark_hash_functions(hash_funcs, keys, table_size)
|
||||
|
||||
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
|
||||
fig.suptitle('Hash Function Performance Comparison', fontsize=16, fontweight='bold')
|
||||
|
||||
names = list(results.keys())
|
||||
collision_rates = [results[n]['collision_rate'] * 100 for n in names]
|
||||
variances = [results[n]['variance'] for n in names]
|
||||
times = [results[n]['time'] * 1000 for n in names] # Convert to ms
|
||||
max_chains = [results[n]['max_chain_length'] for n in names]
|
||||
|
||||
# Collision rate
|
||||
axes[0, 0].bar(names, collision_rates, color='steelblue')
|
||||
axes[0, 0].set_title('Collision Rate (%)', fontweight='bold')
|
||||
axes[0, 0].set_ylabel('Collision Rate (%)')
|
||||
axes[0, 0].tick_params(axis='x', rotation=45)
|
||||
axes[0, 0].grid(axis='y', alpha=0.3)
|
||||
|
||||
# Variance (distribution quality)
|
||||
axes[0, 1].bar(names, variances, color='coral')
|
||||
axes[0, 1].set_title('Distribution Variance (Lower is Better)', fontweight='bold')
|
||||
axes[0, 1].set_ylabel('Variance')
|
||||
axes[0, 1].tick_params(axis='x', rotation=45)
|
||||
axes[0, 1].grid(axis='y', alpha=0.3)
|
||||
|
||||
# Execution time
|
||||
axes[1, 0].bar(names, times, color='mediumseagreen')
|
||||
axes[1, 0].set_title('Execution Time', fontweight='bold')
|
||||
axes[1, 0].set_ylabel('Time (ms)')
|
||||
axes[1, 0].tick_params(axis='x', rotation=45)
|
||||
axes[1, 0].grid(axis='y', alpha=0.3)
|
||||
|
||||
# Max chain length
|
||||
axes[1, 1].bar(names, max_chains, color='plum')
|
||||
axes[1, 1].set_title('Maximum Chain Length', fontweight='bold')
|
||||
axes[1, 1].set_ylabel('Max Chain Length')
|
||||
axes[1, 1].tick_params(axis='x', rotation=45)
|
||||
axes[1, 1].grid(axis='y', alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
output_path = os.path.join(os.path.dirname(__file__), '..', 'docs', 'hash_function_comparison.png')
|
||||
plt.savefig(output_path, dpi=300, bbox_inches='tight')
|
||||
print(f"Saved: {output_path}")
|
||||
plt.close()
|
||||
|
||||
|
||||
def plot_open_addressing_vs_chaining():
|
||||
"""Compare open addressing vs separate chaining."""
|
||||
print("Generating open addressing vs separate chaining comparison plot...")
|
||||
|
||||
sizes = [100, 500, 1000, 5000, 10000]
|
||||
results = benchmark_open_addressing_vs_chaining(sizes)
|
||||
|
||||
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
|
||||
fig.suptitle('Open Addressing vs Separate Chaining Performance', fontsize=16, fontweight='bold')
|
||||
|
||||
sizes_arr = np.array(sizes)
|
||||
|
||||
# Insert time
|
||||
ax = axes[0, 0]
|
||||
for probe_type in ['linear', 'quadratic', 'double']:
|
||||
insert_times = [r['insert_time'] for r in results['open_addressing'][probe_type]]
|
||||
ax.plot(sizes_arr, insert_times, marker='o', label=f'Open Addressing ({probe_type})', linewidth=2)
|
||||
|
||||
insert_times_sc = [r['insert_time'] for r in results['separate_chaining']]
|
||||
ax.plot(sizes_arr, insert_times_sc, marker='s', label='Separate Chaining', linewidth=2, linestyle='--')
|
||||
ax.set_xlabel('Number of Elements')
|
||||
ax.set_ylabel('Insert Time (seconds)')
|
||||
ax.set_title('Insert Performance', fontweight='bold')
|
||||
ax.legend()
|
||||
ax.grid(alpha=0.3)
|
||||
ax.set_xscale('log')
|
||||
ax.set_yscale('log')
|
||||
|
||||
# Search time
|
||||
ax = axes[0, 1]
|
||||
for probe_type in ['linear', 'quadratic', 'double']:
|
||||
search_times = [r['search_time'] for r in results['open_addressing'][probe_type]]
|
||||
ax.plot(sizes_arr, search_times, marker='o', label=f'Open Addressing ({probe_type})', linewidth=2)
|
||||
|
||||
search_times_sc = [r['search_time'] for r in results['separate_chaining']]
|
||||
ax.plot(sizes_arr, search_times_sc, marker='s', label='Separate Chaining', linewidth=2, linestyle='--')
|
||||
ax.set_xlabel('Number of Elements')
|
||||
ax.set_ylabel('Search Time (seconds)')
|
||||
ax.set_title('Search Performance', fontweight='bold')
|
||||
ax.legend()
|
||||
ax.grid(alpha=0.3)
|
||||
ax.set_xscale('log')
|
||||
ax.set_yscale('log')
|
||||
|
||||
# Delete time
|
||||
ax = axes[1, 0]
|
||||
for probe_type in ['linear', 'quadratic', 'double']:
|
||||
delete_times = [r['delete_time'] for r in results['open_addressing'][probe_type]]
|
||||
ax.plot(sizes_arr, delete_times, marker='o', label=f'Open Addressing ({probe_type})', linewidth=2)
|
||||
|
||||
delete_times_sc = [r['delete_time'] for r in results['separate_chaining']]
|
||||
ax.plot(sizes_arr, delete_times_sc, marker='s', label='Separate Chaining', linewidth=2, linestyle='--')
|
||||
ax.set_xlabel('Number of Elements')
|
||||
ax.set_ylabel('Delete Time (seconds)')
|
||||
ax.set_title('Delete Performance', fontweight='bold')
|
||||
ax.legend()
|
||||
ax.grid(alpha=0.3)
|
||||
ax.set_xscale('log')
|
||||
ax.set_yscale('log')
|
||||
|
||||
# Load factors
|
||||
ax = axes[1, 1]
|
||||
for probe_type in ['linear', 'quadratic', 'double']:
|
||||
load_factors = [r['load_factor'] for r in results['open_addressing'][probe_type]]
|
||||
ax.plot(sizes_arr, load_factors, marker='o', label=f'Open Addressing ({probe_type})', linewidth=2)
|
||||
|
||||
load_factors_sc = [r['load_factor'] for r in results['separate_chaining']]
|
||||
ax.plot(sizes_arr, load_factors_sc, marker='s', label='Separate Chaining', linewidth=2, linestyle='--')
|
||||
ax.set_xlabel('Number of Elements')
|
||||
ax.set_ylabel('Load Factor')
|
||||
ax.set_title('Load Factor', fontweight='bold')
|
||||
ax.legend()
|
||||
ax.grid(alpha=0.3)
|
||||
ax.set_xscale('log')
|
||||
|
||||
plt.tight_layout()
|
||||
output_path = os.path.join(os.path.dirname(__file__), '..', 'docs', 'open_addressing_vs_chaining.png')
|
||||
plt.savefig(output_path, dpi=300, bbox_inches='tight')
|
||||
print(f"Saved: {output_path}")
|
||||
plt.close()
|
||||
|
||||
|
||||
def plot_load_factor_impact():
|
||||
"""Plot performance at different load factors with statistical smoothing."""
|
||||
print("Generating load factor impact plot...")
|
||||
|
||||
results = benchmark_load_factor_impact(initial_size=100, max_elements=1000, probe_type='linear', num_runs=30)
|
||||
|
||||
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
|
||||
fig.suptitle('Performance Impact of Load Factor', fontsize=16, fontweight='bold')
|
||||
|
||||
# Extract data
|
||||
oa_data = results['open_addressing']
|
||||
sc_data = results['separate_chaining']
|
||||
|
||||
# Sort by load factor to avoid zig-zag lines
|
||||
oa_sorted = sorted(oa_data, key=lambda x: x['load_factor'])
|
||||
sc_sorted = sorted(sc_data, key=lambda x: x['load_factor'])
|
||||
|
||||
oa_load_factors = [r['load_factor'] for r in oa_sorted]
|
||||
oa_insert_times = [r['insert_time'] for r in oa_sorted]
|
||||
oa_insert_stds = [r.get('insert_time_std', 0) for r in oa_sorted]
|
||||
oa_search_times = [r['search_time'] for r in oa_sorted]
|
||||
oa_search_stds = [r.get('search_time_std', 0) for r in oa_sorted]
|
||||
|
||||
sc_load_factors = [r['load_factor'] for r in sc_sorted]
|
||||
sc_insert_times = [r['insert_time'] for r in sc_sorted]
|
||||
sc_insert_stds = [r.get('insert_time_std', 0) for r in sc_sorted]
|
||||
sc_search_times = [r['search_time'] for r in sc_sorted]
|
||||
sc_search_stds = [r.get('search_time_std', 0) for r in sc_sorted]
|
||||
sc_chain_lengths = [r['avg_chain_length'] for r in sc_sorted]
|
||||
|
||||
# Insert time vs load factor (per element) with error bars
|
||||
ax = axes[0]
|
||||
ax.errorbar(oa_load_factors, oa_insert_times, yerr=oa_insert_stds,
|
||||
marker='o', label='Open Addressing (Linear)', linewidth=2,
|
||||
capsize=3, capthick=1.5, alpha=0.8)
|
||||
ax.errorbar(sc_load_factors, sc_insert_times, yerr=sc_insert_stds,
|
||||
marker='s', label='Separate Chaining', linewidth=2, linestyle='--',
|
||||
capsize=3, capthick=1.5, alpha=0.8)
|
||||
ax.set_xlabel('Load Factor')
|
||||
ax.set_ylabel('Insert Time per Element (seconds)')
|
||||
ax.set_title('Insert Time vs Load Factor', fontweight='bold')
|
||||
ax.legend()
|
||||
ax.grid(alpha=0.3)
|
||||
|
||||
# Search time vs load factor (per element) with error bars
|
||||
ax = axes[1]
|
||||
ax.errorbar(oa_load_factors, oa_search_times, yerr=oa_search_stds,
|
||||
marker='o', label='Open Addressing (Linear)', linewidth=2,
|
||||
capsize=3, capthick=1.5, alpha=0.8)
|
||||
ax.errorbar(sc_load_factors, sc_search_times, yerr=sc_search_stds,
|
||||
marker='s', label='Separate Chaining', linewidth=2, linestyle='--',
|
||||
capsize=3, capthick=1.5, alpha=0.8)
|
||||
ax2 = ax.twinx()
|
||||
# Chain length is smooth and accurate, so use line plot
|
||||
ax2.plot(sc_load_factors, sc_chain_lengths, marker='^',
|
||||
label='Avg Chain Length (SC)', color='green', linestyle=':', linewidth=2)
|
||||
ax.set_xlabel('Load Factor')
|
||||
ax.set_ylabel('Search Time per Element (seconds)', color='blue')
|
||||
ax2.set_ylabel('Average Chain Length', color='green')
|
||||
ax.set_title('Search Time vs Load Factor', fontweight='bold')
|
||||
ax.legend(loc='upper left')
|
||||
ax2.legend(loc='upper right')
|
||||
ax.grid(alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
output_path = os.path.join(os.path.dirname(__file__), '..', 'docs', 'load_factor_impact.png')
|
||||
plt.savefig(output_path, dpi=300, bbox_inches='tight')
|
||||
print(f"Saved: {output_path}")
|
||||
plt.close()
|
||||
|
||||
|
||||
def plot_load_factor_impact_probes():
|
||||
"""Plot probe counts and comparisons at different load factors.
|
||||
|
||||
Uses deterministic metrics (probe counts, comparisons) instead of timing
|
||||
to produce smooth theoretical curves without measurement noise.
|
||||
"""
|
||||
print("Generating load factor impact plot (probe counts)...")
|
||||
|
||||
results = benchmark_load_factor_impact_probes(initial_size=100, max_elements=1000, probe_type='linear', num_runs=10)
|
||||
|
||||
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
|
||||
fig.suptitle('Performance Impact of Load Factor (Probe Counts & Comparisons)', fontsize=16, fontweight='bold')
|
||||
|
||||
# Extract data
|
||||
oa_data = results['open_addressing']
|
||||
sc_data = results['separate_chaining']
|
||||
|
||||
# Sort by load factor
|
||||
oa_sorted = sorted(oa_data, key=lambda x: x['load_factor'])
|
||||
sc_sorted = sorted(sc_data, key=lambda x: x['load_factor'])
|
||||
|
||||
oa_load_factors = [r['load_factor'] for r in oa_sorted]
|
||||
oa_insert_probes = [r['insert_probes_per_element'] for r in oa_sorted]
|
||||
oa_search_probes = [r['search_probes_per_element'] for r in oa_sorted]
|
||||
oa_insert_comparisons = [r['insert_comparisons_per_element'] for r in oa_sorted]
|
||||
oa_search_comparisons = [r['search_comparisons_per_element'] for r in oa_sorted]
|
||||
|
||||
sc_load_factors = [r['load_factor'] for r in sc_sorted]
|
||||
sc_insert_comparisons = [r['insert_comparisons_per_element'] for r in sc_sorted]
|
||||
sc_search_comparisons = [r['search_comparisons_per_element'] for r in sc_sorted]
|
||||
sc_chain_lengths = [r['avg_chain_length'] for r in sc_sorted]
|
||||
|
||||
# Insert probes per element (Open Addressing)
|
||||
ax = axes[0, 0]
|
||||
ax.plot(oa_load_factors, oa_insert_probes, marker='o', label='Open Addressing (Linear)',
|
||||
linewidth=2, color='blue', markersize=6)
|
||||
ax.set_xlabel('Load Factor')
|
||||
ax.set_ylabel('Probes per Element')
|
||||
ax.set_title('Insert: Probes per Element (Open Addressing)', fontweight='bold')
|
||||
ax.legend()
|
||||
ax.grid(alpha=0.3)
|
||||
|
||||
# Search probes per element (Open Addressing)
|
||||
ax = axes[0, 1]
|
||||
ax.plot(oa_load_factors, oa_search_probes, marker='o', label='Open Addressing (Linear)',
|
||||
linewidth=2, color='blue', markersize=6)
|
||||
ax.set_xlabel('Load Factor')
|
||||
ax.set_ylabel('Probes per Element')
|
||||
ax.set_title('Search: Probes per Element (Open Addressing)', fontweight='bold')
|
||||
ax.legend()
|
||||
ax.grid(alpha=0.3)
|
||||
|
||||
# Comparisons per element (both methods)
|
||||
ax = axes[1, 0]
|
||||
ax.plot(oa_load_factors, oa_insert_comparisons, marker='o', label='Open Addressing (Linear)',
|
||||
linewidth=2, color='blue', markersize=6)
|
||||
ax.plot(sc_load_factors, sc_insert_comparisons, marker='s', label='Separate Chaining',
|
||||
linewidth=2, linestyle='--', color='orange', markersize=6)
|
||||
ax.set_xlabel('Load Factor')
|
||||
ax.set_ylabel('Comparisons per Element')
|
||||
ax.set_title('Insert: Comparisons per Element', fontweight='bold')
|
||||
ax.legend()
|
||||
ax.grid(alpha=0.3)
|
||||
|
||||
# Search comparisons per element and chain length
|
||||
ax = axes[1, 1]
|
||||
ax.plot(oa_load_factors, oa_search_comparisons, marker='o', label='Open Addressing (Linear)',
|
||||
linewidth=2, color='blue', markersize=6)
|
||||
ax.plot(sc_load_factors, sc_search_comparisons, marker='s', label='Separate Chaining',
|
||||
linewidth=2, linestyle='--', color='orange', markersize=6)
|
||||
ax2 = ax.twinx()
|
||||
ax2.plot(sc_load_factors, sc_chain_lengths, marker='^', label='Avg Chain Length (SC)',
|
||||
color='green', linestyle=':', linewidth=2, markersize=6)
|
||||
ax.set_xlabel('Load Factor')
|
||||
ax.set_ylabel('Comparisons per Element', color='blue')
|
||||
ax2.set_ylabel('Average Chain Length', color='green')
|
||||
ax.set_title('Search: Comparisons per Element', fontweight='bold')
|
||||
ax.legend(loc='upper left')
|
||||
ax2.legend(loc='upper right')
|
||||
ax.grid(alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
output_path = os.path.join(os.path.dirname(__file__), '..', 'docs', 'load_factor_impact_probes.png')
|
||||
plt.savefig(output_path, dpi=300, bbox_inches='tight')
|
||||
print(f"Saved: {output_path}")
|
||||
plt.close()
|
||||
|
||||
|
||||
def plot_collision_analysis():
|
||||
"""Plot collision analysis for different hash functions."""
|
||||
print("Generating collision analysis plot...")
|
||||
|
||||
keys = generate_test_data(500)
|
||||
table_size = 100
|
||||
|
||||
hash_funcs = {
|
||||
'Division': division_hash,
|
||||
'Multiplication': lambda k, s: multiplication_hash(k, s),
|
||||
'Simple String': lambda k, s: string_hash_simple(str(k), s),
|
||||
'Polynomial': lambda k, s: string_hash_polynomial(str(k), s),
|
||||
'Bad Clustering': bad_hash_clustering,
|
||||
}
|
||||
|
||||
results = benchmark_hash_functions(hash_funcs, keys, table_size)
|
||||
|
||||
fig, ax = plt.subplots(figsize=(12, 6))
|
||||
|
||||
names = list(results.keys())
|
||||
collision_counts = [results[n]['collisions'] for n in names]
|
||||
colors = ['steelblue' if 'Bad' not in n else 'coral' for n in names]
|
||||
|
||||
bars = ax.bar(names, collision_counts, color=colors)
|
||||
ax.set_xlabel('Hash Function', fontweight='bold')
|
||||
ax.set_ylabel('Number of Collisions', fontweight='bold')
|
||||
ax.set_title('Collision Analysis: Good vs Bad Hash Functions', fontsize=14, fontweight='bold')
|
||||
ax.grid(axis='y', alpha=0.3)
|
||||
ax.tick_params(axis='x', rotation=45)
|
||||
|
||||
# Add value labels on bars
|
||||
for bar in bars:
|
||||
height = bar.get_height()
|
||||
ax.text(bar.get_x() + bar.get_width()/2., height,
|
||||
f'{int(height)}',
|
||||
ha='center', va='bottom', fontweight='bold')
|
||||
|
||||
plt.tight_layout()
|
||||
output_path = os.path.join(os.path.dirname(__file__), '..', 'docs', 'collision_analysis.png')
|
||||
plt.savefig(output_path, dpi=300, bbox_inches='tight')
|
||||
print(f"Saved: {output_path}")
|
||||
plt.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure docs directory exists
|
||||
os.makedirs(os.path.join(os.path.dirname(__file__), '..', 'docs'), exist_ok=True)
|
||||
|
||||
print("Generating visualization plots...")
|
||||
print("This may take a few minutes...\n")
|
||||
|
||||
plot_hash_function_comparison()
|
||||
plot_open_addressing_vs_chaining()
|
||||
plot_load_factor_impact()
|
||||
plot_load_factor_impact_probes()
|
||||
plot_collision_analysis()
|
||||
|
||||
print("\nAll plots generated successfully!")
|
||||
|
||||
207
examples/hash_tables_demo.py
Normal file
207
examples/hash_tables_demo.py
Normal file
@@ -0,0 +1,207 @@
|
||||
"""
|
||||
Demonstration of hash table implementations and their usage.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||
|
||||
from src.hash_tables import (
|
||||
DirectAddressTable,
|
||||
HashTableOpenAddressing,
|
||||
HashTableSeparateChaining
|
||||
)
|
||||
from src.hash_functions import (
|
||||
division_hash,
|
||||
string_hash_polynomial,
|
||||
string_hash_simple
|
||||
)
|
||||
|
||||
|
||||
def demo_direct_address_table():
|
||||
"""Demonstrate direct-address table."""
|
||||
print("=" * 60)
|
||||
print("Direct-Address Table Demonstration")
|
||||
print("=" * 60)
|
||||
|
||||
# Create table for keys in range [0, 99]
|
||||
table = DirectAddressTable(100)
|
||||
|
||||
# Insert some values
|
||||
table.insert(5, "Alice")
|
||||
table.insert(42, "Bob")
|
||||
table.insert(99, "Charlie")
|
||||
|
||||
print("\nInserted key-value pairs:")
|
||||
print(" Key 5 ->", table.search(5))
|
||||
print(" Key 42 ->", table.search(42))
|
||||
print(" Key 99 ->", table.search(99))
|
||||
|
||||
# Search
|
||||
print("\nSearching for key 42:", table.search(42))
|
||||
print("Searching for key 10:", table.search(10)) # Not found
|
||||
|
||||
# Delete
|
||||
table.delete(42)
|
||||
print("\nAfter deleting key 42:")
|
||||
print(" Key 42 ->", table.search(42)) # None
|
||||
print()
|
||||
|
||||
|
||||
def demo_open_addressing():
|
||||
"""Demonstrate open addressing hash table."""
|
||||
print("=" * 60)
|
||||
print("Open Addressing Hash Table Demonstration")
|
||||
print("=" * 60)
|
||||
|
||||
# Test with linear probing
|
||||
print("\n--- Linear Probing ---")
|
||||
ht_linear = HashTableOpenAddressing(10, probe_type='linear')
|
||||
|
||||
keys = [10, 22, 31, 4, 15, 28, 17, 88, 59]
|
||||
for key in keys:
|
||||
ht_linear.insert(key, f"Value_{key}")
|
||||
|
||||
print(f"Inserted {len(keys)} keys")
|
||||
print(f"Load factor: {ht_linear._load_factor():.2f}")
|
||||
|
||||
print("\nSearching for keys:")
|
||||
for key in [10, 22, 88, 99]:
|
||||
result = ht_linear.search(key)
|
||||
print(f" Key {key}: {'Found' if result else 'Not found'}")
|
||||
|
||||
# Test with quadratic probing
|
||||
print("\n--- Quadratic Probing ---")
|
||||
# Use larger table size for quadratic probing to avoid probe sequence issues
|
||||
ht_quad = HashTableOpenAddressing(20, probe_type='quadratic')
|
||||
|
||||
for key in keys:
|
||||
ht_quad.insert(key, f"Value_{key}")
|
||||
|
||||
print(f"Inserted {len(keys)} keys")
|
||||
print(f"Load factor: {ht_quad._load_factor():.2f}")
|
||||
|
||||
# Test with double hashing
|
||||
print("\n--- Double Hashing ---")
|
||||
# Use larger table size for double hashing to ensure all keys can be inserted
|
||||
ht_double = HashTableOpenAddressing(20, probe_type='double')
|
||||
|
||||
for key in keys:
|
||||
ht_double.insert(key, f"Value_{key}")
|
||||
|
||||
print(f"Inserted {len(keys)} keys")
|
||||
print(f"Load factor: {ht_double._load_factor():.2f}")
|
||||
print()
|
||||
|
||||
|
||||
def demo_separate_chaining():
|
||||
"""Demonstrate separate chaining hash table."""
|
||||
print("=" * 60)
|
||||
print("Separate Chaining Hash Table Demonstration")
|
||||
print("=" * 60)
|
||||
|
||||
ht = HashTableSeparateChaining(10)
|
||||
|
||||
keys = [10, 22, 31, 4, 15, 28, 17, 88, 59, 71]
|
||||
for key in keys:
|
||||
ht.insert(key, f"Value_{key}")
|
||||
|
||||
print(f"\nInserted {len(keys)} keys")
|
||||
print(f"Load factor: {ht._load_factor():.2f}")
|
||||
|
||||
chain_lengths = ht.get_chain_lengths()
|
||||
print(f"Chain lengths: {chain_lengths}")
|
||||
print(f"Average chain length: {sum(chain_lengths) / len(chain_lengths):.2f}")
|
||||
print(f"Maximum chain length: {max(chain_lengths)}")
|
||||
|
||||
print("\nSearching for keys:")
|
||||
for key in [10, 22, 88, 99]:
|
||||
result = ht.search(key)
|
||||
print(f" Key {key}: {'Found' if result else 'Not found'}")
|
||||
|
||||
# Delete some keys
|
||||
print("\nDeleting keys 22 and 88:")
|
||||
ht.delete(22)
|
||||
ht.delete(88)
|
||||
print(f" Key 22: {'Found' if ht.search(22) else 'Not found'}")
|
||||
print(f" Key 88: {'Found' if ht.search(88) else 'Not found'}")
|
||||
print()
|
||||
|
||||
|
||||
def demo_hash_functions():
|
||||
"""Demonstrate different hash functions."""
|
||||
print("=" * 60)
|
||||
print("Hash Function Demonstration")
|
||||
print("=" * 60)
|
||||
|
||||
keys = [10, 22, 31, 4, 15, 28, 17, 88, 59, 71]
|
||||
table_size = 11
|
||||
|
||||
print(f"\nKeys: {keys}")
|
||||
print(f"Table size: {table_size}\n")
|
||||
|
||||
# Division method
|
||||
print("Division method (h(k) = k mod m):")
|
||||
for key in keys[:5]:
|
||||
hash_val = division_hash(key, table_size)
|
||||
print(f" h({key}) = {hash_val}")
|
||||
|
||||
# String hashing
|
||||
print("\nString hash functions:")
|
||||
string_keys = ["hello", "world", "hash", "table", "test"]
|
||||
|
||||
print("Simple string hash (BAD - prone to collisions):")
|
||||
for key in string_keys:
|
||||
hash_val = string_hash_simple(key, table_size)
|
||||
print(f" h('{key}') = {hash_val}")
|
||||
|
||||
print("\nPolynomial string hash (GOOD - better distribution):")
|
||||
for key in string_keys:
|
||||
hash_val = string_hash_polynomial(key, table_size)
|
||||
print(f" h('{key}') = {hash_val}")
|
||||
print()
|
||||
|
||||
|
||||
def demo_collision_comparison():
|
||||
"""Demonstrate collision behavior with different hash functions."""
|
||||
print("=" * 60)
|
||||
print("Collision Comparison Demonstration")
|
||||
print("=" * 60)
|
||||
|
||||
# Generate test keys
|
||||
keys = list(range(100, 200))
|
||||
table_size = 50
|
||||
|
||||
from src.hash_functions import (
|
||||
division_hash,
|
||||
multiplication_hash,
|
||||
string_hash_simple,
|
||||
string_hash_polynomial
|
||||
)
|
||||
|
||||
hash_funcs = {
|
||||
'Division': division_hash,
|
||||
'Multiplication': lambda k, s: multiplication_hash(k, s),
|
||||
}
|
||||
|
||||
print(f"\nTesting with {len(keys)} keys and table size {table_size}\n")
|
||||
|
||||
for name, hash_func in hash_funcs.items():
|
||||
hash_values = [hash_func(k, table_size) for k in keys]
|
||||
collisions = len(keys) - len(set(hash_values))
|
||||
collision_rate = collisions / len(keys) * 100
|
||||
|
||||
print(f"{name} method:")
|
||||
print(f" Collisions: {collisions}")
|
||||
print(f" Collision rate: {collision_rate:.2f}%")
|
||||
print(f" Buckets used: {len(set(hash_values))}/{table_size}")
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo_direct_address_table()
|
||||
demo_open_addressing()
|
||||
demo_separate_chaining()
|
||||
demo_hash_functions()
|
||||
demo_collision_comparison()
|
||||
|
||||
Reference in New Issue
Block a user