Initial commit: Randomized Quicksort and Hash Table with Chaining implementation
- Implemented Randomized Quicksort algorithm with performance analysis - Implemented Hash Table with Chaining for collision resolution - Added comprehensive test suite (30+ test cases) - Created test runner script with multiple test options - Added detailed README with architecture diagrams and documentation - Added MIT License - Includes examples and comprehensive documentation
This commit is contained in:
10
src/__init__.py
Normal file
10
src/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
"""
|
||||
MSCS532 Assignment 3: Understanding Algorithm Efficiency and Scalability
|
||||
|
||||
This package contains implementations of:
|
||||
- Randomized Quicksort algorithm
|
||||
- Hashing with Chaining data structure
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
|
||||
148
src/examples.py
Normal file
148
src/examples.py
Normal file
@@ -0,0 +1,148 @@
|
||||
"""
|
||||
Example usage of Randomized Quicksort and Hash Table implementations.
|
||||
|
||||
This module demonstrates how to use the algorithms and data structures
|
||||
implemented in this project.
|
||||
"""
|
||||
|
||||
import random
|
||||
from src.quicksort import (
|
||||
randomized_quicksort,
|
||||
compare_with_builtin,
|
||||
analyze_performance
|
||||
)
|
||||
from src.hash_table import HashTable
|
||||
|
||||
|
||||
def example_quicksort():
|
||||
"""Demonstrate randomized quicksort usage."""
|
||||
print("=" * 60)
|
||||
print("Randomized Quicksort Example")
|
||||
print("=" * 60)
|
||||
|
||||
# Example 1: Basic sorting
|
||||
print("\n1. Basic Sorting:")
|
||||
arr = [64, 34, 25, 12, 22, 11, 90, 5]
|
||||
print(f"Original array: {arr}")
|
||||
sorted_arr = randomized_quicksort(arr)
|
||||
print(f"Sorted array: {sorted_arr}")
|
||||
|
||||
# Example 2: Large random array
|
||||
print("\n2. Large Random Array:")
|
||||
large_arr = [random.randint(1, 1000) for _ in range(20)]
|
||||
print(f"Original array (first 20 elements): {large_arr[:20]}")
|
||||
sorted_large = randomized_quicksort(large_arr)
|
||||
print(f"Sorted array (first 20 elements): {sorted_large[:20]}")
|
||||
|
||||
# Example 3: Performance comparison
|
||||
print("\n3. Performance Comparison with Built-in Sort:")
|
||||
test_array = [random.randint(1, 100000) for _ in range(10000)]
|
||||
comparison = compare_with_builtin(test_array)
|
||||
print(f"Array length: {comparison['array_length']}")
|
||||
print(f"Quicksort time: {comparison['quicksort_time']:.6f} seconds")
|
||||
print(f"Built-in sort time: {comparison['builtin_time']:.6f} seconds")
|
||||
print(f"Speedup ratio: {comparison['speedup']:.2f}x")
|
||||
print(f"Results match: {comparison['is_correct']}")
|
||||
|
||||
|
||||
def example_hash_table():
|
||||
"""Demonstrate hash table with chaining usage."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Hash Table with Chaining Example")
|
||||
print("=" * 60)
|
||||
|
||||
# Create hash table
|
||||
ht = HashTable(initial_size=8)
|
||||
|
||||
# Example 1: Insert operations
|
||||
print("\n1. Insert Operations:")
|
||||
keys_values = [
|
||||
(1, "apple"),
|
||||
(2, "banana"),
|
||||
(3, "cherry"),
|
||||
(10, "date"),
|
||||
(11, "elderberry"),
|
||||
(18, "fig"),
|
||||
(19, "grape"),
|
||||
(26, "honeydew")
|
||||
]
|
||||
|
||||
for key, value in keys_values:
|
||||
ht.insert(key, value)
|
||||
print(f"Inserted ({key}, {value}) - Load factor: {ht.get_load_factor():.2f}")
|
||||
|
||||
print(f"\nHash table size: {ht.size}")
|
||||
print(f"Number of elements: {len(ht)}")
|
||||
print(f"Load factor: {ht.get_load_factor():.2f}")
|
||||
|
||||
# Example 2: Search operations
|
||||
print("\n2. Search Operations:")
|
||||
search_keys = [1, 3, 11, 99]
|
||||
for key in search_keys:
|
||||
value = ht.get(key)
|
||||
if value:
|
||||
print(f"Key {key} found: {value}")
|
||||
else:
|
||||
print(f"Key {key} not found")
|
||||
|
||||
# Example 3: Contains operator
|
||||
print("\n3. Using 'in' Operator:")
|
||||
test_keys = [2, 5, 18]
|
||||
for key in test_keys:
|
||||
print(f"{key} in hash table: {key in ht}")
|
||||
|
||||
# Example 4: Delete operations
|
||||
print("\n4. Delete Operations:")
|
||||
delete_key = 3
|
||||
print(f"Deleting key {delete_key}...")
|
||||
deleted = ht.delete(delete_key)
|
||||
print(f"Delete successful: {deleted}")
|
||||
print(f"Key {delete_key} still exists: {delete_key in ht}")
|
||||
print(f"Updated count: {len(ht)}")
|
||||
|
||||
# Example 5: Get all items
|
||||
print("\n5. All Items in Hash Table:")
|
||||
all_items = ht.get_all_items()
|
||||
for key, value in all_items:
|
||||
print(f" Key: {key}, Value: {value}")
|
||||
|
||||
# Example 6: Collision demonstration
|
||||
print("\n6. Collision Resolution (Chaining):")
|
||||
collision_ht = HashTable(initial_size=5)
|
||||
# Keys that will likely collide
|
||||
collision_keys = [1, 6, 11, 16, 21]
|
||||
for key in collision_keys:
|
||||
collision_ht.insert(key, f"value_{key}")
|
||||
print(f"Hash table with collisions:")
|
||||
print(f" Size: {collision_ht.size}")
|
||||
print(f" Count: {len(collision_ht)}")
|
||||
print(f" Load factor: {collision_ht.get_load_factor():.2f}")
|
||||
print(f" Items: {collision_ht.get_all_items()}")
|
||||
|
||||
|
||||
def example_performance_analysis():
|
||||
"""Demonstrate performance analysis of quicksort."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Performance Analysis Example")
|
||||
print("=" * 60)
|
||||
|
||||
print("\nAnalyzing quicksort performance across different array sizes:")
|
||||
results = analyze_performance([100, 1000, 10000])
|
||||
|
||||
print("\nResults:")
|
||||
print(f"{'Size':<10} {'Quicksort Time':<18} {'Built-in Time':<18} {'Speedup':<10} {'Correct':<10}")
|
||||
print("-" * 70)
|
||||
for result in results:
|
||||
print(f"{result['array_length']:<10} "
|
||||
f"{result['quicksort_time']:<18.6f} "
|
||||
f"{result['builtin_time']:<18.6f} "
|
||||
f"{result['speedup']:<10.2f} "
|
||||
f"{str(result['is_correct']):<10}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Run all examples
|
||||
example_quicksort()
|
||||
example_hash_table()
|
||||
example_performance_analysis()
|
||||
|
||||
204
src/hash_table.py
Normal file
204
src/hash_table.py
Normal file
@@ -0,0 +1,204 @@
|
||||
"""
|
||||
Hash Table with Chaining Implementation
|
||||
|
||||
This module provides a hash table implementation using chaining
|
||||
for collision resolution.
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Tuple, Iterator
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class HashNode:
|
||||
"""Node for storing key-value pairs in hash table chains."""
|
||||
key: int
|
||||
value: any
|
||||
next: Optional['HashNode'] = None
|
||||
|
||||
|
||||
class HashTable:
|
||||
"""
|
||||
Hash Table implementation using chaining for collision resolution.
|
||||
|
||||
Chaining stores multiple elements in the same bucket using a linked list.
|
||||
When a collision occurs, the new element is appended to the chain.
|
||||
|
||||
Time Complexity:
|
||||
- Average: O(1) for insert, search, delete
|
||||
- Worst: O(n) when all keys hash to the same bucket
|
||||
|
||||
Space Complexity: O(n + m) where n is number of elements, m is table size
|
||||
"""
|
||||
|
||||
def __init__(self, initial_size: int = 16, load_factor_threshold: float = 0.75):
|
||||
"""
|
||||
Initialize hash table.
|
||||
|
||||
Args:
|
||||
initial_size: Initial size of the hash table
|
||||
load_factor_threshold: Threshold for resizing (default: 0.75)
|
||||
"""
|
||||
self.size = initial_size
|
||||
self.load_factor_threshold = load_factor_threshold
|
||||
self.count = 0
|
||||
self.buckets: List[Optional[HashNode]] = [None] * self.size
|
||||
|
||||
def _hash(self, key: int) -> int:
|
||||
"""
|
||||
Hash function using multiplication method.
|
||||
|
||||
Args:
|
||||
key: Key to hash
|
||||
|
||||
Returns:
|
||||
Hash value (bucket index)
|
||||
"""
|
||||
# Using multiplication method: h(k) = floor(m * (k * A mod 1))
|
||||
# where A ≈ (√5 - 1) / 2 ≈ 0.618
|
||||
A = 0.6180339887498949
|
||||
return int(self.size * ((key * A) % 1))
|
||||
|
||||
def _resize(self) -> None:
|
||||
"""Resize hash table when load factor exceeds threshold."""
|
||||
old_buckets = self.buckets
|
||||
old_size = self.size
|
||||
|
||||
# Double the size
|
||||
self.size *= 2
|
||||
self.count = 0
|
||||
self.buckets = [None] * self.size
|
||||
|
||||
# Rehash all existing elements
|
||||
for bucket in old_buckets:
|
||||
current = bucket
|
||||
while current is not None:
|
||||
self.insert(current.key, current.value)
|
||||
current = current.next
|
||||
|
||||
def insert(self, key: int, value: any) -> None:
|
||||
"""
|
||||
Insert a key-value pair into the hash table.
|
||||
|
||||
Args:
|
||||
key: Key to insert
|
||||
value: Value associated with the key
|
||||
"""
|
||||
# Check if resize is needed
|
||||
load_factor = self.count / self.size
|
||||
if load_factor >= self.load_factor_threshold:
|
||||
self._resize()
|
||||
|
||||
bucket_index = self._hash(key)
|
||||
|
||||
# Check if key already exists
|
||||
current = self.buckets[bucket_index]
|
||||
while current is not None:
|
||||
if current.key == key:
|
||||
current.value = value # Update existing key
|
||||
return
|
||||
current = current.next
|
||||
|
||||
# Insert new node at the beginning of the chain
|
||||
new_node = HashNode(key, value, self.buckets[bucket_index])
|
||||
self.buckets[bucket_index] = new_node
|
||||
self.count += 1
|
||||
|
||||
def get(self, key: int) -> Optional[any]:
|
||||
"""
|
||||
Retrieve value associated with a key.
|
||||
|
||||
Args:
|
||||
key: Key to search for
|
||||
|
||||
Returns:
|
||||
Value associated with key, or None if not found
|
||||
"""
|
||||
bucket_index = self._hash(key)
|
||||
current = self.buckets[bucket_index]
|
||||
|
||||
while current is not None:
|
||||
if current.key == key:
|
||||
return current.value
|
||||
current = current.next
|
||||
|
||||
return None
|
||||
|
||||
def delete(self, key: int) -> bool:
|
||||
"""
|
||||
Delete a key-value pair from the hash table.
|
||||
|
||||
Args:
|
||||
key: Key to delete
|
||||
|
||||
Returns:
|
||||
True if key was found and deleted, False otherwise
|
||||
"""
|
||||
bucket_index = self._hash(key)
|
||||
current = self.buckets[bucket_index]
|
||||
prev = None
|
||||
|
||||
while current is not None:
|
||||
if current.key == key:
|
||||
if prev is None:
|
||||
# Node to delete is at the head of chain
|
||||
self.buckets[bucket_index] = current.next
|
||||
else:
|
||||
# Node to delete is in the middle or end
|
||||
prev.next = current.next
|
||||
self.count -= 1
|
||||
return True
|
||||
prev = current
|
||||
current = current.next
|
||||
|
||||
return False
|
||||
|
||||
def contains(self, key: int) -> bool:
|
||||
"""
|
||||
Check if a key exists in the hash table.
|
||||
|
||||
Args:
|
||||
key: Key to check
|
||||
|
||||
Returns:
|
||||
True if key exists, False otherwise
|
||||
"""
|
||||
return self.get(key) is not None
|
||||
|
||||
def get_load_factor(self) -> float:
|
||||
"""
|
||||
Get current load factor of the hash table.
|
||||
|
||||
Returns:
|
||||
Load factor (count / size)
|
||||
"""
|
||||
return self.count / self.size if self.size > 0 else 0.0
|
||||
|
||||
def get_all_items(self) -> List[Tuple[int, any]]:
|
||||
"""
|
||||
Get all key-value pairs in the hash table.
|
||||
|
||||
Returns:
|
||||
List of (key, value) tuples
|
||||
"""
|
||||
items = []
|
||||
for bucket in self.buckets:
|
||||
current = bucket
|
||||
while current is not None:
|
||||
items.append((current.key, current.value))
|
||||
current = current.next
|
||||
return items
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""Return the number of elements in the hash table."""
|
||||
return self.count
|
||||
|
||||
def __contains__(self, key: int) -> bool:
|
||||
"""Check if key exists in hash table using 'in' operator."""
|
||||
return self.contains(key)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""String representation of the hash table."""
|
||||
items = self.get_all_items()
|
||||
return f"HashTable(size={self.size}, count={self.count}, load_factor={self.get_load_factor():.2f}, items={items})"
|
||||
|
||||
178
src/quicksort.py
Normal file
178
src/quicksort.py
Normal file
@@ -0,0 +1,178 @@
|
||||
"""
|
||||
Randomized Quicksort Implementation
|
||||
|
||||
This module provides a randomized quicksort algorithm implementation
|
||||
along with utilities for performance analysis and comparison.
|
||||
"""
|
||||
|
||||
import random
|
||||
from typing import List, Callable, Tuple
|
||||
import time
|
||||
|
||||
|
||||
def randomized_quicksort(arr: List[int], low: int = None, high: int = None) -> List[int]:
|
||||
"""
|
||||
Sort an array using randomized quicksort algorithm.
|
||||
|
||||
Time Complexity:
|
||||
- Average: O(n log n)
|
||||
- Worst: O(n²) (rarely occurs due to randomization)
|
||||
- Best: O(n log n)
|
||||
|
||||
Space Complexity: O(log n) average case due to recursion stack
|
||||
|
||||
Args:
|
||||
arr: List of integers to sort
|
||||
low: Starting index (default: 0)
|
||||
high: Ending index (default: len(arr) - 1)
|
||||
|
||||
Returns:
|
||||
Sorted list of integers
|
||||
"""
|
||||
if low is None:
|
||||
low = 0
|
||||
if high is None:
|
||||
high = len(arr) - 1
|
||||
|
||||
# Create a copy to avoid mutating the original array
|
||||
arr = arr.copy()
|
||||
|
||||
def _quicksort(arr: List[int], low: int, high: int) -> None:
|
||||
"""Internal recursive quicksort function."""
|
||||
if low < high:
|
||||
# Partition the array and get pivot index
|
||||
pivot_idx = randomized_partition(arr, low, high)
|
||||
|
||||
# Recursively sort elements before and after partition
|
||||
_quicksort(arr, low, pivot_idx - 1)
|
||||
_quicksort(arr, pivot_idx + 1, high)
|
||||
|
||||
_quicksort(arr, low, high)
|
||||
return arr
|
||||
|
||||
|
||||
def randomized_partition(arr: List[int], low: int, high: int) -> int:
|
||||
"""
|
||||
Partition the array using a random pivot element.
|
||||
|
||||
This randomization helps avoid worst-case O(n²) behavior
|
||||
that occurs when the pivot is always the smallest or largest element.
|
||||
|
||||
Args:
|
||||
arr: List to partition
|
||||
low: Starting index
|
||||
high: Ending index
|
||||
|
||||
Returns:
|
||||
Final position of pivot element
|
||||
"""
|
||||
# Randomly select a pivot index
|
||||
random_idx = random.randint(low, high)
|
||||
|
||||
# Swap random element with last element
|
||||
arr[random_idx], arr[high] = arr[high], arr[random_idx]
|
||||
|
||||
# Use standard partition with pivot at high
|
||||
return partition(arr, low, high)
|
||||
|
||||
|
||||
def partition(arr: List[int], low: int, high: int) -> int:
|
||||
"""
|
||||
Partition the array around a pivot element.
|
||||
|
||||
Elements smaller than pivot go to the left,
|
||||
elements greater than pivot go to the right.
|
||||
|
||||
Args:
|
||||
arr: List to partition
|
||||
low: Starting index
|
||||
high: Ending index (pivot position)
|
||||
|
||||
Returns:
|
||||
Final position of pivot element
|
||||
"""
|
||||
pivot = arr[high] # Pivot element
|
||||
i = low - 1 # Index of smaller element
|
||||
|
||||
for j in range(low, high):
|
||||
# If current element is smaller than or equal to pivot
|
||||
if arr[j] <= pivot:
|
||||
i += 1
|
||||
arr[i], arr[j] = arr[j], arr[i]
|
||||
|
||||
# Place pivot in correct position
|
||||
arr[i + 1], arr[high] = arr[high], arr[i + 1]
|
||||
return i + 1
|
||||
|
||||
|
||||
def measure_time(func: Callable, *args, **kwargs) -> Tuple[float, any]:
|
||||
"""
|
||||
Measure execution time of a function.
|
||||
|
||||
Args:
|
||||
func: Function to measure
|
||||
*args: Positional arguments for function
|
||||
**kwargs: Keyword arguments for function
|
||||
|
||||
Returns:
|
||||
Tuple of (execution_time_in_seconds, function_result)
|
||||
"""
|
||||
start_time = time.perf_counter()
|
||||
result = func(*args, **kwargs)
|
||||
end_time = time.perf_counter()
|
||||
return end_time - start_time, result
|
||||
|
||||
|
||||
def compare_with_builtin(arr: List[int]) -> dict:
|
||||
"""
|
||||
Compare randomized quicksort with Python's built-in sort.
|
||||
|
||||
Args:
|
||||
arr: List to sort
|
||||
|
||||
Returns:
|
||||
Dictionary with timing comparison results
|
||||
"""
|
||||
# Test randomized quicksort
|
||||
quicksort_time, sorted_quicksort = measure_time(randomized_quicksort, arr)
|
||||
|
||||
# Test built-in sort
|
||||
builtin_time, sorted_builtin = measure_time(sorted, arr)
|
||||
|
||||
# Verify correctness
|
||||
is_correct = sorted_quicksort == sorted_builtin
|
||||
|
||||
return {
|
||||
'quicksort_time': quicksort_time,
|
||||
'builtin_time': builtin_time,
|
||||
'speedup': quicksort_time / builtin_time if builtin_time > 0 else float('inf'),
|
||||
'is_correct': is_correct,
|
||||
'array_length': len(arr)
|
||||
}
|
||||
|
||||
|
||||
def analyze_performance(array_sizes: List[int] = None) -> List[dict]:
|
||||
"""
|
||||
Analyze quicksort performance across different array sizes.
|
||||
|
||||
Args:
|
||||
array_sizes: List of array sizes to test (default: [100, 1000, 10000, 100000])
|
||||
|
||||
Returns:
|
||||
List of performance metrics for each array size
|
||||
"""
|
||||
if array_sizes is None:
|
||||
array_sizes = [100, 1000, 10000, 100000]
|
||||
|
||||
results = []
|
||||
|
||||
for size in array_sizes:
|
||||
# Generate random array
|
||||
test_array = [random.randint(1, 1000000) for _ in range(size)]
|
||||
|
||||
# Measure performance
|
||||
comparison = compare_with_builtin(test_array)
|
||||
results.append(comparison)
|
||||
|
||||
return results
|
||||
|
||||
Reference in New Issue
Block a user