Initial commit: Randomized Quicksort and Hash Table with Chaining implementation

- Implemented Randomized Quicksort algorithm with performance analysis
- Implemented Hash Table with Chaining for collision resolution
- Added comprehensive test suite (30+ test cases)
- Created test runner script with multiple test options
- Added detailed README with architecture diagrams and documentation
- Added MIT License
- Includes examples and comprehensive documentation
This commit is contained in:
Carlos Gutierrez
2025-11-04 21:35:02 -05:00
commit a7fe11fd74
12 changed files with 2024 additions and 0 deletions

10
src/__init__.py Normal file
View File

@@ -0,0 +1,10 @@
"""
MSCS532 Assignment 3: Understanding Algorithm Efficiency and Scalability
This package contains implementations of:
- Randomized Quicksort algorithm
- Hashing with Chaining data structure
"""
__version__ = "1.0.0"

148
src/examples.py Normal file
View File

@@ -0,0 +1,148 @@
"""
Example usage of Randomized Quicksort and Hash Table implementations.
This module demonstrates how to use the algorithms and data structures
implemented in this project.
"""
import random
from src.quicksort import (
randomized_quicksort,
compare_with_builtin,
analyze_performance
)
from src.hash_table import HashTable
def example_quicksort():
"""Demonstrate randomized quicksort usage."""
print("=" * 60)
print("Randomized Quicksort Example")
print("=" * 60)
# Example 1: Basic sorting
print("\n1. Basic Sorting:")
arr = [64, 34, 25, 12, 22, 11, 90, 5]
print(f"Original array: {arr}")
sorted_arr = randomized_quicksort(arr)
print(f"Sorted array: {sorted_arr}")
# Example 2: Large random array
print("\n2. Large Random Array:")
large_arr = [random.randint(1, 1000) for _ in range(20)]
print(f"Original array (first 20 elements): {large_arr[:20]}")
sorted_large = randomized_quicksort(large_arr)
print(f"Sorted array (first 20 elements): {sorted_large[:20]}")
# Example 3: Performance comparison
print("\n3. Performance Comparison with Built-in Sort:")
test_array = [random.randint(1, 100000) for _ in range(10000)]
comparison = compare_with_builtin(test_array)
print(f"Array length: {comparison['array_length']}")
print(f"Quicksort time: {comparison['quicksort_time']:.6f} seconds")
print(f"Built-in sort time: {comparison['builtin_time']:.6f} seconds")
print(f"Speedup ratio: {comparison['speedup']:.2f}x")
print(f"Results match: {comparison['is_correct']}")
def example_hash_table():
"""Demonstrate hash table with chaining usage."""
print("\n" + "=" * 60)
print("Hash Table with Chaining Example")
print("=" * 60)
# Create hash table
ht = HashTable(initial_size=8)
# Example 1: Insert operations
print("\n1. Insert Operations:")
keys_values = [
(1, "apple"),
(2, "banana"),
(3, "cherry"),
(10, "date"),
(11, "elderberry"),
(18, "fig"),
(19, "grape"),
(26, "honeydew")
]
for key, value in keys_values:
ht.insert(key, value)
print(f"Inserted ({key}, {value}) - Load factor: {ht.get_load_factor():.2f}")
print(f"\nHash table size: {ht.size}")
print(f"Number of elements: {len(ht)}")
print(f"Load factor: {ht.get_load_factor():.2f}")
# Example 2: Search operations
print("\n2. Search Operations:")
search_keys = [1, 3, 11, 99]
for key in search_keys:
value = ht.get(key)
if value:
print(f"Key {key} found: {value}")
else:
print(f"Key {key} not found")
# Example 3: Contains operator
print("\n3. Using 'in' Operator:")
test_keys = [2, 5, 18]
for key in test_keys:
print(f"{key} in hash table: {key in ht}")
# Example 4: Delete operations
print("\n4. Delete Operations:")
delete_key = 3
print(f"Deleting key {delete_key}...")
deleted = ht.delete(delete_key)
print(f"Delete successful: {deleted}")
print(f"Key {delete_key} still exists: {delete_key in ht}")
print(f"Updated count: {len(ht)}")
# Example 5: Get all items
print("\n5. All Items in Hash Table:")
all_items = ht.get_all_items()
for key, value in all_items:
print(f" Key: {key}, Value: {value}")
# Example 6: Collision demonstration
print("\n6. Collision Resolution (Chaining):")
collision_ht = HashTable(initial_size=5)
# Keys that will likely collide
collision_keys = [1, 6, 11, 16, 21]
for key in collision_keys:
collision_ht.insert(key, f"value_{key}")
print(f"Hash table with collisions:")
print(f" Size: {collision_ht.size}")
print(f" Count: {len(collision_ht)}")
print(f" Load factor: {collision_ht.get_load_factor():.2f}")
print(f" Items: {collision_ht.get_all_items()}")
def example_performance_analysis():
"""Demonstrate performance analysis of quicksort."""
print("\n" + "=" * 60)
print("Performance Analysis Example")
print("=" * 60)
print("\nAnalyzing quicksort performance across different array sizes:")
results = analyze_performance([100, 1000, 10000])
print("\nResults:")
print(f"{'Size':<10} {'Quicksort Time':<18} {'Built-in Time':<18} {'Speedup':<10} {'Correct':<10}")
print("-" * 70)
for result in results:
print(f"{result['array_length']:<10} "
f"{result['quicksort_time']:<18.6f} "
f"{result['builtin_time']:<18.6f} "
f"{result['speedup']:<10.2f} "
f"{str(result['is_correct']):<10}")
if __name__ == "__main__":
# Run all examples
example_quicksort()
example_hash_table()
example_performance_analysis()

204
src/hash_table.py Normal file
View File

@@ -0,0 +1,204 @@
"""
Hash Table with Chaining Implementation
This module provides a hash table implementation using chaining
for collision resolution.
"""
from typing import List, Optional, Tuple, Iterator
from dataclasses import dataclass
@dataclass
class HashNode:
"""Node for storing key-value pairs in hash table chains."""
key: int
value: any
next: Optional['HashNode'] = None
class HashTable:
"""
Hash Table implementation using chaining for collision resolution.
Chaining stores multiple elements in the same bucket using a linked list.
When a collision occurs, the new element is appended to the chain.
Time Complexity:
- Average: O(1) for insert, search, delete
- Worst: O(n) when all keys hash to the same bucket
Space Complexity: O(n + m) where n is number of elements, m is table size
"""
def __init__(self, initial_size: int = 16, load_factor_threshold: float = 0.75):
"""
Initialize hash table.
Args:
initial_size: Initial size of the hash table
load_factor_threshold: Threshold for resizing (default: 0.75)
"""
self.size = initial_size
self.load_factor_threshold = load_factor_threshold
self.count = 0
self.buckets: List[Optional[HashNode]] = [None] * self.size
def _hash(self, key: int) -> int:
"""
Hash function using multiplication method.
Args:
key: Key to hash
Returns:
Hash value (bucket index)
"""
# Using multiplication method: h(k) = floor(m * (k * A mod 1))
# where A ≈ (√5 - 1) / 2 ≈ 0.618
A = 0.6180339887498949
return int(self.size * ((key * A) % 1))
def _resize(self) -> None:
"""Resize hash table when load factor exceeds threshold."""
old_buckets = self.buckets
old_size = self.size
# Double the size
self.size *= 2
self.count = 0
self.buckets = [None] * self.size
# Rehash all existing elements
for bucket in old_buckets:
current = bucket
while current is not None:
self.insert(current.key, current.value)
current = current.next
def insert(self, key: int, value: any) -> None:
"""
Insert a key-value pair into the hash table.
Args:
key: Key to insert
value: Value associated with the key
"""
# Check if resize is needed
load_factor = self.count / self.size
if load_factor >= self.load_factor_threshold:
self._resize()
bucket_index = self._hash(key)
# Check if key already exists
current = self.buckets[bucket_index]
while current is not None:
if current.key == key:
current.value = value # Update existing key
return
current = current.next
# Insert new node at the beginning of the chain
new_node = HashNode(key, value, self.buckets[bucket_index])
self.buckets[bucket_index] = new_node
self.count += 1
def get(self, key: int) -> Optional[any]:
"""
Retrieve value associated with a key.
Args:
key: Key to search for
Returns:
Value associated with key, or None if not found
"""
bucket_index = self._hash(key)
current = self.buckets[bucket_index]
while current is not None:
if current.key == key:
return current.value
current = current.next
return None
def delete(self, key: int) -> bool:
"""
Delete a key-value pair from the hash table.
Args:
key: Key to delete
Returns:
True if key was found and deleted, False otherwise
"""
bucket_index = self._hash(key)
current = self.buckets[bucket_index]
prev = None
while current is not None:
if current.key == key:
if prev is None:
# Node to delete is at the head of chain
self.buckets[bucket_index] = current.next
else:
# Node to delete is in the middle or end
prev.next = current.next
self.count -= 1
return True
prev = current
current = current.next
return False
def contains(self, key: int) -> bool:
"""
Check if a key exists in the hash table.
Args:
key: Key to check
Returns:
True if key exists, False otherwise
"""
return self.get(key) is not None
def get_load_factor(self) -> float:
"""
Get current load factor of the hash table.
Returns:
Load factor (count / size)
"""
return self.count / self.size if self.size > 0 else 0.0
def get_all_items(self) -> List[Tuple[int, any]]:
"""
Get all key-value pairs in the hash table.
Returns:
List of (key, value) tuples
"""
items = []
for bucket in self.buckets:
current = bucket
while current is not None:
items.append((current.key, current.value))
current = current.next
return items
def __len__(self) -> int:
"""Return the number of elements in the hash table."""
return self.count
def __contains__(self, key: int) -> bool:
"""Check if key exists in hash table using 'in' operator."""
return self.contains(key)
def __repr__(self) -> str:
"""String representation of the hash table."""
items = self.get_all_items()
return f"HashTable(size={self.size}, count={self.count}, load_factor={self.get_load_factor():.2f}, items={items})"

178
src/quicksort.py Normal file
View File

@@ -0,0 +1,178 @@
"""
Randomized Quicksort Implementation
This module provides a randomized quicksort algorithm implementation
along with utilities for performance analysis and comparison.
"""
import random
from typing import List, Callable, Tuple
import time
def randomized_quicksort(arr: List[int], low: int = None, high: int = None) -> List[int]:
"""
Sort an array using randomized quicksort algorithm.
Time Complexity:
- Average: O(n log n)
- Worst: O(n²) (rarely occurs due to randomization)
- Best: O(n log n)
Space Complexity: O(log n) average case due to recursion stack
Args:
arr: List of integers to sort
low: Starting index (default: 0)
high: Ending index (default: len(arr) - 1)
Returns:
Sorted list of integers
"""
if low is None:
low = 0
if high is None:
high = len(arr) - 1
# Create a copy to avoid mutating the original array
arr = arr.copy()
def _quicksort(arr: List[int], low: int, high: int) -> None:
"""Internal recursive quicksort function."""
if low < high:
# Partition the array and get pivot index
pivot_idx = randomized_partition(arr, low, high)
# Recursively sort elements before and after partition
_quicksort(arr, low, pivot_idx - 1)
_quicksort(arr, pivot_idx + 1, high)
_quicksort(arr, low, high)
return arr
def randomized_partition(arr: List[int], low: int, high: int) -> int:
"""
Partition the array using a random pivot element.
This randomization helps avoid worst-case O(n²) behavior
that occurs when the pivot is always the smallest or largest element.
Args:
arr: List to partition
low: Starting index
high: Ending index
Returns:
Final position of pivot element
"""
# Randomly select a pivot index
random_idx = random.randint(low, high)
# Swap random element with last element
arr[random_idx], arr[high] = arr[high], arr[random_idx]
# Use standard partition with pivot at high
return partition(arr, low, high)
def partition(arr: List[int], low: int, high: int) -> int:
"""
Partition the array around a pivot element.
Elements smaller than pivot go to the left,
elements greater than pivot go to the right.
Args:
arr: List to partition
low: Starting index
high: Ending index (pivot position)
Returns:
Final position of pivot element
"""
pivot = arr[high] # Pivot element
i = low - 1 # Index of smaller element
for j in range(low, high):
# If current element is smaller than or equal to pivot
if arr[j] <= pivot:
i += 1
arr[i], arr[j] = arr[j], arr[i]
# Place pivot in correct position
arr[i + 1], arr[high] = arr[high], arr[i + 1]
return i + 1
def measure_time(func: Callable, *args, **kwargs) -> Tuple[float, any]:
"""
Measure execution time of a function.
Args:
func: Function to measure
*args: Positional arguments for function
**kwargs: Keyword arguments for function
Returns:
Tuple of (execution_time_in_seconds, function_result)
"""
start_time = time.perf_counter()
result = func(*args, **kwargs)
end_time = time.perf_counter()
return end_time - start_time, result
def compare_with_builtin(arr: List[int]) -> dict:
"""
Compare randomized quicksort with Python's built-in sort.
Args:
arr: List to sort
Returns:
Dictionary with timing comparison results
"""
# Test randomized quicksort
quicksort_time, sorted_quicksort = measure_time(randomized_quicksort, arr)
# Test built-in sort
builtin_time, sorted_builtin = measure_time(sorted, arr)
# Verify correctness
is_correct = sorted_quicksort == sorted_builtin
return {
'quicksort_time': quicksort_time,
'builtin_time': builtin_time,
'speedup': quicksort_time / builtin_time if builtin_time > 0 else float('inf'),
'is_correct': is_correct,
'array_length': len(arr)
}
def analyze_performance(array_sizes: List[int] = None) -> List[dict]:
"""
Analyze quicksort performance across different array sizes.
Args:
array_sizes: List of array sizes to test (default: [100, 1000, 10000, 100000])
Returns:
List of performance metrics for each array size
"""
if array_sizes is None:
array_sizes = [100, 1000, 10000, 100000]
results = []
for size in array_sizes:
# Generate random array
test_array = [random.randint(1, 1000000) for _ in range(size)]
# Measure performance
comparison = compare_with_builtin(test_array)
results.append(comparison)
return results