Initial commit: Randomized Quicksort and Hash Table with Chaining implementation

- Implemented Randomized Quicksort algorithm with performance analysis - Implemented Hash Table with Chaining for collision resolution - Added comprehensive test suite (30+ test cases) - Created test runner script with multiple test options - Added detailed README with architecture diagrams and documentation - Added MIT License - Includes examples and comprehensive documentation
2025-11-04 21:35:02 -05:00
commit a7fe11fd74
12 changed files with 2024 additions and 0 deletions
--- a/src/init.py
+++ b/src/init.py
@@ -0,0 +1,10 @@
+"""
+MSCS532 Assignment 3: Understanding Algorithm Efficiency and Scalability
+
+This package contains implementations of:
+- Randomized Quicksort algorithm
+- Hashing with Chaining data structure
+"""
+
+__version__ = "1.0.0"
+
--- a/src/examples.py
+++ b/src/examples.py
@@ -0,0 +1,148 @@
+"""
+Example usage of Randomized Quicksort and Hash Table implementations.
+
+This module demonstrates how to use the algorithms and data structures
+implemented in this project.
+"""
+
+import random
+from src.quicksort import (
+    randomized_quicksort,
+    compare_with_builtin,
+    analyze_performance
+)
+from src.hash_table import HashTable
+
+
+def example_quicksort():
+    """Demonstrate randomized quicksort usage."""
+    print("=" * 60)
+    print("Randomized Quicksort Example")
+    print("=" * 60)
+    
+    # Example 1: Basic sorting
+    print("\n1. Basic Sorting:")
+    arr = [64, 34, 25, 12, 22, 11, 90, 5]
+    print(f"Original array: {arr}")
+    sorted_arr = randomized_quicksort(arr)
+    print(f"Sorted array: {sorted_arr}")
+    
+    # Example 2: Large random array
+    print("\n2. Large Random Array:")
+    large_arr = [random.randint(1, 1000) for _ in range(20)]
+    print(f"Original array (first 20 elements): {large_arr[:20]}")
+    sorted_large = randomized_quicksort(large_arr)
+    print(f"Sorted array (first 20 elements): {sorted_large[:20]}")
+    
+    # Example 3: Performance comparison
+    print("\n3. Performance Comparison with Built-in Sort:")
+    test_array = [random.randint(1, 100000) for _ in range(10000)]
+    comparison = compare_with_builtin(test_array)
+    print(f"Array length: {comparison['array_length']}")
+    print(f"Quicksort time: {comparison['quicksort_time']:.6f} seconds")
+    print(f"Built-in sort time: {comparison['builtin_time']:.6f} seconds")
+    print(f"Speedup ratio: {comparison['speedup']:.2f}x")
+    print(f"Results match: {comparison['is_correct']}")
+
+
+def example_hash_table():
+    """Demonstrate hash table with chaining usage."""
+    print("\n" + "=" * 60)
+    print("Hash Table with Chaining Example")
+    print("=" * 60)
+    
+    # Create hash table
+    ht = HashTable(initial_size=8)
+    
+    # Example 1: Insert operations
+    print("\n1. Insert Operations:")
+    keys_values = [
+        (1, "apple"),
+        (2, "banana"),
+        (3, "cherry"),
+        (10, "date"),
+        (11, "elderberry"),
+        (18, "fig"),
+        (19, "grape"),
+        (26, "honeydew")
+    ]
+    
+    for key, value in keys_values:
+        ht.insert(key, value)
+        print(f"Inserted ({key}, {value}) - Load factor: {ht.get_load_factor():.2f}")
+    
+    print(f"\nHash table size: {ht.size}")
+    print(f"Number of elements: {len(ht)}")
+    print(f"Load factor: {ht.get_load_factor():.2f}")
+    
+    # Example 2: Search operations
+    print("\n2. Search Operations:")
+    search_keys = [1, 3, 11, 99]
+    for key in search_keys:
+        value = ht.get(key)
+        if value:
+            print(f"Key {key} found: {value}")
+        else:
+            print(f"Key {key} not found")
+    
+    # Example 3: Contains operator
+    print("\n3. Using 'in' Operator:")
+    test_keys = [2, 5, 18]
+    for key in test_keys:
+        print(f"{key} in hash table: {key in ht}")
+    
+    # Example 4: Delete operations
+    print("\n4. Delete Operations:")
+    delete_key = 3
+    print(f"Deleting key {delete_key}...")
+    deleted = ht.delete(delete_key)
+    print(f"Delete successful: {deleted}")
+    print(f"Key {delete_key} still exists: {delete_key in ht}")
+    print(f"Updated count: {len(ht)}")
+    
+    # Example 5: Get all items
+    print("\n5. All Items in Hash Table:")
+    all_items = ht.get_all_items()
+    for key, value in all_items:
+        print(f"  Key: {key}, Value: {value}")
+    
+    # Example 6: Collision demonstration
+    print("\n6. Collision Resolution (Chaining):")
+    collision_ht = HashTable(initial_size=5)
+    # Keys that will likely collide
+    collision_keys = [1, 6, 11, 16, 21]
+    for key in collision_keys:
+        collision_ht.insert(key, f"value_{key}")
+    print(f"Hash table with collisions:")
+    print(f"  Size: {collision_ht.size}")
+    print(f"  Count: {len(collision_ht)}")
+    print(f"  Load factor: {collision_ht.get_load_factor():.2f}")
+    print(f"  Items: {collision_ht.get_all_items()}")
+
+
+def example_performance_analysis():
+    """Demonstrate performance analysis of quicksort."""
+    print("\n" + "=" * 60)
+    print("Performance Analysis Example")
+    print("=" * 60)
+    
+    print("\nAnalyzing quicksort performance across different array sizes:")
+    results = analyze_performance([100, 1000, 10000])
+    
+    print("\nResults:")
+    print(f"{'Size':<10} {'Quicksort Time':<18} {'Built-in Time':<18} {'Speedup':<10} {'Correct':<10}")
+    print("-" * 70)
+    for result in results:
+        print(f"{result['array_length']:<10} "
+              f"{result['quicksort_time']:<18.6f} "
+              f"{result['builtin_time']:<18.6f} "
+              f"{result['speedup']:<10.2f} "
+              f"{str(result['is_correct']):<10}")
+
+
+if __name__ == "__main__":
+    # Run all examples
+    example_quicksort()
+    example_hash_table()
+    example_performance_analysis()
+
--- a/src/hash_table.py
+++ b/src/hash_table.py
@@ -0,0 +1,204 @@
+"""
+Hash Table with Chaining Implementation
+
+This module provides a hash table implementation using chaining
+for collision resolution.
+"""
+
+from typing import List, Optional, Tuple, Iterator
+from dataclasses import dataclass
+
+
+@dataclass
+class HashNode:
+    """Node for storing key-value pairs in hash table chains."""
+    key: int
+    value: any
+    next: Optional['HashNode'] = None
+
+
+class HashTable:
+    """
+    Hash Table implementation using chaining for collision resolution.
+    
+    Chaining stores multiple elements in the same bucket using a linked list.
+    When a collision occurs, the new element is appended to the chain.
+    
+    Time Complexity:
+        - Average: O(1) for insert, search, delete
+        - Worst: O(n) when all keys hash to the same bucket
+    
+    Space Complexity: O(n + m) where n is number of elements, m is table size
+    """
+    
+    def __init__(self, initial_size: int = 16, load_factor_threshold: float = 0.75):
+        """
+        Initialize hash table.
+        
+        Args:
+            initial_size: Initial size of the hash table
+            load_factor_threshold: Threshold for resizing (default: 0.75)
+        """
+        self.size = initial_size
+        self.load_factor_threshold = load_factor_threshold
+        self.count = 0
+        self.buckets: List[Optional[HashNode]] = [None] * self.size
+    
+    def _hash(self, key: int) -> int:
+        """
+        Hash function using multiplication method.
+        
+        Args:
+            key: Key to hash
+        
+        Returns:
+            Hash value (bucket index)
+        """
+        # Using multiplication method: h(k) = floor(m * (k * A mod 1))
+        # where A ≈ (√5 - 1) / 2 ≈ 0.618
+        A = 0.6180339887498949
+        return int(self.size * ((key * A) % 1))
+    
+    def _resize(self) -> None:
+        """Resize hash table when load factor exceeds threshold."""
+        old_buckets = self.buckets
+        old_size = self.size
+        
+        # Double the size
+        self.size *= 2
+        self.count = 0
+        self.buckets = [None] * self.size
+        
+        # Rehash all existing elements
+        for bucket in old_buckets:
+            current = bucket
+            while current is not None:
+                self.insert(current.key, current.value)
+                current = current.next
+    
+    def insert(self, key: int, value: any) -> None:
+        """
+        Insert a key-value pair into the hash table.
+        
+        Args:
+            key: Key to insert
+            value: Value associated with the key
+        """
+        # Check if resize is needed
+        load_factor = self.count / self.size
+        if load_factor >= self.load_factor_threshold:
+            self._resize()
+        
+        bucket_index = self._hash(key)
+        
+        # Check if key already exists
+        current = self.buckets[bucket_index]
+        while current is not None:
+            if current.key == key:
+                current.value = value  # Update existing key
+                return
+            current = current.next
+        
+        # Insert new node at the beginning of the chain
+        new_node = HashNode(key, value, self.buckets[bucket_index])
+        self.buckets[bucket_index] = new_node
+        self.count += 1
+    
+    def get(self, key: int) -> Optional[any]:
+        """
+        Retrieve value associated with a key.
+        
+        Args:
+            key: Key to search for
+        
+        Returns:
+            Value associated with key, or None if not found
+        """
+        bucket_index = self._hash(key)
+        current = self.buckets[bucket_index]
+        
+        while current is not None:
+            if current.key == key:
+                return current.value
+            current = current.next
+        
+        return None
+    
+    def delete(self, key: int) -> bool:
+        """
+        Delete a key-value pair from the hash table.
+        
+        Args:
+            key: Key to delete
+        
+        Returns:
+            True if key was found and deleted, False otherwise
+        """
+        bucket_index = self._hash(key)
+        current = self.buckets[bucket_index]
+        prev = None
+        
+        while current is not None:
+            if current.key == key:
+                if prev is None:
+                    # Node to delete is at the head of chain
+                    self.buckets[bucket_index] = current.next
+                else:
+                    # Node to delete is in the middle or end
+                    prev.next = current.next
+                self.count -= 1
+                return True
+            prev = current
+            current = current.next
+        
+        return False
+    
+    def contains(self, key: int) -> bool:
+        """
+        Check if a key exists in the hash table.
+        
+        Args:
+            key: Key to check
+        
+        Returns:
+            True if key exists, False otherwise
+        """
+        return self.get(key) is not None
+    
+    def get_load_factor(self) -> float:
+        """
+        Get current load factor of the hash table.
+        
+        Returns:
+            Load factor (count / size)
+        """
+        return self.count / self.size if self.size > 0 else 0.0
+    
+    def get_all_items(self) -> List[Tuple[int, any]]:
+        """
+        Get all key-value pairs in the hash table.
+        
+        Returns:
+            List of (key, value) tuples
+        """
+        items = []
+        for bucket in self.buckets:
+            current = bucket
+            while current is not None:
+                items.append((current.key, current.value))
+                current = current.next
+        return items
+    
+    def __len__(self) -> int:
+        """Return the number of elements in the hash table."""
+        return self.count
+    
+    def __contains__(self, key: int) -> bool:
+        """Check if key exists in hash table using 'in' operator."""
+        return self.contains(key)
+    
+    def __repr__(self) -> str:
+        """String representation of the hash table."""
+        items = self.get_all_items()
+        return f"HashTable(size={self.size}, count={self.count}, load_factor={self.get_load_factor():.2f}, items={items})"
+
--- a/src/quicksort.py
+++ b/src/quicksort.py
@@ -0,0 +1,178 @@
+"""
+Randomized Quicksort Implementation
+
+This module provides a randomized quicksort algorithm implementation
+along with utilities for performance analysis and comparison.
+"""
+
+import random
+from typing import List, Callable, Tuple
+import time
+
+
+def randomized_quicksort(arr: List[int], low: int = None, high: int = None) -> List[int]:
+    """
+    Sort an array using randomized quicksort algorithm.
+    
+    Time Complexity:
+        - Average: O(n log n)
+        - Worst: O(n²) (rarely occurs due to randomization)
+        - Best: O(n log n)
+    
+    Space Complexity: O(log n) average case due to recursion stack
+    
+    Args:
+        arr: List of integers to sort
+        low: Starting index (default: 0)
+        high: Ending index (default: len(arr) - 1)
+    
+    Returns:
+        Sorted list of integers
+    """
+    if low is None:
+        low = 0
+    if high is None:
+        high = len(arr) - 1
+    
+    # Create a copy to avoid mutating the original array
+    arr = arr.copy()
+    
+    def _quicksort(arr: List[int], low: int, high: int) -> None:
+        """Internal recursive quicksort function."""
+        if low < high:
+            # Partition the array and get pivot index
+            pivot_idx = randomized_partition(arr, low, high)
+            
+            # Recursively sort elements before and after partition
+            _quicksort(arr, low, pivot_idx - 1)
+            _quicksort(arr, pivot_idx + 1, high)
+    
+    _quicksort(arr, low, high)
+    return arr
+
+
+def randomized_partition(arr: List[int], low: int, high: int) -> int:
+    """
+    Partition the array using a random pivot element.
+    
+    This randomization helps avoid worst-case O(n²) behavior
+    that occurs when the pivot is always the smallest or largest element.
+    
+    Args:
+        arr: List to partition
+        low: Starting index
+        high: Ending index
+    
+    Returns:
+        Final position of pivot element
+    """
+    # Randomly select a pivot index
+    random_idx = random.randint(low, high)
+    
+    # Swap random element with last element
+    arr[random_idx], arr[high] = arr[high], arr[random_idx]
+    
+    # Use standard partition with pivot at high
+    return partition(arr, low, high)
+
+
+def partition(arr: List[int], low: int, high: int) -> int:
+    """
+    Partition the array around a pivot element.
+    
+    Elements smaller than pivot go to the left,
+    elements greater than pivot go to the right.
+    
+    Args:
+        arr: List to partition
+        low: Starting index
+        high: Ending index (pivot position)
+    
+    Returns:
+        Final position of pivot element
+    """
+    pivot = arr[high]  # Pivot element
+    i = low - 1  # Index of smaller element
+    
+    for j in range(low, high):
+        # If current element is smaller than or equal to pivot
+        if arr[j] <= pivot:
+            i += 1
+            arr[i], arr[j] = arr[j], arr[i]
+    
+    # Place pivot in correct position
+    arr[i + 1], arr[high] = arr[high], arr[i + 1]
+    return i + 1
+
+
+def measure_time(func: Callable, *args, **kwargs) -> Tuple[float, any]:
+    """
+    Measure execution time of a function.
+    
+    Args:
+        func: Function to measure
+        *args: Positional arguments for function
+        **kwargs: Keyword arguments for function
+    
+    Returns:
+        Tuple of (execution_time_in_seconds, function_result)
+    """
+    start_time = time.perf_counter()
+    result = func(*args, **kwargs)
+    end_time = time.perf_counter()
+    return end_time - start_time, result
+
+
+def compare_with_builtin(arr: List[int]) -> dict:
+    """
+    Compare randomized quicksort with Python's built-in sort.
+    
+    Args:
+        arr: List to sort
+    
+    Returns:
+        Dictionary with timing comparison results
+    """
+    # Test randomized quicksort
+    quicksort_time, sorted_quicksort = measure_time(randomized_quicksort, arr)
+    
+    # Test built-in sort
+    builtin_time, sorted_builtin = measure_time(sorted, arr)
+    
+    # Verify correctness
+    is_correct = sorted_quicksort == sorted_builtin
+    
+    return {
+        'quicksort_time': quicksort_time,
+        'builtin_time': builtin_time,
+        'speedup': quicksort_time / builtin_time if builtin_time > 0 else float('inf'),
+        'is_correct': is_correct,
+        'array_length': len(arr)
+    }
+
+
+def analyze_performance(array_sizes: List[int] = None) -> List[dict]:
+    """
+    Analyze quicksort performance across different array sizes.
+    
+    Args:
+        array_sizes: List of array sizes to test (default: [100, 1000, 10000, 100000])
+    
+    Returns:
+        List of performance metrics for each array size
+    """
+    if array_sizes is None:
+        array_sizes = [100, 1000, 10000, 100000]
+    
+    results = []
+    
+    for size in array_sizes:
+        # Generate random array
+        test_array = [random.randint(1, 1000000) for _ in range(size)]
+        
+        # Measure performance
+        comparison = compare_with_builtin(test_array)
+        results.append(comparison)
+    
+    return results
+