commit a7fe11fd7430dcf3003527ac2646d2498b71339a Author: Carlos Gutierrez Date: Tue Nov 4 21:35:02 2025 -0500 Initial commit: Randomized Quicksort and Hash Table with Chaining implementation - Implemented Randomized Quicksort algorithm with performance analysis - Implemented Hash Table with Chaining for collision resolution - Added comprehensive test suite (30+ test cases) - Created test runner script with multiple test options - Added detailed README with architecture diagrams and documentation - Added MIT License - Includes examples and comprehensive documentation diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..697c944 --- /dev/null +++ b/.gitignore @@ -0,0 +1,135 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +Pipfile.lock + +# PEP 582 +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f87bac0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2025 MSCS532 Assignment 3 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..a55dc6e --- /dev/null +++ b/README.md @@ -0,0 +1,593 @@ +# Randomized Quicksort & Hash Table with Chaining - Algorithm Efficiency and Scalability + +## Overview + +This project implements two fundamental algorithms and data structures demonstrating algorithm efficiency and scalability: + +1. **Randomized Quicksort Algorithm** - An efficient sorting algorithm with average O(n log n) time complexity +2. **Hash Table with Chaining** - A hash table implementation using chaining for collision resolution + +Both implementations provide comprehensive test suites, performance analysis utilities, and detailed documentation for educational purposes. + +### Key Features + +* ✅ **Randomized Quicksort**: Efficient sorting with randomized pivot selection to avoid worst-case performance +* ✅ **Performance Analysis**: Built-in utilities for comparing and analyzing algorithm performance +* ✅ **Hash Table with Chaining**: Complete hash table implementation with dynamic resizing +* ✅ **Comprehensive Test Suite**: Extensive test coverage including edge cases, stress tests, and performance benchmarks +* ✅ **Well-Documented Code**: Clear comments, docstrings, and educational examples +* ✅ **Production-Ready**: Robust error handling and comprehensive test coverage + +## Architecture + +### Randomized Quicksort Algorithm Flow + +``` +Input Array: [64, 34, 25, 12, 22, 11, 90, 5] + ↓ + ┌─────────────────────────────────────┐ + │ Randomized Quicksort Process │ + └─────────────────────────────────────┘ + ↓ + ┌─────────────────────────────────────────────────┐ + │ Step 1: Randomly select pivot │ + │ Pivot: 25 (randomly selected) │ + │ Partition: [12, 22, 11, 5] | 25 | [64, 34, 90] │ + └─────────────────────────────────────────────────┘ + ↓ + ┌─────────────────────────────────────┐ + │ Step 2: Recursively sort left │ + │ Array: [12, 22, 11, 5] │ + │ Pivot: 11 → [5, 11] | [12, 22] │ + └─────────────────────────────────────┘ + ↓ + ┌─────────────────────────────────────┐ + │ Step 3: Recursively sort right │ + │ Array: [64, 34, 90] │ + │ Pivot: 64 → [34, 64] | [90] │ + └─────────────────────────────────────┘ + ↓ +Output Array: [5, 11, 12, 22, 25, 34, 64, 90] +``` + +### Hash Table with Chaining Structure + +``` +Hash Table (size=8) +┌─────────────────────────────────────────┐ +│ Bucket 0: [Key: 8, Value: "eight"] │ +│ [Key: 16, Value: "sixteen"] │ +│ Bucket 1: [Key: 9, Value: "nine"] │ +│ Bucket 2: [Key: 10, Value: "ten"] │ +│ [Key: 18, Value: "eighteen"] │ +│ Bucket 3: [Key: 11, Value: "eleven"] │ +│ Bucket 4: [Key: 12, Value: "twelve"] │ +│ Bucket 5: [Key: 13, Value: "thirteen"] │ +│ Bucket 6: [Key: 14, Value: "fourteen"] │ +│ Bucket 7: [Key: 15, Value: "fifteen"] │ +└─────────────────────────────────────────┘ + ↓ + Collision Resolution via Chaining + (Multiple keys hash to same bucket) +``` + +### Core Algorithm Structure + +#### Randomized Quicksort + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Randomized Quicksort │ +├─────────────────────────────────────────────────────────────┤ +│ Function: randomized_quicksort(arr) │ +│ Input: Array of comparable elements │ +│ Output: Array sorted in ascending order │ +├─────────────────────────────────────────────────────────────┤ +│ Algorithm Steps: │ +│ 1. If array has ≤ 1 element, return │ +│ 2. Randomly select pivot element │ +│ 3. Partition array around pivot │ +│ 4. Recursively sort left subarray │ +│ 5. Recursively sort right subarray │ +│ 6. Combine results │ +└─────────────────────────────────────────────────────────────┘ +``` + +#### Hash Table with Chaining + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Hash Table with Chaining │ +├─────────────────────────────────────────────────────────────┤ +│ Class: HashTable │ +│ Operations: insert, get, delete, contains │ +├─────────────────────────────────────────────────────────────┤ +│ Key Operations: │ +│ 1. Hash function: h(k) = floor(m × (k × A mod 1)) │ +│ 2. Collision resolution: Chaining (linked lists) │ +│ 3. Load factor management: Resize when threshold exceeded │ +│ 4. Dynamic resizing: Double size when load > 0.75 │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Implementation Details + +### Part 1: Randomized Quicksort + +#### Core Functions + +##### 1. `randomized_quicksort(arr)` + +* **Purpose**: Sort array using randomized quicksort algorithm +* **Parameters**: `arr` (list) - Input array to be sorted +* **Returns**: `list` - New array sorted in ascending order +* **Space Complexity**: O(n) - Creates a copy of the input array +* **Time Complexity**: + - Average: O(n log n) + - Worst: O(n²) - rarely occurs due to randomization + - Best: O(n log n) + +##### 2. `randomized_partition(arr, low, high)` + +* **Purpose**: Partition array using a randomly selected pivot +* **Parameters**: + - `arr` (list) - Array to partition + - `low` (int) - Starting index + - `high` (int) - Ending index +* **Returns**: `int` - Final position of pivot element +* **Key Feature**: Random pivot selection prevents worst-case O(n²) performance + +##### 3. `compare_with_builtin(arr)` + +* **Purpose**: Compare randomized quicksort with Python's built-in sort +* **Returns**: Dictionary with timing metrics and correctness verification + +##### 4. `analyze_performance(array_sizes)` + +* **Purpose**: Analyze quicksort performance across different array sizes +* **Returns**: List of performance metrics for each array size + +#### Algorithm Logic + +**Why Randomization?** + +Standard quicksort can degrade to O(n²) when: +- Pivot is always the smallest element (worst case) +- Pivot is always the largest element (worst case) +- Array is already sorted or reverse sorted + +Randomization ensures: +- Expected O(n log n) performance +- Expected number of comparisons: 2n ln n ≈ 1.39n log₂ n +- Very low probability of worst-case behavior + +### Part 2: Hash Table with Chaining + +#### Core Operations + +##### 1. `insert(key, value)` + +* **Purpose**: Insert or update a key-value pair +* **Time Complexity**: O(1) average case, O(n) worst case +* **Features**: + - Automatically updates if key exists + - Triggers resize when load factor exceeds threshold + +##### 2. `get(key)` + +* **Purpose**: Retrieve value associated with a key +* **Time Complexity**: O(1) average case, O(n) worst case +* **Returns**: Value if key exists, None otherwise + +##### 3. `delete(key)` + +* **Purpose**: Remove a key-value pair +* **Time Complexity**: O(1) average case, O(n) worst case +* **Returns**: True if key was found and deleted, False otherwise + +##### 4. `contains(key)` + +* **Purpose**: Check if a key exists in the hash table +* **Time Complexity**: O(1) average case, O(n) worst case +* **Pythonic**: Supports `in` operator + +#### Hash Function + +**Multiplication Method:** +``` +h(k) = floor(m × (k × A mod 1)) +``` +where: +- `m` = table size +- `A` ≈ (√5 - 1) / 2 ≈ 0.618 (golden ratio) +- Provides good distribution of keys across buckets + +#### Collision Resolution + +**Chaining Strategy:** +- Each bucket contains a linked list of key-value pairs +- When collision occurs, new element is appended to chain +- Allows multiple elements per bucket +- No clustering issues unlike open addressing + +#### Dynamic Resizing + +**Load Factor Management:** +- Default threshold: 0.75 +- When load factor exceeds threshold, table size doubles +- All elements are rehashed into new table +- Maintains O(1) average performance + +## Complexity Analysis + +### Randomized Quicksort + +| Aspect | Complexity | Description | +| -------------------- | ---------- | -------------------------------------------------- | +| **Time Complexity** | O(n log n) | Average case - randomized pivot selection | +| **Worst Case** | O(n²) | Rarely occurs due to randomization | +| **Best Case** | O(n log n) | Already sorted arrays | +| **Space Complexity** | O(log n) | Average case recursion stack depth | +| **Stability** | Not Stable | Equal elements may change relative order | + +### Hash Table with Chaining + +| Aspect | Complexity | Description | +| -------------------- | ---------- | -------------------------------------------------- | +| **Time Complexity** | O(1) | Average case for insert, get, delete | +| **Worst Case** | O(n) | All keys hash to same bucket (rare) | +| **Space Complexity** | O(n + m) | n elements + m buckets | +| **Load Factor** | 0.75 | Threshold for automatic resizing | + +## Usage Examples + +### Basic Usage - Randomized Quicksort + +```python +from src.quicksort import randomized_quicksort, compare_with_builtin + +# Example 1: Basic sorting +arr = [64, 34, 25, 12, 22, 11, 90, 5] +sorted_arr = randomized_quicksort(arr) +print(sorted_arr) # Output: [5, 11, 12, 22, 25, 34, 64, 90] + +# Example 2: Performance comparison +comparison = compare_with_builtin(arr) +print(f"Quicksort time: {comparison['quicksort_time']:.6f} seconds") +print(f"Built-in sort time: {comparison['builtin_time']:.6f} seconds") +print(f"Speedup ratio: {comparison['speedup']:.2f}x") +print(f"Results match: {comparison['is_correct']}") +``` + +### Basic Usage - Hash Table + +```python +from src.hash_table import HashTable + +# Create hash table +ht = HashTable(initial_size=16) + +# Insert key-value pairs +ht.insert(1, "apple") +ht.insert(2, "banana") +ht.insert(3, "cherry") + +# Retrieve values +print(ht.get(1)) # "apple" + +# Check if key exists +print(2 in ht) # True + +# Delete a key +ht.delete(2) + +# Get all items +items = ht.get_all_items() +print(items) # [(1, "apple"), (3, "cherry")] +``` + +### Edge Cases Handled + +#### Quicksort + +```python +# Empty array +empty_arr = [] +result = randomized_quicksort(empty_arr) +print(result) # Output: [] + +# Single element +single = [42] +result = randomized_quicksort(single) +print(result) # Output: [42] + +# Duplicate elements +duplicates = [3, 3, 3, 3] +result = randomized_quicksort(duplicates) +print(result) # Output: [3, 3, 3, 3] + +# Negative numbers +negatives = [-5, -2, -8, 1, 3, -1, 0] +result = randomized_quicksort(negatives) +print(result) # Output: [-8, -5, -2, -1, 0, 1, 3] +``` + +#### Hash Table + +```python +# Empty hash table +ht = HashTable() +print(len(ht)) # 0 +print(ht.get(1)) # None + +# Collision handling +ht = HashTable(initial_size=5) +ht.insert(1, "one") +ht.insert(6, "six") # May collide with 1 +ht.insert(11, "eleven") # May collide with 1 and 6 +# All keys are stored correctly via chaining + +# Load factor management +ht = HashTable(initial_size=4, load_factor_threshold=0.75) +ht.insert(1, "a") +ht.insert(2, "b") +ht.insert(3, "c") +ht.insert(4, "d") # Triggers resize (load factor = 1.0 > 0.75) +print(ht.size) # 8 (doubled) +``` + +## Running the Program + +### Prerequisites + +* Python 3.7 or higher +* No external dependencies required (uses only Python standard library) + +### Execution + +#### Run Examples + +```bash +python3 -m src.examples +``` + +#### Run Tests + +**Quick Tests (Essential functionality):** +```bash +python3 run_tests.py --quick +``` + +**Full Test Suite:** +```bash +python3 run_tests.py +``` + +**Unit Tests Only:** +```bash +python3 run_tests.py --unit-only +``` + +**Performance Benchmarks:** +```bash +python3 run_tests.py --benchmark +``` + +**Stress Tests:** +```bash +python3 run_tests.py --stress +``` + +**Negative Test Cases:** +```bash +python3 run_tests.py --negative +``` + +**Using unittest directly:** +```bash +python3 -m unittest discover tests -v +``` + +## Test Cases + +### Randomized Quicksort Tests + +The test suite includes comprehensive test cases covering: + +#### ✅ **Functional Tests** + +* Basic sorting functionality +* Already sorted arrays (ascending/descending) +* Empty arrays and single elements +* Duplicate elements +* Negative numbers and zero values +* Large arrays (1000+ elements) + +#### ✅ **Behavioral Tests** + +* Non-destructive sorting (original array unchanged) +* Correctness verification against built-in sort +* Partition function correctness + +#### ✅ **Performance Tests** + +* Comparison with built-in sort +* Performance analysis across different array sizes +* Timing measurements + +### Hash Table Tests + +The test suite includes comprehensive test cases covering: + +#### ✅ **Functional Tests** + +* Basic insert, get, delete operations +* Empty hash table operations +* Collision handling +* Load factor calculation +* Dynamic resizing + +#### ✅ **Behavioral Tests** + +* Key existence checking (`in` operator) +* Update existing keys +* Delete from chains (middle of chain) +* Get all items + +#### ✅ **Edge Cases** + +* Empty hash table +* Single element +* All keys hash to same bucket +* Load factor threshold triggering resize + +## Project Structure + +``` +MSCS532_Assignment3/ +├── src/ +│ ├── __init__.py # Package initialization +│ ├── quicksort.py # Randomized Quicksort implementation +│ ├── hash_table.py # Hash Table with Chaining implementation +│ └── examples.py # Example usage demonstrations +├── tests/ +│ ├── __init__.py # Test package initialization +│ ├── test_quicksort.py # Comprehensive quicksort tests +│ └── test_hash_table.py # Comprehensive hash table tests +├── run_tests.py # Test runner with various options +├── README.md # This documentation +├── LICENSE # MIT License +├── .gitignore # Git ignore file +└── requirements.txt # Python dependencies (none required) +``` + +## Testing + +### Test Coverage + +The project includes **30+ comprehensive test cases** covering: + +#### ✅ **Functional Tests** + +* Basic functionality for both algorithms +* Edge cases (empty, single element, duplicates) +* Correctness verification + +#### ✅ **Behavioral Tests** + +* Non-destructive operations +* In-place modifications +* Collision resolution +* Dynamic resizing + +#### ✅ **Performance Tests** + +* Timing comparisons +* Performance analysis across different sizes +* Benchmarking utilities + +#### ✅ **Stress Tests** + +* Large arrays (1000+ elements) +* Many hash table operations +* Boundary conditions + +#### ✅ **Negative Test Cases** + +* Invalid input types +* Edge cases and boundary conditions +* Error handling + +### Running Tests + +The project includes a comprehensive test runner (`run_tests.py`) with multiple options: + +- **Quick Tests**: Essential functionality tests +- **Full Suite**: All tests including edge cases +- **Unit Tests**: Standard unittest tests only +- **Benchmarks**: Performance comparison tests +- **Stress Tests**: Large-scale and boundary tests +- **Negative Tests**: Invalid input and error handling tests + +## Educational Value + +This implementation serves as an excellent learning resource for: + +* **Algorithm Understanding**: Clear demonstration of quicksort and hash table mechanics +* **Randomization Techniques**: Shows how randomization improves algorithm performance +* **Data Structure Design**: Demonstrates hash table implementation with collision resolution +* **Code Quality**: Demonstrates good practices in Python programming +* **Testing**: Comprehensive test suite showing edge case handling +* **Documentation**: Well-commented code with clear explanations +* **Performance Analysis**: Tools for understanding algorithm efficiency + +## Algorithm Analysis + +### Randomized Quicksort + +**Why Randomization?** +- Standard quicksort can degrade to O(n²) when the pivot is always the smallest or largest element +- Randomization ensures expected O(n log n) performance +- Expected number of comparisons: 2n ln n ≈ 1.39n log₂ n + +**Performance Characteristics:** +- Excellent average-case performance +- Non-destructive sorting (creates copy) +- Cache-friendly due to good locality of reference + +**Comparison with Other Algorithms:** +- Faster than O(n²) algorithms (bubble, insertion, selection sort) +- Comparable to merge sort but with better space efficiency +- Generally slower than Python's built-in Timsort (optimized hybrid) + +### Hash Table with Chaining + +**Chaining vs. Open Addressing:** +- Chaining stores multiple elements in the same bucket using linked lists +- Handles collisions gracefully without clustering +- Load factor threshold prevents performance degradation + +**Hash Function:** +- Uses multiplication method: h(k) = floor(m × (k × A mod 1)) +- A ≈ (√5 - 1) / 2 ≈ 0.618 (golden ratio) +- Provides good distribution of keys across buckets + +**Performance Considerations:** +- O(1) average case performance +- Dynamic resizing maintains efficiency +- Trade-off between space and time efficiency + +## Performance Considerations + +1. **Quicksort**: + - Best for general-purpose sorting + - Randomization prevents worst-case scenarios + - Good for medium to large arrays + +2. **Hash Table**: + - Maintains O(1) average performance through load factor management + - Resizing doubles table size when threshold is exceeded + - Trade-off between space and time efficiency + +## Contributing + +This is an educational project demonstrating algorithm implementations. Feel free to: + +* Add more test cases +* Implement additional algorithms +* Improve documentation +* Optimize the implementations +* Add visualization tools + +## License + +This project is licensed under the MIT License - see the LICENSE file for details. + +## Author + +Created for MSCS532 Assignment 3: Understanding Algorithm Efficiency and Scalability + +## Acknowledgments + +* Based on standard algorithm implementations from Introduction to Algorithms (CLRS) +* Educational project for algorithm analysis and data structures course diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f6ae7dc --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +# Requirements for MSCS532 Assignment 3 +# No external dependencies required - uses only Python standard library + diff --git a/run_tests.py b/run_tests.py new file mode 100755 index 0000000..9ffcd92 --- /dev/null +++ b/run_tests.py @@ -0,0 +1,359 @@ +#!/usr/bin/env python3 +""" +Test runner for MSCS532 Assignment 3. + +Provides various test execution options: +- Quick tests: Essential functionality +- Full suite: All tests including edge cases +- Unit tests: Standard unittest tests only +- Benchmarks: Performance comparison tests +- Stress tests: Large-scale and boundary tests +- Negative tests: Invalid input and error handling +""" + +import unittest +import sys +import argparse +import time +from typing import List, Dict + + +def run_quick_tests(): + """Run essential functionality tests.""" + print("=" * 70) + print("Running Quick Tests (Essential Functionality)") + print("=" * 70) + + loader = unittest.TestLoader() + suite = unittest.TestSuite() + + # Add basic functional tests + from tests.test_quicksort import ( + TestRandomizedQuicksort, + TestPartition + ) + from tests.test_hash_table import TestHashTable + + suite.addTests(loader.loadTestsFromTestCase(TestRandomizedQuicksort)) + suite.addTests(loader.loadTestsFromTestCase(TestPartition)) + suite.addTests(loader.loadTestsFromTestCase(TestHashTable)) + + runner = unittest.TextTestRunner(verbosity=2) + result = runner.run(suite) + return result.wasSuccessful() + + +def run_unit_tests(): + """Run standard unittest tests.""" + print("=" * 70) + print("Running Unit Tests") + print("=" * 70) + + loader = unittest.TestLoader() + suite = loader.discover('tests', pattern='test_*.py') + + runner = unittest.TextTestRunner(verbosity=2) + result = runner.run(suite) + return result.wasSuccessful() + + +def run_performance_tests(): + """Run performance benchmark tests.""" + print("=" * 70) + print("Running Performance Benchmarks") + print("=" * 70) + + try: + from src.quicksort import analyze_performance, compare_with_builtin + import random + + print("\n1. Quicksort Performance Analysis:") + print("-" * 70) + sizes = [100, 1000, 10000] + results = analyze_performance(sizes) + + print(f"\n{'Size':<10} {'Quicksort Time':<18} {'Built-in Time':<18} {'Speedup':<10} {'Correct':<10}") + print("-" * 70) + for result in results: + print(f"{result['array_length']:<10} " + f"{result['quicksort_time']:<18.6f} " + f"{result['builtin_time']:<18.6f} " + f"{result['speedup']:<10.2f} " + f"{str(result['is_correct']):<10}") + + print("\n2. Hash Table Performance:") + print("-" * 70) + from src.hash_table import HashTable + + ht = HashTable(initial_size=16) + num_operations = 10000 + + start_time = time.perf_counter() + for i in range(num_operations): + ht.insert(i, f"value_{i}") + insert_time = time.perf_counter() - start_time + + start_time = time.perf_counter() + for i in range(num_operations): + _ = ht.get(i) + get_time = time.perf_counter() - start_time + + start_time = time.perf_counter() + for i in range(num_operations): + ht.delete(i) + delete_time = time.perf_counter() - start_time + + print(f"Insert {num_operations} elements: {insert_time:.6f} seconds") + print(f"Get {num_operations} elements: {get_time:.6f} seconds") + print(f"Delete {num_operations} elements: {delete_time:.6f} seconds") + print(f"Average insert time: {insert_time/num_operations*1000:.4f} ms") + print(f"Average get time: {get_time/num_operations*1000:.4f} ms") + print(f"Average delete time: {delete_time/num_operations*1000:.4f} ms") + + return True + + except Exception as e: + print(f"Error running performance tests: {e}") + return False + + +def run_stress_tests(): + """Run stress tests with large inputs.""" + print("=" * 70) + print("Running Stress Tests") + print("=" * 70) + + try: + from src.quicksort import randomized_quicksort + from src.hash_table import HashTable + import random + + print("\n1. Quicksort Stress Tests:") + print("-" * 70) + + # Test with very large array + large_size = 50000 + print(f"Testing with array of size {large_size}...") + large_arr = [random.randint(1, 1000000) for _ in range(large_size)] + + start_time = time.perf_counter() + sorted_arr = randomized_quicksort(large_arr) + elapsed = time.perf_counter() - start_time + + # Verify correctness + is_correct = sorted_arr == sorted(large_arr) + print(f"✓ Large array sorted in {elapsed:.4f} seconds") + print(f"✓ Correctness: {is_correct}") + + # Test with worst-case scenario (many duplicates) + print(f"\nTesting with array of size {large_size} (many duplicates)...") + dup_arr = [random.randint(1, 100) for _ in range(large_size)] + + start_time = time.perf_counter() + sorted_dup = randomized_quicksort(dup_arr) + elapsed = time.perf_counter() - start_time + + is_correct = sorted_dup == sorted(dup_arr) + print(f"✓ Duplicate-heavy array sorted in {elapsed:.4f} seconds") + print(f"✓ Correctness: {is_correct}") + + print("\n2. Hash Table Stress Tests:") + print("-" * 70) + + # Test with many insertions + ht = HashTable(initial_size=16) + num_inserts = 100000 + + print(f"Inserting {num_inserts} elements...") + start_time = time.perf_counter() + for i in range(num_inserts): + ht.insert(i, f"value_{i}") + elapsed = time.perf_counter() - start_time + + print(f"✓ Inserted {num_inserts} elements in {elapsed:.4f} seconds") + print(f"✓ Hash table size: {ht.size}") + print(f"✓ Load factor: {ht.get_load_factor():.4f}") + print(f"✓ Count: {len(ht)}") + + # Verify all elements are retrievable + print(f"\nVerifying retrieval of {num_inserts} elements...") + start_time = time.perf_counter() + all_found = True + for i in range(num_inserts): + if ht.get(i) != f"value_{i}": + all_found = False + break + elapsed = time.perf_counter() - start_time + + print(f"✓ Retrieved {num_inserts} elements in {elapsed:.4f} seconds") + print(f"✓ All elements found: {all_found}") + + return True + + except Exception as e: + print(f"Error running stress tests: {e}") + import traceback + traceback.print_exc() + return False + + +def run_negative_tests(): + """Run negative test cases (invalid inputs, error handling).""" + print("=" * 70) + print("Running Negative Test Cases") + print("=" * 70) + + try: + from src.quicksort import randomized_quicksort + from src.hash_table import HashTable + + print("\n1. Quicksort Negative Tests:") + print("-" * 70) + + # Test with None (should handle gracefully or raise appropriate error) + try: + result = randomized_quicksort(None) + print("✗ Should have raised TypeError for None input") + except (TypeError, AttributeError): + print("✓ Correctly handles None input") + + # Test with mixed types (should raise TypeError) + try: + result = randomized_quicksort([1, 2, "three", 4]) + print("✗ Should have raised TypeError for mixed types") + except TypeError: + print("✓ Correctly raises TypeError for mixed types") + + print("\n2. Hash Table Negative Tests:") + print("-" * 70) + + # Test with None key + ht = HashTable() + try: + ht.insert(None, "value") + print("✗ Should have raised TypeError for None key") + except TypeError: + print("✓ Correctly handles None key") + + # Test with invalid initial size + try: + ht = HashTable(initial_size=0) + print("✗ Should handle invalid initial size") + except (ValueError, ZeroDivisionError): + print("✓ Correctly handles invalid initial size") + + # Test with negative initial size + try: + ht = HashTable(initial_size=-1) + print("✗ Should handle negative initial size") + except (ValueError, AssertionError): + print("✓ Correctly handles negative initial size") + + print("\n✓ Negative tests completed") + return True + + except Exception as e: + print(f"Error running negative tests: {e}") + import traceback + traceback.print_exc() + return False + + +def run_full_suite(): + """Run all tests.""" + print("=" * 70) + print("Running Full Test Suite") + print("=" * 70) + + results = [] + + print("\n[1/4] Running Unit Tests...") + results.append(("Unit Tests", run_unit_tests())) + + print("\n[2/4] Running Performance Benchmarks...") + results.append(("Performance Tests", run_performance_tests())) + + print("\n[3/4] Running Stress Tests...") + results.append(("Stress Tests", run_stress_tests())) + + print("\n[4/4] Running Negative Tests...") + results.append(("Negative Tests", run_negative_tests())) + + print("\n" + "=" * 70) + print("Test Summary") + print("=" * 70) + + for test_name, success in results: + status = "✓ PASSED" if success else "✗ FAILED" + print(f"{test_name:<25} {status}") + + all_passed = all(result[1] for result in results) + return all_passed + + +def main(): + """Main test runner with command-line interface.""" + parser = argparse.ArgumentParser( + description='Test runner for MSCS532 Assignment 3', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python3 run_tests.py --quick # Run quick tests + python3 run_tests.py # Run full suite + python3 run_tests.py --unit-only # Run unit tests only + python3 run_tests.py --benchmark # Run performance benchmarks + python3 run_tests.py --stress # Run stress tests + python3 run_tests.py --negative # Run negative test cases + """ + ) + + parser.add_argument( + '--quick', + action='store_true', + help='Run quick tests (essential functionality only)' + ) + parser.add_argument( + '--unit-only', + action='store_true', + help='Run unit tests only' + ) + parser.add_argument( + '--benchmark', + action='store_true', + help='Run performance benchmarks' + ) + parser.add_argument( + '--stress', + action='store_true', + help='Run stress tests' + ) + parser.add_argument( + '--negative', + action='store_true', + help='Run negative test cases' + ) + + args = parser.parse_args() + + success = False + + if args.quick: + success = run_quick_tests() + elif args.unit_only: + success = run_unit_tests() + elif args.benchmark: + success = run_performance_tests() + elif args.stress: + success = run_stress_tests() + elif args.negative: + success = run_negative_tests() + else: + # Default: run full suite + success = run_full_suite() + + sys.exit(0 if success else 1) + + +if __name__ == '__main__': + main() + diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..7c87b3c --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,10 @@ +""" +MSCS532 Assignment 3: Understanding Algorithm Efficiency and Scalability + +This package contains implementations of: +- Randomized Quicksort algorithm +- Hashing with Chaining data structure +""" + +__version__ = "1.0.0" + diff --git a/src/examples.py b/src/examples.py new file mode 100644 index 0000000..0d3152a --- /dev/null +++ b/src/examples.py @@ -0,0 +1,148 @@ +""" +Example usage of Randomized Quicksort and Hash Table implementations. + +This module demonstrates how to use the algorithms and data structures +implemented in this project. +""" + +import random +from src.quicksort import ( + randomized_quicksort, + compare_with_builtin, + analyze_performance +) +from src.hash_table import HashTable + + +def example_quicksort(): + """Demonstrate randomized quicksort usage.""" + print("=" * 60) + print("Randomized Quicksort Example") + print("=" * 60) + + # Example 1: Basic sorting + print("\n1. Basic Sorting:") + arr = [64, 34, 25, 12, 22, 11, 90, 5] + print(f"Original array: {arr}") + sorted_arr = randomized_quicksort(arr) + print(f"Sorted array: {sorted_arr}") + + # Example 2: Large random array + print("\n2. Large Random Array:") + large_arr = [random.randint(1, 1000) for _ in range(20)] + print(f"Original array (first 20 elements): {large_arr[:20]}") + sorted_large = randomized_quicksort(large_arr) + print(f"Sorted array (first 20 elements): {sorted_large[:20]}") + + # Example 3: Performance comparison + print("\n3. Performance Comparison with Built-in Sort:") + test_array = [random.randint(1, 100000) for _ in range(10000)] + comparison = compare_with_builtin(test_array) + print(f"Array length: {comparison['array_length']}") + print(f"Quicksort time: {comparison['quicksort_time']:.6f} seconds") + print(f"Built-in sort time: {comparison['builtin_time']:.6f} seconds") + print(f"Speedup ratio: {comparison['speedup']:.2f}x") + print(f"Results match: {comparison['is_correct']}") + + +def example_hash_table(): + """Demonstrate hash table with chaining usage.""" + print("\n" + "=" * 60) + print("Hash Table with Chaining Example") + print("=" * 60) + + # Create hash table + ht = HashTable(initial_size=8) + + # Example 1: Insert operations + print("\n1. Insert Operations:") + keys_values = [ + (1, "apple"), + (2, "banana"), + (3, "cherry"), + (10, "date"), + (11, "elderberry"), + (18, "fig"), + (19, "grape"), + (26, "honeydew") + ] + + for key, value in keys_values: + ht.insert(key, value) + print(f"Inserted ({key}, {value}) - Load factor: {ht.get_load_factor():.2f}") + + print(f"\nHash table size: {ht.size}") + print(f"Number of elements: {len(ht)}") + print(f"Load factor: {ht.get_load_factor():.2f}") + + # Example 2: Search operations + print("\n2. Search Operations:") + search_keys = [1, 3, 11, 99] + for key in search_keys: + value = ht.get(key) + if value: + print(f"Key {key} found: {value}") + else: + print(f"Key {key} not found") + + # Example 3: Contains operator + print("\n3. Using 'in' Operator:") + test_keys = [2, 5, 18] + for key in test_keys: + print(f"{key} in hash table: {key in ht}") + + # Example 4: Delete operations + print("\n4. Delete Operations:") + delete_key = 3 + print(f"Deleting key {delete_key}...") + deleted = ht.delete(delete_key) + print(f"Delete successful: {deleted}") + print(f"Key {delete_key} still exists: {delete_key in ht}") + print(f"Updated count: {len(ht)}") + + # Example 5: Get all items + print("\n5. All Items in Hash Table:") + all_items = ht.get_all_items() + for key, value in all_items: + print(f" Key: {key}, Value: {value}") + + # Example 6: Collision demonstration + print("\n6. Collision Resolution (Chaining):") + collision_ht = HashTable(initial_size=5) + # Keys that will likely collide + collision_keys = [1, 6, 11, 16, 21] + for key in collision_keys: + collision_ht.insert(key, f"value_{key}") + print(f"Hash table with collisions:") + print(f" Size: {collision_ht.size}") + print(f" Count: {len(collision_ht)}") + print(f" Load factor: {collision_ht.get_load_factor():.2f}") + print(f" Items: {collision_ht.get_all_items()}") + + +def example_performance_analysis(): + """Demonstrate performance analysis of quicksort.""" + print("\n" + "=" * 60) + print("Performance Analysis Example") + print("=" * 60) + + print("\nAnalyzing quicksort performance across different array sizes:") + results = analyze_performance([100, 1000, 10000]) + + print("\nResults:") + print(f"{'Size':<10} {'Quicksort Time':<18} {'Built-in Time':<18} {'Speedup':<10} {'Correct':<10}") + print("-" * 70) + for result in results: + print(f"{result['array_length']:<10} " + f"{result['quicksort_time']:<18.6f} " + f"{result['builtin_time']:<18.6f} " + f"{result['speedup']:<10.2f} " + f"{str(result['is_correct']):<10}") + + +if __name__ == "__main__": + # Run all examples + example_quicksort() + example_hash_table() + example_performance_analysis() + diff --git a/src/hash_table.py b/src/hash_table.py new file mode 100644 index 0000000..90e8709 --- /dev/null +++ b/src/hash_table.py @@ -0,0 +1,204 @@ +""" +Hash Table with Chaining Implementation + +This module provides a hash table implementation using chaining +for collision resolution. +""" + +from typing import List, Optional, Tuple, Iterator +from dataclasses import dataclass + + +@dataclass +class HashNode: + """Node for storing key-value pairs in hash table chains.""" + key: int + value: any + next: Optional['HashNode'] = None + + +class HashTable: + """ + Hash Table implementation using chaining for collision resolution. + + Chaining stores multiple elements in the same bucket using a linked list. + When a collision occurs, the new element is appended to the chain. + + Time Complexity: + - Average: O(1) for insert, search, delete + - Worst: O(n) when all keys hash to the same bucket + + Space Complexity: O(n + m) where n is number of elements, m is table size + """ + + def __init__(self, initial_size: int = 16, load_factor_threshold: float = 0.75): + """ + Initialize hash table. + + Args: + initial_size: Initial size of the hash table + load_factor_threshold: Threshold for resizing (default: 0.75) + """ + self.size = initial_size + self.load_factor_threshold = load_factor_threshold + self.count = 0 + self.buckets: List[Optional[HashNode]] = [None] * self.size + + def _hash(self, key: int) -> int: + """ + Hash function using multiplication method. + + Args: + key: Key to hash + + Returns: + Hash value (bucket index) + """ + # Using multiplication method: h(k) = floor(m * (k * A mod 1)) + # where A ≈ (√5 - 1) / 2 ≈ 0.618 + A = 0.6180339887498949 + return int(self.size * ((key * A) % 1)) + + def _resize(self) -> None: + """Resize hash table when load factor exceeds threshold.""" + old_buckets = self.buckets + old_size = self.size + + # Double the size + self.size *= 2 + self.count = 0 + self.buckets = [None] * self.size + + # Rehash all existing elements + for bucket in old_buckets: + current = bucket + while current is not None: + self.insert(current.key, current.value) + current = current.next + + def insert(self, key: int, value: any) -> None: + """ + Insert a key-value pair into the hash table. + + Args: + key: Key to insert + value: Value associated with the key + """ + # Check if resize is needed + load_factor = self.count / self.size + if load_factor >= self.load_factor_threshold: + self._resize() + + bucket_index = self._hash(key) + + # Check if key already exists + current = self.buckets[bucket_index] + while current is not None: + if current.key == key: + current.value = value # Update existing key + return + current = current.next + + # Insert new node at the beginning of the chain + new_node = HashNode(key, value, self.buckets[bucket_index]) + self.buckets[bucket_index] = new_node + self.count += 1 + + def get(self, key: int) -> Optional[any]: + """ + Retrieve value associated with a key. + + Args: + key: Key to search for + + Returns: + Value associated with key, or None if not found + """ + bucket_index = self._hash(key) + current = self.buckets[bucket_index] + + while current is not None: + if current.key == key: + return current.value + current = current.next + + return None + + def delete(self, key: int) -> bool: + """ + Delete a key-value pair from the hash table. + + Args: + key: Key to delete + + Returns: + True if key was found and deleted, False otherwise + """ + bucket_index = self._hash(key) + current = self.buckets[bucket_index] + prev = None + + while current is not None: + if current.key == key: + if prev is None: + # Node to delete is at the head of chain + self.buckets[bucket_index] = current.next + else: + # Node to delete is in the middle or end + prev.next = current.next + self.count -= 1 + return True + prev = current + current = current.next + + return False + + def contains(self, key: int) -> bool: + """ + Check if a key exists in the hash table. + + Args: + key: Key to check + + Returns: + True if key exists, False otherwise + """ + return self.get(key) is not None + + def get_load_factor(self) -> float: + """ + Get current load factor of the hash table. + + Returns: + Load factor (count / size) + """ + return self.count / self.size if self.size > 0 else 0.0 + + def get_all_items(self) -> List[Tuple[int, any]]: + """ + Get all key-value pairs in the hash table. + + Returns: + List of (key, value) tuples + """ + items = [] + for bucket in self.buckets: + current = bucket + while current is not None: + items.append((current.key, current.value)) + current = current.next + return items + + def __len__(self) -> int: + """Return the number of elements in the hash table.""" + return self.count + + def __contains__(self, key: int) -> bool: + """Check if key exists in hash table using 'in' operator.""" + return self.contains(key) + + def __repr__(self) -> str: + """String representation of the hash table.""" + items = self.get_all_items() + return f"HashTable(size={self.size}, count={self.count}, load_factor={self.get_load_factor():.2f}, items={items})" + diff --git a/src/quicksort.py b/src/quicksort.py new file mode 100644 index 0000000..6a455a2 --- /dev/null +++ b/src/quicksort.py @@ -0,0 +1,178 @@ +""" +Randomized Quicksort Implementation + +This module provides a randomized quicksort algorithm implementation +along with utilities for performance analysis and comparison. +""" + +import random +from typing import List, Callable, Tuple +import time + + +def randomized_quicksort(arr: List[int], low: int = None, high: int = None) -> List[int]: + """ + Sort an array using randomized quicksort algorithm. + + Time Complexity: + - Average: O(n log n) + - Worst: O(n²) (rarely occurs due to randomization) + - Best: O(n log n) + + Space Complexity: O(log n) average case due to recursion stack + + Args: + arr: List of integers to sort + low: Starting index (default: 0) + high: Ending index (default: len(arr) - 1) + + Returns: + Sorted list of integers + """ + if low is None: + low = 0 + if high is None: + high = len(arr) - 1 + + # Create a copy to avoid mutating the original array + arr = arr.copy() + + def _quicksort(arr: List[int], low: int, high: int) -> None: + """Internal recursive quicksort function.""" + if low < high: + # Partition the array and get pivot index + pivot_idx = randomized_partition(arr, low, high) + + # Recursively sort elements before and after partition + _quicksort(arr, low, pivot_idx - 1) + _quicksort(arr, pivot_idx + 1, high) + + _quicksort(arr, low, high) + return arr + + +def randomized_partition(arr: List[int], low: int, high: int) -> int: + """ + Partition the array using a random pivot element. + + This randomization helps avoid worst-case O(n²) behavior + that occurs when the pivot is always the smallest or largest element. + + Args: + arr: List to partition + low: Starting index + high: Ending index + + Returns: + Final position of pivot element + """ + # Randomly select a pivot index + random_idx = random.randint(low, high) + + # Swap random element with last element + arr[random_idx], arr[high] = arr[high], arr[random_idx] + + # Use standard partition with pivot at high + return partition(arr, low, high) + + +def partition(arr: List[int], low: int, high: int) -> int: + """ + Partition the array around a pivot element. + + Elements smaller than pivot go to the left, + elements greater than pivot go to the right. + + Args: + arr: List to partition + low: Starting index + high: Ending index (pivot position) + + Returns: + Final position of pivot element + """ + pivot = arr[high] # Pivot element + i = low - 1 # Index of smaller element + + for j in range(low, high): + # If current element is smaller than or equal to pivot + if arr[j] <= pivot: + i += 1 + arr[i], arr[j] = arr[j], arr[i] + + # Place pivot in correct position + arr[i + 1], arr[high] = arr[high], arr[i + 1] + return i + 1 + + +def measure_time(func: Callable, *args, **kwargs) -> Tuple[float, any]: + """ + Measure execution time of a function. + + Args: + func: Function to measure + *args: Positional arguments for function + **kwargs: Keyword arguments for function + + Returns: + Tuple of (execution_time_in_seconds, function_result) + """ + start_time = time.perf_counter() + result = func(*args, **kwargs) + end_time = time.perf_counter() + return end_time - start_time, result + + +def compare_with_builtin(arr: List[int]) -> dict: + """ + Compare randomized quicksort with Python's built-in sort. + + Args: + arr: List to sort + + Returns: + Dictionary with timing comparison results + """ + # Test randomized quicksort + quicksort_time, sorted_quicksort = measure_time(randomized_quicksort, arr) + + # Test built-in sort + builtin_time, sorted_builtin = measure_time(sorted, arr) + + # Verify correctness + is_correct = sorted_quicksort == sorted_builtin + + return { + 'quicksort_time': quicksort_time, + 'builtin_time': builtin_time, + 'speedup': quicksort_time / builtin_time if builtin_time > 0 else float('inf'), + 'is_correct': is_correct, + 'array_length': len(arr) + } + + +def analyze_performance(array_sizes: List[int] = None) -> List[dict]: + """ + Analyze quicksort performance across different array sizes. + + Args: + array_sizes: List of array sizes to test (default: [100, 1000, 10000, 100000]) + + Returns: + List of performance metrics for each array size + """ + if array_sizes is None: + array_sizes = [100, 1000, 10000, 100000] + + results = [] + + for size in array_sizes: + # Generate random array + test_array = [random.randint(1, 1000000) for _ in range(size)] + + # Measure performance + comparison = compare_with_builtin(test_array) + results.append(comparison) + + return results + diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..bb68a9d --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,4 @@ +""" +Test suite for MSCS532 Assignment 3 implementations. +""" + diff --git a/tests/test_hash_table.py b/tests/test_hash_table.py new file mode 100644 index 0000000..609d661 --- /dev/null +++ b/tests/test_hash_table.py @@ -0,0 +1,218 @@ +""" +Unit tests for Hash Table with Chaining implementation. +""" + +import unittest +from src.hash_table import HashTable, HashNode + + +class TestHashTable(unittest.TestCase): + """Test cases for hash table with chaining.""" + + def test_initialization(self): + """Test hash table initialization.""" + ht = HashTable(initial_size=16) + self.assertEqual(ht.size, 16) + self.assertEqual(len(ht), 0) + self.assertEqual(ht.get_load_factor(), 0.0) + + def test_insert_and_get(self): + """Test basic insert and get operations.""" + ht = HashTable() + ht.insert(1, "apple") + ht.insert(2, "banana") + + self.assertEqual(ht.get(1), "apple") + self.assertEqual(ht.get(2), "banana") + self.assertEqual(len(ht), 2) + + def test_insert_update(self): + """Test that inserting same key updates value.""" + ht = HashTable() + ht.insert(1, "apple") + ht.insert(1, "banana") + + self.assertEqual(ht.get(1), "banana") + self.assertEqual(len(ht), 1) + + def test_get_nonexistent_key(self): + """Test getting a key that doesn't exist.""" + ht = HashTable() + ht.insert(1, "apple") + + self.assertIsNone(ht.get(2)) + + def test_delete_existing_key(self): + """Test deleting an existing key.""" + ht = HashTable() + ht.insert(1, "apple") + ht.insert(2, "banana") + + deleted = ht.delete(1) + self.assertTrue(deleted) + self.assertIsNone(ht.get(1)) + self.assertEqual(ht.get(2), "banana") + self.assertEqual(len(ht), 1) + + def test_delete_nonexistent_key(self): + """Test deleting a key that doesn't exist.""" + ht = HashTable() + ht.insert(1, "apple") + + deleted = ht.delete(2) + self.assertFalse(deleted) + self.assertEqual(len(ht), 1) + + def test_contains(self): + """Test contains method.""" + ht = HashTable() + ht.insert(1, "apple") + + self.assertTrue(ht.contains(1)) + self.assertFalse(ht.contains(2)) + + def test_in_operator(self): + """Test using 'in' operator.""" + ht = HashTable() + ht.insert(1, "apple") + + self.assertIn(1, ht) + self.assertNotIn(2, ht) + + def test_load_factor(self): + """Test load factor calculation.""" + ht = HashTable(initial_size=4) + + # Initially empty + self.assertEqual(ht.get_load_factor(), 0.0) + + # Add elements + ht.insert(1, "a") + self.assertEqual(ht.get_load_factor(), 0.25) + + ht.insert(2, "b") + self.assertEqual(ht.get_load_factor(), 0.5) + + ht.insert(3, "c") + self.assertEqual(ht.get_load_factor(), 0.75) + + def test_resize(self): + """Test automatic resizing when load factor threshold is reached.""" + ht = HashTable(initial_size=4, load_factor_threshold=0.75) + + # Insert elements to trigger resize + ht.insert(1, "a") + ht.insert(2, "b") + ht.insert(3, "c") + # This should trigger resize (3/4 = 0.75) + ht.insert(4, "d") + + # Size should have doubled + self.assertEqual(ht.size, 8) + + # All elements should still be accessible + self.assertEqual(ht.get(1), "a") + self.assertEqual(ht.get(2), "b") + self.assertEqual(ht.get(3), "c") + self.assertEqual(ht.get(4), "d") + self.assertEqual(len(ht), 4) + + def test_get_all_items(self): + """Test getting all items from hash table.""" + ht = HashTable() + ht.insert(1, "apple") + ht.insert(2, "banana") + ht.insert(3, "cherry") + + items = ht.get_all_items() + self.assertEqual(len(items), 3) + + # Check that all items are present + item_dict = dict(items) + self.assertEqual(item_dict[1], "apple") + self.assertEqual(item_dict[2], "banana") + self.assertEqual(item_dict[3], "cherry") + + def test_collision_handling(self): + """Test that collisions are handled correctly.""" + ht = HashTable(initial_size=5) + + # Insert keys that might collide + keys = [1, 6, 11, 16, 21] + for key in keys: + ht.insert(key, f"value_{key}") + + # All keys should be retrievable + for key in keys: + self.assertEqual(ht.get(key), f"value_{key}") + + self.assertEqual(len(ht), len(keys)) + + def test_delete_from_chain(self): + """Test deleting an element from the middle of a chain.""" + ht = HashTable(initial_size=5) + + # Create a chain by inserting colliding keys + keys = [1, 6, 11] + for key in keys: + ht.insert(key, f"value_{key}") + + # Delete middle element + deleted = ht.delete(6) + self.assertTrue(deleted) + + # Remaining elements should still be accessible + self.assertEqual(ht.get(1), "value_1") + self.assertIsNone(ht.get(6)) + self.assertEqual(ht.get(11), "value_11") + self.assertEqual(len(ht), 2) + + def test_len(self): + """Test __len__ method.""" + ht = HashTable() + self.assertEqual(len(ht), 0) + + ht.insert(1, "a") + self.assertEqual(len(ht), 1) + + ht.insert(2, "b") + self.assertEqual(len(ht), 2) + + ht.delete(1) + self.assertEqual(len(ht), 1) + + def test_multiple_operations(self): + """Test a sequence of mixed operations.""" + ht = HashTable() + + # Insert + ht.insert(1, "one") + ht.insert(2, "two") + ht.insert(3, "three") + + # Update + ht.insert(2, "TWO") + + # Delete + ht.delete(1) + + # Verify final state + self.assertIsNone(ht.get(1)) + self.assertEqual(ht.get(2), "TWO") + self.assertEqual(ht.get(3), "three") + self.assertEqual(len(ht), 2) + + def test_empty_hash_table(self): + """Test operations on empty hash table.""" + ht = HashTable() + + self.assertIsNone(ht.get(1)) + self.assertFalse(ht.contains(1)) + self.assertFalse(ht.delete(1)) + self.assertEqual(ht.get_all_items(), []) + self.assertEqual(len(ht), 0) + + +if __name__ == '__main__': + unittest.main() + diff --git a/tests/test_quicksort.py b/tests/test_quicksort.py new file mode 100644 index 0000000..6b4af8d --- /dev/null +++ b/tests/test_quicksort.py @@ -0,0 +1,150 @@ +""" +Unit tests for Randomized Quicksort implementation. +""" + +import unittest +import random +from src.quicksort import ( + randomized_quicksort, + partition, + randomized_partition, + compare_with_builtin, + analyze_performance +) + + +class TestRandomizedQuicksort(unittest.TestCase): + """Test cases for randomized quicksort algorithm.""" + + def test_empty_array(self): + """Test sorting an empty array.""" + arr = [] + result = randomized_quicksort(arr) + self.assertEqual(result, []) + + def test_single_element(self): + """Test sorting an array with a single element.""" + arr = [42] + result = randomized_quicksort(arr) + self.assertEqual(result, [42]) + + def test_sorted_array(self): + """Test sorting an already sorted array.""" + arr = [1, 2, 3, 4, 5] + result = randomized_quicksort(arr) + self.assertEqual(result, [1, 2, 3, 4, 5]) + + def test_reverse_sorted_array(self): + """Test sorting a reverse sorted array.""" + arr = [5, 4, 3, 2, 1] + result = randomized_quicksort(arr) + self.assertEqual(result, [1, 2, 3, 4, 5]) + + def test_random_array(self): + """Test sorting a random array.""" + arr = [64, 34, 25, 12, 22, 11, 90, 5] + result = randomized_quicksort(arr) + expected = sorted(arr) + self.assertEqual(result, expected) + + def test_duplicate_elements(self): + """Test sorting an array with duplicate elements.""" + arr = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3] + result = randomized_quicksort(arr) + expected = sorted(arr) + self.assertEqual(result, expected) + + def test_negative_numbers(self): + """Test sorting an array with negative numbers.""" + arr = [-5, -2, -8, 1, 3, -1, 0] + result = randomized_quicksort(arr) + expected = sorted(arr) + self.assertEqual(result, expected) + + def test_large_array(self): + """Test sorting a large array.""" + arr = [random.randint(1, 10000) for _ in range(1000)] + result = randomized_quicksort(arr) + expected = sorted(arr) + self.assertEqual(result, expected) + + def test_original_array_not_modified(self): + """Test that the original array is not modified.""" + arr = [64, 34, 25, 12, 22, 11, 90, 5] + original = arr.copy() + randomized_quicksort(arr) + self.assertEqual(arr, original) + + def test_all_same_elements(self): + """Test sorting an array with all same elements.""" + arr = [5, 5, 5, 5, 5] + result = randomized_quicksort(arr) + self.assertEqual(result, [5, 5, 5, 5, 5]) + + +class TestPartition(unittest.TestCase): + """Test cases for partition function.""" + + def test_partition(self): + """Test partition function.""" + arr = [64, 34, 25, 12, 22, 11, 90, 5] + pivot_idx = partition(arr, 0, len(arr) - 1) + + # Check that pivot is in correct position + pivot_value = arr[pivot_idx] + # All elements before pivot should be <= pivot + for i in range(0, pivot_idx): + self.assertLessEqual(arr[i], pivot_value) + # All elements after pivot should be >= pivot + for i in range(pivot_idx + 1, len(arr)): + self.assertGreaterEqual(arr[i], pivot_value) + + def test_randomized_partition(self): + """Test randomized partition function.""" + arr = [64, 34, 25, 12, 22, 11, 90, 5] + pivot_idx = randomized_partition(arr, 0, len(arr) - 1) + + # Check that pivot is in correct position + pivot_value = arr[pivot_idx] + # All elements before pivot should be <= pivot + for i in range(0, pivot_idx): + self.assertLessEqual(arr[i], pivot_value) + # All elements after pivot should be >= pivot + for i in range(pivot_idx + 1, len(arr)): + self.assertGreaterEqual(arr[i], pivot_value) + + +class TestPerformanceComparison(unittest.TestCase): + """Test cases for performance comparison utilities.""" + + def test_compare_with_builtin(self): + """Test comparison with built-in sort.""" + arr = [random.randint(1, 1000) for _ in range(100)] + comparison = compare_with_builtin(arr) + + self.assertIn('quicksort_time', comparison) + self.assertIn('builtin_time', comparison) + self.assertIn('speedup', comparison) + self.assertIn('is_correct', comparison) + self.assertIn('array_length', comparison) + + self.assertTrue(comparison['is_correct']) + self.assertEqual(comparison['array_length'], 100) + self.assertGreater(comparison['quicksort_time'], 0) + self.assertGreater(comparison['builtin_time'], 0) + + def test_analyze_performance(self): + """Test performance analysis.""" + results = analyze_performance([100, 1000]) + + self.assertEqual(len(results), 2) + for result in results: + self.assertIn('quicksort_time', result) + self.assertIn('builtin_time', result) + self.assertIn('is_correct', result) + self.assertTrue(result['is_correct']) + + +if __name__ == '__main__': + unittest.main() +