Initial commit: Randomized Quicksort and Hash Table with Chaining implementation
- Implemented Randomized Quicksort algorithm with performance analysis - Implemented Hash Table with Chaining for collision resolution - Added comprehensive test suite (30+ test cases) - Created test runner script with multiple test options - Added detailed README with architecture diagrams and documentation - Added MIT License - Includes examples and comprehensive documentation
This commit is contained in:
135
.gitignore
vendored
Normal file
135
.gitignore
vendored
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
pip-wheel-metadata/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
.python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
Pipfile.lock
|
||||||
|
|
||||||
|
# PEP 582
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
22
LICENSE
Normal file
22
LICENSE
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2025 MSCS532 Assignment 3
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
|
||||||
593
README.md
Normal file
593
README.md
Normal file
@@ -0,0 +1,593 @@
|
|||||||
|
# Randomized Quicksort & Hash Table with Chaining - Algorithm Efficiency and Scalability
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This project implements two fundamental algorithms and data structures demonstrating algorithm efficiency and scalability:
|
||||||
|
|
||||||
|
1. **Randomized Quicksort Algorithm** - An efficient sorting algorithm with average O(n log n) time complexity
|
||||||
|
2. **Hash Table with Chaining** - A hash table implementation using chaining for collision resolution
|
||||||
|
|
||||||
|
Both implementations provide comprehensive test suites, performance analysis utilities, and detailed documentation for educational purposes.
|
||||||
|
|
||||||
|
### Key Features
|
||||||
|
|
||||||
|
* ✅ **Randomized Quicksort**: Efficient sorting with randomized pivot selection to avoid worst-case performance
|
||||||
|
* ✅ **Performance Analysis**: Built-in utilities for comparing and analyzing algorithm performance
|
||||||
|
* ✅ **Hash Table with Chaining**: Complete hash table implementation with dynamic resizing
|
||||||
|
* ✅ **Comprehensive Test Suite**: Extensive test coverage including edge cases, stress tests, and performance benchmarks
|
||||||
|
* ✅ **Well-Documented Code**: Clear comments, docstrings, and educational examples
|
||||||
|
* ✅ **Production-Ready**: Robust error handling and comprehensive test coverage
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### Randomized Quicksort Algorithm Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
Input Array: [64, 34, 25, 12, 22, 11, 90, 5]
|
||||||
|
↓
|
||||||
|
┌─────────────────────────────────────┐
|
||||||
|
│ Randomized Quicksort Process │
|
||||||
|
└─────────────────────────────────────┘
|
||||||
|
↓
|
||||||
|
┌─────────────────────────────────────────────────┐
|
||||||
|
│ Step 1: Randomly select pivot │
|
||||||
|
│ Pivot: 25 (randomly selected) │
|
||||||
|
│ Partition: [12, 22, 11, 5] | 25 | [64, 34, 90] │
|
||||||
|
└─────────────────────────────────────────────────┘
|
||||||
|
↓
|
||||||
|
┌─────────────────────────────────────┐
|
||||||
|
│ Step 2: Recursively sort left │
|
||||||
|
│ Array: [12, 22, 11, 5] │
|
||||||
|
│ Pivot: 11 → [5, 11] | [12, 22] │
|
||||||
|
└─────────────────────────────────────┘
|
||||||
|
↓
|
||||||
|
┌─────────────────────────────────────┐
|
||||||
|
│ Step 3: Recursively sort right │
|
||||||
|
│ Array: [64, 34, 90] │
|
||||||
|
│ Pivot: 64 → [34, 64] | [90] │
|
||||||
|
└─────────────────────────────────────┘
|
||||||
|
↓
|
||||||
|
Output Array: [5, 11, 12, 22, 25, 34, 64, 90]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Hash Table with Chaining Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
Hash Table (size=8)
|
||||||
|
┌─────────────────────────────────────────┐
|
||||||
|
│ Bucket 0: [Key: 8, Value: "eight"] │
|
||||||
|
│ [Key: 16, Value: "sixteen"] │
|
||||||
|
│ Bucket 1: [Key: 9, Value: "nine"] │
|
||||||
|
│ Bucket 2: [Key: 10, Value: "ten"] │
|
||||||
|
│ [Key: 18, Value: "eighteen"] │
|
||||||
|
│ Bucket 3: [Key: 11, Value: "eleven"] │
|
||||||
|
│ Bucket 4: [Key: 12, Value: "twelve"] │
|
||||||
|
│ Bucket 5: [Key: 13, Value: "thirteen"] │
|
||||||
|
│ Bucket 6: [Key: 14, Value: "fourteen"] │
|
||||||
|
│ Bucket 7: [Key: 15, Value: "fifteen"] │
|
||||||
|
└─────────────────────────────────────────┘
|
||||||
|
↓
|
||||||
|
Collision Resolution via Chaining
|
||||||
|
(Multiple keys hash to same bucket)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Core Algorithm Structure
|
||||||
|
|
||||||
|
#### Randomized Quicksort
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ Randomized Quicksort │
|
||||||
|
├─────────────────────────────────────────────────────────────┤
|
||||||
|
│ Function: randomized_quicksort(arr) │
|
||||||
|
│ Input: Array of comparable elements │
|
||||||
|
│ Output: Array sorted in ascending order │
|
||||||
|
├─────────────────────────────────────────────────────────────┤
|
||||||
|
│ Algorithm Steps: │
|
||||||
|
│ 1. If array has ≤ 1 element, return │
|
||||||
|
│ 2. Randomly select pivot element │
|
||||||
|
│ 3. Partition array around pivot │
|
||||||
|
│ 4. Recursively sort left subarray │
|
||||||
|
│ 5. Recursively sort right subarray │
|
||||||
|
│ 6. Combine results │
|
||||||
|
└─────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Hash Table with Chaining
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ Hash Table with Chaining │
|
||||||
|
├─────────────────────────────────────────────────────────────┤
|
||||||
|
│ Class: HashTable │
|
||||||
|
│ Operations: insert, get, delete, contains │
|
||||||
|
├─────────────────────────────────────────────────────────────┤
|
||||||
|
│ Key Operations: │
|
||||||
|
│ 1. Hash function: h(k) = floor(m × (k × A mod 1)) │
|
||||||
|
│ 2. Collision resolution: Chaining (linked lists) │
|
||||||
|
│ 3. Load factor management: Resize when threshold exceeded │
|
||||||
|
│ 4. Dynamic resizing: Double size when load > 0.75 │
|
||||||
|
└─────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## Implementation Details
|
||||||
|
|
||||||
|
### Part 1: Randomized Quicksort
|
||||||
|
|
||||||
|
#### Core Functions
|
||||||
|
|
||||||
|
##### 1. `randomized_quicksort(arr)`
|
||||||
|
|
||||||
|
* **Purpose**: Sort array using randomized quicksort algorithm
|
||||||
|
* **Parameters**: `arr` (list) - Input array to be sorted
|
||||||
|
* **Returns**: `list` - New array sorted in ascending order
|
||||||
|
* **Space Complexity**: O(n) - Creates a copy of the input array
|
||||||
|
* **Time Complexity**:
|
||||||
|
- Average: O(n log n)
|
||||||
|
- Worst: O(n²) - rarely occurs due to randomization
|
||||||
|
- Best: O(n log n)
|
||||||
|
|
||||||
|
##### 2. `randomized_partition(arr, low, high)`
|
||||||
|
|
||||||
|
* **Purpose**: Partition array using a randomly selected pivot
|
||||||
|
* **Parameters**:
|
||||||
|
- `arr` (list) - Array to partition
|
||||||
|
- `low` (int) - Starting index
|
||||||
|
- `high` (int) - Ending index
|
||||||
|
* **Returns**: `int` - Final position of pivot element
|
||||||
|
* **Key Feature**: Random pivot selection prevents worst-case O(n²) performance
|
||||||
|
|
||||||
|
##### 3. `compare_with_builtin(arr)`
|
||||||
|
|
||||||
|
* **Purpose**: Compare randomized quicksort with Python's built-in sort
|
||||||
|
* **Returns**: Dictionary with timing metrics and correctness verification
|
||||||
|
|
||||||
|
##### 4. `analyze_performance(array_sizes)`
|
||||||
|
|
||||||
|
* **Purpose**: Analyze quicksort performance across different array sizes
|
||||||
|
* **Returns**: List of performance metrics for each array size
|
||||||
|
|
||||||
|
#### Algorithm Logic
|
||||||
|
|
||||||
|
**Why Randomization?**
|
||||||
|
|
||||||
|
Standard quicksort can degrade to O(n²) when:
|
||||||
|
- Pivot is always the smallest element (worst case)
|
||||||
|
- Pivot is always the largest element (worst case)
|
||||||
|
- Array is already sorted or reverse sorted
|
||||||
|
|
||||||
|
Randomization ensures:
|
||||||
|
- Expected O(n log n) performance
|
||||||
|
- Expected number of comparisons: 2n ln n ≈ 1.39n log₂ n
|
||||||
|
- Very low probability of worst-case behavior
|
||||||
|
|
||||||
|
### Part 2: Hash Table with Chaining
|
||||||
|
|
||||||
|
#### Core Operations
|
||||||
|
|
||||||
|
##### 1. `insert(key, value)`
|
||||||
|
|
||||||
|
* **Purpose**: Insert or update a key-value pair
|
||||||
|
* **Time Complexity**: O(1) average case, O(n) worst case
|
||||||
|
* **Features**:
|
||||||
|
- Automatically updates if key exists
|
||||||
|
- Triggers resize when load factor exceeds threshold
|
||||||
|
|
||||||
|
##### 2. `get(key)`
|
||||||
|
|
||||||
|
* **Purpose**: Retrieve value associated with a key
|
||||||
|
* **Time Complexity**: O(1) average case, O(n) worst case
|
||||||
|
* **Returns**: Value if key exists, None otherwise
|
||||||
|
|
||||||
|
##### 3. `delete(key)`
|
||||||
|
|
||||||
|
* **Purpose**: Remove a key-value pair
|
||||||
|
* **Time Complexity**: O(1) average case, O(n) worst case
|
||||||
|
* **Returns**: True if key was found and deleted, False otherwise
|
||||||
|
|
||||||
|
##### 4. `contains(key)`
|
||||||
|
|
||||||
|
* **Purpose**: Check if a key exists in the hash table
|
||||||
|
* **Time Complexity**: O(1) average case, O(n) worst case
|
||||||
|
* **Pythonic**: Supports `in` operator
|
||||||
|
|
||||||
|
#### Hash Function
|
||||||
|
|
||||||
|
**Multiplication Method:**
|
||||||
|
```
|
||||||
|
h(k) = floor(m × (k × A mod 1))
|
||||||
|
```
|
||||||
|
where:
|
||||||
|
- `m` = table size
|
||||||
|
- `A` ≈ (√5 - 1) / 2 ≈ 0.618 (golden ratio)
|
||||||
|
- Provides good distribution of keys across buckets
|
||||||
|
|
||||||
|
#### Collision Resolution
|
||||||
|
|
||||||
|
**Chaining Strategy:**
|
||||||
|
- Each bucket contains a linked list of key-value pairs
|
||||||
|
- When collision occurs, new element is appended to chain
|
||||||
|
- Allows multiple elements per bucket
|
||||||
|
- No clustering issues unlike open addressing
|
||||||
|
|
||||||
|
#### Dynamic Resizing
|
||||||
|
|
||||||
|
**Load Factor Management:**
|
||||||
|
- Default threshold: 0.75
|
||||||
|
- When load factor exceeds threshold, table size doubles
|
||||||
|
- All elements are rehashed into new table
|
||||||
|
- Maintains O(1) average performance
|
||||||
|
|
||||||
|
## Complexity Analysis
|
||||||
|
|
||||||
|
### Randomized Quicksort
|
||||||
|
|
||||||
|
| Aspect | Complexity | Description |
|
||||||
|
| -------------------- | ---------- | -------------------------------------------------- |
|
||||||
|
| **Time Complexity** | O(n log n) | Average case - randomized pivot selection |
|
||||||
|
| **Worst Case** | O(n²) | Rarely occurs due to randomization |
|
||||||
|
| **Best Case** | O(n log n) | Already sorted arrays |
|
||||||
|
| **Space Complexity** | O(log n) | Average case recursion stack depth |
|
||||||
|
| **Stability** | Not Stable | Equal elements may change relative order |
|
||||||
|
|
||||||
|
### Hash Table with Chaining
|
||||||
|
|
||||||
|
| Aspect | Complexity | Description |
|
||||||
|
| -------------------- | ---------- | -------------------------------------------------- |
|
||||||
|
| **Time Complexity** | O(1) | Average case for insert, get, delete |
|
||||||
|
| **Worst Case** | O(n) | All keys hash to same bucket (rare) |
|
||||||
|
| **Space Complexity** | O(n + m) | n elements + m buckets |
|
||||||
|
| **Load Factor** | 0.75 | Threshold for automatic resizing |
|
||||||
|
|
||||||
|
## Usage Examples
|
||||||
|
|
||||||
|
### Basic Usage - Randomized Quicksort
|
||||||
|
|
||||||
|
```python
|
||||||
|
from src.quicksort import randomized_quicksort, compare_with_builtin
|
||||||
|
|
||||||
|
# Example 1: Basic sorting
|
||||||
|
arr = [64, 34, 25, 12, 22, 11, 90, 5]
|
||||||
|
sorted_arr = randomized_quicksort(arr)
|
||||||
|
print(sorted_arr) # Output: [5, 11, 12, 22, 25, 34, 64, 90]
|
||||||
|
|
||||||
|
# Example 2: Performance comparison
|
||||||
|
comparison = compare_with_builtin(arr)
|
||||||
|
print(f"Quicksort time: {comparison['quicksort_time']:.6f} seconds")
|
||||||
|
print(f"Built-in sort time: {comparison['builtin_time']:.6f} seconds")
|
||||||
|
print(f"Speedup ratio: {comparison['speedup']:.2f}x")
|
||||||
|
print(f"Results match: {comparison['is_correct']}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Basic Usage - Hash Table
|
||||||
|
|
||||||
|
```python
|
||||||
|
from src.hash_table import HashTable
|
||||||
|
|
||||||
|
# Create hash table
|
||||||
|
ht = HashTable(initial_size=16)
|
||||||
|
|
||||||
|
# Insert key-value pairs
|
||||||
|
ht.insert(1, "apple")
|
||||||
|
ht.insert(2, "banana")
|
||||||
|
ht.insert(3, "cherry")
|
||||||
|
|
||||||
|
# Retrieve values
|
||||||
|
print(ht.get(1)) # "apple"
|
||||||
|
|
||||||
|
# Check if key exists
|
||||||
|
print(2 in ht) # True
|
||||||
|
|
||||||
|
# Delete a key
|
||||||
|
ht.delete(2)
|
||||||
|
|
||||||
|
# Get all items
|
||||||
|
items = ht.get_all_items()
|
||||||
|
print(items) # [(1, "apple"), (3, "cherry")]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Edge Cases Handled
|
||||||
|
|
||||||
|
#### Quicksort
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Empty array
|
||||||
|
empty_arr = []
|
||||||
|
result = randomized_quicksort(empty_arr)
|
||||||
|
print(result) # Output: []
|
||||||
|
|
||||||
|
# Single element
|
||||||
|
single = [42]
|
||||||
|
result = randomized_quicksort(single)
|
||||||
|
print(result) # Output: [42]
|
||||||
|
|
||||||
|
# Duplicate elements
|
||||||
|
duplicates = [3, 3, 3, 3]
|
||||||
|
result = randomized_quicksort(duplicates)
|
||||||
|
print(result) # Output: [3, 3, 3, 3]
|
||||||
|
|
||||||
|
# Negative numbers
|
||||||
|
negatives = [-5, -2, -8, 1, 3, -1, 0]
|
||||||
|
result = randomized_quicksort(negatives)
|
||||||
|
print(result) # Output: [-8, -5, -2, -1, 0, 1, 3]
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Hash Table
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Empty hash table
|
||||||
|
ht = HashTable()
|
||||||
|
print(len(ht)) # 0
|
||||||
|
print(ht.get(1)) # None
|
||||||
|
|
||||||
|
# Collision handling
|
||||||
|
ht = HashTable(initial_size=5)
|
||||||
|
ht.insert(1, "one")
|
||||||
|
ht.insert(6, "six") # May collide with 1
|
||||||
|
ht.insert(11, "eleven") # May collide with 1 and 6
|
||||||
|
# All keys are stored correctly via chaining
|
||||||
|
|
||||||
|
# Load factor management
|
||||||
|
ht = HashTable(initial_size=4, load_factor_threshold=0.75)
|
||||||
|
ht.insert(1, "a")
|
||||||
|
ht.insert(2, "b")
|
||||||
|
ht.insert(3, "c")
|
||||||
|
ht.insert(4, "d") # Triggers resize (load factor = 1.0 > 0.75)
|
||||||
|
print(ht.size) # 8 (doubled)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Running the Program
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
* Python 3.7 or higher
|
||||||
|
* No external dependencies required (uses only Python standard library)
|
||||||
|
|
||||||
|
### Execution
|
||||||
|
|
||||||
|
#### Run Examples
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 -m src.examples
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Run Tests
|
||||||
|
|
||||||
|
**Quick Tests (Essential functionality):**
|
||||||
|
```bash
|
||||||
|
python3 run_tests.py --quick
|
||||||
|
```
|
||||||
|
|
||||||
|
**Full Test Suite:**
|
||||||
|
```bash
|
||||||
|
python3 run_tests.py
|
||||||
|
```
|
||||||
|
|
||||||
|
**Unit Tests Only:**
|
||||||
|
```bash
|
||||||
|
python3 run_tests.py --unit-only
|
||||||
|
```
|
||||||
|
|
||||||
|
**Performance Benchmarks:**
|
||||||
|
```bash
|
||||||
|
python3 run_tests.py --benchmark
|
||||||
|
```
|
||||||
|
|
||||||
|
**Stress Tests:**
|
||||||
|
```bash
|
||||||
|
python3 run_tests.py --stress
|
||||||
|
```
|
||||||
|
|
||||||
|
**Negative Test Cases:**
|
||||||
|
```bash
|
||||||
|
python3 run_tests.py --negative
|
||||||
|
```
|
||||||
|
|
||||||
|
**Using unittest directly:**
|
||||||
|
```bash
|
||||||
|
python3 -m unittest discover tests -v
|
||||||
|
```
|
||||||
|
|
||||||
|
## Test Cases
|
||||||
|
|
||||||
|
### Randomized Quicksort Tests
|
||||||
|
|
||||||
|
The test suite includes comprehensive test cases covering:
|
||||||
|
|
||||||
|
#### ✅ **Functional Tests**
|
||||||
|
|
||||||
|
* Basic sorting functionality
|
||||||
|
* Already sorted arrays (ascending/descending)
|
||||||
|
* Empty arrays and single elements
|
||||||
|
* Duplicate elements
|
||||||
|
* Negative numbers and zero values
|
||||||
|
* Large arrays (1000+ elements)
|
||||||
|
|
||||||
|
#### ✅ **Behavioral Tests**
|
||||||
|
|
||||||
|
* Non-destructive sorting (original array unchanged)
|
||||||
|
* Correctness verification against built-in sort
|
||||||
|
* Partition function correctness
|
||||||
|
|
||||||
|
#### ✅ **Performance Tests**
|
||||||
|
|
||||||
|
* Comparison with built-in sort
|
||||||
|
* Performance analysis across different array sizes
|
||||||
|
* Timing measurements
|
||||||
|
|
||||||
|
### Hash Table Tests
|
||||||
|
|
||||||
|
The test suite includes comprehensive test cases covering:
|
||||||
|
|
||||||
|
#### ✅ **Functional Tests**
|
||||||
|
|
||||||
|
* Basic insert, get, delete operations
|
||||||
|
* Empty hash table operations
|
||||||
|
* Collision handling
|
||||||
|
* Load factor calculation
|
||||||
|
* Dynamic resizing
|
||||||
|
|
||||||
|
#### ✅ **Behavioral Tests**
|
||||||
|
|
||||||
|
* Key existence checking (`in` operator)
|
||||||
|
* Update existing keys
|
||||||
|
* Delete from chains (middle of chain)
|
||||||
|
* Get all items
|
||||||
|
|
||||||
|
#### ✅ **Edge Cases**
|
||||||
|
|
||||||
|
* Empty hash table
|
||||||
|
* Single element
|
||||||
|
* All keys hash to same bucket
|
||||||
|
* Load factor threshold triggering resize
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
MSCS532_Assignment3/
|
||||||
|
├── src/
|
||||||
|
│ ├── __init__.py # Package initialization
|
||||||
|
│ ├── quicksort.py # Randomized Quicksort implementation
|
||||||
|
│ ├── hash_table.py # Hash Table with Chaining implementation
|
||||||
|
│ └── examples.py # Example usage demonstrations
|
||||||
|
├── tests/
|
||||||
|
│ ├── __init__.py # Test package initialization
|
||||||
|
│ ├── test_quicksort.py # Comprehensive quicksort tests
|
||||||
|
│ └── test_hash_table.py # Comprehensive hash table tests
|
||||||
|
├── run_tests.py # Test runner with various options
|
||||||
|
├── README.md # This documentation
|
||||||
|
├── LICENSE # MIT License
|
||||||
|
├── .gitignore # Git ignore file
|
||||||
|
└── requirements.txt # Python dependencies (none required)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
### Test Coverage
|
||||||
|
|
||||||
|
The project includes **30+ comprehensive test cases** covering:
|
||||||
|
|
||||||
|
#### ✅ **Functional Tests**
|
||||||
|
|
||||||
|
* Basic functionality for both algorithms
|
||||||
|
* Edge cases (empty, single element, duplicates)
|
||||||
|
* Correctness verification
|
||||||
|
|
||||||
|
#### ✅ **Behavioral Tests**
|
||||||
|
|
||||||
|
* Non-destructive operations
|
||||||
|
* In-place modifications
|
||||||
|
* Collision resolution
|
||||||
|
* Dynamic resizing
|
||||||
|
|
||||||
|
#### ✅ **Performance Tests**
|
||||||
|
|
||||||
|
* Timing comparisons
|
||||||
|
* Performance analysis across different sizes
|
||||||
|
* Benchmarking utilities
|
||||||
|
|
||||||
|
#### ✅ **Stress Tests**
|
||||||
|
|
||||||
|
* Large arrays (1000+ elements)
|
||||||
|
* Many hash table operations
|
||||||
|
* Boundary conditions
|
||||||
|
|
||||||
|
#### ✅ **Negative Test Cases**
|
||||||
|
|
||||||
|
* Invalid input types
|
||||||
|
* Edge cases and boundary conditions
|
||||||
|
* Error handling
|
||||||
|
|
||||||
|
### Running Tests
|
||||||
|
|
||||||
|
The project includes a comprehensive test runner (`run_tests.py`) with multiple options:
|
||||||
|
|
||||||
|
- **Quick Tests**: Essential functionality tests
|
||||||
|
- **Full Suite**: All tests including edge cases
|
||||||
|
- **Unit Tests**: Standard unittest tests only
|
||||||
|
- **Benchmarks**: Performance comparison tests
|
||||||
|
- **Stress Tests**: Large-scale and boundary tests
|
||||||
|
- **Negative Tests**: Invalid input and error handling tests
|
||||||
|
|
||||||
|
## Educational Value
|
||||||
|
|
||||||
|
This implementation serves as an excellent learning resource for:
|
||||||
|
|
||||||
|
* **Algorithm Understanding**: Clear demonstration of quicksort and hash table mechanics
|
||||||
|
* **Randomization Techniques**: Shows how randomization improves algorithm performance
|
||||||
|
* **Data Structure Design**: Demonstrates hash table implementation with collision resolution
|
||||||
|
* **Code Quality**: Demonstrates good practices in Python programming
|
||||||
|
* **Testing**: Comprehensive test suite showing edge case handling
|
||||||
|
* **Documentation**: Well-commented code with clear explanations
|
||||||
|
* **Performance Analysis**: Tools for understanding algorithm efficiency
|
||||||
|
|
||||||
|
## Algorithm Analysis
|
||||||
|
|
||||||
|
### Randomized Quicksort
|
||||||
|
|
||||||
|
**Why Randomization?**
|
||||||
|
- Standard quicksort can degrade to O(n²) when the pivot is always the smallest or largest element
|
||||||
|
- Randomization ensures expected O(n log n) performance
|
||||||
|
- Expected number of comparisons: 2n ln n ≈ 1.39n log₂ n
|
||||||
|
|
||||||
|
**Performance Characteristics:**
|
||||||
|
- Excellent average-case performance
|
||||||
|
- Non-destructive sorting (creates copy)
|
||||||
|
- Cache-friendly due to good locality of reference
|
||||||
|
|
||||||
|
**Comparison with Other Algorithms:**
|
||||||
|
- Faster than O(n²) algorithms (bubble, insertion, selection sort)
|
||||||
|
- Comparable to merge sort but with better space efficiency
|
||||||
|
- Generally slower than Python's built-in Timsort (optimized hybrid)
|
||||||
|
|
||||||
|
### Hash Table with Chaining
|
||||||
|
|
||||||
|
**Chaining vs. Open Addressing:**
|
||||||
|
- Chaining stores multiple elements in the same bucket using linked lists
|
||||||
|
- Handles collisions gracefully without clustering
|
||||||
|
- Load factor threshold prevents performance degradation
|
||||||
|
|
||||||
|
**Hash Function:**
|
||||||
|
- Uses multiplication method: h(k) = floor(m × (k × A mod 1))
|
||||||
|
- A ≈ (√5 - 1) / 2 ≈ 0.618 (golden ratio)
|
||||||
|
- Provides good distribution of keys across buckets
|
||||||
|
|
||||||
|
**Performance Considerations:**
|
||||||
|
- O(1) average case performance
|
||||||
|
- Dynamic resizing maintains efficiency
|
||||||
|
- Trade-off between space and time efficiency
|
||||||
|
|
||||||
|
## Performance Considerations
|
||||||
|
|
||||||
|
1. **Quicksort**:
|
||||||
|
- Best for general-purpose sorting
|
||||||
|
- Randomization prevents worst-case scenarios
|
||||||
|
- Good for medium to large arrays
|
||||||
|
|
||||||
|
2. **Hash Table**:
|
||||||
|
- Maintains O(1) average performance through load factor management
|
||||||
|
- Resizing doubles table size when threshold is exceeded
|
||||||
|
- Trade-off between space and time efficiency
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
|
||||||
|
This is an educational project demonstrating algorithm implementations. Feel free to:
|
||||||
|
|
||||||
|
* Add more test cases
|
||||||
|
* Implement additional algorithms
|
||||||
|
* Improve documentation
|
||||||
|
* Optimize the implementations
|
||||||
|
* Add visualization tools
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
This project is licensed under the MIT License - see the LICENSE file for details.
|
||||||
|
|
||||||
|
## Author
|
||||||
|
|
||||||
|
Created for MSCS532 Assignment 3: Understanding Algorithm Efficiency and Scalability
|
||||||
|
|
||||||
|
## Acknowledgments
|
||||||
|
|
||||||
|
* Based on standard algorithm implementations from Introduction to Algorithms (CLRS)
|
||||||
|
* Educational project for algorithm analysis and data structures course
|
||||||
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
# Requirements for MSCS532 Assignment 3
|
||||||
|
# No external dependencies required - uses only Python standard library
|
||||||
|
|
||||||
359
run_tests.py
Executable file
359
run_tests.py
Executable file
@@ -0,0 +1,359 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Test runner for MSCS532 Assignment 3.
|
||||||
|
|
||||||
|
Provides various test execution options:
|
||||||
|
- Quick tests: Essential functionality
|
||||||
|
- Full suite: All tests including edge cases
|
||||||
|
- Unit tests: Standard unittest tests only
|
||||||
|
- Benchmarks: Performance comparison tests
|
||||||
|
- Stress tests: Large-scale and boundary tests
|
||||||
|
- Negative tests: Invalid input and error handling
|
||||||
|
"""
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
import time
|
||||||
|
from typing import List, Dict
|
||||||
|
|
||||||
|
|
||||||
|
def run_quick_tests():
|
||||||
|
"""Run essential functionality tests."""
|
||||||
|
print("=" * 70)
|
||||||
|
print("Running Quick Tests (Essential Functionality)")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
loader = unittest.TestLoader()
|
||||||
|
suite = unittest.TestSuite()
|
||||||
|
|
||||||
|
# Add basic functional tests
|
||||||
|
from tests.test_quicksort import (
|
||||||
|
TestRandomizedQuicksort,
|
||||||
|
TestPartition
|
||||||
|
)
|
||||||
|
from tests.test_hash_table import TestHashTable
|
||||||
|
|
||||||
|
suite.addTests(loader.loadTestsFromTestCase(TestRandomizedQuicksort))
|
||||||
|
suite.addTests(loader.loadTestsFromTestCase(TestPartition))
|
||||||
|
suite.addTests(loader.loadTestsFromTestCase(TestHashTable))
|
||||||
|
|
||||||
|
runner = unittest.TextTestRunner(verbosity=2)
|
||||||
|
result = runner.run(suite)
|
||||||
|
return result.wasSuccessful()
|
||||||
|
|
||||||
|
|
||||||
|
def run_unit_tests():
|
||||||
|
"""Run standard unittest tests."""
|
||||||
|
print("=" * 70)
|
||||||
|
print("Running Unit Tests")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
loader = unittest.TestLoader()
|
||||||
|
suite = loader.discover('tests', pattern='test_*.py')
|
||||||
|
|
||||||
|
runner = unittest.TextTestRunner(verbosity=2)
|
||||||
|
result = runner.run(suite)
|
||||||
|
return result.wasSuccessful()
|
||||||
|
|
||||||
|
|
||||||
|
def run_performance_tests():
|
||||||
|
"""Run performance benchmark tests."""
|
||||||
|
print("=" * 70)
|
||||||
|
print("Running Performance Benchmarks")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from src.quicksort import analyze_performance, compare_with_builtin
|
||||||
|
import random
|
||||||
|
|
||||||
|
print("\n1. Quicksort Performance Analysis:")
|
||||||
|
print("-" * 70)
|
||||||
|
sizes = [100, 1000, 10000]
|
||||||
|
results = analyze_performance(sizes)
|
||||||
|
|
||||||
|
print(f"\n{'Size':<10} {'Quicksort Time':<18} {'Built-in Time':<18} {'Speedup':<10} {'Correct':<10}")
|
||||||
|
print("-" * 70)
|
||||||
|
for result in results:
|
||||||
|
print(f"{result['array_length']:<10} "
|
||||||
|
f"{result['quicksort_time']:<18.6f} "
|
||||||
|
f"{result['builtin_time']:<18.6f} "
|
||||||
|
f"{result['speedup']:<10.2f} "
|
||||||
|
f"{str(result['is_correct']):<10}")
|
||||||
|
|
||||||
|
print("\n2. Hash Table Performance:")
|
||||||
|
print("-" * 70)
|
||||||
|
from src.hash_table import HashTable
|
||||||
|
|
||||||
|
ht = HashTable(initial_size=16)
|
||||||
|
num_operations = 10000
|
||||||
|
|
||||||
|
start_time = time.perf_counter()
|
||||||
|
for i in range(num_operations):
|
||||||
|
ht.insert(i, f"value_{i}")
|
||||||
|
insert_time = time.perf_counter() - start_time
|
||||||
|
|
||||||
|
start_time = time.perf_counter()
|
||||||
|
for i in range(num_operations):
|
||||||
|
_ = ht.get(i)
|
||||||
|
get_time = time.perf_counter() - start_time
|
||||||
|
|
||||||
|
start_time = time.perf_counter()
|
||||||
|
for i in range(num_operations):
|
||||||
|
ht.delete(i)
|
||||||
|
delete_time = time.perf_counter() - start_time
|
||||||
|
|
||||||
|
print(f"Insert {num_operations} elements: {insert_time:.6f} seconds")
|
||||||
|
print(f"Get {num_operations} elements: {get_time:.6f} seconds")
|
||||||
|
print(f"Delete {num_operations} elements: {delete_time:.6f} seconds")
|
||||||
|
print(f"Average insert time: {insert_time/num_operations*1000:.4f} ms")
|
||||||
|
print(f"Average get time: {get_time/num_operations*1000:.4f} ms")
|
||||||
|
print(f"Average delete time: {delete_time/num_operations*1000:.4f} ms")
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error running performance tests: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def run_stress_tests():
|
||||||
|
"""Run stress tests with large inputs."""
|
||||||
|
print("=" * 70)
|
||||||
|
print("Running Stress Tests")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from src.quicksort import randomized_quicksort
|
||||||
|
from src.hash_table import HashTable
|
||||||
|
import random
|
||||||
|
|
||||||
|
print("\n1. Quicksort Stress Tests:")
|
||||||
|
print("-" * 70)
|
||||||
|
|
||||||
|
# Test with very large array
|
||||||
|
large_size = 50000
|
||||||
|
print(f"Testing with array of size {large_size}...")
|
||||||
|
large_arr = [random.randint(1, 1000000) for _ in range(large_size)]
|
||||||
|
|
||||||
|
start_time = time.perf_counter()
|
||||||
|
sorted_arr = randomized_quicksort(large_arr)
|
||||||
|
elapsed = time.perf_counter() - start_time
|
||||||
|
|
||||||
|
# Verify correctness
|
||||||
|
is_correct = sorted_arr == sorted(large_arr)
|
||||||
|
print(f"✓ Large array sorted in {elapsed:.4f} seconds")
|
||||||
|
print(f"✓ Correctness: {is_correct}")
|
||||||
|
|
||||||
|
# Test with worst-case scenario (many duplicates)
|
||||||
|
print(f"\nTesting with array of size {large_size} (many duplicates)...")
|
||||||
|
dup_arr = [random.randint(1, 100) for _ in range(large_size)]
|
||||||
|
|
||||||
|
start_time = time.perf_counter()
|
||||||
|
sorted_dup = randomized_quicksort(dup_arr)
|
||||||
|
elapsed = time.perf_counter() - start_time
|
||||||
|
|
||||||
|
is_correct = sorted_dup == sorted(dup_arr)
|
||||||
|
print(f"✓ Duplicate-heavy array sorted in {elapsed:.4f} seconds")
|
||||||
|
print(f"✓ Correctness: {is_correct}")
|
||||||
|
|
||||||
|
print("\n2. Hash Table Stress Tests:")
|
||||||
|
print("-" * 70)
|
||||||
|
|
||||||
|
# Test with many insertions
|
||||||
|
ht = HashTable(initial_size=16)
|
||||||
|
num_inserts = 100000
|
||||||
|
|
||||||
|
print(f"Inserting {num_inserts} elements...")
|
||||||
|
start_time = time.perf_counter()
|
||||||
|
for i in range(num_inserts):
|
||||||
|
ht.insert(i, f"value_{i}")
|
||||||
|
elapsed = time.perf_counter() - start_time
|
||||||
|
|
||||||
|
print(f"✓ Inserted {num_inserts} elements in {elapsed:.4f} seconds")
|
||||||
|
print(f"✓ Hash table size: {ht.size}")
|
||||||
|
print(f"✓ Load factor: {ht.get_load_factor():.4f}")
|
||||||
|
print(f"✓ Count: {len(ht)}")
|
||||||
|
|
||||||
|
# Verify all elements are retrievable
|
||||||
|
print(f"\nVerifying retrieval of {num_inserts} elements...")
|
||||||
|
start_time = time.perf_counter()
|
||||||
|
all_found = True
|
||||||
|
for i in range(num_inserts):
|
||||||
|
if ht.get(i) != f"value_{i}":
|
||||||
|
all_found = False
|
||||||
|
break
|
||||||
|
elapsed = time.perf_counter() - start_time
|
||||||
|
|
||||||
|
print(f"✓ Retrieved {num_inserts} elements in {elapsed:.4f} seconds")
|
||||||
|
print(f"✓ All elements found: {all_found}")
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error running stress tests: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def run_negative_tests():
|
||||||
|
"""Run negative test cases (invalid inputs, error handling)."""
|
||||||
|
print("=" * 70)
|
||||||
|
print("Running Negative Test Cases")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from src.quicksort import randomized_quicksort
|
||||||
|
from src.hash_table import HashTable
|
||||||
|
|
||||||
|
print("\n1. Quicksort Negative Tests:")
|
||||||
|
print("-" * 70)
|
||||||
|
|
||||||
|
# Test with None (should handle gracefully or raise appropriate error)
|
||||||
|
try:
|
||||||
|
result = randomized_quicksort(None)
|
||||||
|
print("✗ Should have raised TypeError for None input")
|
||||||
|
except (TypeError, AttributeError):
|
||||||
|
print("✓ Correctly handles None input")
|
||||||
|
|
||||||
|
# Test with mixed types (should raise TypeError)
|
||||||
|
try:
|
||||||
|
result = randomized_quicksort([1, 2, "three", 4])
|
||||||
|
print("✗ Should have raised TypeError for mixed types")
|
||||||
|
except TypeError:
|
||||||
|
print("✓ Correctly raises TypeError for mixed types")
|
||||||
|
|
||||||
|
print("\n2. Hash Table Negative Tests:")
|
||||||
|
print("-" * 70)
|
||||||
|
|
||||||
|
# Test with None key
|
||||||
|
ht = HashTable()
|
||||||
|
try:
|
||||||
|
ht.insert(None, "value")
|
||||||
|
print("✗ Should have raised TypeError for None key")
|
||||||
|
except TypeError:
|
||||||
|
print("✓ Correctly handles None key")
|
||||||
|
|
||||||
|
# Test with invalid initial size
|
||||||
|
try:
|
||||||
|
ht = HashTable(initial_size=0)
|
||||||
|
print("✗ Should handle invalid initial size")
|
||||||
|
except (ValueError, ZeroDivisionError):
|
||||||
|
print("✓ Correctly handles invalid initial size")
|
||||||
|
|
||||||
|
# Test with negative initial size
|
||||||
|
try:
|
||||||
|
ht = HashTable(initial_size=-1)
|
||||||
|
print("✗ Should handle negative initial size")
|
||||||
|
except (ValueError, AssertionError):
|
||||||
|
print("✓ Correctly handles negative initial size")
|
||||||
|
|
||||||
|
print("\n✓ Negative tests completed")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error running negative tests: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def run_full_suite():
|
||||||
|
"""Run all tests."""
|
||||||
|
print("=" * 70)
|
||||||
|
print("Running Full Test Suite")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
print("\n[1/4] Running Unit Tests...")
|
||||||
|
results.append(("Unit Tests", run_unit_tests()))
|
||||||
|
|
||||||
|
print("\n[2/4] Running Performance Benchmarks...")
|
||||||
|
results.append(("Performance Tests", run_performance_tests()))
|
||||||
|
|
||||||
|
print("\n[3/4] Running Stress Tests...")
|
||||||
|
results.append(("Stress Tests", run_stress_tests()))
|
||||||
|
|
||||||
|
print("\n[4/4] Running Negative Tests...")
|
||||||
|
results.append(("Negative Tests", run_negative_tests()))
|
||||||
|
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("Test Summary")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
for test_name, success in results:
|
||||||
|
status = "✓ PASSED" if success else "✗ FAILED"
|
||||||
|
print(f"{test_name:<25} {status}")
|
||||||
|
|
||||||
|
all_passed = all(result[1] for result in results)
|
||||||
|
return all_passed
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main test runner with command-line interface."""
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Test runner for MSCS532 Assignment 3',
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog="""
|
||||||
|
Examples:
|
||||||
|
python3 run_tests.py --quick # Run quick tests
|
||||||
|
python3 run_tests.py # Run full suite
|
||||||
|
python3 run_tests.py --unit-only # Run unit tests only
|
||||||
|
python3 run_tests.py --benchmark # Run performance benchmarks
|
||||||
|
python3 run_tests.py --stress # Run stress tests
|
||||||
|
python3 run_tests.py --negative # Run negative test cases
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'--quick',
|
||||||
|
action='store_true',
|
||||||
|
help='Run quick tests (essential functionality only)'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--unit-only',
|
||||||
|
action='store_true',
|
||||||
|
help='Run unit tests only'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--benchmark',
|
||||||
|
action='store_true',
|
||||||
|
help='Run performance benchmarks'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--stress',
|
||||||
|
action='store_true',
|
||||||
|
help='Run stress tests'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--negative',
|
||||||
|
action='store_true',
|
||||||
|
help='Run negative test cases'
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
success = False
|
||||||
|
|
||||||
|
if args.quick:
|
||||||
|
success = run_quick_tests()
|
||||||
|
elif args.unit_only:
|
||||||
|
success = run_unit_tests()
|
||||||
|
elif args.benchmark:
|
||||||
|
success = run_performance_tests()
|
||||||
|
elif args.stress:
|
||||||
|
success = run_stress_tests()
|
||||||
|
elif args.negative:
|
||||||
|
success = run_negative_tests()
|
||||||
|
else:
|
||||||
|
# Default: run full suite
|
||||||
|
success = run_full_suite()
|
||||||
|
|
||||||
|
sys.exit(0 if success else 1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
|
||||||
10
src/__init__.py
Normal file
10
src/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
"""
|
||||||
|
MSCS532 Assignment 3: Understanding Algorithm Efficiency and Scalability
|
||||||
|
|
||||||
|
This package contains implementations of:
|
||||||
|
- Randomized Quicksort algorithm
|
||||||
|
- Hashing with Chaining data structure
|
||||||
|
"""
|
||||||
|
|
||||||
|
__version__ = "1.0.0"
|
||||||
|
|
||||||
148
src/examples.py
Normal file
148
src/examples.py
Normal file
@@ -0,0 +1,148 @@
|
|||||||
|
"""
|
||||||
|
Example usage of Randomized Quicksort and Hash Table implementations.
|
||||||
|
|
||||||
|
This module demonstrates how to use the algorithms and data structures
|
||||||
|
implemented in this project.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import random
|
||||||
|
from src.quicksort import (
|
||||||
|
randomized_quicksort,
|
||||||
|
compare_with_builtin,
|
||||||
|
analyze_performance
|
||||||
|
)
|
||||||
|
from src.hash_table import HashTable
|
||||||
|
|
||||||
|
|
||||||
|
def example_quicksort():
|
||||||
|
"""Demonstrate randomized quicksort usage."""
|
||||||
|
print("=" * 60)
|
||||||
|
print("Randomized Quicksort Example")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
# Example 1: Basic sorting
|
||||||
|
print("\n1. Basic Sorting:")
|
||||||
|
arr = [64, 34, 25, 12, 22, 11, 90, 5]
|
||||||
|
print(f"Original array: {arr}")
|
||||||
|
sorted_arr = randomized_quicksort(arr)
|
||||||
|
print(f"Sorted array: {sorted_arr}")
|
||||||
|
|
||||||
|
# Example 2: Large random array
|
||||||
|
print("\n2. Large Random Array:")
|
||||||
|
large_arr = [random.randint(1, 1000) for _ in range(20)]
|
||||||
|
print(f"Original array (first 20 elements): {large_arr[:20]}")
|
||||||
|
sorted_large = randomized_quicksort(large_arr)
|
||||||
|
print(f"Sorted array (first 20 elements): {sorted_large[:20]}")
|
||||||
|
|
||||||
|
# Example 3: Performance comparison
|
||||||
|
print("\n3. Performance Comparison with Built-in Sort:")
|
||||||
|
test_array = [random.randint(1, 100000) for _ in range(10000)]
|
||||||
|
comparison = compare_with_builtin(test_array)
|
||||||
|
print(f"Array length: {comparison['array_length']}")
|
||||||
|
print(f"Quicksort time: {comparison['quicksort_time']:.6f} seconds")
|
||||||
|
print(f"Built-in sort time: {comparison['builtin_time']:.6f} seconds")
|
||||||
|
print(f"Speedup ratio: {comparison['speedup']:.2f}x")
|
||||||
|
print(f"Results match: {comparison['is_correct']}")
|
||||||
|
|
||||||
|
|
||||||
|
def example_hash_table():
|
||||||
|
"""Demonstrate hash table with chaining usage."""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("Hash Table with Chaining Example")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
# Create hash table
|
||||||
|
ht = HashTable(initial_size=8)
|
||||||
|
|
||||||
|
# Example 1: Insert operations
|
||||||
|
print("\n1. Insert Operations:")
|
||||||
|
keys_values = [
|
||||||
|
(1, "apple"),
|
||||||
|
(2, "banana"),
|
||||||
|
(3, "cherry"),
|
||||||
|
(10, "date"),
|
||||||
|
(11, "elderberry"),
|
||||||
|
(18, "fig"),
|
||||||
|
(19, "grape"),
|
||||||
|
(26, "honeydew")
|
||||||
|
]
|
||||||
|
|
||||||
|
for key, value in keys_values:
|
||||||
|
ht.insert(key, value)
|
||||||
|
print(f"Inserted ({key}, {value}) - Load factor: {ht.get_load_factor():.2f}")
|
||||||
|
|
||||||
|
print(f"\nHash table size: {ht.size}")
|
||||||
|
print(f"Number of elements: {len(ht)}")
|
||||||
|
print(f"Load factor: {ht.get_load_factor():.2f}")
|
||||||
|
|
||||||
|
# Example 2: Search operations
|
||||||
|
print("\n2. Search Operations:")
|
||||||
|
search_keys = [1, 3, 11, 99]
|
||||||
|
for key in search_keys:
|
||||||
|
value = ht.get(key)
|
||||||
|
if value:
|
||||||
|
print(f"Key {key} found: {value}")
|
||||||
|
else:
|
||||||
|
print(f"Key {key} not found")
|
||||||
|
|
||||||
|
# Example 3: Contains operator
|
||||||
|
print("\n3. Using 'in' Operator:")
|
||||||
|
test_keys = [2, 5, 18]
|
||||||
|
for key in test_keys:
|
||||||
|
print(f"{key} in hash table: {key in ht}")
|
||||||
|
|
||||||
|
# Example 4: Delete operations
|
||||||
|
print("\n4. Delete Operations:")
|
||||||
|
delete_key = 3
|
||||||
|
print(f"Deleting key {delete_key}...")
|
||||||
|
deleted = ht.delete(delete_key)
|
||||||
|
print(f"Delete successful: {deleted}")
|
||||||
|
print(f"Key {delete_key} still exists: {delete_key in ht}")
|
||||||
|
print(f"Updated count: {len(ht)}")
|
||||||
|
|
||||||
|
# Example 5: Get all items
|
||||||
|
print("\n5. All Items in Hash Table:")
|
||||||
|
all_items = ht.get_all_items()
|
||||||
|
for key, value in all_items:
|
||||||
|
print(f" Key: {key}, Value: {value}")
|
||||||
|
|
||||||
|
# Example 6: Collision demonstration
|
||||||
|
print("\n6. Collision Resolution (Chaining):")
|
||||||
|
collision_ht = HashTable(initial_size=5)
|
||||||
|
# Keys that will likely collide
|
||||||
|
collision_keys = [1, 6, 11, 16, 21]
|
||||||
|
for key in collision_keys:
|
||||||
|
collision_ht.insert(key, f"value_{key}")
|
||||||
|
print(f"Hash table with collisions:")
|
||||||
|
print(f" Size: {collision_ht.size}")
|
||||||
|
print(f" Count: {len(collision_ht)}")
|
||||||
|
print(f" Load factor: {collision_ht.get_load_factor():.2f}")
|
||||||
|
print(f" Items: {collision_ht.get_all_items()}")
|
||||||
|
|
||||||
|
|
||||||
|
def example_performance_analysis():
|
||||||
|
"""Demonstrate performance analysis of quicksort."""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("Performance Analysis Example")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
print("\nAnalyzing quicksort performance across different array sizes:")
|
||||||
|
results = analyze_performance([100, 1000, 10000])
|
||||||
|
|
||||||
|
print("\nResults:")
|
||||||
|
print(f"{'Size':<10} {'Quicksort Time':<18} {'Built-in Time':<18} {'Speedup':<10} {'Correct':<10}")
|
||||||
|
print("-" * 70)
|
||||||
|
for result in results:
|
||||||
|
print(f"{result['array_length']:<10} "
|
||||||
|
f"{result['quicksort_time']:<18.6f} "
|
||||||
|
f"{result['builtin_time']:<18.6f} "
|
||||||
|
f"{result['speedup']:<10.2f} "
|
||||||
|
f"{str(result['is_correct']):<10}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Run all examples
|
||||||
|
example_quicksort()
|
||||||
|
example_hash_table()
|
||||||
|
example_performance_analysis()
|
||||||
|
|
||||||
204
src/hash_table.py
Normal file
204
src/hash_table.py
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
"""
|
||||||
|
Hash Table with Chaining Implementation
|
||||||
|
|
||||||
|
This module provides a hash table implementation using chaining
|
||||||
|
for collision resolution.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import List, Optional, Tuple, Iterator
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class HashNode:
|
||||||
|
"""Node for storing key-value pairs in hash table chains."""
|
||||||
|
key: int
|
||||||
|
value: any
|
||||||
|
next: Optional['HashNode'] = None
|
||||||
|
|
||||||
|
|
||||||
|
class HashTable:
|
||||||
|
"""
|
||||||
|
Hash Table implementation using chaining for collision resolution.
|
||||||
|
|
||||||
|
Chaining stores multiple elements in the same bucket using a linked list.
|
||||||
|
When a collision occurs, the new element is appended to the chain.
|
||||||
|
|
||||||
|
Time Complexity:
|
||||||
|
- Average: O(1) for insert, search, delete
|
||||||
|
- Worst: O(n) when all keys hash to the same bucket
|
||||||
|
|
||||||
|
Space Complexity: O(n + m) where n is number of elements, m is table size
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, initial_size: int = 16, load_factor_threshold: float = 0.75):
|
||||||
|
"""
|
||||||
|
Initialize hash table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
initial_size: Initial size of the hash table
|
||||||
|
load_factor_threshold: Threshold for resizing (default: 0.75)
|
||||||
|
"""
|
||||||
|
self.size = initial_size
|
||||||
|
self.load_factor_threshold = load_factor_threshold
|
||||||
|
self.count = 0
|
||||||
|
self.buckets: List[Optional[HashNode]] = [None] * self.size
|
||||||
|
|
||||||
|
def _hash(self, key: int) -> int:
|
||||||
|
"""
|
||||||
|
Hash function using multiplication method.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: Key to hash
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Hash value (bucket index)
|
||||||
|
"""
|
||||||
|
# Using multiplication method: h(k) = floor(m * (k * A mod 1))
|
||||||
|
# where A ≈ (√5 - 1) / 2 ≈ 0.618
|
||||||
|
A = 0.6180339887498949
|
||||||
|
return int(self.size * ((key * A) % 1))
|
||||||
|
|
||||||
|
def _resize(self) -> None:
|
||||||
|
"""Resize hash table when load factor exceeds threshold."""
|
||||||
|
old_buckets = self.buckets
|
||||||
|
old_size = self.size
|
||||||
|
|
||||||
|
# Double the size
|
||||||
|
self.size *= 2
|
||||||
|
self.count = 0
|
||||||
|
self.buckets = [None] * self.size
|
||||||
|
|
||||||
|
# Rehash all existing elements
|
||||||
|
for bucket in old_buckets:
|
||||||
|
current = bucket
|
||||||
|
while current is not None:
|
||||||
|
self.insert(current.key, current.value)
|
||||||
|
current = current.next
|
||||||
|
|
||||||
|
def insert(self, key: int, value: any) -> None:
|
||||||
|
"""
|
||||||
|
Insert a key-value pair into the hash table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: Key to insert
|
||||||
|
value: Value associated with the key
|
||||||
|
"""
|
||||||
|
# Check if resize is needed
|
||||||
|
load_factor = self.count / self.size
|
||||||
|
if load_factor >= self.load_factor_threshold:
|
||||||
|
self._resize()
|
||||||
|
|
||||||
|
bucket_index = self._hash(key)
|
||||||
|
|
||||||
|
# Check if key already exists
|
||||||
|
current = self.buckets[bucket_index]
|
||||||
|
while current is not None:
|
||||||
|
if current.key == key:
|
||||||
|
current.value = value # Update existing key
|
||||||
|
return
|
||||||
|
current = current.next
|
||||||
|
|
||||||
|
# Insert new node at the beginning of the chain
|
||||||
|
new_node = HashNode(key, value, self.buckets[bucket_index])
|
||||||
|
self.buckets[bucket_index] = new_node
|
||||||
|
self.count += 1
|
||||||
|
|
||||||
|
def get(self, key: int) -> Optional[any]:
|
||||||
|
"""
|
||||||
|
Retrieve value associated with a key.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: Key to search for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Value associated with key, or None if not found
|
||||||
|
"""
|
||||||
|
bucket_index = self._hash(key)
|
||||||
|
current = self.buckets[bucket_index]
|
||||||
|
|
||||||
|
while current is not None:
|
||||||
|
if current.key == key:
|
||||||
|
return current.value
|
||||||
|
current = current.next
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def delete(self, key: int) -> bool:
|
||||||
|
"""
|
||||||
|
Delete a key-value pair from the hash table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: Key to delete
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if key was found and deleted, False otherwise
|
||||||
|
"""
|
||||||
|
bucket_index = self._hash(key)
|
||||||
|
current = self.buckets[bucket_index]
|
||||||
|
prev = None
|
||||||
|
|
||||||
|
while current is not None:
|
||||||
|
if current.key == key:
|
||||||
|
if prev is None:
|
||||||
|
# Node to delete is at the head of chain
|
||||||
|
self.buckets[bucket_index] = current.next
|
||||||
|
else:
|
||||||
|
# Node to delete is in the middle or end
|
||||||
|
prev.next = current.next
|
||||||
|
self.count -= 1
|
||||||
|
return True
|
||||||
|
prev = current
|
||||||
|
current = current.next
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def contains(self, key: int) -> bool:
|
||||||
|
"""
|
||||||
|
Check if a key exists in the hash table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: Key to check
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if key exists, False otherwise
|
||||||
|
"""
|
||||||
|
return self.get(key) is not None
|
||||||
|
|
||||||
|
def get_load_factor(self) -> float:
|
||||||
|
"""
|
||||||
|
Get current load factor of the hash table.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Load factor (count / size)
|
||||||
|
"""
|
||||||
|
return self.count / self.size if self.size > 0 else 0.0
|
||||||
|
|
||||||
|
def get_all_items(self) -> List[Tuple[int, any]]:
|
||||||
|
"""
|
||||||
|
Get all key-value pairs in the hash table.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of (key, value) tuples
|
||||||
|
"""
|
||||||
|
items = []
|
||||||
|
for bucket in self.buckets:
|
||||||
|
current = bucket
|
||||||
|
while current is not None:
|
||||||
|
items.append((current.key, current.value))
|
||||||
|
current = current.next
|
||||||
|
return items
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""Return the number of elements in the hash table."""
|
||||||
|
return self.count
|
||||||
|
|
||||||
|
def __contains__(self, key: int) -> bool:
|
||||||
|
"""Check if key exists in hash table using 'in' operator."""
|
||||||
|
return self.contains(key)
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
"""String representation of the hash table."""
|
||||||
|
items = self.get_all_items()
|
||||||
|
return f"HashTable(size={self.size}, count={self.count}, load_factor={self.get_load_factor():.2f}, items={items})"
|
||||||
|
|
||||||
178
src/quicksort.py
Normal file
178
src/quicksort.py
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
"""
|
||||||
|
Randomized Quicksort Implementation
|
||||||
|
|
||||||
|
This module provides a randomized quicksort algorithm implementation
|
||||||
|
along with utilities for performance analysis and comparison.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import random
|
||||||
|
from typing import List, Callable, Tuple
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
def randomized_quicksort(arr: List[int], low: int = None, high: int = None) -> List[int]:
|
||||||
|
"""
|
||||||
|
Sort an array using randomized quicksort algorithm.
|
||||||
|
|
||||||
|
Time Complexity:
|
||||||
|
- Average: O(n log n)
|
||||||
|
- Worst: O(n²) (rarely occurs due to randomization)
|
||||||
|
- Best: O(n log n)
|
||||||
|
|
||||||
|
Space Complexity: O(log n) average case due to recursion stack
|
||||||
|
|
||||||
|
Args:
|
||||||
|
arr: List of integers to sort
|
||||||
|
low: Starting index (default: 0)
|
||||||
|
high: Ending index (default: len(arr) - 1)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Sorted list of integers
|
||||||
|
"""
|
||||||
|
if low is None:
|
||||||
|
low = 0
|
||||||
|
if high is None:
|
||||||
|
high = len(arr) - 1
|
||||||
|
|
||||||
|
# Create a copy to avoid mutating the original array
|
||||||
|
arr = arr.copy()
|
||||||
|
|
||||||
|
def _quicksort(arr: List[int], low: int, high: int) -> None:
|
||||||
|
"""Internal recursive quicksort function."""
|
||||||
|
if low < high:
|
||||||
|
# Partition the array and get pivot index
|
||||||
|
pivot_idx = randomized_partition(arr, low, high)
|
||||||
|
|
||||||
|
# Recursively sort elements before and after partition
|
||||||
|
_quicksort(arr, low, pivot_idx - 1)
|
||||||
|
_quicksort(arr, pivot_idx + 1, high)
|
||||||
|
|
||||||
|
_quicksort(arr, low, high)
|
||||||
|
return arr
|
||||||
|
|
||||||
|
|
||||||
|
def randomized_partition(arr: List[int], low: int, high: int) -> int:
|
||||||
|
"""
|
||||||
|
Partition the array using a random pivot element.
|
||||||
|
|
||||||
|
This randomization helps avoid worst-case O(n²) behavior
|
||||||
|
that occurs when the pivot is always the smallest or largest element.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
arr: List to partition
|
||||||
|
low: Starting index
|
||||||
|
high: Ending index
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Final position of pivot element
|
||||||
|
"""
|
||||||
|
# Randomly select a pivot index
|
||||||
|
random_idx = random.randint(low, high)
|
||||||
|
|
||||||
|
# Swap random element with last element
|
||||||
|
arr[random_idx], arr[high] = arr[high], arr[random_idx]
|
||||||
|
|
||||||
|
# Use standard partition with pivot at high
|
||||||
|
return partition(arr, low, high)
|
||||||
|
|
||||||
|
|
||||||
|
def partition(arr: List[int], low: int, high: int) -> int:
|
||||||
|
"""
|
||||||
|
Partition the array around a pivot element.
|
||||||
|
|
||||||
|
Elements smaller than pivot go to the left,
|
||||||
|
elements greater than pivot go to the right.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
arr: List to partition
|
||||||
|
low: Starting index
|
||||||
|
high: Ending index (pivot position)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Final position of pivot element
|
||||||
|
"""
|
||||||
|
pivot = arr[high] # Pivot element
|
||||||
|
i = low - 1 # Index of smaller element
|
||||||
|
|
||||||
|
for j in range(low, high):
|
||||||
|
# If current element is smaller than or equal to pivot
|
||||||
|
if arr[j] <= pivot:
|
||||||
|
i += 1
|
||||||
|
arr[i], arr[j] = arr[j], arr[i]
|
||||||
|
|
||||||
|
# Place pivot in correct position
|
||||||
|
arr[i + 1], arr[high] = arr[high], arr[i + 1]
|
||||||
|
return i + 1
|
||||||
|
|
||||||
|
|
||||||
|
def measure_time(func: Callable, *args, **kwargs) -> Tuple[float, any]:
|
||||||
|
"""
|
||||||
|
Measure execution time of a function.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
func: Function to measure
|
||||||
|
*args: Positional arguments for function
|
||||||
|
**kwargs: Keyword arguments for function
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (execution_time_in_seconds, function_result)
|
||||||
|
"""
|
||||||
|
start_time = time.perf_counter()
|
||||||
|
result = func(*args, **kwargs)
|
||||||
|
end_time = time.perf_counter()
|
||||||
|
return end_time - start_time, result
|
||||||
|
|
||||||
|
|
||||||
|
def compare_with_builtin(arr: List[int]) -> dict:
|
||||||
|
"""
|
||||||
|
Compare randomized quicksort with Python's built-in sort.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
arr: List to sort
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with timing comparison results
|
||||||
|
"""
|
||||||
|
# Test randomized quicksort
|
||||||
|
quicksort_time, sorted_quicksort = measure_time(randomized_quicksort, arr)
|
||||||
|
|
||||||
|
# Test built-in sort
|
||||||
|
builtin_time, sorted_builtin = measure_time(sorted, arr)
|
||||||
|
|
||||||
|
# Verify correctness
|
||||||
|
is_correct = sorted_quicksort == sorted_builtin
|
||||||
|
|
||||||
|
return {
|
||||||
|
'quicksort_time': quicksort_time,
|
||||||
|
'builtin_time': builtin_time,
|
||||||
|
'speedup': quicksort_time / builtin_time if builtin_time > 0 else float('inf'),
|
||||||
|
'is_correct': is_correct,
|
||||||
|
'array_length': len(arr)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_performance(array_sizes: List[int] = None) -> List[dict]:
|
||||||
|
"""
|
||||||
|
Analyze quicksort performance across different array sizes.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
array_sizes: List of array sizes to test (default: [100, 1000, 10000, 100000])
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of performance metrics for each array size
|
||||||
|
"""
|
||||||
|
if array_sizes is None:
|
||||||
|
array_sizes = [100, 1000, 10000, 100000]
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for size in array_sizes:
|
||||||
|
# Generate random array
|
||||||
|
test_array = [random.randint(1, 1000000) for _ in range(size)]
|
||||||
|
|
||||||
|
# Measure performance
|
||||||
|
comparison = compare_with_builtin(test_array)
|
||||||
|
results.append(comparison)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
4
tests/__init__.py
Normal file
4
tests/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
"""
|
||||||
|
Test suite for MSCS532 Assignment 3 implementations.
|
||||||
|
"""
|
||||||
|
|
||||||
218
tests/test_hash_table.py
Normal file
218
tests/test_hash_table.py
Normal file
@@ -0,0 +1,218 @@
|
|||||||
|
"""
|
||||||
|
Unit tests for Hash Table with Chaining implementation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
from src.hash_table import HashTable, HashNode
|
||||||
|
|
||||||
|
|
||||||
|
class TestHashTable(unittest.TestCase):
|
||||||
|
"""Test cases for hash table with chaining."""
|
||||||
|
|
||||||
|
def test_initialization(self):
|
||||||
|
"""Test hash table initialization."""
|
||||||
|
ht = HashTable(initial_size=16)
|
||||||
|
self.assertEqual(ht.size, 16)
|
||||||
|
self.assertEqual(len(ht), 0)
|
||||||
|
self.assertEqual(ht.get_load_factor(), 0.0)
|
||||||
|
|
||||||
|
def test_insert_and_get(self):
|
||||||
|
"""Test basic insert and get operations."""
|
||||||
|
ht = HashTable()
|
||||||
|
ht.insert(1, "apple")
|
||||||
|
ht.insert(2, "banana")
|
||||||
|
|
||||||
|
self.assertEqual(ht.get(1), "apple")
|
||||||
|
self.assertEqual(ht.get(2), "banana")
|
||||||
|
self.assertEqual(len(ht), 2)
|
||||||
|
|
||||||
|
def test_insert_update(self):
|
||||||
|
"""Test that inserting same key updates value."""
|
||||||
|
ht = HashTable()
|
||||||
|
ht.insert(1, "apple")
|
||||||
|
ht.insert(1, "banana")
|
||||||
|
|
||||||
|
self.assertEqual(ht.get(1), "banana")
|
||||||
|
self.assertEqual(len(ht), 1)
|
||||||
|
|
||||||
|
def test_get_nonexistent_key(self):
|
||||||
|
"""Test getting a key that doesn't exist."""
|
||||||
|
ht = HashTable()
|
||||||
|
ht.insert(1, "apple")
|
||||||
|
|
||||||
|
self.assertIsNone(ht.get(2))
|
||||||
|
|
||||||
|
def test_delete_existing_key(self):
|
||||||
|
"""Test deleting an existing key."""
|
||||||
|
ht = HashTable()
|
||||||
|
ht.insert(1, "apple")
|
||||||
|
ht.insert(2, "banana")
|
||||||
|
|
||||||
|
deleted = ht.delete(1)
|
||||||
|
self.assertTrue(deleted)
|
||||||
|
self.assertIsNone(ht.get(1))
|
||||||
|
self.assertEqual(ht.get(2), "banana")
|
||||||
|
self.assertEqual(len(ht), 1)
|
||||||
|
|
||||||
|
def test_delete_nonexistent_key(self):
|
||||||
|
"""Test deleting a key that doesn't exist."""
|
||||||
|
ht = HashTable()
|
||||||
|
ht.insert(1, "apple")
|
||||||
|
|
||||||
|
deleted = ht.delete(2)
|
||||||
|
self.assertFalse(deleted)
|
||||||
|
self.assertEqual(len(ht), 1)
|
||||||
|
|
||||||
|
def test_contains(self):
|
||||||
|
"""Test contains method."""
|
||||||
|
ht = HashTable()
|
||||||
|
ht.insert(1, "apple")
|
||||||
|
|
||||||
|
self.assertTrue(ht.contains(1))
|
||||||
|
self.assertFalse(ht.contains(2))
|
||||||
|
|
||||||
|
def test_in_operator(self):
|
||||||
|
"""Test using 'in' operator."""
|
||||||
|
ht = HashTable()
|
||||||
|
ht.insert(1, "apple")
|
||||||
|
|
||||||
|
self.assertIn(1, ht)
|
||||||
|
self.assertNotIn(2, ht)
|
||||||
|
|
||||||
|
def test_load_factor(self):
|
||||||
|
"""Test load factor calculation."""
|
||||||
|
ht = HashTable(initial_size=4)
|
||||||
|
|
||||||
|
# Initially empty
|
||||||
|
self.assertEqual(ht.get_load_factor(), 0.0)
|
||||||
|
|
||||||
|
# Add elements
|
||||||
|
ht.insert(1, "a")
|
||||||
|
self.assertEqual(ht.get_load_factor(), 0.25)
|
||||||
|
|
||||||
|
ht.insert(2, "b")
|
||||||
|
self.assertEqual(ht.get_load_factor(), 0.5)
|
||||||
|
|
||||||
|
ht.insert(3, "c")
|
||||||
|
self.assertEqual(ht.get_load_factor(), 0.75)
|
||||||
|
|
||||||
|
def test_resize(self):
|
||||||
|
"""Test automatic resizing when load factor threshold is reached."""
|
||||||
|
ht = HashTable(initial_size=4, load_factor_threshold=0.75)
|
||||||
|
|
||||||
|
# Insert elements to trigger resize
|
||||||
|
ht.insert(1, "a")
|
||||||
|
ht.insert(2, "b")
|
||||||
|
ht.insert(3, "c")
|
||||||
|
# This should trigger resize (3/4 = 0.75)
|
||||||
|
ht.insert(4, "d")
|
||||||
|
|
||||||
|
# Size should have doubled
|
||||||
|
self.assertEqual(ht.size, 8)
|
||||||
|
|
||||||
|
# All elements should still be accessible
|
||||||
|
self.assertEqual(ht.get(1), "a")
|
||||||
|
self.assertEqual(ht.get(2), "b")
|
||||||
|
self.assertEqual(ht.get(3), "c")
|
||||||
|
self.assertEqual(ht.get(4), "d")
|
||||||
|
self.assertEqual(len(ht), 4)
|
||||||
|
|
||||||
|
def test_get_all_items(self):
|
||||||
|
"""Test getting all items from hash table."""
|
||||||
|
ht = HashTable()
|
||||||
|
ht.insert(1, "apple")
|
||||||
|
ht.insert(2, "banana")
|
||||||
|
ht.insert(3, "cherry")
|
||||||
|
|
||||||
|
items = ht.get_all_items()
|
||||||
|
self.assertEqual(len(items), 3)
|
||||||
|
|
||||||
|
# Check that all items are present
|
||||||
|
item_dict = dict(items)
|
||||||
|
self.assertEqual(item_dict[1], "apple")
|
||||||
|
self.assertEqual(item_dict[2], "banana")
|
||||||
|
self.assertEqual(item_dict[3], "cherry")
|
||||||
|
|
||||||
|
def test_collision_handling(self):
|
||||||
|
"""Test that collisions are handled correctly."""
|
||||||
|
ht = HashTable(initial_size=5)
|
||||||
|
|
||||||
|
# Insert keys that might collide
|
||||||
|
keys = [1, 6, 11, 16, 21]
|
||||||
|
for key in keys:
|
||||||
|
ht.insert(key, f"value_{key}")
|
||||||
|
|
||||||
|
# All keys should be retrievable
|
||||||
|
for key in keys:
|
||||||
|
self.assertEqual(ht.get(key), f"value_{key}")
|
||||||
|
|
||||||
|
self.assertEqual(len(ht), len(keys))
|
||||||
|
|
||||||
|
def test_delete_from_chain(self):
|
||||||
|
"""Test deleting an element from the middle of a chain."""
|
||||||
|
ht = HashTable(initial_size=5)
|
||||||
|
|
||||||
|
# Create a chain by inserting colliding keys
|
||||||
|
keys = [1, 6, 11]
|
||||||
|
for key in keys:
|
||||||
|
ht.insert(key, f"value_{key}")
|
||||||
|
|
||||||
|
# Delete middle element
|
||||||
|
deleted = ht.delete(6)
|
||||||
|
self.assertTrue(deleted)
|
||||||
|
|
||||||
|
# Remaining elements should still be accessible
|
||||||
|
self.assertEqual(ht.get(1), "value_1")
|
||||||
|
self.assertIsNone(ht.get(6))
|
||||||
|
self.assertEqual(ht.get(11), "value_11")
|
||||||
|
self.assertEqual(len(ht), 2)
|
||||||
|
|
||||||
|
def test_len(self):
|
||||||
|
"""Test __len__ method."""
|
||||||
|
ht = HashTable()
|
||||||
|
self.assertEqual(len(ht), 0)
|
||||||
|
|
||||||
|
ht.insert(1, "a")
|
||||||
|
self.assertEqual(len(ht), 1)
|
||||||
|
|
||||||
|
ht.insert(2, "b")
|
||||||
|
self.assertEqual(len(ht), 2)
|
||||||
|
|
||||||
|
ht.delete(1)
|
||||||
|
self.assertEqual(len(ht), 1)
|
||||||
|
|
||||||
|
def test_multiple_operations(self):
|
||||||
|
"""Test a sequence of mixed operations."""
|
||||||
|
ht = HashTable()
|
||||||
|
|
||||||
|
# Insert
|
||||||
|
ht.insert(1, "one")
|
||||||
|
ht.insert(2, "two")
|
||||||
|
ht.insert(3, "three")
|
||||||
|
|
||||||
|
# Update
|
||||||
|
ht.insert(2, "TWO")
|
||||||
|
|
||||||
|
# Delete
|
||||||
|
ht.delete(1)
|
||||||
|
|
||||||
|
# Verify final state
|
||||||
|
self.assertIsNone(ht.get(1))
|
||||||
|
self.assertEqual(ht.get(2), "TWO")
|
||||||
|
self.assertEqual(ht.get(3), "three")
|
||||||
|
self.assertEqual(len(ht), 2)
|
||||||
|
|
||||||
|
def test_empty_hash_table(self):
|
||||||
|
"""Test operations on empty hash table."""
|
||||||
|
ht = HashTable()
|
||||||
|
|
||||||
|
self.assertIsNone(ht.get(1))
|
||||||
|
self.assertFalse(ht.contains(1))
|
||||||
|
self.assertFalse(ht.delete(1))
|
||||||
|
self.assertEqual(ht.get_all_items(), [])
|
||||||
|
self.assertEqual(len(ht), 0)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
|
|
||||||
150
tests/test_quicksort.py
Normal file
150
tests/test_quicksort.py
Normal file
@@ -0,0 +1,150 @@
|
|||||||
|
"""
|
||||||
|
Unit tests for Randomized Quicksort implementation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
import random
|
||||||
|
from src.quicksort import (
|
||||||
|
randomized_quicksort,
|
||||||
|
partition,
|
||||||
|
randomized_partition,
|
||||||
|
compare_with_builtin,
|
||||||
|
analyze_performance
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestRandomizedQuicksort(unittest.TestCase):
|
||||||
|
"""Test cases for randomized quicksort algorithm."""
|
||||||
|
|
||||||
|
def test_empty_array(self):
|
||||||
|
"""Test sorting an empty array."""
|
||||||
|
arr = []
|
||||||
|
result = randomized_quicksort(arr)
|
||||||
|
self.assertEqual(result, [])
|
||||||
|
|
||||||
|
def test_single_element(self):
|
||||||
|
"""Test sorting an array with a single element."""
|
||||||
|
arr = [42]
|
||||||
|
result = randomized_quicksort(arr)
|
||||||
|
self.assertEqual(result, [42])
|
||||||
|
|
||||||
|
def test_sorted_array(self):
|
||||||
|
"""Test sorting an already sorted array."""
|
||||||
|
arr = [1, 2, 3, 4, 5]
|
||||||
|
result = randomized_quicksort(arr)
|
||||||
|
self.assertEqual(result, [1, 2, 3, 4, 5])
|
||||||
|
|
||||||
|
def test_reverse_sorted_array(self):
|
||||||
|
"""Test sorting a reverse sorted array."""
|
||||||
|
arr = [5, 4, 3, 2, 1]
|
||||||
|
result = randomized_quicksort(arr)
|
||||||
|
self.assertEqual(result, [1, 2, 3, 4, 5])
|
||||||
|
|
||||||
|
def test_random_array(self):
|
||||||
|
"""Test sorting a random array."""
|
||||||
|
arr = [64, 34, 25, 12, 22, 11, 90, 5]
|
||||||
|
result = randomized_quicksort(arr)
|
||||||
|
expected = sorted(arr)
|
||||||
|
self.assertEqual(result, expected)
|
||||||
|
|
||||||
|
def test_duplicate_elements(self):
|
||||||
|
"""Test sorting an array with duplicate elements."""
|
||||||
|
arr = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3]
|
||||||
|
result = randomized_quicksort(arr)
|
||||||
|
expected = sorted(arr)
|
||||||
|
self.assertEqual(result, expected)
|
||||||
|
|
||||||
|
def test_negative_numbers(self):
|
||||||
|
"""Test sorting an array with negative numbers."""
|
||||||
|
arr = [-5, -2, -8, 1, 3, -1, 0]
|
||||||
|
result = randomized_quicksort(arr)
|
||||||
|
expected = sorted(arr)
|
||||||
|
self.assertEqual(result, expected)
|
||||||
|
|
||||||
|
def test_large_array(self):
|
||||||
|
"""Test sorting a large array."""
|
||||||
|
arr = [random.randint(1, 10000) for _ in range(1000)]
|
||||||
|
result = randomized_quicksort(arr)
|
||||||
|
expected = sorted(arr)
|
||||||
|
self.assertEqual(result, expected)
|
||||||
|
|
||||||
|
def test_original_array_not_modified(self):
|
||||||
|
"""Test that the original array is not modified."""
|
||||||
|
arr = [64, 34, 25, 12, 22, 11, 90, 5]
|
||||||
|
original = arr.copy()
|
||||||
|
randomized_quicksort(arr)
|
||||||
|
self.assertEqual(arr, original)
|
||||||
|
|
||||||
|
def test_all_same_elements(self):
|
||||||
|
"""Test sorting an array with all same elements."""
|
||||||
|
arr = [5, 5, 5, 5, 5]
|
||||||
|
result = randomized_quicksort(arr)
|
||||||
|
self.assertEqual(result, [5, 5, 5, 5, 5])
|
||||||
|
|
||||||
|
|
||||||
|
class TestPartition(unittest.TestCase):
|
||||||
|
"""Test cases for partition function."""
|
||||||
|
|
||||||
|
def test_partition(self):
|
||||||
|
"""Test partition function."""
|
||||||
|
arr = [64, 34, 25, 12, 22, 11, 90, 5]
|
||||||
|
pivot_idx = partition(arr, 0, len(arr) - 1)
|
||||||
|
|
||||||
|
# Check that pivot is in correct position
|
||||||
|
pivot_value = arr[pivot_idx]
|
||||||
|
# All elements before pivot should be <= pivot
|
||||||
|
for i in range(0, pivot_idx):
|
||||||
|
self.assertLessEqual(arr[i], pivot_value)
|
||||||
|
# All elements after pivot should be >= pivot
|
||||||
|
for i in range(pivot_idx + 1, len(arr)):
|
||||||
|
self.assertGreaterEqual(arr[i], pivot_value)
|
||||||
|
|
||||||
|
def test_randomized_partition(self):
|
||||||
|
"""Test randomized partition function."""
|
||||||
|
arr = [64, 34, 25, 12, 22, 11, 90, 5]
|
||||||
|
pivot_idx = randomized_partition(arr, 0, len(arr) - 1)
|
||||||
|
|
||||||
|
# Check that pivot is in correct position
|
||||||
|
pivot_value = arr[pivot_idx]
|
||||||
|
# All elements before pivot should be <= pivot
|
||||||
|
for i in range(0, pivot_idx):
|
||||||
|
self.assertLessEqual(arr[i], pivot_value)
|
||||||
|
# All elements after pivot should be >= pivot
|
||||||
|
for i in range(pivot_idx + 1, len(arr)):
|
||||||
|
self.assertGreaterEqual(arr[i], pivot_value)
|
||||||
|
|
||||||
|
|
||||||
|
class TestPerformanceComparison(unittest.TestCase):
|
||||||
|
"""Test cases for performance comparison utilities."""
|
||||||
|
|
||||||
|
def test_compare_with_builtin(self):
|
||||||
|
"""Test comparison with built-in sort."""
|
||||||
|
arr = [random.randint(1, 1000) for _ in range(100)]
|
||||||
|
comparison = compare_with_builtin(arr)
|
||||||
|
|
||||||
|
self.assertIn('quicksort_time', comparison)
|
||||||
|
self.assertIn('builtin_time', comparison)
|
||||||
|
self.assertIn('speedup', comparison)
|
||||||
|
self.assertIn('is_correct', comparison)
|
||||||
|
self.assertIn('array_length', comparison)
|
||||||
|
|
||||||
|
self.assertTrue(comparison['is_correct'])
|
||||||
|
self.assertEqual(comparison['array_length'], 100)
|
||||||
|
self.assertGreater(comparison['quicksort_time'], 0)
|
||||||
|
self.assertGreater(comparison['builtin_time'], 0)
|
||||||
|
|
||||||
|
def test_analyze_performance(self):
|
||||||
|
"""Test performance analysis."""
|
||||||
|
results = analyze_performance([100, 1000])
|
||||||
|
|
||||||
|
self.assertEqual(len(results), 2)
|
||||||
|
for result in results:
|
||||||
|
self.assertIn('quicksort_time', result)
|
||||||
|
self.assertIn('builtin_time', result)
|
||||||
|
self.assertIn('is_correct', result)
|
||||||
|
self.assertTrue(result['is_correct'])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
|
|
||||||
Reference in New Issue
Block a user