commit e68377d6c842bdf3dd33a6740434da542696e9fc Author: Carlos Gutierrez Date: Sun Nov 16 13:45:43 2025 -0500 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4cdfba4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,44 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +venv/ +ENV/ +env/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Project specific +*.pyc +.pytest_cache/ +.coverage +htmlcov/ + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5d03f9f --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2025 Carlos Gutierrez + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..70450c2 --- /dev/null +++ b/README.md @@ -0,0 +1,172 @@ +# MSCS532 Assignment 5: Quicksort Implementation and Analysis + +**Author:** Carlos Gutierrez +**Email:** cgutierrez44833@ucumberlands.edu +**Course:** MSCS532 – Data Structures and Algorithms +**Assignment:** Quicksort Implementation and Analysis + +## Overview + +This assignment delivers a comprehensive study of Quicksort. It includes deterministic, randomized, and three-way Quicksort implementations, theoretical complexity analysis, empirical benchmarking across multiple data distributions, test coverage, and reproducible visualization assets. + +## Repository Structure + +``` +MSCS532_Assignment5/ +├── docs/ +│ ├── quicksort_comparison.png # Comparative performance (line plots) +│ ├── quicksort_comparison_bar.png # Deterministic vs randomized comparison +│ ├── quicksort_scalability.png # Scalability on random inputs +│ └── quicksort_worst_case.png # Worst-case behavior on sorted inputs +├── examples/ +│ ├── quicksort_demo.py # Usage demonstrations +│ ├── comparison_demo.py # Benchmark walkthrough +│ └── generate_plots.py # Script to reproduce plots +├── src/ +│ ├── quicksort.py # Deterministic, randomized, and 3-way Quicksort +│ └── comparison.py # Benchmarking and data generation utilities +├── tests/ +│ ├── test_quicksort.py # Unit tests for sorting algorithms +│ └── test_comparison.py # Unit tests for benchmarking helpers +├── requirements.txt # Python dependencies (NumPy, Matplotlib) +└── README.md # Project documentation (this file) +``` + +## 1. Quicksort Implementation + +- `quicksort()` implements the deterministic version using Lomuto partitioning and the last element as pivot. +- `randomized_quicksort()` selects pivots uniformly at random to mitigate adverse distributions. +- `quicksort_3way()` leverages a Dutch National Flag strategy to optimize inputs with many duplicates. +- All functions support in-place and non-in-place usage, plus optional `key` functions for custom comparison. + +### API Highlights + +- `quicksort(arr, in_place=True, key=None)` + - Returns `None` when sorting in place; otherwise returns a new sorted list. +- `randomized_quicksort(arr, in_place=True, key=None, seed=None)` + - Optional `seed` for reproducible experiments. +- `quicksort_3way(arr, in_place=True, key=None)` + - Efficient for datasets containing repeated elements. + +## 2. Theoretical Performance Analysis + +| Scenario | Deterministic Quicksort | Randomized Quicksort | Notes | +|---------------|-------------------------|----------------------|-------| +| Best Case | \(O(n \log n)\) | \(O(n \log n)\) | Balanced partitions from median pivots | +| Average Case | \(O(n \log n)\) | \(O(n \log n)\) | Expected logarithmic recursion depth | +| Worst Case | \(O(n^2)\) | \(O(n^2)\) | Occurs with highly unbalanced splits | + +- **Average-case intuition:** Balanced partitions of size \(n/2\) produce the recurrence \(T(n) = 2T(n/2) + O(n)\), which resolves to \(O(n \log n)\). +- **Worst-case intuition:** Consistently poor pivots reduce the problem by one element, yielding \(T(n) = T(n - 1) + O(n)\) and \(O(n^2)\) behavior. +- **Space complexity:** \(O(\log n)\) expected stack depth for balanced recursion, \(O(n)\) in the worst case. Randomized pivot selection significantly decreases the probability of worst-case depth on adversarial inputs. + +## 3. Randomized Quicksort + +- Randomization chooses pivots uniformly at random, ensuring that any specific pivot ordering is unlikely. +- While the theoretical worst case remains \(O(n^2)\), the probability of encountering it drops exponentially with input size. +- The implementation exposes an optional `seed` to guarantee repeatable experimental runs while retaining stochastic behavior by default. + +## 4. Empirical Analysis + +### Experimental Setup + +- **Input sizes:** 100, 500, 1,000, 2,000, 5,000, 10,000 elements. +- **Distributions:** random, sorted, reverse sorted, nearly sorted, and high-duplicate arrays. +- **Iterations:** Each benchmark averages three independent runs per algorithm/input combination. +- **Environment:** Python 3.11+, NumPy, Matplotlib. + +### Key Observations + +- Randomized Quicksort consistently outperforms deterministic Quicksort on sorted and reverse-sorted arrays by avoiding degenerate partitions. +- Both versions exhibit \(O(n \log n)\) scaling on random inputs, aligning with theoretical expectations. +- Deterministic Quicksort degrades toward quadratic performance as inputs approach worst-case ordering; randomization flattens this curve. +- Three-way Quicksort (explored in examples/tests) provides strong performance on datasets with heavy duplication. + +### Missing Data in Visualizations + +**Important Note on Infinite Execution Times:** + +In some visualizations (particularly `quicksort_worst_case.png` and `quicksort_comparison_bar.png`), you may notice that certain bars or data points are missing. This occurs when deterministic Quicksort encounters its worst-case scenario on sorted or reverse-sorted arrays. + +**Why execution times become infinite:** + +1. **Worst-case complexity:** On sorted or reverse-sorted inputs, deterministic Quicksort (using the last element as pivot) creates highly unbalanced partitions, resulting in \(O(n^2)\) time complexity. + +2. **Recursion depth:** For large arrays (typically ≥ 1,000 elements), the algorithm requires \(O(n)\) recursive calls, which can exceed Python's default recursion limit (usually 1,000) and raise a `RecursionError`. + +3. **Timeout behavior:** Even when recursion limits are increased, the quadratic time complexity means execution times grow prohibitively large. For arrays of size 5,000 or 10,000, deterministic Quicksort may take minutes or hours to complete, making it impractical for benchmarking. + +4. **Error handling:** When exceptions occur (recursion errors, timeouts, or other failures), the benchmarking framework records the execution time as `float('inf')` to indicate that the algorithm failed to complete within reasonable time. + +**What this means for the plots:** + +- Missing bars in bar charts indicate that deterministic Quicksort failed to complete for that input size/distribution combination. +- Randomized Quicksort, by contrast, handles sorted and reverse-sorted inputs efficiently due to random pivot selection, avoiding the worst-case scenario. +- This visualization effectively demonstrates why randomized pivot selection is crucial for practical Quicksort implementations. + +### Visualization Highlights + +![Quicksort Performance Comparison](docs/quicksort_comparison.png) + +*Figure 1. Mean execution time vs. input size across distributions for deterministic and randomized Quicksort.* + +![Deterministic vs Randomized Comparison](docs/quicksort_comparison_bar.png) + +*Figure 2. Runtime comparison on random, sorted, and reverse-sorted arrays (n = 5,000). Missing bars for deterministic Quicksort on sorted/reverse-sorted inputs indicate execution failures due to worst-case \(O(n^2)\) performance.* + +![Scalability Analysis](docs/quicksort_scalability.png) + +*Figure 3. Log-log visualization of scalability on random inputs with \(O(n \log n)\) reference.* + +![Worst-Case Behavior](docs/quicksort_worst_case.png) + +*Figure 4. Worst-case analysis contrasting sorted and reverse-sorted distributions. Missing bars for deterministic Quicksort at larger sizes (≥1,000) indicate execution failures due to recursion limits and quadratic time complexity.* + +## Getting Started + +### Prerequisites + +- Python 3.10 or later +- Recommended to use a virtual environment + +### Installation + +```bash +python -m venv .venv +source .venv/bin/activate # On Windows: .venv\Scripts\activate +pip install -r requirements.txt +``` + +## Running the Examples + +```bash +python examples/quicksort_demo.py # Deterministic, randomized, and 3-way demos +python examples/comparison_demo.py # Console-based benchmarking summary +python examples/generate_plots.py # Regenerate all figures in docs/ +``` + +## Running Tests + +```bash +python -m pytest +``` + +The test suite verifies correctness for deterministic, randomized, and three-way Quicksort, along with array generator and benchmarking utilities. + +## Reproducing the Empirical Study + +1. Activate your environment and install dependencies. +2. Run `python examples/generate_plots.py`. + - Benchmarks may take several minutes depending on hardware. +3. Generated figures will be written to the `docs/` directory and referenced automatically by this README. + +## Academic Integrity Statement + +This project is submitted for academic evaluation in MSCS532 – Data Structures and Algorithms. All code, analysis, and documentation were authored by Carlos Gutierrez for the specific purpose of this assignment. + +--- + +© 2025 Carlos Gutierrez. All rights reserved. + + + diff --git a/docs/quicksort_comparison.png b/docs/quicksort_comparison.png new file mode 100644 index 0000000..2752c3e Binary files /dev/null and b/docs/quicksort_comparison.png differ diff --git a/docs/quicksort_comparison_bar.png b/docs/quicksort_comparison_bar.png new file mode 100644 index 0000000..c4a2248 Binary files /dev/null and b/docs/quicksort_comparison_bar.png differ diff --git a/docs/quicksort_scalability.png b/docs/quicksort_scalability.png new file mode 100644 index 0000000..58b72e3 Binary files /dev/null and b/docs/quicksort_scalability.png differ diff --git a/docs/quicksort_worst_case.png b/docs/quicksort_worst_case.png new file mode 100644 index 0000000..b77fd51 Binary files /dev/null and b/docs/quicksort_worst_case.png differ diff --git a/examples/comparison_demo.py b/examples/comparison_demo.py new file mode 100644 index 0000000..5b90f3f --- /dev/null +++ b/examples/comparison_demo.py @@ -0,0 +1,101 @@ +""" +Demonstration of Quicksort performance comparison. +""" + +import sys +import os +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from src.comparison import ( + generate_random_array, + generate_sorted_array, + generate_reverse_sorted_array, + generate_nearly_sorted_array, + generate_array_with_duplicates, + compare_algorithms, + format_results_table +) +from src.quicksort import quicksort, randomized_quicksort + + +def demo_performance_comparison(): + """Demonstrate performance comparison between algorithms.""" + print("=" * 80) + print("QUICKSORT PERFORMANCE COMPARISON") + print("=" * 80) + + # Define algorithms to compare + algorithms = { + 'Deterministic Quicksort': lambda arr: quicksort(arr), + 'Randomized Quicksort': lambda arr: randomized_quicksort(arr, seed=42) + } + + # Define array generators + array_generators = { + 'Random': generate_random_array, + 'Sorted': generate_sorted_array, + 'Reverse Sorted': generate_reverse_sorted_array, + 'Nearly Sorted': lambda size: generate_nearly_sorted_array(size, swap_count=10), + 'Many Duplicates': lambda size: generate_array_with_duplicates(size, unique_count=10) + } + + # Test sizes + sizes = [100, 500, 1000, 5000] + + print("\nRunning performance benchmarks...") + print("This may take a few moments...\n") + + # Run comparison + results = compare_algorithms( + algorithms=algorithms, + array_generators=array_generators, + sizes=sizes, + iterations=3 + ) + + # Print formatted results + print(format_results_table(results)) + + return results + + +def demo_specific_scenarios(): + """Demonstrate performance on specific scenarios.""" + print("\n" + "=" * 80) + print("SPECIFIC SCENARIO ANALYSIS") + print("=" * 80) + + from src.comparison import benchmark_sorting_algorithm + + scenarios = { + 'Small Random (100)': generate_random_array(100), + 'Medium Random (1000)': generate_random_array(1000), + 'Large Random (10000)': generate_random_array(10000), + 'Sorted (1000)': generate_sorted_array(1000), + 'Reverse Sorted (1000)': generate_reverse_sorted_array(1000), + 'Nearly Sorted (1000)': generate_nearly_sorted_array(1000, 10), + 'Many Duplicates (1000)': generate_array_with_duplicates(1000, 10) + } + + algorithms = { + 'Deterministic': quicksort, + 'Randomized': lambda arr: randomized_quicksort(arr, seed=42) + } + + print(f"\n{'Scenario':<30} {'Algorithm':<20} {'Mean Time (s)':<15} {'Median Time (s)':<15}") + print("-" * 80) + + for scenario_name, test_array in scenarios.items(): + for algo_name, algo_func in algorithms.items(): + stats = benchmark_sorting_algorithm(algo_func, test_array, iterations=5) + print(f"{scenario_name:<30} {algo_name:<20} {stats['mean']:<15.6f} {stats['median']:<15.6f}") + + +if __name__ == '__main__': + results = demo_performance_comparison() + demo_specific_scenarios() + + print("\n" + "=" * 80) + print("COMPARISON COMPLETE") + print("=" * 80) + diff --git a/examples/generate_plots.py b/examples/generate_plots.py new file mode 100644 index 0000000..1b27c9c --- /dev/null +++ b/examples/generate_plots.py @@ -0,0 +1,305 @@ +""" +Generate performance comparison plots for Quicksort algorithms. +""" + +import sys +import os +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +import matplotlib.pyplot as plt +import numpy as np +from src.comparison import ( + generate_random_array, + generate_sorted_array, + generate_reverse_sorted_array, + generate_nearly_sorted_array, + generate_array_with_duplicates, + compare_algorithms +) +from src.quicksort import quicksort, randomized_quicksort +import os + + +def generate_performance_plots(): + """Generate comprehensive performance comparison plots.""" + print("Generating performance plots...") + + # Ensure docs directory exists + os.makedirs('docs', exist_ok=True) + + # Define algorithms + algorithms = { + 'Deterministic Quicksort': lambda arr: quicksort(arr), + 'Randomized Quicksort': lambda arr: randomized_quicksort(arr, seed=42) + } + + # Define array generators + array_generators = { + 'Random': generate_random_array, + 'Sorted': generate_sorted_array, + 'Reverse Sorted': generate_reverse_sorted_array, + 'Nearly Sorted': lambda size: generate_nearly_sorted_array(size, swap_count=10), + 'Many Duplicates': lambda size: generate_array_with_duplicates(size, unique_count=10) + } + + # Test sizes + sizes = [100, 500, 1000, 2000, 5000, 10000] + + print("Running benchmarks (this may take a few minutes)...") + results = compare_algorithms( + algorithms=algorithms, + array_generators=array_generators, + sizes=sizes, + iterations=3 + ) + + # Plot 1: Line plot comparing algorithms across distributions + print("Generating line plot...") + fig, axes = plt.subplots(2, 3, figsize=(18, 12)) + fig.suptitle('Quicksort Performance Comparison', fontsize=16, fontweight='bold') + + distributions = ['Random', 'Sorted', 'Reverse Sorted', 'Nearly Sorted', 'Many Duplicates'] + algo_names = list(algorithms.keys()) + colors = ['#1f77b4', '#ff7f0e'] + + for idx, dist in enumerate(distributions): + ax = axes[idx // 3, idx % 3] + + for algo_idx, algo_name in enumerate(algo_names): + if dist in results[algo_name]: + sizes_list = sorted(results[algo_name][dist].keys()) + # Filter out infinite values + valid_data = [(s, results[algo_name][dist][s]['mean']) + for s in sizes_list + if np.isfinite(results[algo_name][dist][s]['mean'])] + if valid_data: + valid_sizes, valid_times = zip(*valid_data) + ax.plot(valid_sizes, valid_times, marker='o', label=algo_name, + color=colors[algo_idx], linewidth=2, markersize=6) + + ax.set_xlabel('Array Size', fontsize=10) + ax.set_ylabel('Time (seconds)', fontsize=10) + ax.set_title(f'{dist} Distribution', fontsize=12, fontweight='bold') + ax.legend() + ax.grid(True, alpha=0.3) + ax.set_xscale('log') + ax.set_yscale('log') + + # Hide the last subplot + axes[1, 2].axis('off') + + plt.tight_layout() + plt.savefig('docs/quicksort_comparison.png', dpi=300, bbox_inches='tight') + print("Saved: docs/quicksort_comparison.png") + plt.close() + + # Plot 2: Bar chart comparing algorithms on sorted vs random + print("Generating bar chart...") + fig, ax = plt.subplots(figsize=(14, 8)) + + distributions_to_plot = ['Random', 'Sorted', 'Reverse Sorted'] + x = np.arange(len(distributions_to_plot)) + width = 0.35 + + # Use size 5000 for comparison + size = 5000 + + det_times = [] + rand_times = [] + + for dist in distributions_to_plot: + # Check if data exists and is finite (not inf or nan) + det_val = None + if (dist in results['Deterministic Quicksort'] and + size in results['Deterministic Quicksort'][dist]): + mean_val = results['Deterministic Quicksort'][dist][size]['mean'] + if np.isfinite(mean_val): + det_val = mean_val + + det_times.append(det_val if det_val is not None else np.nan) + + rand_val = None + if (dist in results['Randomized Quicksort'] and + size in results['Randomized Quicksort'][dist]): + mean_val = results['Randomized Quicksort'][dist][size]['mean'] + if np.isfinite(mean_val): + rand_val = mean_val + + rand_times.append(rand_val if rand_val is not None else np.nan) + + bars1 = ax.bar(x - width/2, det_times, width, label='Deterministic Quicksort', + color='#1f77b4', alpha=0.8) + bars2 = ax.bar(x + width/2, rand_times, width, label='Randomized Quicksort', + color='#ff7f0e', alpha=0.8) + + ax.set_xlabel('Input Distribution', fontsize=12, fontweight='bold') + ax.set_ylabel('Time (seconds)', fontsize=12, fontweight='bold') + ax.set_title(f'Quicksort Performance Comparison (Array Size: {size})', + fontsize=14, fontweight='bold') + ax.set_xticks(x) + ax.set_xticklabels(distributions_to_plot) + ax.legend(fontsize=11) + ax.grid(True, alpha=0.3, axis='y') + + # Add value labels on bars + for bars in [bars1, bars2]: + for bar in bars: + height = bar.get_height() + if height > 0 and np.isfinite(height): + ax.text(bar.get_x() + bar.get_width()/2., height, + f'{height:.4f}s', + ha='center', va='bottom', fontsize=9) + + # Add annotation for missing data + missing_det = [i for i, (dist, val) in enumerate(zip(distributions_to_plot, det_times)) + if not np.isfinite(val) or np.isnan(val)] + if missing_det: + missing_dists = [distributions_to_plot[i] for i in missing_det] + note_text = f"Note: Deterministic Quicksort failed on {', '.join(missing_dists)}\n" + note_text += "due to worst-case O(n²) performance (see README for details)" + ax.text(0.5, 0.02, note_text, transform=ax.transAxes, + fontsize=9, ha='center', va='bottom', + bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5)) + + plt.tight_layout() + plt.savefig('docs/quicksort_comparison_bar.png', dpi=300, bbox_inches='tight') + print("Saved: docs/quicksort_comparison_bar.png") + plt.close() + + # Plot 3: Scalability analysis + print("Generating scalability plot...") + fig, ax = plt.subplots(figsize=(12, 8)) + + # Focus on random distribution for scalability + dist = 'Random' + # Filter out infinite values + valid_sizes_det = [s for s in sizes + if (s in results['Deterministic Quicksort'][dist] and + np.isfinite(results['Deterministic Quicksort'][dist][s]['mean']))] + valid_sizes_rand = [s for s in sizes + if (s in results['Randomized Quicksort'][dist] and + np.isfinite(results['Randomized Quicksort'][dist][s]['mean']))] + + sizes_list = sorted(set(valid_sizes_det + valid_sizes_rand)) + + det_times = [results['Deterministic Quicksort'][dist][s]['mean'] + for s in sizes_list if s in valid_sizes_det] + det_sizes = [s for s in sizes_list if s in valid_sizes_det] + rand_times = [results['Randomized Quicksort'][dist][s]['mean'] + for s in sizes_list if s in valid_sizes_rand] + rand_sizes = [s for s in sizes_list if s in valid_sizes_rand] + + if det_sizes: + ax.plot(det_sizes, det_times, marker='o', label='Deterministic Quicksort', + color='#1f77b4', linewidth=2.5, markersize=8) + if rand_sizes: + ax.plot(rand_sizes, rand_times, marker='s', label='Randomized Quicksort', + color='#ff7f0e', linewidth=2.5, markersize=8) + + # Add theoretical O(n log n) reference line + if det_sizes and det_times: + # Normalize to match first data point + n_log_n = [s * np.log2(s) for s in det_sizes] + scale_factor = det_times[0] / n_log_n[0] if n_log_n[0] > 0 else 1 + n_log_n_scaled = [x * scale_factor for x in n_log_n] + ax.plot(det_sizes, n_log_n_scaled, '--', label='O(n log n) reference', + color='gray', linewidth=2, alpha=0.7) + + ax.set_xlabel('Array Size', fontsize=12, fontweight='bold') + ax.set_ylabel('Time (seconds)', fontsize=12, fontweight='bold') + ax.set_title('Quicksort Scalability Analysis (Random Distribution)', + fontsize=14, fontweight='bold') + ax.legend(fontsize=11) + ax.grid(True, alpha=0.3) + ax.set_xscale('log') + ax.set_yscale('log') + + plt.tight_layout() + plt.savefig('docs/quicksort_scalability.png', dpi=300, bbox_inches='tight') + print("Saved: docs/quicksort_scalability.png") + plt.close() + + # Plot 4: Worst-case comparison (sorted vs reverse sorted) + print("Generating worst-case comparison plot...") + fig, ax = plt.subplots(figsize=(12, 8)) + + worst_case_dists = ['Sorted', 'Reverse Sorted'] + # Use all sizes, not just those from Random distribution + all_sizes = sorted(sizes) + x = np.arange(len(all_sizes)) + width = 0.35 + + for dist_idx, dist in enumerate(worst_case_dists): + det_times = [] + rand_times = [] + + for size in all_sizes: + # Check if data exists and is finite (not inf or nan) + det_val = None + if (dist in results['Deterministic Quicksort'] and + size in results['Deterministic Quicksort'][dist]): + mean_val = results['Deterministic Quicksort'][dist][size]['mean'] + if np.isfinite(mean_val): + det_val = mean_val + + det_times.append(det_val if det_val is not None else np.nan) + + rand_val = None + if (dist in results['Randomized Quicksort'] and + size in results['Randomized Quicksort'][dist]): + mean_val = results['Randomized Quicksort'][dist][size]['mean'] + if np.isfinite(mean_val): + rand_val = mean_val + + rand_times.append(rand_val if rand_val is not None else np.nan) + + offset = (dist_idx - 0.5) * width + ax.bar(x + offset, det_times, width/2, label=f'Deterministic ({dist})', + alpha=0.7, color=['#1f77b4', '#2ca02c'][dist_idx]) + ax.bar(x + offset + width/2, rand_times, width/2, + label=f'Randomized ({dist})', alpha=0.7, + color=['#ff7f0e', '#d62728'][dist_idx]) + + ax.set_xlabel('Array Size', fontsize=12, fontweight='bold') + ax.set_ylabel('Time (seconds)', fontsize=12, fontweight='bold') + ax.set_title('Worst-Case Performance: Sorted vs Reverse Sorted', + fontsize=14, fontweight='bold') + ax.set_xticks(x) + ax.set_xticklabels([str(s) for s in all_sizes], rotation=45) + ax.legend(fontsize=10, ncol=2) + ax.grid(True, alpha=0.3, axis='y') + ax.set_yscale('log') + + # Add annotation for missing data + missing_sizes = [] + for size_idx, size in enumerate(all_sizes): + has_det_data = False + for dist in worst_case_dists: + if (dist in results['Deterministic Quicksort'] and + size in results['Deterministic Quicksort'][dist]): + mean_val = results['Deterministic Quicksort'][dist][size]['mean'] + if np.isfinite(mean_val): + has_det_data = True + break + if not has_det_data: + missing_sizes.append(size) + + if missing_sizes: + note_text = f"Note: Missing bars for Deterministic Quicksort at sizes ≥{min(missing_sizes)}\n" + note_text += "indicate execution failures due to recursion limits and O(n²) complexity" + ax.text(0.5, 0.02, note_text, transform=ax.transAxes, + fontsize=9, ha='center', va='bottom', + bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5)) + + plt.tight_layout() + plt.savefig('docs/quicksort_worst_case.png', dpi=300, bbox_inches='tight') + print("Saved: docs/quicksort_worst_case.png") + plt.close() + + print("\nAll plots generated successfully!") + print("Plots saved in the 'docs' directory.") + + +if __name__ == '__main__': + generate_performance_plots() + diff --git a/examples/quicksort_demo.py b/examples/quicksort_demo.py new file mode 100644 index 0000000..d36de7d --- /dev/null +++ b/examples/quicksort_demo.py @@ -0,0 +1,149 @@ +""" +Demonstration of Quicksort algorithm usage. +""" + +import sys +import os +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from src.quicksort import quicksort, randomized_quicksort, quicksort_3way + + +def demo_basic_quicksort(): + """Demonstrate basic Quicksort usage.""" + print("=" * 60) + print("BASIC QUICKSORT DEMONSTRATION") + print("=" * 60) + + # Example 1: Simple integer array + print("\n1. Sorting a simple integer array:") + arr = [64, 34, 25, 12, 22, 11, 90] + print(f" Original: {arr}") + quicksort(arr) + print(f" Sorted: {arr}") + + # Example 2: Already sorted array + print("\n2. Sorting an already sorted array:") + arr = [1, 2, 3, 4, 5] + print(f" Original: {arr}") + quicksort(arr) + print(f" Sorted: {arr}") + + # Example 3: Reverse sorted array + print("\n3. Sorting a reverse-sorted array:") + arr = [5, 4, 3, 2, 1] + print(f" Original: {arr}") + quicksort(arr) + print(f" Sorted: {arr}") + + # Example 4: Array with duplicates + print("\n4. Sorting an array with duplicate elements:") + arr = [5, 2, 8, 2, 9, 1, 5, 5] + print(f" Original: {arr}") + quicksort(arr) + print(f" Sorted: {arr}") + + # Example 5: Non-in-place sorting + print("\n5. Non-in-place sorting (preserves original):") + arr = [3, 1, 4, 1, 5, 9, 2, 6] + original = arr.copy() + sorted_arr = quicksort(arr, in_place=False) + print(f" Original: {original}") + print(f" Sorted: {sorted_arr}") + print(f" Original unchanged: {arr == original}") + + +def demo_randomized_quicksort(): + """Demonstrate Randomized Quicksort usage.""" + print("\n" + "=" * 60) + print("RANDOMIZED QUICKSORT DEMONSTRATION") + print("=" * 60) + + # Example 1: Random array + print("\n1. Sorting a random array:") + arr = [64, 34, 25, 12, 22, 11, 90] + print(f" Original: {arr}") + randomized_quicksort(arr, seed=42) + print(f" Sorted: {arr}") + + # Example 2: Sorted array (randomized should handle better) + print("\n2. Sorting a sorted array (worst case for deterministic):") + arr = list(range(1, 11)) + print(f" Original: {arr}") + randomized_quicksort(arr, seed=42) + print(f" Sorted: {arr}") + + # Example 3: Reproducibility with seed + print("\n3. Reproducibility with same seed:") + arr1 = [5, 2, 8, 1, 9, 3, 7, 4, 6] + arr2 = arr1.copy() + randomized_quicksort(arr1, seed=42) + randomized_quicksort(arr2, seed=42) + print(f" Array 1: {arr1}") + print(f" Array 2: {arr2}") + print(f" Results match: {arr1 == arr2}") + + +def demo_3way_quicksort(): + """Demonstrate Three-way Quicksort usage.""" + print("\n" + "=" * 60) + print("THREE-WAY QUICKSORT DEMONSTRATION") + print("=" * 60) + + # Example 1: Array with many duplicates + print("\n1. Sorting array with many duplicate elements:") + arr = [3, 2, 3, 1, 3, 2, 1, 3, 2, 1] + print(f" Original: {arr}") + quicksort_3way(arr) + print(f" Sorted: {arr}") + + # Example 2: All same elements + print("\n2. Sorting array with all same elements:") + arr = [5, 5, 5, 5, 5] + print(f" Original: {arr}") + quicksort_3way(arr) + print(f" Sorted: {arr}") + + # Example 3: Random array + print("\n3. Sorting a random array:") + arr = [64, 34, 25, 12, 22, 11, 90] + print(f" Original: {arr}") + quicksort_3way(arr) + print(f" Sorted: {arr}") + + +def demo_custom_key(): + """Demonstrate sorting with custom key function.""" + print("\n" + "=" * 60) + print("CUSTOM KEY FUNCTION DEMONSTRATION") + print("=" * 60) + + # Example 1: Sorting dictionaries + print("\n1. Sorting list of dictionaries by value:") + arr = [ + {'name': 'Alice', 'age': 30}, + {'name': 'Bob', 'age': 25}, + {'name': 'Charlie', 'age': 35} + ] + print(f" Original: {arr}") + quicksort(arr, key=lambda x: x['age']) + print(f" Sorted by age: {arr}") + + # Example 2: Sorting tuples + print("\n2. Sorting list of tuples by second element:") + arr = [('apple', 3), ('banana', 1), ('cherry', 2)] + print(f" Original: {arr}") + quicksort(arr, key=lambda x: x[1]) + print(f" Sorted by count: {arr}") + + +if __name__ == '__main__': + demo_basic_quicksort() + demo_randomized_quicksort() + demo_3way_quicksort() + demo_custom_key() + + print("\n" + "=" * 60) + print("DEMONSTRATION COMPLETE") + print("=" * 60) + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..714125b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +matplotlib>=3.7.0 +numpy>=1.24.0 + diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..8754f5f --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,13 @@ +""" +MSCS532 Assignment 5: Quicksort Implementation and Analysis + +This package contains implementations of Quicksort algorithms including: +- Deterministic Quicksort +- Randomized Quicksort +- Performance comparison utilities +""" + +__version__ = "1.0.0" +__author__ = "Carlos Gutierrez" +__email__ = "cgutierrez44833@ucumberlands.edu" + diff --git a/src/comparison.py b/src/comparison.py new file mode 100644 index 0000000..95a06fe --- /dev/null +++ b/src/comparison.py @@ -0,0 +1,191 @@ +""" +Performance Comparison Utilities + +This module provides utilities for comparing different sorting algorithms +and analyzing their performance characteristics. +""" + +import time +import random +from typing import List, Callable, Dict, Tuple, Any +from functools import wraps +import statistics + + +def time_function(func: Callable) -> Callable: + """ + Decorator to measure the execution time of a function. + + Args: + func: The function to time + + Returns: + Wrapped function that returns (result, execution_time) + """ + @wraps(func) + def wrapper(*args, **kwargs): + start_time = time.perf_counter() + result = func(*args, **kwargs) + end_time = time.perf_counter() + execution_time = end_time - start_time + return result, execution_time + return wrapper + + +def generate_random_array(size: int, min_val: int = 0, max_val: int = 1000) -> List[int]: + """Generate a random array of integers.""" + return [random.randint(min_val, max_val) for _ in range(size)] + + +def generate_sorted_array(size: int, start: int = 0, step: int = 1) -> List[int]: + """Generate a sorted array of integers.""" + return list(range(start, start + size * step, step)) + + +def generate_reverse_sorted_array(size: int, start: int = 0, step: int = 1) -> List[int]: + """Generate a reverse-sorted array of integers.""" + return list(range(start + (size - 1) * step, start - step, -step)) + + +def generate_nearly_sorted_array(size: int, swap_count: int = 10) -> List[int]: + """Generate a nearly sorted array with a few swaps.""" + arr = list(range(size)) + for _ in range(swap_count): + i = random.randint(0, size - 1) + j = random.randint(0, size - 1) + arr[i], arr[j] = arr[j], arr[i] + return arr + + +def generate_array_with_duplicates(size: int, unique_count: int = 10) -> List[int]: + """Generate an array with many duplicate values.""" + unique_values = list(range(unique_count)) + return [random.choice(unique_values) for _ in range(size)] + + +def benchmark_sorting_algorithm( + sort_func: Callable[[List[Any]], Any], + array: List[Any], + iterations: int = 1 +) -> Dict[str, float]: + """ + Benchmark a sorting algorithm on a given array. + + Args: + sort_func: The sorting function to benchmark + array: The array to sort + iterations: Number of iterations to run (for averaging) + + Returns: + Dictionary with timing statistics + """ + times = [] + + for _ in range(iterations): + # Create a fresh copy for each iteration + arr_copy = array.copy() + + start_time = time.perf_counter() + result = sort_func(arr_copy) + end_time = time.perf_counter() + + # Verify the result is sorted + sorted_arr = result if result is not None else arr_copy + if sorted_arr != sorted(array): + raise ValueError(f"Sorting function {sort_func.__name__} produced incorrect results") + + times.append(end_time - start_time) + + return { + 'mean': statistics.mean(times), + 'median': statistics.median(times), + 'min': min(times), + 'max': max(times), + 'stdev': statistics.stdev(times) if len(times) > 1 else 0.0 + } + + +def compare_algorithms( + algorithms: Dict[str, Callable[[List[Any]], Any]], + array_generators: Dict[str, Callable[[int], List[Any]]], + sizes: List[int], + iterations: int = 3 +) -> Dict[str, Dict[str, Dict[str, float]]]: + """ + Compare multiple sorting algorithms on different input distributions and sizes. + + Args: + algorithms: Dictionary mapping algorithm names to sorting functions + array_generators: Dictionary mapping distribution names to generator functions + sizes: List of array sizes to test + iterations: Number of iterations per test (for averaging) + + Returns: + Nested dictionary: results[algorithm][distribution][size] = timing_stats + """ + results = {} + + for algo_name, algo_func in algorithms.items(): + results[algo_name] = {} + + for dist_name, gen_func in array_generators.items(): + results[algo_name][dist_name] = {} + + for size in sizes: + print(f"Testing {algo_name} on {dist_name} array of size {size}...") + + # Generate test array + test_array = gen_func(size) + + # Benchmark + try: + stats = benchmark_sorting_algorithm(algo_func, test_array, iterations) + results[algo_name][dist_name][size] = stats + except Exception as e: + print(f"Error testing {algo_name} on {dist_name} size {size}: {e}") + results[algo_name][dist_name][size] = { + 'mean': float('inf'), + 'median': float('inf'), + 'min': float('inf'), + 'max': float('inf'), + 'stdev': 0.0 + } + + return results + + +def format_results_table(results: Dict[str, Dict[str, Dict[str, float]]]) -> str: + """ + Format benchmark results as a readable table. + + Args: + results: Results dictionary from compare_algorithms + + Returns: + Formatted string table + """ + lines = [] + lines.append("=" * 80) + lines.append("SORTING ALGORITHM PERFORMANCE COMPARISON") + lines.append("=" * 80) + lines.append("") + + for algo_name in results: + lines.append(f"\n{algo_name.upper()}") + lines.append("-" * 80) + + for dist_name in results[algo_name]: + lines.append(f"\n {dist_name}:") + lines.append(f" {'Size':<10} {'Mean (s)':<15} {'Median (s)':<15} {'Min (s)':<15} {'Max (s)':<15}") + lines.append(" " + "-" * 70) + + for size in sorted(results[algo_name][dist_name].keys()): + stats = results[algo_name][dist_name][size] + lines.append( + f" {size:<10} {stats['mean']:<15.6f} {stats['median']:<15.6f} " + f"{stats['min']:<15.6f} {stats['max']:<15.6f}" + ) + + lines.append("\n" + "=" * 80) + return "\n".join(lines) + diff --git a/src/quicksort.py b/src/quicksort.py new file mode 100644 index 0000000..7c30122 --- /dev/null +++ b/src/quicksort.py @@ -0,0 +1,284 @@ +""" +Quicksort Implementation + +This module provides both deterministic and randomized versions of the Quicksort algorithm. +""" + +from typing import List, Callable, Optional, Any +import random + + +def partition( + arr: List[Any], + low: int, + high: int, + pivot_index: int, + key: Optional[Callable[[Any], Any]] = None +) -> int: + """ + Partition the array around a pivot element. + + After partitioning, all elements less than the pivot are on the left, + and all elements greater than or equal to the pivot are on the right. + + Args: + arr: The array to partition + low: Starting index of the subarray + high: Ending index of the subarray (inclusive) + pivot_index: Index of the pivot element + key: Optional function to extract comparison key from elements + + Returns: + The final position of the pivot element after partitioning + + Time Complexity: O(n) where n = high - low + 1 + Space Complexity: O(1) + """ + # Move pivot to the end + arr[pivot_index], arr[high] = arr[high], arr[pivot_index] + + # Get pivot value + pivot_value = key(arr[high]) if key else arr[high] + + # Index of smaller element (indicates right position of pivot) + i = low - 1 + + for j in range(low, high): + # Compare current element with pivot + current_value = key(arr[j]) if key else arr[j] + if current_value < pivot_value: + i += 1 + arr[i], arr[j] = arr[j], arr[i] + + # Place pivot in its correct position + arr[i + 1], arr[high] = arr[high], arr[i + 1] + return i + 1 + + +def _quicksort_recursive( + arr: List[Any], + low: int, + high: int, + pivot_selector: Callable[[int, int], int], + key: Optional[Callable[[Any], Any]] = None +) -> None: + """ + Recursive helper function for Quicksort. + + Args: + arr: The array to sort + low: Starting index + high: Ending index (inclusive) + pivot_selector: Function that takes (low, high) and returns pivot index + key: Optional function to extract comparison key from elements + """ + if low < high: + # Select pivot using the provided selector function + pivot_index = pivot_selector(low, high) + + # Partition the array and get the pivot's final position + pivot_pos = partition(arr, low, high, pivot_index, key) + + # Recursively sort elements before and after partition + _quicksort_recursive(arr, low, pivot_pos - 1, pivot_selector, key) + _quicksort_recursive(arr, pivot_pos + 1, high, pivot_selector, key) + + +def quicksort( + arr: List[Any], + in_place: bool = True, + key: Optional[Callable[[Any], Any]] = None +) -> Optional[List[Any]]: + """ + Deterministic Quicksort algorithm. + + Uses the last element as the pivot (Lomuto partition scheme). + + Args: + arr: The array to sort + in_place: If True, sorts the array in place and returns None. + If False, returns a new sorted array without modifying the original. + key: Optional function to extract comparison key from elements. + If provided, elements are compared using key(element). + + Returns: + None if in_place=True, otherwise a new sorted list + + Time Complexity: + - Best case: O(n log n) - balanced partitions + - Average case: O(n log n) - expected balanced partitions + - Worst case: O(n²) - highly unbalanced partitions (e.g., sorted array) + + Space Complexity: + - Best case: O(log n) - balanced recursion stack + - Average case: O(log n) - expected balanced recursion stack + - Worst case: O(n) - highly unbalanced recursion stack + + Example: + >>> arr = [3, 6, 8, 10, 1, 2, 1] + >>> quicksort(arr) + >>> arr + [1, 1, 2, 3, 6, 8, 10] + + >>> arr = [3, 6, 8, 10, 1, 2, 1] + >>> sorted_arr = quicksort(arr, in_place=False) + >>> sorted_arr + [1, 1, 2, 3, 6, 8, 10] + >>> arr # Original unchanged + [3, 6, 8, 10, 1, 2, 1] + """ + if not arr: + return None if in_place else [] + + if in_place: + # Use last element as pivot (deterministic) + pivot_selector = lambda low, high: high + _quicksort_recursive(arr, 0, len(arr) - 1, pivot_selector, key) + return None + else: + # Create a copy to avoid modifying the original + arr_copy = arr.copy() + pivot_selector = lambda low, high: high + _quicksort_recursive(arr_copy, 0, len(arr_copy) - 1, pivot_selector, key) + return arr_copy + + +def randomized_quicksort( + arr: List[Any], + in_place: bool = True, + key: Optional[Callable[[Any], Any]] = None, + seed: Optional[int] = None +) -> Optional[List[Any]]: + """ + Randomized Quicksort algorithm. + + Uses a randomly selected element as the pivot, which helps avoid worst-case + performance on sorted or nearly sorted inputs. + + Args: + arr: The array to sort + in_place: If True, sorts the array in place and returns None. + If False, returns a new sorted array without modifying the original. + key: Optional function to extract comparison key from elements. + If provided, elements are compared using key(element). + seed: Optional random seed for reproducibility + + Returns: + None if in_place=True, otherwise a new sorted list + + Time Complexity: + - Best case: O(n log n) - balanced partitions + - Average case: O(n log n) - expected balanced partitions with high probability + - Worst case: O(n²) - still possible but extremely unlikely with randomization + + Space Complexity: + - Best case: O(log n) - balanced recursion stack + - Average case: O(log n) - expected balanced recursion stack + - Worst case: O(n) - highly unbalanced recursion stack (very unlikely) + + Example: + >>> arr = [3, 6, 8, 10, 1, 2, 1] + >>> randomized_quicksort(arr, seed=42) + >>> arr + [1, 1, 2, 3, 6, 8, 10] + + >>> arr = [3, 6, 8, 10, 1, 2, 1] + >>> sorted_arr = randomized_quicksort(arr, in_place=False, seed=42) + >>> sorted_arr + [1, 1, 2, 3, 6, 8, 10] + """ + if not arr: + return None if in_place else [] + + if seed is not None: + random.seed(seed) + + if in_place: + # Use random element as pivot + pivot_selector = lambda low, high: random.randint(low, high) + _quicksort_recursive(arr, 0, len(arr) - 1, pivot_selector, key) + return None + else: + # Create a copy to avoid modifying the original + arr_copy = arr.copy() + pivot_selector = lambda low, high: random.randint(low, high) + _quicksort_recursive(arr_copy, 0, len(arr_copy) - 1, pivot_selector, key) + return arr_copy + + +def quicksort_3way( + arr: List[Any], + in_place: bool = True, + key: Optional[Callable[[Any], Any]] = None +) -> Optional[List[Any]]: + """ + Three-way Quicksort (Dutch National Flag algorithm variant). + + Efficiently handles arrays with many duplicate elements by partitioning + into three parts: elements less than, equal to, and greater than the pivot. + + Args: + arr: The array to sort + in_place: If True, sorts the array in place and returns None. + If False, returns a new sorted array without modifying the original. + key: Optional function to extract comparison key from elements. + + Returns: + None if in_place=True, otherwise a new sorted list + + Time Complexity: + - Best case: O(n) - when all elements are equal + - Average case: O(n log n) + - Worst case: O(n²) - but rare with good pivot selection + + Example: + >>> arr = [3, 2, 3, 1, 3, 2, 1] + >>> quicksort_3way(arr) + >>> arr + [1, 1, 2, 2, 3, 3, 3] + """ + if not arr: + return None if in_place else [] + + def _3way_partition(low: int, high: int) -> tuple[int, int]: + """Three-way partition: returns (lt, gt) indices.""" + if low >= high: + return low, high + + pivot_value = key(arr[high]) if key else arr[high] + lt = low # arr[low..lt-1] < pivot + i = low # arr[lt..i-1] == pivot + gt = high # arr[gt+1..high] > pivot + + while i <= gt: + current_value = key(arr[i]) if key else arr[i] + if current_value < pivot_value: + arr[lt], arr[i] = arr[i], arr[lt] + lt += 1 + i += 1 + elif current_value > pivot_value: + arr[i], arr[gt] = arr[gt], arr[i] + gt -= 1 + else: + i += 1 + + return lt, gt + + def _3way_quicksort_recursive(low: int, high: int) -> None: + if low < high: + lt, gt = _3way_partition(low, high) + _3way_quicksort_recursive(low, lt - 1) + _3way_quicksort_recursive(gt + 1, high) + + if in_place: + _3way_quicksort_recursive(0, len(arr) - 1) + return None + else: + arr_copy = arr.copy() + # Temporarily replace arr to use in recursive function + original_arr = arr + arr = arr_copy + _3way_quicksort_recursive(0, len(arr) - 1) + arr = original_arr + return arr_copy + diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..95736b1 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,2 @@ +"""Test package for MSCS532 Assignment 5""" + diff --git a/tests/test_comparison.py b/tests/test_comparison.py new file mode 100644 index 0000000..3c03d02 --- /dev/null +++ b/tests/test_comparison.py @@ -0,0 +1,101 @@ +""" +Test cases for comparison utilities. +""" + +import unittest +from src.comparison import ( + generate_random_array, + generate_sorted_array, + generate_reverse_sorted_array, + generate_nearly_sorted_array, + generate_array_with_duplicates, + benchmark_sorting_algorithm +) +from src.quicksort import quicksort, randomized_quicksort + + +class TestArrayGenerators(unittest.TestCase): + """Test array generator functions.""" + + def test_generate_random_array(self): + """Test random array generation.""" + arr = generate_random_array(100, 0, 100) + self.assertEqual(len(arr), 100) + self.assertTrue(all(0 <= x <= 100 for x in arr)) + + def test_generate_sorted_array(self): + """Test sorted array generation.""" + arr = generate_sorted_array(10, 0, 1) + self.assertEqual(arr, list(range(10))) + + arr = generate_sorted_array(5, 10, 2) + self.assertEqual(arr, [10, 12, 14, 16, 18]) + + def test_generate_reverse_sorted_array(self): + """Test reverse-sorted array generation.""" + arr = generate_reverse_sorted_array(10, 0, 1) + self.assertEqual(arr, list(range(9, -1, -1))) + + def test_generate_nearly_sorted_array(self): + """Test nearly sorted array generation.""" + arr = generate_nearly_sorted_array(100, 5) + self.assertEqual(len(arr), 100) + # Should be mostly sorted + sorted_arr = sorted(arr) + # Count inversions (should be small) + inversions = sum(1 for i in range(len(arr)-1) if arr[i] > arr[i+1]) + self.assertLess(inversions, 20) # Should have few inversions + + def test_generate_array_with_duplicates(self): + """Test array with duplicates generation.""" + arr = generate_array_with_duplicates(100, 5) + self.assertEqual(len(arr), 100) + unique_values = set(arr) + self.assertLessEqual(len(unique_values), 5) + + +class TestBenchmarking(unittest.TestCase): + """Test benchmarking utilities.""" + + def test_benchmark_quicksort(self): + """Test benchmarking quicksort.""" + arr = generate_random_array(100) + stats = benchmark_sorting_algorithm(quicksort, arr, iterations=3) + + self.assertIn('mean', stats) + self.assertIn('median', stats) + self.assertIn('min', stats) + self.assertIn('max', stats) + self.assertIn('stdev', stats) + + self.assertGreater(stats['mean'], 0) + self.assertGreaterEqual(stats['min'], 0) + self.assertGreaterEqual(stats['max'], stats['min']) + + def test_benchmark_randomized_quicksort(self): + """Test benchmarking randomized quicksort.""" + arr = generate_random_array(100) + stats = benchmark_sorting_algorithm( + lambda a: randomized_quicksort(a, seed=42), + arr, + iterations=3 + ) + + self.assertIn('mean', stats) + self.assertGreater(stats['mean'], 0) + + def test_benchmark_verifies_sorting(self): + """Test that benchmarking verifies correct sorting.""" + def bad_sort(arr): + # Intentionally bad sort that doesn't actually sort + return arr + + arr = generate_random_array(10) + + with self.assertRaises(ValueError): + benchmark_sorting_algorithm(bad_sort, arr) + + +if __name__ == '__main__': + unittest.main() + diff --git a/tests/test_quicksort.py b/tests/test_quicksort.py new file mode 100644 index 0000000..d91fda2 --- /dev/null +++ b/tests/test_quicksort.py @@ -0,0 +1,273 @@ +""" +Test cases for Quicksort implementation. +""" + +import unittest +import random +from typing import List + +from src.quicksort import quicksort, randomized_quicksort, quicksort_3way + + +class TestQuicksort(unittest.TestCase): + """Test cases for deterministic Quicksort.""" + + def test_empty_array(self): + """Test sorting an empty array.""" + arr = [] + quicksort(arr) + self.assertEqual(arr, []) + + result = quicksort([], in_place=False) + self.assertEqual(result, []) + + def test_single_element(self): + """Test sorting an array with a single element.""" + arr = [42] + quicksort(arr) + self.assertEqual(arr, [42]) + + result = quicksort([42], in_place=False) + self.assertEqual(result, [42]) + + def test_two_elements(self): + """Test sorting an array with two elements.""" + arr = [2, 1] + quicksort(arr) + self.assertEqual(arr, [1, 2]) + + arr = [1, 2] + quicksort(arr) + self.assertEqual(arr, [1, 2]) + + def test_already_sorted(self): + """Test sorting an already sorted array.""" + arr = [1, 2, 3, 4, 5] + quicksort(arr) + self.assertEqual(arr, [1, 2, 3, 4, 5]) + + def test_reverse_sorted(self): + """Test sorting a reverse-sorted array.""" + arr = [5, 4, 3, 2, 1] + quicksort(arr) + self.assertEqual(arr, [1, 2, 3, 4, 5]) + + def test_random_array(self): + """Test sorting a random array.""" + arr = [3, 6, 8, 10, 1, 2, 1] + quicksort(arr) + self.assertEqual(arr, [1, 1, 2, 3, 6, 8, 10]) + + def test_duplicate_elements(self): + """Test sorting an array with duplicate elements.""" + arr = [5, 2, 8, 2, 9, 1, 5, 5] + quicksort(arr) + self.assertEqual(arr, [1, 2, 2, 5, 5, 5, 8, 9]) + + def test_negative_numbers(self): + """Test sorting an array with negative numbers.""" + arr = [-3, 5, -1, 0, -5, 2] + quicksort(arr) + self.assertEqual(arr, [-5, -3, -1, 0, 2, 5]) + + def test_large_array(self): + """Test sorting a large array.""" + import random + # Use random array to avoid worst-case recursion depth + arr = list(range(1, 501)) + random.shuffle(arr) + quicksort(arr) + self.assertEqual(arr, list(range(1, 501))) + + def test_in_place_sorting(self): + """Test that in-place sorting modifies the original array.""" + arr = [5, 2, 8, 1, 9] + original_id = id(arr) + result = quicksort(arr, in_place=True) + + self.assertIsNone(result) + self.assertEqual(id(arr), original_id) + self.assertEqual(arr, [1, 2, 5, 8, 9]) + + def test_non_in_place_sorting(self): + """Test that non-in-place sorting doesn't modify the original.""" + arr = [5, 2, 8, 1, 9] + original = arr.copy() + result = quicksort(arr, in_place=False) + + self.assertEqual(arr, original) + self.assertEqual(result, [1, 2, 5, 8, 9]) + self.assertIsNotNone(result) + + def test_custom_key_function(self): + """Test sorting with a custom key function.""" + arr = [{'value': 3}, {'value': 1}, {'value': 2}] + quicksort(arr, key=lambda x: x['value']) + self.assertEqual([x['value'] for x in arr], [1, 2, 3]) + + # Test with tuples + arr = [(2, 'b'), (1, 'a'), (3, 'c')] + quicksort(arr, key=lambda x: x[0]) + self.assertEqual([x[0] for x in arr], [1, 2, 3]) + + +class TestRandomizedQuicksort(unittest.TestCase): + """Test cases for Randomized Quicksort.""" + + def test_empty_array(self): + """Test sorting an empty array.""" + arr = [] + randomized_quicksort(arr, seed=42) + self.assertEqual(arr, []) + + def test_single_element(self): + """Test sorting an array with a single element.""" + arr = [42] + randomized_quicksort(arr, seed=42) + self.assertEqual(arr, [42]) + + def test_random_array(self): + """Test sorting a random array.""" + arr = [3, 6, 8, 10, 1, 2, 1] + randomized_quicksort(arr, seed=42) + self.assertEqual(arr, [1, 1, 2, 3, 6, 8, 10]) + + def test_sorted_array(self): + """Test sorting an already sorted array.""" + arr = [1, 2, 3, 4, 5] + randomized_quicksort(arr, seed=42) + self.assertEqual(arr, [1, 2, 3, 4, 5]) + + def test_reverse_sorted_array(self): + """Test sorting a reverse-sorted array.""" + arr = [5, 4, 3, 2, 1] + randomized_quicksort(arr, seed=42) + self.assertEqual(arr, [1, 2, 3, 4, 5]) + + def test_reproducibility_with_seed(self): + """Test that same seed produces same results.""" + arr1 = [5, 2, 8, 1, 9, 3, 7, 4, 6] + arr2 = arr1.copy() + + randomized_quicksort(arr1, seed=42) + randomized_quicksort(arr2, seed=42) + + self.assertEqual(arr1, arr2) + self.assertEqual(arr1, [1, 2, 3, 4, 5, 6, 7, 8, 9]) + + def test_large_array(self): + """Test sorting a large array.""" + import random + # Use random array to avoid worst-case recursion depth + arr = list(range(1, 501)) + random.shuffle(arr) + randomized_quicksort(arr, seed=42) + self.assertEqual(arr, list(range(1, 501))) + + def test_in_place_sorting(self): + """Test that in-place sorting modifies the original array.""" + arr = [5, 2, 8, 1, 9] + original_id = id(arr) + result = randomized_quicksort(arr, in_place=True, seed=42) + + self.assertIsNone(result) + self.assertEqual(id(arr), original_id) + self.assertEqual(arr, [1, 2, 5, 8, 9]) + + def test_non_in_place_sorting(self): + """Test that non-in-place sorting doesn't modify the original.""" + arr = [5, 2, 8, 1, 9] + original = arr.copy() + result = randomized_quicksort(arr, in_place=False, seed=42) + + self.assertEqual(arr, original) + self.assertEqual(result, [1, 2, 5, 8, 9]) + + +class TestQuicksort3Way(unittest.TestCase): + """Test cases for Three-way Quicksort.""" + + def test_empty_array(self): + """Test sorting an empty array.""" + arr = [] + quicksort_3way(arr) + self.assertEqual(arr, []) + + def test_single_element(self): + """Test sorting an array with a single element.""" + arr = [42] + quicksort_3way(arr) + self.assertEqual(arr, [42]) + + def test_all_duplicates(self): + """Test sorting an array with all duplicate elements.""" + arr = [5, 5, 5, 5, 5] + quicksort_3way(arr) + self.assertEqual(arr, [5, 5, 5, 5, 5]) + + def test_many_duplicates(self): + """Test sorting an array with many duplicate elements.""" + arr = [3, 2, 3, 1, 3, 2, 1, 3, 2] + quicksort_3way(arr) + self.assertEqual(arr, [1, 1, 2, 2, 2, 3, 3, 3, 3]) + + def test_random_array(self): + """Test sorting a random array.""" + arr = [3, 6, 8, 10, 1, 2, 1] + quicksort_3way(arr) + self.assertEqual(arr, [1, 1, 2, 3, 6, 8, 10]) + + def test_sorted_array(self): + """Test sorting an already sorted array.""" + arr = [1, 2, 3, 4, 5] + quicksort_3way(arr) + self.assertEqual(arr, [1, 2, 3, 4, 5]) + + def test_large_array_with_duplicates(self): + """Test sorting a large array with many duplicates.""" + arr = [random.randint(0, 10) for _ in range(1000)] + expected = sorted(arr) + quicksort_3way(arr) + self.assertEqual(arr, expected) + + +class TestQuicksortEdgeCases(unittest.TestCase): + """Test edge cases and special scenarios.""" + + def test_all_same_elements(self): + """Test sorting an array where all elements are the same.""" + arr = [7, 7, 7, 7, 7] + quicksort(arr) + self.assertEqual(arr, [7, 7, 7, 7, 7]) + + randomized_quicksort(arr, seed=42) + self.assertEqual(arr, [7, 7, 7, 7, 7]) + + def test_alternating_pattern(self): + """Test sorting an array with alternating pattern.""" + arr = [1, 5, 2, 4, 3] + quicksort(arr) + self.assertEqual(arr, [1, 2, 3, 4, 5]) + + def test_floating_point_numbers(self): + """Test sorting an array with floating point numbers.""" + arr = [3.5, 1.2, 4.8, 2.1, 5.9] + quicksort(arr) + self.assertEqual(arr, [1.2, 2.1, 3.5, 4.8, 5.9]) + + def test_strings(self): + """Test sorting an array of strings.""" + arr = ['banana', 'apple', 'cherry', 'date'] + quicksort(arr) + self.assertEqual(arr, ['apple', 'banana', 'cherry', 'date']) + + def test_mixed_types_with_key(self): + """Test sorting with key function to handle mixed types.""" + arr = [('b', 2), ('a', 1), ('c', 3)] + quicksort(arr, key=lambda x: x[1]) + self.assertEqual([x[1] for x in arr], [1, 2, 3]) + + +if __name__ == '__main__': + unittest.main() +