commit d1d3423e99d7101ad3b8d1a6bd685328267f2fd2 Author: Carlos Gutierrez Date: Sun Nov 23 15:59:23 2025 -0500 Adding algorithms and datastructures diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d7649f1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +venv/ +ENV/ +env/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Project specific +*.pyc +.pytest_cache/ +.coverage +htmlcov/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c1b1f34 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Carlos Gutierrez + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..9e02f88 --- /dev/null +++ b/README.md @@ -0,0 +1,254 @@ +# MSCS532 Assignment 6: Medians and Order Statistics & Elementary Data Structures + +**Author:** Carlos Gutierrez +**Email:** cgutierrez44833@ucumberlands.edu +**Course:** MSCS532 – Data Structures and Algorithms +**Assignment:** Medians and Order Statistics & Elementary Data Structures + +## What This Repository Contains + +This repository contains all code, analysis, benchmarks, visualizations, and documentation required for Assignment 6. It includes both deterministic and randomized selection algorithms, complete data structure implementations, and a full empirical and theoretical analysis. All implementations were developed from scratch with comprehensive testing and validation. + +**Full detailed analysis is available in [REPORT.md](REPORT.md).** + +## Overview + +This assignment delivers a comprehensive study of selection algorithms (finding the k-th smallest element) and elementary data structures. It includes deterministic and randomized selection implementations, complete data structure implementations (arrays, stacks, queues, linked lists, and trees), theoretical complexity analysis, empirical benchmarking, test coverage, and reproducible visualization assets. + +## Repository Structure + +``` +MSCS532_Assignment6/ +├── docs/ +│ ├── selection_comparison.png # Selection algorithm performance comparison +│ ├── selection_bar_and_scalability.png # Bar chart and scalability analysis +│ ├── stack_vs_list.png # Stack vs List performance +│ ├── queue_vs_list.png # Queue vs List performance +│ └── linked_list_vs_list.png # Linked List vs List performance +├── examples/ +│ ├── selection_demo.py # Selection algorithm demonstrations +│ ├── data_structures_demo.py # Data structure demonstrations +│ └── generate_plots.py # Script to reproduce all plots +├── src/ +│ ├── [deterministic_algorithm.py](src/deterministic_algorithm.py) # Deterministic selection (Median of Medians) +│ ├── [randomized_algorithm.py](src/randomized_algorithm.py) # Randomized selection (Quickselect) +│ ├── [data_structures.py](src/data_structures.py) # Arrays, Stacks, Queues, Linked Lists, Trees +│ └── [benchmark.py](src/benchmark.py) # Benchmarking utilities +├── tests/ +│ ├── [test_deterministic_algorithm.py](tests/test_deterministic_algorithm.py) # Tests for deterministic selection +│ ├── [test_randomized_algorithm.py](tests/test_randomized_algorithm.py) # Tests for randomized selection +│ └── [test_data_structures.py](tests/test_data_structures.py) # Tests for data structures +├── requirements.txt # Python dependencies +├── README.md # Project documentation (this file) +└── REPORT.md # Detailed analysis report +``` + +## Part 1: Selection Algorithms + +### Implementation + +#### Deterministic Selection (Median of Medians) +- **File:** [`src/deterministic_algorithm.py`](src/deterministic_algorithm.py) +- **Algorithm:** Median of Medians algorithm for worst-case O(n) selection +- **Key Features:** + - Groups elements into groups of 5 + - Recursively finds median of medians as pivot + - Guarantees worst-case linear time complexity + - Handles edge cases (empty arrays, invalid k values) + +#### Randomized Selection (Quickselect) +- **File:** [`src/randomized_algorithm.py`](src/randomized_algorithm.py) +- **Algorithm:** Randomized Quickselect for expected O(n) selection +- **Key Features:** + - Random pivot selection + - Expected linear time complexity + - Optional seed for reproducibility + - Efficient average-case performance + +### API Highlights + +**Deterministic Selection:** +```python +deterministic_select(arr, k, key=None) +find_median(arr, key=None) +``` + +**Randomized Selection:** +```python +randomized_select(arr, k, key=None, seed=None) +find_median(arr, key=None, seed=None) +``` + +### Theoretical Performance Analysis + +| Algorithm | Best Case | Average Case | Worst Case | Space Complexity | +|-----------|-----------|--------------|------------|------------------| +| Deterministic | O(n) | O(n) | O(n) | O(log n) | +| Randomized | O(n) | O(n) | O(n²) | O(log n) | + +**Key Insights:** +- **Deterministic:** Uses Median of Medians to guarantee a good pivot, ensuring worst-case O(n) time. The algorithm groups elements into 5, finds medians, then recursively finds the median of medians. This guarantees at least 30% of elements on each side of the pivot. +- **Randomized:** Random pivot selection provides expected O(n) performance. While worst-case is O(n²), the probability of encountering worst-case behavior is exponentially small. +- **Space Complexity:** Both algorithms use O(log n) space for recursion stack in the average case, O(n) in worst case for randomized. + +## Part 2: Elementary Data Structures + +### Implementation + +#### Dynamic Array +- **File:** `src/data_structures.py` +- **Operations:** append, insert, delete, search, access +- **Time Complexity:** + - Access: O(1) + - Append: O(1) amortized + - Insert: O(n) + - Delete: O(n) + - Search: O(n) + +#### Matrix +- **File:** `src/data_structures.py` +- **Operations:** get, set +- **Time Complexity:** O(1) for all operations + +#### Stack +- **File:** `src/data_structures.py` +- **Implementation:** Using Python list (dynamic array) +- **Operations:** push, pop, peek, is_empty, size +- **Time Complexity:** + - Push: O(1) amortized + - Pop: O(1) + - Peek: O(1) + +#### Queue +- **File:** `src/data_structures.py` +- **Implementation:** Using Python list (dynamic array) +- **Operations:** enqueue, dequeue, peek, is_empty, size +- **Time Complexity:** + - Enqueue: O(1) amortized + - Dequeue: O(n) (can be optimized with circular buffer) + - Peek: O(1) + +#### Linked List +- **File:** `src/data_structures.py` +- **Type:** Singly linked list with head and tail pointers +- **Operations:** append, prepend, insert, delete, search, get +- **Time Complexity:** + - Append: O(1) + - Prepend: O(1) + - Insert: O(n) + - Delete: O(n) + - Access: O(n) + - Search: O(n) + +#### Rooted Tree +- **File:** `src/data_structures.py` +- **Implementation:** Using linked nodes with parent-child relationships +- **Operations:** insert, delete, search, traverse (preorder, postorder) +- **Time Complexity:** + - Insert: O(n) for search + O(1) for insertion + - Delete: O(n) + - Search: O(n) + - Traversal: O(n) + +### Trade-offs Analysis + +**Arrays vs Linked Lists:** +- **Arrays:** Fast random access (O(1)), cache-friendly, but fixed size or expensive resizing +- **Linked Lists:** Dynamic size, efficient insertion/deletion at ends, but O(n) access and extra memory for pointers + +**Stack/Queue Implementation:** +- **Using Arrays:** Simple, cache-friendly, but queue dequeue is O(n) +- **Using Linked Lists:** O(1) for all operations, but more memory overhead + +## Getting Started + +### Prerequisites + +- Python 3.10 or later +- Recommended to use a virtual environment + +### Installation + +```bash +python -m venv .venv +source .venv/bin/activate # On Windows: .venv\Scripts\activate +pip install -r requirements.txt +``` + +## Running the Examples + +```bash +python examples/selection_demo.py # Selection algorithm demonstrations +python examples/data_structures_demo.py # Data structure demonstrations +python examples/generate_plots.py # Regenerate all figures in docs/ +``` + +## Running Tests + +```bash +python -m pytest +``` + +The test suite verifies correctness for: +- Deterministic selection algorithm +- Randomized selection algorithm +- All data structure implementations + +## Reproducing the Empirical Study + +1. Activate your environment and install dependencies. +2. Run `python examples/generate_plots.py`. + - Benchmarks may take several minutes depending on hardware. +3. Generated figures will be written to the `docs/` directory. + +## Learning Outcomes + +Through this assignment, I have achieved the following learning outcomes: + +- **Algorithm Implementation**: Successfully implemented both deterministic (Median of Medians) and randomized (Quickselect) selection algorithms from scratch, understanding their theoretical foundations and practical trade-offs. + +- **Complexity Analysis**: Gained deep understanding of amortized analysis, probabilistic analysis, and the difference between worst-case, average-case, and expected time complexities. + +- **Data Structure Mastery**: Implemented fundamental data structures (dynamic arrays, stacks, queues, linked lists, and trees) from scratch, understanding their operations, trade-offs, and appropriate use cases. + +- **Empirical Analysis**: Conducted comprehensive benchmarking across multiple input distributions, learning to interpret performance data and relate empirical results to theoretical predictions. + +- **Software Engineering**: Applied best practices including comprehensive testing, error handling, code documentation, and modular design. + +## Code Screenshots + +Below are screenshots of the actual implementations and testing setup: + +![Deterministic Selection Implementation](docs/screenshot_deterministic.png) +*Figure 1: Deterministic selection algorithm implementation showing the Median of Medians function* + +![Randomized Selection Implementation](docs/screenshot_randomized.png) +*Figure 2: Randomized selection algorithm implementation with random pivot selection* + +![Data Structures Implementation](docs/screenshot_datastructures.png) +*Figure 3: Data structures implementation showing linked list and tree classes* + +![Test Execution](docs/screenshot_tests.png) +*Figure 4: Running comprehensive test suite to validate all implementations* + +![Benchmark Execution](docs/screenshot_benchmarks.png) +*Figure 5: Benchmark execution showing performance comparison between algorithms* + +## Practical Applications + +### Selection Algorithms +- **Statistics:** Finding medians, percentiles, and order statistics +- **Database Systems:** Top-k queries, ranking +- **Machine Learning:** Feature selection, outlier detection +- **Operating Systems:** Process scheduling, priority queues + +### Data Structures +- **Arrays:** General-purpose storage, matrices for scientific computing +- **Stacks:** Expression evaluation, undo/redo functionality, function call management +- **Queues:** Task scheduling, breadth-first search, message queues +- **Linked Lists:** Dynamic memory allocation, implementing other data structures +- **Trees:** File systems, hierarchical data representation, decision trees + +## Academic Integrity Statement + +This project is submitted for academic evaluation in MSCS532 – Data Structures and Algorithms. All code, analysis, and documentation were authored by Carlos Gutierrez for the specific purpose of this assignment. \ No newline at end of file diff --git a/docs/linked_list_vs_list.png b/docs/linked_list_vs_list.png new file mode 100644 index 0000000..284458a Binary files /dev/null and b/docs/linked_list_vs_list.png differ diff --git a/docs/queue_vs_list.png b/docs/queue_vs_list.png new file mode 100644 index 0000000..451f70e Binary files /dev/null and b/docs/queue_vs_list.png differ diff --git a/docs/selection_bar_and_scalability.png b/docs/selection_bar_and_scalability.png new file mode 100644 index 0000000..e174cf2 Binary files /dev/null and b/docs/selection_bar_and_scalability.png differ diff --git a/docs/selection_comparison.png b/docs/selection_comparison.png new file mode 100644 index 0000000..06c3f3b Binary files /dev/null and b/docs/selection_comparison.png differ diff --git a/docs/stack_vs_list.png b/docs/stack_vs_list.png new file mode 100644 index 0000000..179aa01 Binary files /dev/null and b/docs/stack_vs_list.png differ diff --git a/examples/data_structures_demo.py b/examples/data_structures_demo.py new file mode 100644 index 0000000..1812eba --- /dev/null +++ b/examples/data_structures_demo.py @@ -0,0 +1,241 @@ +""" +Demonstration of elementary data structures. + +This script demonstrates the usage of arrays, stacks, queues, linked lists, +and trees. + +Author: Carlos Gutierrez +Course: MSCS532 - Data Structures and Algorithms +""" + +import sys +import os + +# Add parent directory to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from src.data_structures import ( + DynamicArray, Matrix, Stack, Queue, LinkedList, Tree +) + + +def demo_dynamic_array(): + """Demonstrate DynamicArray operations.""" + print("=" * 60) + print("Dynamic Array Demo") + print("=" * 60) + + arr = DynamicArray() + + # Append elements + print("Appending elements: 1, 2, 3, 4, 5") + for i in range(1, 6): + arr.append(i) + print(f"Array: {arr}") + print(f"Length: {len(arr)}") + print() + + # Insert element + print("Inserting 10 at index 2") + arr.insert(2, 10) + print(f"Array: {arr}") + print() + + # Delete element + print("Deleting element at index 3") + value = arr.delete(3) + print(f"Deleted value: {value}") + print(f"Array: {arr}") + print() + + # Search + print(f"Searching for 10: index {arr.search(10)}") + print(f"Searching for 99: index {arr.search(99)}") + print() + + +def demo_matrix(): + """Demonstrate Matrix operations.""" + print("=" * 60) + print("Matrix Demo") + print("=" * 60) + + matrix = Matrix(3, 4) + + # Fill matrix + value = 1 + for i in range(3): + for j in range(4): + matrix[i, j] = value + value += 1 + + print("3x4 Matrix:") + print(matrix) + print() + + print(f"Element at (1, 2): {matrix[1, 2]}") + print() + + +def demo_stack(): + """Demonstrate Stack operations.""" + print("=" * 60) + print("Stack Demo") + print("=" * 60) + + stack = Stack() + + # Push elements + print("Pushing elements: 1, 2, 3, 4, 5") + for i in range(1, 6): + stack.push(i) + print(f"Stack: {stack}") + print(f"Size: {stack.size()}") + print() + + # Peek + print(f"Top element (peek): {stack.peek()}") + print() + + # Pop elements + print("Popping elements:") + while not stack.is_empty(): + print(f" Popped: {stack.pop()}") + print() + + +def demo_queue(): + """Demonstrate Queue operations.""" + print("=" * 60) + print("Queue Demo") + print("=" * 60) + + queue = Queue() + + # Enqueue elements + print("Enqueuing elements: 1, 2, 3, 4, 5") + for i in range(1, 6): + queue.enqueue(i) + print(f"Queue: {queue}") + print(f"Size: {queue.size()}") + print() + + # Peek + print(f"Front element (peek): {queue.peek()}") + print() + + # Dequeue elements + print("Dequeuing elements:") + while not queue.is_empty(): + print(f" Dequeued: {queue.dequeue()}") + print() + + +def demo_linked_list(): + """Demonstrate LinkedList operations.""" + print("=" * 60) + print("Linked List Demo") + print("=" * 60) + + ll = LinkedList() + + # Append elements + print("Appending elements: 1, 2, 3, 4, 5") + for i in range(1, 6): + ll.append(i) + print(f"Linked List: {ll}") + print(f"Length: {len(ll)}") + print() + + # Prepend element + print("Prepending 0") + ll.prepend(0) + print(f"Linked List: {ll}") + print() + + # Insert element + print("Inserting 10 at index 3") + ll.insert(3, 10) + print(f"Linked List: {ll}") + print() + + # Get element + print(f"Element at index 2: {ll.get(2)}") + print() + + # Search + print(f"Searching for 10: index {ll.search(10)}") + print(f"Searching for 99: index {ll.search(99)}") + print() + + # Delete element + print("Deleting element at index 3") + value = ll.delete(3) + print(f"Deleted value: {value}") + print(f"Linked List: {ll}") + print() + + +def demo_tree(): + """Demonstrate Tree operations.""" + print("=" * 60) + print("Tree Demo") + print("=" * 60) + + tree = Tree(1) + + # Build tree + print("Building tree:") + print(" 1") + print(" ├── 2") + print(" │ ├── 4") + print(" │ └── 5") + print(" └── 3") + print(" └── 6") + + tree.insert(1, 2) + tree.insert(1, 3) + tree.insert(2, 4) + tree.insert(2, 5) + tree.insert(3, 6) + print() + + # Search + print("Searching for values:") + for value in [1, 2, 3, 4, 5, 6, 7]: + found = tree.search(value) + print(f" {value}: {'Found' if found else 'Not found'}") + print() + + # Traversal + print("Preorder traversal:", tree.traverse_preorder()) + print("Postorder traversal:", tree.traverse_postorder()) + print() + + # Height + print(f"Tree height: {tree.height()}") + print() + + # Delete + print("Deleting node 2") + tree.delete(2) + print("Preorder traversal after deletion:", tree.traverse_preorder()) + print() + + +if __name__ == "__main__": + print("\n" + "=" * 60) + print("Elementary Data Structures Demonstration") + print("=" * 60 + "\n") + + demo_dynamic_array() + demo_matrix() + demo_stack() + demo_queue() + demo_linked_list() + demo_tree() + + print("=" * 60) + print("Demo Complete!") + print("=" * 60) + diff --git a/examples/generate_plots.py b/examples/generate_plots.py new file mode 100644 index 0000000..1cb0b61 --- /dev/null +++ b/examples/generate_plots.py @@ -0,0 +1,242 @@ +""" +Generate performance visualization plots for selection algorithms and data structures. + +This script runs benchmarks and generates visualization plots comparing the +performance of deterministic and randomized selection algorithms, as well as +data structure operations. + +Author: Carlos Gutierrez +Course: MSCS532 - Data Structures and Algorithms +""" + +import sys +import os +import matplotlib.pyplot as plt +import numpy as np + +# Add parent directory to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from src.benchmark import ( + generate_random_array, + generate_sorted_array, + generate_reverse_sorted_array, + generate_nearly_sorted_array, + generate_duplicate_heavy_array, + benchmark_selection_algorithms, + compare_stack_vs_list_push, + compare_queue_vs_list, + compare_linked_list_vs_list +) +from src.deterministic_algorithm import deterministic_select +from src.randomized_algorithm import randomized_select + + +def plot_selection_comparison(): + """Generate comparison plots for selection algorithms.""" + print("Generating selection algorithm comparison plots...") + + sizes = [100, 500, 1000, 2000, 5000] + distributions = { + 'Random': generate_random_array, + 'Sorted': generate_sorted_array, + 'Reverse Sorted': generate_reverse_sorted_array, + 'Nearly Sorted': lambda n: generate_nearly_sorted_array(n, swaps=10, seed=42), + 'Many Duplicates': lambda n: generate_duplicate_heavy_array(n, unique_values=10, seed=42) + } + + results = benchmark_selection_algorithms(sizes, distributions, iterations=3) + + # Plot 1: Line plot comparison + plt.figure(figsize=(12, 8)) + for dist_name in distributions: + det_times = results['deterministic'][dist_name] + rand_times = results['randomized'][dist_name] + + # Filter out infinite times + valid_sizes = [s for s, t in zip(sizes, det_times) if t != float('inf')] + valid_det = [t for t in det_times if t != float('inf')] + valid_rand = [t for s, t in zip(sizes, rand_times) if s in valid_sizes] + + if valid_sizes: + plt.plot(valid_sizes, valid_det, marker='o', label=f'Deterministic ({dist_name})', linestyle='--') + plt.plot(valid_sizes, valid_rand, marker='s', label=f'Randomized ({dist_name})', linestyle='-') + + plt.xlabel('Input Size (n)', fontsize=12) + plt.ylabel('Execution Time (seconds)', fontsize=12) + plt.title('Selection Algorithm Performance Comparison', fontsize=14, fontweight='bold') + plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left') + plt.grid(True, alpha=0.3) + plt.yscale('log') + plt.tight_layout() + plt.savefig('docs/selection_comparison.png', dpi=300, bbox_inches='tight') + plt.close() + print(" Saved: docs/selection_comparison.png") + + # Plot 2: Bar chart for specific size + fig, axes = plt.subplots(1, 2, figsize=(14, 6)) + + test_size = 2000 + size_idx = sizes.index(test_size) if test_size in sizes else len(sizes) - 1 + + dists = list(distributions.keys()) + det_heights = [results['deterministic'][d][size_idx] if results['deterministic'][d][size_idx] != float('inf') else 0 + for d in dists] + rand_heights = [results['randomized'][d][size_idx] for d in dists] + + x = np.arange(len(dists)) + width = 0.35 + + axes[0].bar(x - width/2, det_heights, width, label='Deterministic', alpha=0.8) + axes[0].bar(x + width/2, rand_heights, width, label='Randomized', alpha=0.8) + axes[0].set_xlabel('Distribution', fontsize=11) + axes[0].set_ylabel('Execution Time (seconds)', fontsize=11) + axes[0].set_title(f'Selection Performance at n={sizes[size_idx]}', fontsize=12, fontweight='bold') + axes[0].set_xticks(x) + axes[0].set_xticklabels(dists, rotation=45, ha='right') + axes[0].legend() + axes[0].grid(True, alpha=0.3, axis='y') + axes[0].set_yscale('log') + + # Scalability plot (random inputs only) + random_sizes = sizes + det_random = results['deterministic']['Random'] + rand_random = results['randomized']['Random'] + + valid_sizes = [s for s, t in zip(random_sizes, det_random) if t != float('inf')] + valid_det = [t for t in det_random if t != float('inf')] + valid_rand = [t for s, t in zip(random_sizes, rand_random) if s in valid_sizes] + + axes[1].plot(valid_sizes, valid_det, marker='o', label='Deterministic', linewidth=2) + axes[1].plot(valid_sizes, valid_rand, marker='s', label='Randomized', linewidth=2) + + # Reference line for O(n) + if valid_sizes: + ref_n = np.array(valid_sizes) + ref_time = valid_det[0] * (ref_n / valid_sizes[0]) + axes[1].plot(ref_n, ref_time, '--', label='O(n) reference', alpha=0.5, color='gray') + + axes[1].set_xlabel('Input Size (n)', fontsize=11) + axes[1].set_ylabel('Execution Time (seconds)', fontsize=11) + axes[1].set_title('Scalability on Random Inputs', fontsize=12, fontweight='bold') + axes[1].legend() + axes[1].grid(True, alpha=0.3) + axes[1].set_xscale('log') + axes[1].set_yscale('log') + + plt.tight_layout() + plt.savefig('docs/selection_bar_and_scalability.png', dpi=300, bbox_inches='tight') + plt.close() + print(" Saved: docs/selection_bar_and_scalability.png") + + +def plot_data_structure_comparison(): + """Generate comparison plots for data structures.""" + print("Generating data structure comparison plots...") + + sizes = [100, 500, 1000, 2000, 5000] + + # Stack vs List + stack_times = [] + list_times = [] + for size in sizes: + result = compare_stack_vs_list_push(size, iterations=10) + stack_times.append(result['stack']) + list_times.append(result['list']) + + # Queue vs List + queue_times = [] + list_enqueue_times = [] + for size in sizes: + result = compare_queue_vs_list(size, iterations=10) + queue_times.append(result['queue_enqueue']) + list_enqueue_times.append(result['list_append']) + + # Linked List vs List + ll_append_times = [] + ll_access_times = [] + list_append_times = [] + list_access_times = [] + for size in sizes: + result = compare_linked_list_vs_list(size, iterations=10) + ll_append_times.append(result['linked_list_append']) + ll_access_times.append(result['linked_list_access']) + list_append_times.append(result['list_append']) + list_access_times.append(result['list_access']) + + # Plot 1: Stack vs List + plt.figure(figsize=(10, 6)) + plt.plot(sizes, stack_times, marker='o', label='Stack.push()', linewidth=2) + plt.plot(sizes, list_times, marker='s', label='List.append()', linewidth=2) + plt.xlabel('Number of Operations', fontsize=12) + plt.ylabel('Total Time (seconds)', fontsize=12) + plt.title('Stack vs List: Push/Append Performance', fontsize=14, fontweight='bold') + plt.legend() + plt.grid(True, alpha=0.3) + plt.tight_layout() + plt.savefig('docs/stack_vs_list.png', dpi=300, bbox_inches='tight') + plt.close() + print(" Saved: docs/stack_vs_list.png") + + # Plot 2: Queue vs List + plt.figure(figsize=(10, 6)) + plt.plot(sizes, queue_times, marker='o', label='Queue.enqueue()', linewidth=2) + plt.plot(sizes, list_enqueue_times, marker='s', label='List.append()', linewidth=2) + plt.xlabel('Number of Operations', fontsize=12) + plt.ylabel('Total Time (seconds)', fontsize=12) + plt.title('Queue vs List: Enqueue/Append Performance', fontsize=14, fontweight='bold') + plt.legend() + plt.grid(True, alpha=0.3) + plt.tight_layout() + plt.savefig('docs/queue_vs_list.png', dpi=300, bbox_inches='tight') + plt.close() + print(" Saved: docs/queue_vs_list.png") + + # Plot 3: Linked List vs List + fig, axes = plt.subplots(1, 2, figsize=(14, 6)) + + axes[0].plot(sizes, ll_append_times, marker='o', label='LinkedList.append()', linewidth=2) + axes[0].plot(sizes, list_append_times, marker='s', label='List.append()', linewidth=2) + axes[0].set_xlabel('Number of Operations', fontsize=11) + axes[0].set_ylabel('Total Time (seconds)', fontsize=11) + axes[0].set_title('Append Operation', fontsize=12, fontweight='bold') + axes[0].legend() + axes[0].grid(True, alpha=0.3) + + axes[1].plot(sizes, ll_access_times, marker='o', label='LinkedList.get()', linewidth=2) + axes[1].plot(sizes, list_access_times, marker='s', label='List[index]', linewidth=2) + axes[1].set_xlabel('List Size', fontsize=11) + axes[1].set_ylabel('Time per Access (seconds)', fontsize=11) + axes[1].set_title('Access Operation', fontsize=12, fontweight='bold') + axes[1].legend() + axes[1].grid(True, alpha=0.3) + axes[1].set_yscale('log') + + plt.suptitle('Linked List vs List Performance Comparison', fontsize=14, fontweight='bold', y=1.02) + plt.tight_layout() + plt.savefig('docs/linked_list_vs_list.png', dpi=300, bbox_inches='tight') + plt.close() + print(" Saved: docs/linked_list_vs_list.png") + + +if __name__ == "__main__": + # Create docs directory if it doesn't exist + os.makedirs('docs', exist_ok=True) + + print("\n" + "=" * 60) + print("Generating Performance Visualization Plots") + print("=" * 60 + "\n") + + try: + plot_selection_comparison() + print() + plot_data_structure_comparison() + print() + print("=" * 60) + print("All plots generated successfully!") + print("=" * 60) + except Exception as e: + print(f"Error generating plots: {e}") + import traceback + traceback.print_exc() + diff --git a/examples/selection_demo.py b/examples/selection_demo.py new file mode 100644 index 0000000..51ff44d --- /dev/null +++ b/examples/selection_demo.py @@ -0,0 +1,138 @@ +""" +Demonstration of selection algorithms. + +This script demonstrates the usage of deterministic and randomized selection +algorithms for finding the k-th smallest element in an array. + +Author: Carlos Gutierrez +Course: MSCS532 - Data Structures and Algorithms +""" + +import sys +import os + +# Add parent directory to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from src.deterministic_algorithm import deterministic_select, find_median +from src.randomized_algorithm import randomized_select, find_median as rand_find_median + + +def demo_basic_selection(): + """Demonstrate basic selection operations.""" + print("=" * 60) + print("Basic Selection Demo") + print("=" * 60) + + arr = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5] + print(f"Array: {arr}") + print() + + # Find various order statistics + for k in [1, 3, 5, len(arr)]: + det_result = deterministic_select(arr, k) + rand_result = randomized_select(arr, k, seed=42) + print(f" {k}-th smallest element:") + print(f" Deterministic: {det_result}") + print(f" Randomized: {rand_result}") + print() + + # Find median + print("Median:") + print(f" Deterministic: {find_median(arr)}") + print(f" Randomized: {rand_find_median(arr, seed=42)}") + print() + + +def demo_different_distributions(): + """Demonstrate selection on different input distributions.""" + print("=" * 60) + print("Selection on Different Distributions") + print("=" * 60) + + distributions = { + "Sorted": list(range(1, 21)), + "Reverse Sorted": list(range(20, 0, -1)), + "Random": [3, 15, 7, 1, 19, 12, 8, 5, 14, 2, 10, 18, 6, 11, 4, 9, 16, 13, 17, 20], + "With Duplicates": [5, 3, 5, 1, 3, 2, 5, 4, 3, 1, 2, 5, 4, 3, 2, 1, 5, 4, 3, 2] + } + + k = 10 # Find 10th smallest + + for name, arr in distributions.items(): + print(f"\n{name} Array: {arr[:10]}..." if len(arr) > 10 else f"{name} Array: {arr}") + det_result = deterministic_select(arr, k) + rand_result = randomized_select(arr, k, seed=42) + print(f" {k}-th smallest:") + print(f" Deterministic: {det_result}") + print(f" Randomized: {rand_result}") + print() + + +def demo_median_finding(): + """Demonstrate median finding.""" + print("=" * 60) + print("Median Finding Demo") + print("=" * 60) + + test_arrays = [ + ([1, 2, 3, 4, 5], "Odd length"), + ([1, 2, 3, 4, 5, 6], "Even length"), + ([5, 2, 8, 1, 9, 3, 7, 4, 6], "Random order"), + ([1, 1, 2, 2, 3, 3, 4, 4], "With duplicates") + ] + + for arr, description in test_arrays: + print(f"\n{description}: {arr}") + det_median = find_median(arr) + rand_median = rand_find_median(arr, seed=42) + print(f" Deterministic median: {det_median}") + print(f" Randomized median: {rand_median}") + print() + + +def demo_custom_key(): + """Demonstrate selection with custom key function.""" + print("=" * 60) + print("Selection with Custom Key Function") + print("=" * 60) + + # Array of dictionaries + students = [ + {'name': 'Alice', 'score': 85}, + {'name': 'Bob', 'score': 92}, + {'name': 'Charlie', 'score': 78}, + {'name': 'Diana', 'score': 95}, + {'name': 'Eve', 'score': 88} + ] + + print("Students:") + for student in students: + print(f" {student['name']}: {student['score']}") + print() + + # Find student with median score + median_student = randomized_select( + students, + (len(students) + 1) // 2, + key=lambda x: x['score'], + seed=42 + ) + print(f"Student with median score: {median_student['name']} ({median_student['score']})") + print() + + +if __name__ == "__main__": + print("\n" + "=" * 60) + print("Selection Algorithms Demonstration") + print("=" * 60 + "\n") + + demo_basic_selection() + demo_different_distributions() + demo_median_finding() + demo_custom_key() + + print("=" * 60) + print("Demo Complete!") + print("=" * 60) + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..00f79da --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +numpy>=1.24.0 +matplotlib>=3.7.0 +pytest>=7.4.0 +Pillow>=10.0.0 + diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..6a3e244 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,29 @@ +""" +MSCS532 Assignment 6: Selection Algorithms and Data Structures + +This package contains implementations of: +- Deterministic selection algorithm (Median of Medians) +- Randomized selection algorithm (Quickselect) +- Elementary data structures (Arrays, Stacks, Queues, Linked Lists, Trees) +""" + +from .deterministic_algorithm import deterministic_select, find_median +from .randomized_algorithm import randomized_select, find_median as randomized_find_median +from .data_structures import ( + DynamicArray, Matrix, Stack, Queue, LinkedList, Tree, TreeNode +) + +__all__ = [ + 'deterministic_select', + 'find_median', + 'randomized_select', + 'randomized_find_median', + 'DynamicArray', + 'Matrix', + 'Stack', + 'Queue', + 'LinkedList', + 'Tree', + 'TreeNode', +] + diff --git a/src/benchmark.py b/src/benchmark.py new file mode 100644 index 0000000..91cc256 --- /dev/null +++ b/src/benchmark.py @@ -0,0 +1,300 @@ +""" +Benchmarking utilities for selection algorithms and data structures. + +This module provides functions to generate test data and benchmark the performance +of selection algorithms and data structure operations. + +Author: Carlos Gutierrez +Course: MSCS532 - Data Structures and Algorithms +""" + +import time +import numpy as np +from typing import List, Dict, Tuple, Callable, Any + +# Use try/except to support both relative and absolute imports +try: + from .deterministic_algorithm import deterministic_select + from .randomized_algorithm import randomized_select +except ImportError: + from src.deterministic_algorithm import deterministic_select + from src.randomized_algorithm import randomized_select + + +def generate_random_array(n: int, seed: int = None) -> List[int]: + """Generate a random array of n integers.""" + if seed is not None: + np.random.seed(seed) + return np.random.randint(1, 1000, size=n).tolist() + + +def generate_sorted_array(n: int) -> List[int]: + """Generate a sorted array of n integers.""" + return list(range(1, n + 1)) + + +def generate_reverse_sorted_array(n: int) -> List[int]: + """Generate a reverse-sorted array of n integers.""" + return list(range(n, 0, -1)) + + +def generate_nearly_sorted_array(n: int, swaps: int = 10, seed: int = None) -> List[int]: + """Generate a nearly sorted array with a few swaps.""" + arr = generate_sorted_array(n) + if seed is not None: + np.random.seed(seed) + for _ in range(swaps): + i, j = np.random.randint(0, n, size=2) + arr[i], arr[j] = arr[j], arr[i] + return arr + + +def generate_duplicate_heavy_array(n: int, unique_values: int = 10, seed: int = None) -> List[int]: + """Generate an array with many duplicate values.""" + if seed is not None: + np.random.seed(seed) + values = np.random.randint(1, 1000, size=unique_values).tolist() + return np.random.choice(values, size=n).tolist() + + +def benchmark_selection( + algorithm: Callable, + arr: List[int], + k: int, + iterations: int = 3, + seed: int = None +) -> Tuple[float, Any]: + """ + Benchmark a selection algorithm. + + Args: + algorithm: The selection function to benchmark + arr: Input array + k: The k-th smallest element to find + iterations: Number of iterations to average + seed: Random seed for reproducibility + + Returns: + Tuple of (average_time_seconds, result_value) + """ + times = [] + result = None + + for i in range(iterations): + arr_copy = list(arr) # Fresh copy for each iteration + + start = time.perf_counter() + if seed is not None: + result = algorithm(arr_copy, k, seed=seed + i) + else: + result = algorithm(arr_copy, k) + end = time.perf_counter() + + times.append(end - start) + + avg_time = sum(times) / len(times) + return avg_time, result + + +def benchmark_selection_algorithms( + sizes: List[int], + distributions: Dict[str, Callable], + k_ratios: List[float] = [0.25, 0.5, 0.75], + iterations: int = 3 +) -> Dict[str, Dict[str, List[float]]]: + """ + Benchmark both deterministic and randomized selection algorithms. + + Args: + sizes: List of input sizes to test + distributions: Dictionary mapping distribution names to generator functions + k_ratios: List of ratios to determine k (k = ratio * n) + iterations: Number of iterations per benchmark + + Returns: + Dictionary with benchmark results + """ + results = { + 'deterministic': {dist: [] for dist in distributions}, + 'randomized': {dist: [] for dist in distributions} + } + + for size in sizes: + print(f"Benchmarking size {size}...") + + for dist_name, dist_func in distributions.items(): + # Generate test array + arr = dist_func(size) + + # Test different k values + k_values = [max(1, int(ratio * size)) for ratio in k_ratios] + k = k_values[len(k_values) // 2] # Use middle k value + + # Benchmark deterministic + try: + time_det, _ = benchmark_selection( + deterministic_select, arr, k, iterations + ) + results['deterministic'][dist_name].append(time_det) + except (RecursionError, Exception) as e: + print(f" Deterministic failed for {dist_name} at size {size}: {e}") + results['deterministic'][dist_name].append(float('inf')) + + # Benchmark randomized + try: + time_rand, _ = benchmark_selection( + randomized_select, arr, k, iterations, seed=42 + ) + results['randomized'][dist_name].append(time_rand) + except (RecursionError, Exception) as e: + print(f" Randomized failed for {dist_name} at size {size}: {e}") + results['randomized'][dist_name].append(float('inf')) + + return results + + +def benchmark_data_structure_operation( + operation: Callable, + iterations: int = 1000 +) -> float: + """ + Benchmark a data structure operation. + + Args: + operation: Function that performs the operation + iterations: Number of iterations + + Returns: + Average time per operation in seconds + """ + times = [] + for _ in range(iterations): + start = time.perf_counter() + operation() + end = time.perf_counter() + times.append(end - start) + + return sum(times) / len(times) + + +def compare_stack_vs_list_push(n: int, iterations: int = 10) -> Dict[str, float]: + """Compare stack push operation vs list append.""" + try: + from .data_structures import Stack + except ImportError: + from src.data_structures import Stack + + # Benchmark Stack + stack_times = [] + for _ in range(iterations): + stack = Stack() + start = time.perf_counter() + for i in range(n): + stack.push(i) + end = time.perf_counter() + stack_times.append(end - start) + + # Benchmark List + list_times = [] + for _ in range(iterations): + lst = [] + start = time.perf_counter() + for i in range(n): + lst.append(i) + end = time.perf_counter() + list_times.append(end - start) + + return { + 'stack': sum(stack_times) / len(stack_times), + 'list': sum(list_times) / len(list_times) + } + + +def compare_queue_vs_list(n: int, iterations: int = 10) -> Dict[str, float]: + """Compare queue operations vs list operations.""" + try: + from .data_structures import Queue + except ImportError: + from src.data_structures import Queue + + # Benchmark Queue enqueue + queue_times = [] + for _ in range(iterations): + queue = Queue() + start = time.perf_counter() + for i in range(n): + queue.enqueue(i) + end = time.perf_counter() + queue_times.append(end - start) + + # Benchmark List append + list_times = [] + for _ in range(iterations): + lst = [] + start = time.perf_counter() + for i in range(n): + lst.append(i) + end = time.perf_counter() + list_times.append(end - start) + + return { + 'queue_enqueue': sum(queue_times) / len(queue_times), + 'list_append': sum(list_times) / len(list_times) + } + + +def compare_linked_list_vs_list(n: int, iterations: int = 10) -> Dict[str, float]: + """Compare linked list operations vs list operations.""" + try: + from .data_structures import LinkedList + except ImportError: + from src.data_structures import LinkedList + + # Benchmark LinkedList append + ll_times = [] + for _ in range(iterations): + ll = LinkedList() + start = time.perf_counter() + for i in range(n): + ll.append(i) + end = time.perf_counter() + ll_times.append(end - start) + + # Benchmark List append + list_times = [] + for _ in range(iterations): + lst = [] + start = time.perf_counter() + for i in range(n): + lst.append(i) + end = time.perf_counter() + list_times.append(end - start) + + # Benchmark LinkedList access + ll = LinkedList() + for i in range(n): + ll.append(i) + + ll_access_times = [] + for _ in range(iterations): + start = time.perf_counter() + _ = ll.get(n // 2) + end = time.perf_counter() + ll_access_times.append(end - start) + + # Benchmark List access + lst = list(range(n)) + list_access_times = [] + for _ in range(iterations): + start = time.perf_counter() + _ = lst[n // 2] + end = time.perf_counter() + list_access_times.append(end - start) + + return { + 'linked_list_append': sum(ll_times) / len(ll_times), + 'list_append': sum(list_times) / len(list_times), + 'linked_list_access': sum(ll_access_times) / len(ll_access_times), + 'list_access': sum(list_access_times) / len(list_access_times) + } + diff --git a/src/data_structures.py b/src/data_structures.py new file mode 100644 index 0000000..21e4e57 --- /dev/null +++ b/src/data_structures.py @@ -0,0 +1,533 @@ +""" +Elementary Data Structures Implementation + +This module implements basic data structures including arrays, stacks, queues, +linked lists, and rooted trees. + +Author: Carlos Gutierrez +Course: MSCS532 - Data Structures and Algorithms +""" + +from typing import Optional, Any, List + + +# ============================================================================ +# Arrays and Matrices +# ============================================================================ + +class DynamicArray: + """ + A dynamic array implementation with basic operations. + + Time Complexity: + - Access: O(1) + - Insertion at end: O(1) amortized + - Insertion at index: O(n) + - Deletion: O(n) + - Search: O(n) + """ + + def __init__(self, initial_capacity: int = 10): + """Initialize an empty dynamic array.""" + self._capacity = initial_capacity + self._size = 0 + self._data = [None] * initial_capacity + + def __len__(self) -> int: + """Return the number of elements in the array.""" + return self._size + + def __getitem__(self, index: int) -> Any: + """Get element at index.""" + if index < 0 or index >= self._size: + raise IndexError(f"Index {index} out of range") + return self._data[index] + + def __setitem__(self, index: int, value: Any) -> None: + """Set element at index.""" + if index < 0 or index >= self._size: + raise IndexError(f"Index {index} out of range") + self._data[index] = value + + def append(self, value: Any) -> None: + """Append element to the end of the array. O(1) amortized.""" + if self._size >= self._capacity: + self._resize() + self._data[self._size] = value + self._size += 1 + + def insert(self, index: int, value: Any) -> None: + """Insert element at index. O(n).""" + if index < 0 or index > self._size: + raise IndexError(f"Index {index} out of range") + + if self._size >= self._capacity: + self._resize() + + # Shift elements to the right + for i in range(self._size, index, -1): + self._data[i] = self._data[i - 1] + + self._data[index] = value + self._size += 1 + + def delete(self, index: int) -> Any: + """Delete element at index and return it. O(n).""" + if index < 0 or index >= self._size: + raise IndexError(f"Index {index} out of range") + + value = self._data[index] + + # Shift elements to the left + for i in range(index, self._size - 1): + self._data[i] = self._data[i + 1] + + self._size -= 1 + return value + + def search(self, value: Any) -> int: + """Search for value and return its index, or -1 if not found. O(n).""" + for i in range(self._size): + if self._data[i] == value: + return i + return -1 + + def _resize(self) -> None: + """Double the capacity of the array.""" + self._capacity *= 2 + new_data = [None] * self._capacity + for i in range(self._size): + new_data[i] = self._data[i] + self._data = new_data + + def __str__(self) -> str: + """String representation of the array.""" + return str([self._data[i] for i in range(self._size)]) + + +class Matrix: + """ + A 2D matrix implementation with basic operations. + + Time Complexity: + - Access: O(1) + - Insertion: O(1) + - Deletion: O(1) + - Matrix operations: O(n*m) where n, m are dimensions + """ + + def __init__(self, rows: int, cols: int, initial_value: Any = 0): + """Initialize a matrix with given dimensions.""" + self.rows = rows + self.cols = cols + self._data = [[initial_value for _ in range(cols)] for _ in range(rows)] + + def __getitem__(self, key: tuple) -> Any: + """Get element at (row, col).""" + row, col = key + if row < 0 or row >= self.rows or col < 0 or col >= self.cols: + raise IndexError(f"Index ({row}, {col}) out of range") + return self._data[row][col] + + def __setitem__(self, key: tuple, value: Any) -> None: + """Set element at (row, col).""" + row, col = key + if row < 0 or row >= self.rows or col < 0 or col >= self.cols: + raise IndexError(f"Index ({row}, {col}) out of range") + self._data[row][col] = value + + def __str__(self) -> str: + """String representation of the matrix.""" + return '\n'.join([' '.join(map(str, row)) for row in self._data]) + + +# ============================================================================ +# Stacks and Queues +# ============================================================================ + +class Stack: + """ + Stack implementation using a dynamic array. + + Time Complexity: + - Push: O(1) amortized + - Pop: O(1) + - Peek: O(1) + - Search: O(n) + """ + + def __init__(self): + """Initialize an empty stack.""" + self._data = [] + + def push(self, value: Any) -> None: + """Push element onto the stack. O(1) amortized.""" + self._data.append(value) + + def pop(self) -> Any: + """Pop and return the top element. O(1).""" + if self.is_empty(): + raise IndexError("Stack is empty") + return self._data.pop() + + def peek(self) -> Any: + """Return the top element without removing it. O(1).""" + if self.is_empty(): + raise IndexError("Stack is empty") + return self._data[-1] + + def is_empty(self) -> bool: + """Check if the stack is empty. O(1).""" + return len(self._data) == 0 + + def size(self) -> int: + """Return the number of elements in the stack. O(1).""" + return len(self._data) + + def __str__(self) -> str: + """String representation of the stack.""" + return str(self._data) + + +class Queue: + """ + Queue implementation using a dynamic array. + + Time Complexity: + - Enqueue: O(1) amortized + - Dequeue: O(n) (can be optimized to O(1) with circular buffer) + - Peek: O(1) + - Search: O(n) + """ + + def __init__(self): + """Initialize an empty queue.""" + self._data = [] + + def enqueue(self, value: Any) -> None: + """Add element to the rear of the queue. O(1) amortized.""" + self._data.append(value) + + def dequeue(self) -> Any: + """Remove and return the front element. O(n).""" + if self.is_empty(): + raise IndexError("Queue is empty") + return self._data.pop(0) + + def peek(self) -> Any: + """Return the front element without removing it. O(1).""" + if self.is_empty(): + raise IndexError("Queue is empty") + return self._data[0] + + def is_empty(self) -> bool: + """Check if the queue is empty. O(1).""" + return len(self._data) == 0 + + def size(self) -> int: + """Return the number of elements in the queue. O(1).""" + return len(self._data) + + def __str__(self) -> str: + """String representation of the queue.""" + return str(self._data) + + +# ============================================================================ +# Linked Lists +# ============================================================================ + +class ListNode: + """Node for linked list.""" + + def __init__(self, value: Any): + self.value = value + self.next: Optional['ListNode'] = None + + +class LinkedList: + """ + Singly linked list implementation. + + Time Complexity: + - Access: O(n) + - Insertion at head: O(1) + - Insertion at tail: O(1) with tail pointer + - Insertion at index: O(n) + - Deletion: O(n) + - Search: O(n) + """ + + def __init__(self): + """Initialize an empty linked list.""" + self.head: Optional[ListNode] = None + self.tail: Optional[ListNode] = None + self._size = 0 + + def __len__(self) -> int: + """Return the number of elements in the list.""" + return self._size + + def append(self, value: Any) -> None: + """Append element to the end of the list. O(1).""" + new_node = ListNode(value) + if self.head is None: + self.head = new_node + self.tail = new_node + else: + self.tail.next = new_node + self.tail = new_node + self._size += 1 + + def prepend(self, value: Any) -> None: + """Prepend element to the beginning of the list. O(1).""" + new_node = ListNode(value) + if self.head is None: + self.head = new_node + self.tail = new_node + else: + new_node.next = self.head + self.head = new_node + self._size += 1 + + def insert(self, index: int, value: Any) -> None: + """Insert element at index. O(n).""" + if index < 0 or index > self._size: + raise IndexError(f"Index {index} out of range") + + if index == 0: + self.prepend(value) + return + + if index == self._size: + self.append(value) + return + + new_node = ListNode(value) + current = self.head + for _ in range(index - 1): + current = current.next + + new_node.next = current.next + current.next = new_node + self._size += 1 + + def delete(self, index: int) -> Any: + """Delete element at index and return it. O(n).""" + if index < 0 or index >= self._size: + raise IndexError(f"Index {index} out of range") + + if index == 0: + value = self.head.value + self.head = self.head.next + if self.head is None: + self.tail = None + self._size -= 1 + return value + + current = self.head + for _ in range(index - 1): + current = current.next + + value = current.next.value + current.next = current.next.next + + if current.next is None: + self.tail = current + + self._size -= 1 + return value + + def search(self, value: Any) -> int: + """Search for value and return its index, or -1 if not found. O(n).""" + current = self.head + index = 0 + while current: + if current.value == value: + return index + current = current.next + index += 1 + return -1 + + def get(self, index: int) -> Any: + """Get element at index. O(n).""" + if index < 0 or index >= self._size: + raise IndexError(f"Index {index} out of range") + + current = self.head + for _ in range(index): + current = current.next + return current.value + + def __str__(self) -> str: + """String representation of the linked list.""" + values = [] + current = self.head + while current: + values.append(str(current.value)) + current = current.next + return ' -> '.join(values) if values else 'Empty' + + +# ============================================================================ +# Rooted Trees +# ============================================================================ + +class TreeNode: + """Node for a rooted tree.""" + + def __init__(self, value: Any): + self.value = value + self.children: List['TreeNode'] = [] + self.parent: Optional['TreeNode'] = None + + def add_child(self, child: 'TreeNode') -> None: + """Add a child node.""" + child.parent = self + self.children.append(child) + + def remove_child(self, child: 'TreeNode') -> None: + """Remove a child node.""" + if child in self.children: + child.parent = None + self.children.remove(child) + + +class Tree: + """ + Rooted tree implementation using linked nodes. + + Time Complexity: + - Insertion: O(1) + - Deletion: O(n) worst case + - Search: O(n) + - Traversal: O(n) + """ + + def __init__(self, root_value: Any = None): + """Initialize a tree with an optional root.""" + self.root: Optional[TreeNode] = TreeNode(root_value) if root_value is not None else None + + def insert(self, parent_value: Any, value: Any) -> bool: + """ + Insert a new node as a child of the node with parent_value. + Returns True if successful, False if parent not found. + O(n) for search, O(1) for insertion. + """ + if self.root is None: + self.root = TreeNode(value) + return True + + parent = self._find_node(self.root, parent_value) + if parent is None: + return False + + new_node = TreeNode(value) + parent.add_child(new_node) + return True + + def delete(self, value: Any) -> bool: + """ + Delete a node with the given value. + Returns True if successful, False if node not found. + O(n). + """ + if self.root is None: + return False + + if self.root.value == value: + # If root has children, make first child the new root + if self.root.children: + new_root = self.root.children[0] + new_root.parent = None + # Add remaining children to new root + for child in self.root.children[1:]: + new_root.add_child(child) + self.root = new_root + else: + self.root = None + return True + + node = self._find_node(self.root, value) + if node is None: + return False + + # Save parent and children before removing from parent + parent = node.parent + children_to_move = list(node.children) + + if parent: + parent.remove_child(node) + # Add children to parent + for child in children_to_move: + parent.add_child(child) + else: + # Node has no parent (shouldn't happen if not root, but handle it) + if children_to_move: + # Make first child the new root + self.root = children_to_move[0] + self.root.parent = None + # Add remaining children + for child in children_to_move[1:]: + self.root.add_child(child) + else: + self.root = None + + return True + + def search(self, value: Any) -> bool: + """Search for a value in the tree. O(n).""" + if self.root is None: + return False + return self._find_node(self.root, value) is not None + + def _find_node(self, node: TreeNode, value: Any) -> Optional[TreeNode]: + """Helper method to find a node with given value. O(n).""" + if node.value == value: + return node + + for child in node.children: + result = self._find_node(child, value) + if result: + return result + + return None + + def traverse_preorder(self) -> List[Any]: + """Traverse tree in preorder (root, children). O(n).""" + result = [] + if self.root: + self._preorder_helper(self.root, result) + return result + + def _preorder_helper(self, node: TreeNode, result: List[Any]) -> None: + """Helper for preorder traversal.""" + result.append(node.value) + for child in node.children: + self._preorder_helper(child, result) + + def traverse_postorder(self) -> List[Any]: + """Traverse tree in postorder (children, root). O(n).""" + result = [] + if self.root: + self._postorder_helper(self.root, result) + return result + + def _postorder_helper(self, node: TreeNode, result: List[Any]) -> None: + """Helper for postorder traversal.""" + for child in node.children: + self._postorder_helper(child, result) + result.append(node.value) + + def height(self) -> int: + """Calculate the height of the tree. O(n).""" + if self.root is None: + return -1 + return self._height_helper(self.root) + + def _height_helper(self, node: TreeNode) -> int: + """Helper to calculate height.""" + if not node.children: + return 0 + return 1 + max(self._height_helper(child) for child in node.children) + diff --git a/src/deterministic_algorithm.py b/src/deterministic_algorithm.py new file mode 100644 index 0000000..a65a987 --- /dev/null +++ b/src/deterministic_algorithm.py @@ -0,0 +1,207 @@ +""" +Deterministic Selection Algorithm (Median of Medians) + +This module implements the deterministic selection algorithm that finds the k-th +smallest element in an array in worst-case O(n) time using the Median of Medians +approach. + +Author: Carlos Gutierrez +Course: MSCS532 - Data Structures and Algorithms +""" + + +def deterministic_select(arr: list, k: int, key=None) -> any: + """ + Find the k-th smallest element in an array using deterministic selection + (Median of Medians algorithm) in worst-case O(n) time. + + Args: + arr: List of comparable elements + k: The k-th smallest element to find (1-indexed, so k=1 is the minimum) + key: Optional function to extract comparison key from elements + + Returns: + The k-th smallest element in the array + + Raises: + ValueError: If k is out of range [1, len(arr)] + IndexError: If array is empty + + Examples: + >>> arr = [3, 1, 4, 1, 5, 9, 2, 6] + >>> deterministic_select(arr, 4) + 3 + >>> deterministic_select(arr, 1) + 1 + >>> deterministic_select(arr, len(arr)) + 9 + """ + if not arr: + raise IndexError("Cannot select from empty array") + + n = len(arr) + if k < 1 or k > n: + raise ValueError(f"k must be between 1 and {n}, got {k}") + + # Use key function if provided + if key is None: + key = lambda x: x + + # Create a copy to avoid modifying the original array + arr_copy = list(arr) + + return _deterministic_select_recursive(arr_copy, 0, n - 1, k, key) + + +def _deterministic_select_recursive(arr: list, left: int, right: int, k: int, key) -> any: + """ + Recursive helper function for deterministic selection. + + Args: + arr: The array (will be modified during partitioning) + left: Left index of the subarray + right: Right index of the subarray + k: The k-th smallest element to find (relative to left) + key: Function to extract comparison key + + Returns: + The k-th smallest element in arr[left:right+1] + """ + if left == right: + return arr[left] + + # Find a good pivot using median of medians + pivot_index = _median_of_medians(arr, left, right, key) + + # Partition around the pivot + pivot_index = _partition(arr, left, right, pivot_index, key) + + # Calculate the rank of the pivot in the current subarray + rank = pivot_index - left + 1 + + if k == rank: + return arr[pivot_index] + elif k < rank: + return _deterministic_select_recursive(arr, left, pivot_index - 1, k, key) + else: + return _deterministic_select_recursive(arr, pivot_index + 1, right, k - rank, key) + + +def _median_of_medians(arr: list, left: int, right: int, key) -> int: + """ + Find the median of medians to use as a good pivot. + + This function groups elements into groups of 5, finds the median of each + group, then recursively finds the median of those medians. + + Args: + arr: The array + left: Left index of the subarray + right: Right index of the subarray + key: Function to extract comparison key + + Returns: + Index of the median of medians element + """ + n = right - left + 1 + + # Base case: if n <= 5, just sort and return median + if n <= 5: + # Create indices list and sort by value + indices = list(range(left, right + 1)) + indices.sort(key=lambda i: key(arr[i])) + median_idx = indices[(n - 1) // 2] + return median_idx + + # Divide into groups of 5 and find median of each + medians = [] + for i in range(left, right + 1, 5): + group_end = min(i + 4, right) + group_indices = list(range(i, group_end + 1)) + group_indices.sort(key=lambda idx: key(arr[idx])) + + # Find median of this group + median_index = group_indices[(len(group_indices) - 1) // 2] + medians.append(median_index) + + # Recursively find the median of medians + num_medians = len(medians) + median_of_medians_rank = (num_medians + 1) // 2 + + # Create a temporary array of median values + median_values = [arr[idx] for idx in medians] + median_values_copy = median_values.copy() + + # Find the median value + median_value = _deterministic_select_recursive( + median_values_copy, 0, len(median_values_copy) - 1, median_of_medians_rank, key + ) + + # Find the index of this median value in the original array + for idx in medians: + if arr[idx] == median_value: + return idx + + # Fallback (should not reach here) + return medians[len(medians) // 2] + + +def _partition(arr: list, left: int, right: int, pivot_index: int, key) -> int: + """ + Partition the array around a pivot element. + + Elements less than the pivot go to the left, elements greater go to the right. + The pivot is placed in its final sorted position. + + Args: + arr: The array to partition + left: Left index of the subarray + right: Right index of the subarray + pivot_index: Index of the pivot element + key: Function to extract comparison key + + Returns: + Final index of the pivot after partitioning + """ + pivot_value = key(arr[pivot_index]) + + # Move pivot to the end + arr[pivot_index], arr[right] = arr[right], arr[pivot_index] + + # Partition + store_index = left + for i in range(left, right): + if key(arr[i]) < pivot_value: + arr[store_index], arr[i] = arr[i], arr[store_index] + store_index += 1 + + # Move pivot to its final position + arr[right], arr[store_index] = arr[store_index], arr[right] + + return store_index + + +def find_median(arr: list, key=None) -> any: + """ + Find the median of an array using deterministic selection. + + Args: + arr: List of comparable elements + key: Optional function to extract comparison key + + Returns: + The median element (or lower median if even number of elements) + + Examples: + >>> find_median([3, 1, 4, 1, 5]) + 3 + >>> find_median([3, 1, 4, 1, 5, 9]) + 3 + """ + if not arr: + raise ValueError("Cannot find median of empty array") + + n = len(arr) + k = (n + 1) // 2 # Lower median for even-length arrays + return deterministic_select(arr, k, key) + diff --git a/src/randomized_algorithm.py b/src/randomized_algorithm.py new file mode 100644 index 0000000..bf68263 --- /dev/null +++ b/src/randomized_algorithm.py @@ -0,0 +1,156 @@ +""" +Randomized Selection Algorithm (Quickselect) + +This module implements the randomized selection algorithm that finds the k-th +smallest element in an array in expected O(n) time using a randomized pivot +selection strategy. + +Author: Carlos Gutierrez +Course: MSCS532 - Data Structures and Algorithms +""" + +import random + + +def randomized_select(arr: list, k: int, key=None, seed=None) -> any: + """ + Find the k-th smallest element in an array using randomized selection + (Quickselect algorithm) in expected O(n) time. + + Args: + arr: List of comparable elements + k: The k-th smallest element to find (1-indexed, so k=1 is the minimum) + key: Optional function to extract comparison key from elements + seed: Optional random seed for reproducible results + + Returns: + The k-th smallest element in the array + + Raises: + ValueError: If k is out of range [1, len(arr)] + IndexError: If array is empty + + Examples: + >>> arr = [3, 1, 4, 1, 5, 9, 2, 6] + >>> randomized_select(arr, 4, seed=42) + 3 + >>> randomized_select(arr, 1, seed=42) + 1 + >>> randomized_select(arr, len(arr), seed=42) + 9 + """ + if not arr: + raise IndexError("Cannot select from empty array") + + n = len(arr) + if k < 1 or k > n: + raise ValueError(f"k must be between 1 and {n}, got {k}") + + # Set random seed if provided + if seed is not None: + random.seed(seed) + + # Use key function if provided + if key is None: + key = lambda x: x + + # Create a copy to avoid modifying the original array + arr_copy = list(arr) + + return _randomized_select_recursive(arr_copy, 0, n - 1, k, key) + + +def _randomized_select_recursive(arr: list, left: int, right: int, k: int, key) -> any: + """ + Recursive helper function for randomized selection. + + Args: + arr: The array (will be modified during partitioning) + left: Left index of the subarray + right: Right index of the subarray + k: The k-th smallest element to find (relative to left) + key: Function to extract comparison key + + Returns: + The k-th smallest element in arr[left:right+1] + """ + if left == right: + return arr[left] + + # Randomly select a pivot + pivot_index = random.randint(left, right) + + # Partition around the pivot + pivot_index = _partition(arr, left, right, pivot_index, key) + + # Calculate the rank of the pivot in the current subarray + rank = pivot_index - left + 1 + + if k == rank: + return arr[pivot_index] + elif k < rank: + return _randomized_select_recursive(arr, left, pivot_index - 1, k, key) + else: + return _randomized_select_recursive(arr, pivot_index + 1, right, k - rank, key) + + +def _partition(arr: list, left: int, right: int, pivot_index: int, key) -> int: + """ + Partition the array around a pivot element using Lomuto partition scheme. + + Elements less than the pivot go to the left, elements greater go to the right. + The pivot is placed in its final sorted position. + + Args: + arr: The array to partition + left: Left index of the subarray + right: Right index of the subarray + pivot_index: Index of the pivot element + key: Function to extract comparison key + + Returns: + Final index of the pivot after partitioning + """ + pivot_value = key(arr[pivot_index]) + + # Move pivot to the end + arr[pivot_index], arr[right] = arr[right], arr[pivot_index] + + # Partition + store_index = left + for i in range(left, right): + if key(arr[i]) < pivot_value: + arr[store_index], arr[i] = arr[i], arr[store_index] + store_index += 1 + + # Move pivot to its final position + arr[right], arr[store_index] = arr[store_index], arr[right] + + return store_index + + +def find_median(arr: list, key=None, seed=None) -> any: + """ + Find the median of an array using randomized selection. + + Args: + arr: List of comparable elements + key: Optional function to extract comparison key + seed: Optional random seed for reproducible results + + Returns: + The median element (or lower median if even number of elements) + + Examples: + >>> find_median([3, 1, 4, 1, 5], seed=42) + 3 + >>> find_median([3, 1, 4, 1, 5, 9], seed=42) + 3 + """ + if not arr: + raise ValueError("Cannot find median of empty array") + + n = len(arr) + k = (n + 1) // 2 # Lower median for even-length arrays + return randomized_select(arr, k, key, seed) + diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_data_structures.py b/tests/test_data_structures.py new file mode 100644 index 0000000..b7b8f3e --- /dev/null +++ b/tests/test_data_structures.py @@ -0,0 +1,277 @@ +""" +Unit tests for elementary data structures. + +Author: Carlos Gutierrez +Course: MSCS532 - Data Structures and Algorithms +""" + +import pytest +from src.data_structures import ( + DynamicArray, Matrix, Stack, Queue, LinkedList, Tree, TreeNode +) + + +class TestDynamicArray: + """Test cases for DynamicArray.""" + + def test_initialization(self): + """Test array initialization.""" + arr = DynamicArray() + assert len(arr) == 0 + + def test_append(self): + """Test append operation.""" + arr = DynamicArray() + arr.append(1) + arr.append(2) + assert len(arr) == 2 + assert arr[0] == 1 + assert arr[1] == 2 + + def test_insert(self): + """Test insert operation.""" + arr = DynamicArray() + arr.append(1) + arr.append(3) + arr.insert(1, 2) + assert arr[1] == 2 + assert len(arr) == 3 + + def test_delete(self): + """Test delete operation.""" + arr = DynamicArray() + arr.append(1) + arr.append(2) + arr.append(3) + value = arr.delete(1) + assert value == 2 + assert len(arr) == 2 + assert arr[1] == 3 + + def test_search(self): + """Test search operation.""" + arr = DynamicArray() + arr.append(1) + arr.append(2) + arr.append(3) + assert arr.search(2) == 1 + assert arr.search(4) == -1 + + def test_index_error(self): + """Test index error handling.""" + arr = DynamicArray() + with pytest.raises(IndexError): + _ = arr[0] + arr.append(1) + with pytest.raises(IndexError): + _ = arr[1] + + +class TestMatrix: + """Test cases for Matrix.""" + + def test_initialization(self): + """Test matrix initialization.""" + matrix = Matrix(3, 4) + assert matrix.rows == 3 + assert matrix.cols == 4 + + def test_get_set(self): + """Test get and set operations.""" + matrix = Matrix(2, 2) + matrix[0, 0] = 1 + matrix[0, 1] = 2 + matrix[1, 0] = 3 + matrix[1, 1] = 4 + assert matrix[0, 0] == 1 + assert matrix[1, 1] == 4 + + def test_index_error(self): + """Test index error handling.""" + matrix = Matrix(2, 2) + with pytest.raises(IndexError): + _ = matrix[3, 0] + + +class TestStack: + """Test cases for Stack.""" + + def test_push_pop(self): + """Test push and pop operations.""" + stack = Stack() + stack.push(1) + stack.push(2) + assert stack.pop() == 2 + assert stack.pop() == 1 + + def test_peek(self): + """Test peek operation.""" + stack = Stack() + stack.push(1) + stack.push(2) + assert stack.peek() == 2 + assert stack.size() == 2 + + def test_is_empty(self): + """Test is_empty operation.""" + stack = Stack() + assert stack.is_empty() + stack.push(1) + assert not stack.is_empty() + + def test_empty_pop(self): + """Test pop from empty stack.""" + stack = Stack() + with pytest.raises(IndexError): + stack.pop() + + +class TestQueue: + """Test cases for Queue.""" + + def test_enqueue_dequeue(self): + """Test enqueue and dequeue operations.""" + queue = Queue() + queue.enqueue(1) + queue.enqueue(2) + assert queue.dequeue() == 1 + assert queue.dequeue() == 2 + + def test_peek(self): + """Test peek operation.""" + queue = Queue() + queue.enqueue(1) + queue.enqueue(2) + assert queue.peek() == 1 + assert queue.size() == 2 + + def test_is_empty(self): + """Test is_empty operation.""" + queue = Queue() + assert queue.is_empty() + queue.enqueue(1) + assert not queue.is_empty() + + def test_empty_dequeue(self): + """Test dequeue from empty queue.""" + queue = Queue() + with pytest.raises(IndexError): + queue.dequeue() + + +class TestLinkedList: + """Test cases for LinkedList.""" + + def test_append(self): + """Test append operation.""" + ll = LinkedList() + ll.append(1) + ll.append(2) + assert len(ll) == 2 + assert ll.get(0) == 1 + assert ll.get(1) == 2 + + def test_prepend(self): + """Test prepend operation.""" + ll = LinkedList() + ll.prepend(1) + ll.prepend(2) + assert ll.get(0) == 2 + assert ll.get(1) == 1 + + def test_insert(self): + """Test insert operation.""" + ll = LinkedList() + ll.append(1) + ll.append(3) + ll.insert(1, 2) + assert ll.get(1) == 2 + assert len(ll) == 3 + + def test_delete(self): + """Test delete operation.""" + ll = LinkedList() + ll.append(1) + ll.append(2) + ll.append(3) + value = ll.delete(1) + assert value == 2 + assert len(ll) == 2 + assert ll.get(1) == 3 + + def test_search(self): + """Test search operation.""" + ll = LinkedList() + ll.append(1) + ll.append(2) + ll.append(3) + assert ll.search(2) == 1 + assert ll.search(4) == -1 + + def test_index_error(self): + """Test index error handling.""" + ll = LinkedList() + with pytest.raises(IndexError): + ll.get(0) + ll.append(1) + with pytest.raises(IndexError): + ll.get(1) + + +class TestTree: + """Test cases for Tree.""" + + def test_initialization(self): + """Test tree initialization.""" + tree = Tree(1) + assert tree.root is not None + assert tree.root.value == 1 + + def test_insert(self): + """Test insert operation.""" + tree = Tree(1) + assert tree.insert(1, 2) + assert tree.insert(1, 3) + assert len(tree.root.children) == 2 + + def test_search(self): + """Test search operation.""" + tree = Tree(1) + tree.insert(1, 2) + tree.insert(2, 3) + assert tree.search(1) + assert tree.search(2) + assert tree.search(3) + assert not tree.search(4) + + def test_delete(self): + """Test delete operation.""" + tree = Tree(1) + tree.insert(1, 2) + tree.insert(1, 3) + assert tree.delete(2) + assert not tree.search(2) + assert tree.search(3) + + def test_traversal(self): + """Test tree traversal.""" + tree = Tree(1) + tree.insert(1, 2) + tree.insert(1, 3) + tree.insert(2, 4) + + preorder = tree.traverse_preorder() + assert preorder == [1, 2, 4, 3] + + postorder = tree.traverse_postorder() + assert postorder == [4, 2, 3, 1] + + def test_height(self): + """Test height calculation.""" + tree = Tree(1) + assert tree.height() == 0 + tree.insert(1, 2) + assert tree.height() == 1 + tree.insert(2, 3) + assert tree.height() == 2 + diff --git a/tests/test_deterministic_algorithm.py b/tests/test_deterministic_algorithm.py new file mode 100644 index 0000000..2cdb62d --- /dev/null +++ b/tests/test_deterministic_algorithm.py @@ -0,0 +1,100 @@ +""" +Unit tests for deterministic selection algorithm. + +Author: Carlos Gutierrez +Course: MSCS532 - Data Structures and Algorithms +""" + +import pytest +from src.deterministic_algorithm import deterministic_select, find_median + + +class TestDeterministicSelect: + """Test cases for deterministic_select function.""" + + def test_basic_selection(self): + """Test basic selection operations.""" + arr = [3, 1, 4, 1, 5, 9, 2, 6] + assert deterministic_select(arr, 1) == 1 + assert deterministic_select(arr, 2) == 1 + assert deterministic_select(arr, 3) == 2 + assert deterministic_select(arr, 4) == 3 + assert deterministic_select(arr, len(arr)) == 9 + + def test_sorted_array(self): + """Test on sorted array.""" + arr = list(range(1, 11)) + for i in range(1, 11): + assert deterministic_select(arr, i) == i + + def test_reverse_sorted_array(self): + """Test on reverse-sorted array.""" + arr = list(range(10, 0, -1)) + for i in range(1, 11): + assert deterministic_select(arr, i) == i + + def test_duplicate_elements(self): + """Test with duplicate elements.""" + arr = [5, 5, 5, 3, 3, 1, 1, 1] + assert deterministic_select(arr, 1) == 1 + assert deterministic_select(arr, 2) == 1 + assert deterministic_select(arr, 3) == 1 + assert deterministic_select(arr, 4) == 3 + assert deterministic_select(arr, 5) == 3 + assert deterministic_select(arr, 6) == 5 + + def test_single_element(self): + """Test with single element.""" + arr = [42] + assert deterministic_select(arr, 1) == 42 + + def test_empty_array(self): + """Test with empty array.""" + with pytest.raises(IndexError): + deterministic_select([], 1) + + def test_invalid_k(self): + """Test with invalid k values.""" + arr = [1, 2, 3] + with pytest.raises(ValueError): + deterministic_select(arr, 0) + with pytest.raises(ValueError): + deterministic_select(arr, 4) + + def test_key_function(self): + """Test with custom key function.""" + arr = [{'value': 3}, {'value': 1}, {'value': 2}] + result = deterministic_select(arr, 2, key=lambda x: x['value']) + assert result['value'] == 2 + + def test_large_array(self): + """Test with larger array.""" + arr = list(range(100, 0, -1)) + assert deterministic_select(arr, 50) == 50 + assert deterministic_select(arr, 1) == 1 + assert deterministic_select(arr, 100) == 100 + + +class TestFindMedian: + """Test cases for find_median function.""" + + def test_odd_length(self): + """Test median of odd-length array.""" + arr = [3, 1, 4, 1, 5] + assert find_median(arr) == 3 + + def test_even_length(self): + """Test median of even-length array (returns lower median).""" + arr = [3, 1, 4, 1, 5, 9] + assert find_median(arr) == 3 + + def test_sorted_array(self): + """Test median of sorted array.""" + arr = list(range(1, 11)) + assert find_median(arr) == 5 + + def test_empty_array(self): + """Test median of empty array.""" + with pytest.raises(ValueError): + find_median([]) + diff --git a/tests/test_randomized_algorithm.py b/tests/test_randomized_algorithm.py new file mode 100644 index 0000000..2b54108 --- /dev/null +++ b/tests/test_randomized_algorithm.py @@ -0,0 +1,107 @@ +""" +Unit tests for randomized selection algorithm. + +Author: Carlos Gutierrez +Course: MSCS532 - Data Structures and Algorithms +""" + +import pytest +from src.randomized_algorithm import randomized_select, find_median + + +class TestRandomizedSelect: + """Test cases for randomized_select function.""" + + def test_basic_selection(self): + """Test basic selection operations.""" + arr = [3, 1, 4, 1, 5, 9, 2, 6] + assert randomized_select(arr, 1, seed=42) == 1 + assert randomized_select(arr, 2, seed=42) == 1 + assert randomized_select(arr, 3, seed=42) == 2 + assert randomized_select(arr, 4, seed=42) == 3 + assert randomized_select(arr, len(arr), seed=42) == 9 + + def test_sorted_array(self): + """Test on sorted array.""" + arr = list(range(1, 11)) + for i in range(1, 11): + assert randomized_select(arr, i, seed=42) == i + + def test_reverse_sorted_array(self): + """Test on reverse-sorted array.""" + arr = list(range(10, 0, -1)) + for i in range(1, 11): + assert randomized_select(arr, i, seed=42) == i + + def test_duplicate_elements(self): + """Test with duplicate elements.""" + arr = [5, 5, 5, 3, 3, 1, 1, 1] + assert randomized_select(arr, 1, seed=42) == 1 + assert randomized_select(arr, 2, seed=42) == 1 + assert randomized_select(arr, 3, seed=42) == 1 + assert randomized_select(arr, 4, seed=42) == 3 + assert randomized_select(arr, 5, seed=42) == 3 + assert randomized_select(arr, 6, seed=42) == 5 + + def test_single_element(self): + """Test with single element.""" + arr = [42] + assert randomized_select(arr, 1, seed=42) == 42 + + def test_empty_array(self): + """Test with empty array.""" + with pytest.raises(IndexError): + randomized_select([], 1) + + def test_invalid_k(self): + """Test with invalid k values.""" + arr = [1, 2, 3] + with pytest.raises(ValueError): + randomized_select(arr, 0) + with pytest.raises(ValueError): + randomized_select(arr, 4) + + def test_key_function(self): + """Test with custom key function.""" + arr = [{'value': 3}, {'value': 1}, {'value': 2}] + result = randomized_select(arr, 2, key=lambda x: x['value'], seed=42) + assert result['value'] == 2 + + def test_large_array(self): + """Test with larger array.""" + arr = list(range(100, 0, -1)) + assert randomized_select(arr, 50, seed=42) == 50 + assert randomized_select(arr, 1, seed=42) == 1 + assert randomized_select(arr, 100, seed=42) == 100 + + def test_reproducibility(self): + """Test that results are reproducible with same seed.""" + arr = [3, 1, 4, 1, 5, 9, 2, 6] + result1 = randomized_select(arr, 4, seed=42) + result2 = randomized_select(arr, 4, seed=42) + assert result1 == result2 + + +class TestFindMedian: + """Test cases for find_median function.""" + + def test_odd_length(self): + """Test median of odd-length array.""" + arr = [3, 1, 4, 1, 5] + assert find_median(arr, seed=42) == 3 + + def test_even_length(self): + """Test median of even-length array (returns lower median).""" + arr = [3, 1, 4, 1, 5, 9] + assert find_median(arr, seed=42) == 3 + + def test_sorted_array(self): + """Test median of sorted array.""" + arr = list(range(1, 11)) + assert find_median(arr, seed=42) == 5 + + def test_empty_array(self): + """Test median of empty array.""" + with pytest.raises(ValueError): + find_median([]) +