Adding algorithms and datastructures

This commit is contained in:
Carlos Gutierrez
2025-11-23 15:59:23 -05:00
commit d1d3423e99
21 changed files with 2653 additions and 0 deletions

View File

@@ -0,0 +1,241 @@
"""
Demonstration of elementary data structures.
This script demonstrates the usage of arrays, stacks, queues, linked lists,
and trees.
Author: Carlos Gutierrez
Course: MSCS532 - Data Structures and Algorithms
"""
import sys
import os
# Add parent directory to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from src.data_structures import (
DynamicArray, Matrix, Stack, Queue, LinkedList, Tree
)
def demo_dynamic_array():
"""Demonstrate DynamicArray operations."""
print("=" * 60)
print("Dynamic Array Demo")
print("=" * 60)
arr = DynamicArray()
# Append elements
print("Appending elements: 1, 2, 3, 4, 5")
for i in range(1, 6):
arr.append(i)
print(f"Array: {arr}")
print(f"Length: {len(arr)}")
print()
# Insert element
print("Inserting 10 at index 2")
arr.insert(2, 10)
print(f"Array: {arr}")
print()
# Delete element
print("Deleting element at index 3")
value = arr.delete(3)
print(f"Deleted value: {value}")
print(f"Array: {arr}")
print()
# Search
print(f"Searching for 10: index {arr.search(10)}")
print(f"Searching for 99: index {arr.search(99)}")
print()
def demo_matrix():
"""Demonstrate Matrix operations."""
print("=" * 60)
print("Matrix Demo")
print("=" * 60)
matrix = Matrix(3, 4)
# Fill matrix
value = 1
for i in range(3):
for j in range(4):
matrix[i, j] = value
value += 1
print("3x4 Matrix:")
print(matrix)
print()
print(f"Element at (1, 2): {matrix[1, 2]}")
print()
def demo_stack():
"""Demonstrate Stack operations."""
print("=" * 60)
print("Stack Demo")
print("=" * 60)
stack = Stack()
# Push elements
print("Pushing elements: 1, 2, 3, 4, 5")
for i in range(1, 6):
stack.push(i)
print(f"Stack: {stack}")
print(f"Size: {stack.size()}")
print()
# Peek
print(f"Top element (peek): {stack.peek()}")
print()
# Pop elements
print("Popping elements:")
while not stack.is_empty():
print(f" Popped: {stack.pop()}")
print()
def demo_queue():
"""Demonstrate Queue operations."""
print("=" * 60)
print("Queue Demo")
print("=" * 60)
queue = Queue()
# Enqueue elements
print("Enqueuing elements: 1, 2, 3, 4, 5")
for i in range(1, 6):
queue.enqueue(i)
print(f"Queue: {queue}")
print(f"Size: {queue.size()}")
print()
# Peek
print(f"Front element (peek): {queue.peek()}")
print()
# Dequeue elements
print("Dequeuing elements:")
while not queue.is_empty():
print(f" Dequeued: {queue.dequeue()}")
print()
def demo_linked_list():
"""Demonstrate LinkedList operations."""
print("=" * 60)
print("Linked List Demo")
print("=" * 60)
ll = LinkedList()
# Append elements
print("Appending elements: 1, 2, 3, 4, 5")
for i in range(1, 6):
ll.append(i)
print(f"Linked List: {ll}")
print(f"Length: {len(ll)}")
print()
# Prepend element
print("Prepending 0")
ll.prepend(0)
print(f"Linked List: {ll}")
print()
# Insert element
print("Inserting 10 at index 3")
ll.insert(3, 10)
print(f"Linked List: {ll}")
print()
# Get element
print(f"Element at index 2: {ll.get(2)}")
print()
# Search
print(f"Searching for 10: index {ll.search(10)}")
print(f"Searching for 99: index {ll.search(99)}")
print()
# Delete element
print("Deleting element at index 3")
value = ll.delete(3)
print(f"Deleted value: {value}")
print(f"Linked List: {ll}")
print()
def demo_tree():
"""Demonstrate Tree operations."""
print("=" * 60)
print("Tree Demo")
print("=" * 60)
tree = Tree(1)
# Build tree
print("Building tree:")
print(" 1")
print(" ├── 2")
print(" │ ├── 4")
print(" │ └── 5")
print(" └── 3")
print(" └── 6")
tree.insert(1, 2)
tree.insert(1, 3)
tree.insert(2, 4)
tree.insert(2, 5)
tree.insert(3, 6)
print()
# Search
print("Searching for values:")
for value in [1, 2, 3, 4, 5, 6, 7]:
found = tree.search(value)
print(f" {value}: {'Found' if found else 'Not found'}")
print()
# Traversal
print("Preorder traversal:", tree.traverse_preorder())
print("Postorder traversal:", tree.traverse_postorder())
print()
# Height
print(f"Tree height: {tree.height()}")
print()
# Delete
print("Deleting node 2")
tree.delete(2)
print("Preorder traversal after deletion:", tree.traverse_preorder())
print()
if __name__ == "__main__":
print("\n" + "=" * 60)
print("Elementary Data Structures Demonstration")
print("=" * 60 + "\n")
demo_dynamic_array()
demo_matrix()
demo_stack()
demo_queue()
demo_linked_list()
demo_tree()
print("=" * 60)
print("Demo Complete!")
print("=" * 60)

242
examples/generate_plots.py Normal file
View File

@@ -0,0 +1,242 @@
"""
Generate performance visualization plots for selection algorithms and data structures.
This script runs benchmarks and generates visualization plots comparing the
performance of deterministic and randomized selection algorithms, as well as
data structure operations.
Author: Carlos Gutierrez
Course: MSCS532 - Data Structures and Algorithms
"""
import sys
import os
import matplotlib.pyplot as plt
import numpy as np
# Add parent directory to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from src.benchmark import (
generate_random_array,
generate_sorted_array,
generate_reverse_sorted_array,
generate_nearly_sorted_array,
generate_duplicate_heavy_array,
benchmark_selection_algorithms,
compare_stack_vs_list_push,
compare_queue_vs_list,
compare_linked_list_vs_list
)
from src.deterministic_algorithm import deterministic_select
from src.randomized_algorithm import randomized_select
def plot_selection_comparison():
"""Generate comparison plots for selection algorithms."""
print("Generating selection algorithm comparison plots...")
sizes = [100, 500, 1000, 2000, 5000]
distributions = {
'Random': generate_random_array,
'Sorted': generate_sorted_array,
'Reverse Sorted': generate_reverse_sorted_array,
'Nearly Sorted': lambda n: generate_nearly_sorted_array(n, swaps=10, seed=42),
'Many Duplicates': lambda n: generate_duplicate_heavy_array(n, unique_values=10, seed=42)
}
results = benchmark_selection_algorithms(sizes, distributions, iterations=3)
# Plot 1: Line plot comparison
plt.figure(figsize=(12, 8))
for dist_name in distributions:
det_times = results['deterministic'][dist_name]
rand_times = results['randomized'][dist_name]
# Filter out infinite times
valid_sizes = [s for s, t in zip(sizes, det_times) if t != float('inf')]
valid_det = [t for t in det_times if t != float('inf')]
valid_rand = [t for s, t in zip(sizes, rand_times) if s in valid_sizes]
if valid_sizes:
plt.plot(valid_sizes, valid_det, marker='o', label=f'Deterministic ({dist_name})', linestyle='--')
plt.plot(valid_sizes, valid_rand, marker='s', label=f'Randomized ({dist_name})', linestyle='-')
plt.xlabel('Input Size (n)', fontsize=12)
plt.ylabel('Execution Time (seconds)', fontsize=12)
plt.title('Selection Algorithm Performance Comparison', fontsize=14, fontweight='bold')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True, alpha=0.3)
plt.yscale('log')
plt.tight_layout()
plt.savefig('docs/selection_comparison.png', dpi=300, bbox_inches='tight')
plt.close()
print(" Saved: docs/selection_comparison.png")
# Plot 2: Bar chart for specific size
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
test_size = 2000
size_idx = sizes.index(test_size) if test_size in sizes else len(sizes) - 1
dists = list(distributions.keys())
det_heights = [results['deterministic'][d][size_idx] if results['deterministic'][d][size_idx] != float('inf') else 0
for d in dists]
rand_heights = [results['randomized'][d][size_idx] for d in dists]
x = np.arange(len(dists))
width = 0.35
axes[0].bar(x - width/2, det_heights, width, label='Deterministic', alpha=0.8)
axes[0].bar(x + width/2, rand_heights, width, label='Randomized', alpha=0.8)
axes[0].set_xlabel('Distribution', fontsize=11)
axes[0].set_ylabel('Execution Time (seconds)', fontsize=11)
axes[0].set_title(f'Selection Performance at n={sizes[size_idx]}', fontsize=12, fontweight='bold')
axes[0].set_xticks(x)
axes[0].set_xticklabels(dists, rotation=45, ha='right')
axes[0].legend()
axes[0].grid(True, alpha=0.3, axis='y')
axes[0].set_yscale('log')
# Scalability plot (random inputs only)
random_sizes = sizes
det_random = results['deterministic']['Random']
rand_random = results['randomized']['Random']
valid_sizes = [s for s, t in zip(random_sizes, det_random) if t != float('inf')]
valid_det = [t for t in det_random if t != float('inf')]
valid_rand = [t for s, t in zip(random_sizes, rand_random) if s in valid_sizes]
axes[1].plot(valid_sizes, valid_det, marker='o', label='Deterministic', linewidth=2)
axes[1].plot(valid_sizes, valid_rand, marker='s', label='Randomized', linewidth=2)
# Reference line for O(n)
if valid_sizes:
ref_n = np.array(valid_sizes)
ref_time = valid_det[0] * (ref_n / valid_sizes[0])
axes[1].plot(ref_n, ref_time, '--', label='O(n) reference', alpha=0.5, color='gray')
axes[1].set_xlabel('Input Size (n)', fontsize=11)
axes[1].set_ylabel('Execution Time (seconds)', fontsize=11)
axes[1].set_title('Scalability on Random Inputs', fontsize=12, fontweight='bold')
axes[1].legend()
axes[1].grid(True, alpha=0.3)
axes[1].set_xscale('log')
axes[1].set_yscale('log')
plt.tight_layout()
plt.savefig('docs/selection_bar_and_scalability.png', dpi=300, bbox_inches='tight')
plt.close()
print(" Saved: docs/selection_bar_and_scalability.png")
def plot_data_structure_comparison():
"""Generate comparison plots for data structures."""
print("Generating data structure comparison plots...")
sizes = [100, 500, 1000, 2000, 5000]
# Stack vs List
stack_times = []
list_times = []
for size in sizes:
result = compare_stack_vs_list_push(size, iterations=10)
stack_times.append(result['stack'])
list_times.append(result['list'])
# Queue vs List
queue_times = []
list_enqueue_times = []
for size in sizes:
result = compare_queue_vs_list(size, iterations=10)
queue_times.append(result['queue_enqueue'])
list_enqueue_times.append(result['list_append'])
# Linked List vs List
ll_append_times = []
ll_access_times = []
list_append_times = []
list_access_times = []
for size in sizes:
result = compare_linked_list_vs_list(size, iterations=10)
ll_append_times.append(result['linked_list_append'])
ll_access_times.append(result['linked_list_access'])
list_append_times.append(result['list_append'])
list_access_times.append(result['list_access'])
# Plot 1: Stack vs List
plt.figure(figsize=(10, 6))
plt.plot(sizes, stack_times, marker='o', label='Stack.push()', linewidth=2)
plt.plot(sizes, list_times, marker='s', label='List.append()', linewidth=2)
plt.xlabel('Number of Operations', fontsize=12)
plt.ylabel('Total Time (seconds)', fontsize=12)
plt.title('Stack vs List: Push/Append Performance', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('docs/stack_vs_list.png', dpi=300, bbox_inches='tight')
plt.close()
print(" Saved: docs/stack_vs_list.png")
# Plot 2: Queue vs List
plt.figure(figsize=(10, 6))
plt.plot(sizes, queue_times, marker='o', label='Queue.enqueue()', linewidth=2)
plt.plot(sizes, list_enqueue_times, marker='s', label='List.append()', linewidth=2)
plt.xlabel('Number of Operations', fontsize=12)
plt.ylabel('Total Time (seconds)', fontsize=12)
plt.title('Queue vs List: Enqueue/Append Performance', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('docs/queue_vs_list.png', dpi=300, bbox_inches='tight')
plt.close()
print(" Saved: docs/queue_vs_list.png")
# Plot 3: Linked List vs List
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
axes[0].plot(sizes, ll_append_times, marker='o', label='LinkedList.append()', linewidth=2)
axes[0].plot(sizes, list_append_times, marker='s', label='List.append()', linewidth=2)
axes[0].set_xlabel('Number of Operations', fontsize=11)
axes[0].set_ylabel('Total Time (seconds)', fontsize=11)
axes[0].set_title('Append Operation', fontsize=12, fontweight='bold')
axes[0].legend()
axes[0].grid(True, alpha=0.3)
axes[1].plot(sizes, ll_access_times, marker='o', label='LinkedList.get()', linewidth=2)
axes[1].plot(sizes, list_access_times, marker='s', label='List[index]', linewidth=2)
axes[1].set_xlabel('List Size', fontsize=11)
axes[1].set_ylabel('Time per Access (seconds)', fontsize=11)
axes[1].set_title('Access Operation', fontsize=12, fontweight='bold')
axes[1].legend()
axes[1].grid(True, alpha=0.3)
axes[1].set_yscale('log')
plt.suptitle('Linked List vs List Performance Comparison', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig('docs/linked_list_vs_list.png', dpi=300, bbox_inches='tight')
plt.close()
print(" Saved: docs/linked_list_vs_list.png")
if __name__ == "__main__":
# Create docs directory if it doesn't exist
os.makedirs('docs', exist_ok=True)
print("\n" + "=" * 60)
print("Generating Performance Visualization Plots")
print("=" * 60 + "\n")
try:
plot_selection_comparison()
print()
plot_data_structure_comparison()
print()
print("=" * 60)
print("All plots generated successfully!")
print("=" * 60)
except Exception as e:
print(f"Error generating plots: {e}")
import traceback
traceback.print_exc()

138
examples/selection_demo.py Normal file
View File

@@ -0,0 +1,138 @@
"""
Demonstration of selection algorithms.
This script demonstrates the usage of deterministic and randomized selection
algorithms for finding the k-th smallest element in an array.
Author: Carlos Gutierrez
Course: MSCS532 - Data Structures and Algorithms
"""
import sys
import os
# Add parent directory to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from src.deterministic_algorithm import deterministic_select, find_median
from src.randomized_algorithm import randomized_select, find_median as rand_find_median
def demo_basic_selection():
"""Demonstrate basic selection operations."""
print("=" * 60)
print("Basic Selection Demo")
print("=" * 60)
arr = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5]
print(f"Array: {arr}")
print()
# Find various order statistics
for k in [1, 3, 5, len(arr)]:
det_result = deterministic_select(arr, k)
rand_result = randomized_select(arr, k, seed=42)
print(f" {k}-th smallest element:")
print(f" Deterministic: {det_result}")
print(f" Randomized: {rand_result}")
print()
# Find median
print("Median:")
print(f" Deterministic: {find_median(arr)}")
print(f" Randomized: {rand_find_median(arr, seed=42)}")
print()
def demo_different_distributions():
"""Demonstrate selection on different input distributions."""
print("=" * 60)
print("Selection on Different Distributions")
print("=" * 60)
distributions = {
"Sorted": list(range(1, 21)),
"Reverse Sorted": list(range(20, 0, -1)),
"Random": [3, 15, 7, 1, 19, 12, 8, 5, 14, 2, 10, 18, 6, 11, 4, 9, 16, 13, 17, 20],
"With Duplicates": [5, 3, 5, 1, 3, 2, 5, 4, 3, 1, 2, 5, 4, 3, 2, 1, 5, 4, 3, 2]
}
k = 10 # Find 10th smallest
for name, arr in distributions.items():
print(f"\n{name} Array: {arr[:10]}..." if len(arr) > 10 else f"{name} Array: {arr}")
det_result = deterministic_select(arr, k)
rand_result = randomized_select(arr, k, seed=42)
print(f" {k}-th smallest:")
print(f" Deterministic: {det_result}")
print(f" Randomized: {rand_result}")
print()
def demo_median_finding():
"""Demonstrate median finding."""
print("=" * 60)
print("Median Finding Demo")
print("=" * 60)
test_arrays = [
([1, 2, 3, 4, 5], "Odd length"),
([1, 2, 3, 4, 5, 6], "Even length"),
([5, 2, 8, 1, 9, 3, 7, 4, 6], "Random order"),
([1, 1, 2, 2, 3, 3, 4, 4], "With duplicates")
]
for arr, description in test_arrays:
print(f"\n{description}: {arr}")
det_median = find_median(arr)
rand_median = rand_find_median(arr, seed=42)
print(f" Deterministic median: {det_median}")
print(f" Randomized median: {rand_median}")
print()
def demo_custom_key():
"""Demonstrate selection with custom key function."""
print("=" * 60)
print("Selection with Custom Key Function")
print("=" * 60)
# Array of dictionaries
students = [
{'name': 'Alice', 'score': 85},
{'name': 'Bob', 'score': 92},
{'name': 'Charlie', 'score': 78},
{'name': 'Diana', 'score': 95},
{'name': 'Eve', 'score': 88}
]
print("Students:")
for student in students:
print(f" {student['name']}: {student['score']}")
print()
# Find student with median score
median_student = randomized_select(
students,
(len(students) + 1) // 2,
key=lambda x: x['score'],
seed=42
)
print(f"Student with median score: {median_student['name']} ({median_student['score']})")
print()
if __name__ == "__main__":
print("\n" + "=" * 60)
print("Selection Algorithms Demonstration")
print("=" * 60 + "\n")
demo_basic_selection()
demo_different_distributions()
demo_median_finding()
demo_custom_key()
print("=" * 60)
print("Demo Complete!")
print("=" * 60)