Files
MSCS532_Assignment6/examples/generate_plots.py
2025-11-23 15:59:23 -05:00

243 lines
9.3 KiB
Python

"""
Generate performance visualization plots for selection algorithms and data structures.
This script runs benchmarks and generates visualization plots comparing the
performance of deterministic and randomized selection algorithms, as well as
data structure operations.
Author: Carlos Gutierrez
Course: MSCS532 - Data Structures and Algorithms
"""
import sys
import os
import matplotlib.pyplot as plt
import numpy as np
# Add parent directory to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from src.benchmark import (
generate_random_array,
generate_sorted_array,
generate_reverse_sorted_array,
generate_nearly_sorted_array,
generate_duplicate_heavy_array,
benchmark_selection_algorithms,
compare_stack_vs_list_push,
compare_queue_vs_list,
compare_linked_list_vs_list
)
from src.deterministic_algorithm import deterministic_select
from src.randomized_algorithm import randomized_select
def plot_selection_comparison():
"""Generate comparison plots for selection algorithms."""
print("Generating selection algorithm comparison plots...")
sizes = [100, 500, 1000, 2000, 5000]
distributions = {
'Random': generate_random_array,
'Sorted': generate_sorted_array,
'Reverse Sorted': generate_reverse_sorted_array,
'Nearly Sorted': lambda n: generate_nearly_sorted_array(n, swaps=10, seed=42),
'Many Duplicates': lambda n: generate_duplicate_heavy_array(n, unique_values=10, seed=42)
}
results = benchmark_selection_algorithms(sizes, distributions, iterations=3)
# Plot 1: Line plot comparison
plt.figure(figsize=(12, 8))
for dist_name in distributions:
det_times = results['deterministic'][dist_name]
rand_times = results['randomized'][dist_name]
# Filter out infinite times
valid_sizes = [s for s, t in zip(sizes, det_times) if t != float('inf')]
valid_det = [t for t in det_times if t != float('inf')]
valid_rand = [t for s, t in zip(sizes, rand_times) if s in valid_sizes]
if valid_sizes:
plt.plot(valid_sizes, valid_det, marker='o', label=f'Deterministic ({dist_name})', linestyle='--')
plt.plot(valid_sizes, valid_rand, marker='s', label=f'Randomized ({dist_name})', linestyle='-')
plt.xlabel('Input Size (n)', fontsize=12)
plt.ylabel('Execution Time (seconds)', fontsize=12)
plt.title('Selection Algorithm Performance Comparison', fontsize=14, fontweight='bold')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True, alpha=0.3)
plt.yscale('log')
plt.tight_layout()
plt.savefig('docs/selection_comparison.png', dpi=300, bbox_inches='tight')
plt.close()
print(" Saved: docs/selection_comparison.png")
# Plot 2: Bar chart for specific size
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
test_size = 2000
size_idx = sizes.index(test_size) if test_size in sizes else len(sizes) - 1
dists = list(distributions.keys())
det_heights = [results['deterministic'][d][size_idx] if results['deterministic'][d][size_idx] != float('inf') else 0
for d in dists]
rand_heights = [results['randomized'][d][size_idx] for d in dists]
x = np.arange(len(dists))
width = 0.35
axes[0].bar(x - width/2, det_heights, width, label='Deterministic', alpha=0.8)
axes[0].bar(x + width/2, rand_heights, width, label='Randomized', alpha=0.8)
axes[0].set_xlabel('Distribution', fontsize=11)
axes[0].set_ylabel('Execution Time (seconds)', fontsize=11)
axes[0].set_title(f'Selection Performance at n={sizes[size_idx]}', fontsize=12, fontweight='bold')
axes[0].set_xticks(x)
axes[0].set_xticklabels(dists, rotation=45, ha='right')
axes[0].legend()
axes[0].grid(True, alpha=0.3, axis='y')
axes[0].set_yscale('log')
# Scalability plot (random inputs only)
random_sizes = sizes
det_random = results['deterministic']['Random']
rand_random = results['randomized']['Random']
valid_sizes = [s for s, t in zip(random_sizes, det_random) if t != float('inf')]
valid_det = [t for t in det_random if t != float('inf')]
valid_rand = [t for s, t in zip(random_sizes, rand_random) if s in valid_sizes]
axes[1].plot(valid_sizes, valid_det, marker='o', label='Deterministic', linewidth=2)
axes[1].plot(valid_sizes, valid_rand, marker='s', label='Randomized', linewidth=2)
# Reference line for O(n)
if valid_sizes:
ref_n = np.array(valid_sizes)
ref_time = valid_det[0] * (ref_n / valid_sizes[0])
axes[1].plot(ref_n, ref_time, '--', label='O(n) reference', alpha=0.5, color='gray')
axes[1].set_xlabel('Input Size (n)', fontsize=11)
axes[1].set_ylabel('Execution Time (seconds)', fontsize=11)
axes[1].set_title('Scalability on Random Inputs', fontsize=12, fontweight='bold')
axes[1].legend()
axes[1].grid(True, alpha=0.3)
axes[1].set_xscale('log')
axes[1].set_yscale('log')
plt.tight_layout()
plt.savefig('docs/selection_bar_and_scalability.png', dpi=300, bbox_inches='tight')
plt.close()
print(" Saved: docs/selection_bar_and_scalability.png")
def plot_data_structure_comparison():
"""Generate comparison plots for data structures."""
print("Generating data structure comparison plots...")
sizes = [100, 500, 1000, 2000, 5000]
# Stack vs List
stack_times = []
list_times = []
for size in sizes:
result = compare_stack_vs_list_push(size, iterations=10)
stack_times.append(result['stack'])
list_times.append(result['list'])
# Queue vs List
queue_times = []
list_enqueue_times = []
for size in sizes:
result = compare_queue_vs_list(size, iterations=10)
queue_times.append(result['queue_enqueue'])
list_enqueue_times.append(result['list_append'])
# Linked List vs List
ll_append_times = []
ll_access_times = []
list_append_times = []
list_access_times = []
for size in sizes:
result = compare_linked_list_vs_list(size, iterations=10)
ll_append_times.append(result['linked_list_append'])
ll_access_times.append(result['linked_list_access'])
list_append_times.append(result['list_append'])
list_access_times.append(result['list_access'])
# Plot 1: Stack vs List
plt.figure(figsize=(10, 6))
plt.plot(sizes, stack_times, marker='o', label='Stack.push()', linewidth=2)
plt.plot(sizes, list_times, marker='s', label='List.append()', linewidth=2)
plt.xlabel('Number of Operations', fontsize=12)
plt.ylabel('Total Time (seconds)', fontsize=12)
plt.title('Stack vs List: Push/Append Performance', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('docs/stack_vs_list.png', dpi=300, bbox_inches='tight')
plt.close()
print(" Saved: docs/stack_vs_list.png")
# Plot 2: Queue vs List
plt.figure(figsize=(10, 6))
plt.plot(sizes, queue_times, marker='o', label='Queue.enqueue()', linewidth=2)
plt.plot(sizes, list_enqueue_times, marker='s', label='List.append()', linewidth=2)
plt.xlabel('Number of Operations', fontsize=12)
plt.ylabel('Total Time (seconds)', fontsize=12)
plt.title('Queue vs List: Enqueue/Append Performance', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('docs/queue_vs_list.png', dpi=300, bbox_inches='tight')
plt.close()
print(" Saved: docs/queue_vs_list.png")
# Plot 3: Linked List vs List
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
axes[0].plot(sizes, ll_append_times, marker='o', label='LinkedList.append()', linewidth=2)
axes[0].plot(sizes, list_append_times, marker='s', label='List.append()', linewidth=2)
axes[0].set_xlabel('Number of Operations', fontsize=11)
axes[0].set_ylabel('Total Time (seconds)', fontsize=11)
axes[0].set_title('Append Operation', fontsize=12, fontweight='bold')
axes[0].legend()
axes[0].grid(True, alpha=0.3)
axes[1].plot(sizes, ll_access_times, marker='o', label='LinkedList.get()', linewidth=2)
axes[1].plot(sizes, list_access_times, marker='s', label='List[index]', linewidth=2)
axes[1].set_xlabel('List Size', fontsize=11)
axes[1].set_ylabel('Time per Access (seconds)', fontsize=11)
axes[1].set_title('Access Operation', fontsize=12, fontweight='bold')
axes[1].legend()
axes[1].grid(True, alpha=0.3)
axes[1].set_yscale('log')
plt.suptitle('Linked List vs List Performance Comparison', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig('docs/linked_list_vs_list.png', dpi=300, bbox_inches='tight')
plt.close()
print(" Saved: docs/linked_list_vs_list.png")
if __name__ == "__main__":
# Create docs directory if it doesn't exist
os.makedirs('docs', exist_ok=True)
print("\n" + "=" * 60)
print("Generating Performance Visualization Plots")
print("=" * 60 + "\n")
try:
plot_selection_comparison()
print()
plot_data_structure_comparison()
print()
print("=" * 60)
print("All plots generated successfully!")
print("=" * 60)
except Exception as e:
print(f"Error generating plots: {e}")
import traceback
traceback.print_exc()