468 lines
19 KiB
Python
468 lines
19 KiB
Python
"""
|
|
Generate performance comparison plots for Quicksort algorithms.
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
|
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
from src.comparison import (
|
|
generate_random_array,
|
|
generate_sorted_array,
|
|
generate_reverse_sorted_array,
|
|
generate_nearly_sorted_array,
|
|
generate_array_with_duplicates,
|
|
compare_algorithms
|
|
)
|
|
from src.quicksort import quicksort, randomized_quicksort, quicksort_3way
|
|
import os
|
|
|
|
|
|
def generate_performance_plots():
|
|
"""Generate comprehensive performance comparison plots."""
|
|
print("Generating performance plots...")
|
|
|
|
# Ensure docs directory exists
|
|
os.makedirs('docs', exist_ok=True)
|
|
|
|
# Define algorithms
|
|
algorithms = {
|
|
'Deterministic Quicksort': lambda arr: quicksort(arr),
|
|
'Randomized Quicksort': lambda arr: randomized_quicksort(arr, seed=42)
|
|
}
|
|
|
|
# Define array generators
|
|
array_generators = {
|
|
'Random': generate_random_array,
|
|
'Sorted': generate_sorted_array,
|
|
'Reverse Sorted': generate_reverse_sorted_array,
|
|
'Nearly Sorted': lambda size: generate_nearly_sorted_array(size, swap_count=10),
|
|
'Many Duplicates': lambda size: generate_array_with_duplicates(size, unique_count=10)
|
|
}
|
|
|
|
# Test sizes
|
|
sizes = [100, 500, 1000, 2000, 5000, 10000]
|
|
|
|
print("Running benchmarks (this may take a few minutes)...")
|
|
results = compare_algorithms(
|
|
algorithms=algorithms,
|
|
array_generators=array_generators,
|
|
sizes=sizes,
|
|
iterations=3
|
|
)
|
|
|
|
# Plot 1: Line plot comparing algorithms across distributions
|
|
print("Generating line plot...")
|
|
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
|
|
fig.suptitle('Quicksort Performance Comparison', fontsize=16, fontweight='bold')
|
|
|
|
distributions = ['Random', 'Sorted', 'Reverse Sorted', 'Nearly Sorted', 'Many Duplicates']
|
|
algo_names = list(algorithms.keys())
|
|
colors = ['#1f77b4', '#ff7f0e']
|
|
|
|
for idx, dist in enumerate(distributions):
|
|
ax = axes[idx // 3, idx % 3]
|
|
|
|
for algo_idx, algo_name in enumerate(algo_names):
|
|
if dist in results[algo_name]:
|
|
sizes_list = sorted(results[algo_name][dist].keys())
|
|
# Filter out infinite values
|
|
valid_data = [(s, results[algo_name][dist][s]['mean'])
|
|
for s in sizes_list
|
|
if np.isfinite(results[algo_name][dist][s]['mean'])]
|
|
if valid_data:
|
|
valid_sizes, valid_times = zip(*valid_data)
|
|
ax.plot(valid_sizes, valid_times, marker='o', label=algo_name,
|
|
color=colors[algo_idx], linewidth=2, markersize=6)
|
|
|
|
ax.set_xlabel('Array Size', fontsize=10)
|
|
ax.set_ylabel('Time (seconds)', fontsize=10)
|
|
ax.set_title(f'{dist} Distribution', fontsize=12, fontweight='bold')
|
|
ax.legend()
|
|
ax.grid(True, alpha=0.3)
|
|
ax.set_xscale('log')
|
|
ax.set_yscale('log')
|
|
|
|
# Hide the last subplot
|
|
axes[1, 2].axis('off')
|
|
|
|
plt.tight_layout()
|
|
plt.savefig('docs/quicksort_comparison.png', dpi=300, bbox_inches='tight')
|
|
print("Saved: docs/quicksort_comparison.png")
|
|
plt.close()
|
|
|
|
# Plot 2: Bar chart comparing algorithms on sorted vs random
|
|
print("Generating bar chart...")
|
|
fig, ax = plt.subplots(figsize=(14, 8))
|
|
|
|
distributions_to_plot = ['Random', 'Sorted', 'Reverse Sorted']
|
|
x = np.arange(len(distributions_to_plot))
|
|
width = 0.35
|
|
|
|
# Use size 5000 for comparison
|
|
size = 5000
|
|
|
|
det_times = []
|
|
rand_times = []
|
|
|
|
for dist in distributions_to_plot:
|
|
# Check if data exists and is finite (not inf or nan)
|
|
det_val = None
|
|
if (dist in results['Deterministic Quicksort'] and
|
|
size in results['Deterministic Quicksort'][dist]):
|
|
mean_val = results['Deterministic Quicksort'][dist][size]['mean']
|
|
if np.isfinite(mean_val):
|
|
det_val = mean_val
|
|
|
|
det_times.append(det_val if det_val is not None else np.nan)
|
|
|
|
rand_val = None
|
|
if (dist in results['Randomized Quicksort'] and
|
|
size in results['Randomized Quicksort'][dist]):
|
|
mean_val = results['Randomized Quicksort'][dist][size]['mean']
|
|
if np.isfinite(mean_val):
|
|
rand_val = mean_val
|
|
|
|
rand_times.append(rand_val if rand_val is not None else np.nan)
|
|
|
|
bars1 = ax.bar(x - width/2, det_times, width, label='Deterministic Quicksort',
|
|
color='#1f77b4', alpha=0.8)
|
|
bars2 = ax.bar(x + width/2, rand_times, width, label='Randomized Quicksort',
|
|
color='#ff7f0e', alpha=0.8)
|
|
|
|
ax.set_xlabel('Input Distribution', fontsize=12, fontweight='bold')
|
|
ax.set_ylabel('Time (seconds)', fontsize=12, fontweight='bold')
|
|
ax.set_title(f'Quicksort Performance Comparison (Array Size: {size})',
|
|
fontsize=14, fontweight='bold')
|
|
ax.set_xticks(x)
|
|
ax.set_xticklabels(distributions_to_plot)
|
|
ax.legend(fontsize=11)
|
|
ax.grid(True, alpha=0.3, axis='y')
|
|
|
|
# Add value labels on bars
|
|
for bars in [bars1, bars2]:
|
|
for bar in bars:
|
|
height = bar.get_height()
|
|
if height > 0 and np.isfinite(height):
|
|
ax.text(bar.get_x() + bar.get_width()/2., height,
|
|
f'{height:.4f}s',
|
|
ha='center', va='bottom', fontsize=9)
|
|
|
|
# Add annotation for missing data
|
|
missing_det = [i for i, (dist, val) in enumerate(zip(distributions_to_plot, det_times))
|
|
if not np.isfinite(val) or np.isnan(val)]
|
|
if missing_det:
|
|
missing_dists = [distributions_to_plot[i] for i in missing_det]
|
|
note_text = f"Note: Deterministic Quicksort failed on {', '.join(missing_dists)}\n"
|
|
note_text += "due to worst-case O(n²) performance (see README for details)"
|
|
ax.text(0.5, 0.02, note_text, transform=ax.transAxes,
|
|
fontsize=9, ha='center', va='bottom',
|
|
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
|
|
|
|
plt.tight_layout()
|
|
plt.savefig('docs/quicksort_comparison_bar.png', dpi=300, bbox_inches='tight')
|
|
print("Saved: docs/quicksort_comparison_bar.png")
|
|
plt.close()
|
|
|
|
# Plot 3: Scalability analysis
|
|
print("Generating scalability plot...")
|
|
fig, ax = plt.subplots(figsize=(12, 8))
|
|
|
|
# Focus on random distribution for scalability
|
|
dist = 'Random'
|
|
# Filter out infinite values
|
|
valid_sizes_det = [s for s in sizes
|
|
if (s in results['Deterministic Quicksort'][dist] and
|
|
np.isfinite(results['Deterministic Quicksort'][dist][s]['mean']))]
|
|
valid_sizes_rand = [s for s in sizes
|
|
if (s in results['Randomized Quicksort'][dist] and
|
|
np.isfinite(results['Randomized Quicksort'][dist][s]['mean']))]
|
|
|
|
sizes_list = sorted(set(valid_sizes_det + valid_sizes_rand))
|
|
|
|
det_times = [results['Deterministic Quicksort'][dist][s]['mean']
|
|
for s in sizes_list if s in valid_sizes_det]
|
|
det_sizes = [s for s in sizes_list if s in valid_sizes_det]
|
|
rand_times = [results['Randomized Quicksort'][dist][s]['mean']
|
|
for s in sizes_list if s in valid_sizes_rand]
|
|
rand_sizes = [s for s in sizes_list if s in valid_sizes_rand]
|
|
|
|
if det_sizes:
|
|
ax.plot(det_sizes, det_times, marker='o', label='Deterministic Quicksort',
|
|
color='#1f77b4', linewidth=2.5, markersize=8)
|
|
if rand_sizes:
|
|
ax.plot(rand_sizes, rand_times, marker='s', label='Randomized Quicksort',
|
|
color='#ff7f0e', linewidth=2.5, markersize=8)
|
|
|
|
# Add theoretical O(n log n) reference line
|
|
if det_sizes and det_times:
|
|
# Normalize to match first data point
|
|
n_log_n = [s * np.log2(s) for s in det_sizes]
|
|
scale_factor = det_times[0] / n_log_n[0] if n_log_n[0] > 0 else 1
|
|
n_log_n_scaled = [x * scale_factor for x in n_log_n]
|
|
ax.plot(det_sizes, n_log_n_scaled, '--', label='O(n log n) reference',
|
|
color='gray', linewidth=2, alpha=0.7)
|
|
|
|
ax.set_xlabel('Array Size', fontsize=12, fontweight='bold')
|
|
ax.set_ylabel('Time (seconds)', fontsize=12, fontweight='bold')
|
|
ax.set_title('Quicksort Scalability Analysis (Random Distribution)',
|
|
fontsize=14, fontweight='bold')
|
|
ax.legend(fontsize=11)
|
|
ax.grid(True, alpha=0.3)
|
|
ax.set_xscale('log')
|
|
ax.set_yscale('log')
|
|
|
|
plt.tight_layout()
|
|
plt.savefig('docs/quicksort_scalability.png', dpi=300, bbox_inches='tight')
|
|
print("Saved: docs/quicksort_scalability.png")
|
|
plt.close()
|
|
|
|
# Plot 4: Worst-case comparison (sorted vs reverse sorted)
|
|
print("Generating worst-case comparison plot...")
|
|
fig, ax = plt.subplots(figsize=(12, 8))
|
|
|
|
worst_case_dists = ['Sorted', 'Reverse Sorted']
|
|
# Use all sizes, not just those from Random distribution
|
|
all_sizes = sorted(sizes)
|
|
x = np.arange(len(all_sizes))
|
|
width = 0.35
|
|
|
|
for dist_idx, dist in enumerate(worst_case_dists):
|
|
det_times = []
|
|
rand_times = []
|
|
|
|
for size in all_sizes:
|
|
# Check if data exists and is finite (not inf or nan)
|
|
det_val = None
|
|
if (dist in results['Deterministic Quicksort'] and
|
|
size in results['Deterministic Quicksort'][dist]):
|
|
mean_val = results['Deterministic Quicksort'][dist][size]['mean']
|
|
if np.isfinite(mean_val):
|
|
det_val = mean_val
|
|
|
|
det_times.append(det_val if det_val is not None else np.nan)
|
|
|
|
rand_val = None
|
|
if (dist in results['Randomized Quicksort'] and
|
|
size in results['Randomized Quicksort'][dist]):
|
|
mean_val = results['Randomized Quicksort'][dist][size]['mean']
|
|
if np.isfinite(mean_val):
|
|
rand_val = mean_val
|
|
|
|
rand_times.append(rand_val if rand_val is not None else np.nan)
|
|
|
|
offset = (dist_idx - 0.5) * width
|
|
ax.bar(x + offset, det_times, width/2, label=f'Deterministic ({dist})',
|
|
alpha=0.7, color=['#1f77b4', '#2ca02c'][dist_idx])
|
|
ax.bar(x + offset + width/2, rand_times, width/2,
|
|
label=f'Randomized ({dist})', alpha=0.7,
|
|
color=['#ff7f0e', '#d62728'][dist_idx])
|
|
|
|
ax.set_xlabel('Array Size', fontsize=12, fontweight='bold')
|
|
ax.set_ylabel('Time (seconds)', fontsize=12, fontweight='bold')
|
|
ax.set_title('Worst-Case Performance: Sorted vs Reverse Sorted',
|
|
fontsize=14, fontweight='bold')
|
|
ax.set_xticks(x)
|
|
ax.set_xticklabels([str(s) for s in all_sizes], rotation=45)
|
|
ax.legend(fontsize=10, ncol=2)
|
|
ax.grid(True, alpha=0.3, axis='y')
|
|
ax.set_yscale('log')
|
|
|
|
# Add annotation for missing data
|
|
missing_sizes = []
|
|
for size_idx, size in enumerate(all_sizes):
|
|
has_det_data = False
|
|
for dist in worst_case_dists:
|
|
if (dist in results['Deterministic Quicksort'] and
|
|
size in results['Deterministic Quicksort'][dist]):
|
|
mean_val = results['Deterministic Quicksort'][dist][size]['mean']
|
|
if np.isfinite(mean_val):
|
|
has_det_data = True
|
|
break
|
|
if not has_det_data:
|
|
missing_sizes.append(size)
|
|
|
|
if missing_sizes:
|
|
note_text = f"Note: Missing bars for Deterministic Quicksort at sizes ≥{min(missing_sizes)}\n"
|
|
note_text += "indicate execution failures due to recursion limits and O(n²) complexity"
|
|
ax.text(0.5, 0.02, note_text, transform=ax.transAxes,
|
|
fontsize=9, ha='center', va='bottom',
|
|
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
|
|
|
|
plt.tight_layout()
|
|
plt.savefig('docs/quicksort_worst_case.png', dpi=300, bbox_inches='tight')
|
|
print("Saved: docs/quicksort_worst_case.png")
|
|
plt.close()
|
|
|
|
print("\nAll plots generated successfully!")
|
|
print("Plots saved in the 'docs' directory.")
|
|
|
|
|
|
def generate_3way_quicksort_plot():
|
|
"""Generate visualization comparing standard Quicksort vs Three-Way Quicksort on duplicate-heavy data."""
|
|
print("\nGenerating Three-Way Quicksort comparison plot...")
|
|
|
|
# Ensure docs directory exists
|
|
os.makedirs('docs', exist_ok=True)
|
|
|
|
# Define algorithms for comparison
|
|
algorithms = {
|
|
'Standard Quicksort (Randomized)': lambda arr: randomized_quicksort(arr, seed=42),
|
|
'Three-Way Quicksort': lambda arr: quicksort_3way(arr)
|
|
}
|
|
|
|
# Test with different duplicate levels
|
|
sizes = [100, 500, 1000, 2000, 5000, 10000]
|
|
duplicate_configs = [
|
|
('10 Unique Values', lambda size: generate_array_with_duplicates(size, unique_count=10)),
|
|
('5 Unique Values', lambda size: generate_array_with_duplicates(size, unique_count=5)),
|
|
('All Equal', lambda size: [5] * size) # All elements are the same
|
|
]
|
|
|
|
print("Running benchmarks for Three-Way Quicksort comparison...")
|
|
|
|
# Run benchmarks once for all configurations
|
|
all_results = {}
|
|
for config_name, gen_func in duplicate_configs:
|
|
array_generators = {'Test': gen_func}
|
|
results = compare_algorithms(
|
|
algorithms=algorithms,
|
|
array_generators=array_generators,
|
|
sizes=sizes,
|
|
iterations=3
|
|
)
|
|
all_results[config_name] = results
|
|
|
|
# Create figure with subplots
|
|
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
|
|
fig.suptitle('Three-Way Quicksort vs Standard Quicksort on Duplicate-Heavy Data',
|
|
fontsize=16, fontweight='bold')
|
|
|
|
colors = ['#1f77b4', '#ff7f0e']
|
|
markers = ['o', 's']
|
|
|
|
for config_idx, (config_name, _) in enumerate(duplicate_configs):
|
|
ax = axes[config_idx]
|
|
results = all_results[config_name]
|
|
|
|
for algo_idx, algo_name in enumerate(algorithms.keys()):
|
|
if 'Test' in results[algo_name]:
|
|
sizes_list = sorted(results[algo_name]['Test'].keys())
|
|
# Filter out infinite values
|
|
valid_data = [(s, results[algo_name]['Test'][s]['mean'])
|
|
for s in sizes_list
|
|
if np.isfinite(results[algo_name]['Test'][s]['mean'])]
|
|
if valid_data:
|
|
valid_sizes, valid_times = zip(*valid_data)
|
|
ax.plot(valid_sizes, valid_times, marker=markers[algo_idx],
|
|
label=algo_name, color=colors[algo_idx],
|
|
linewidth=2.5, markersize=8)
|
|
|
|
ax.set_xlabel('Array Size', fontsize=11, fontweight='bold')
|
|
ax.set_ylabel('Time (seconds)', fontsize=11, fontweight='bold')
|
|
ax.set_title(f'{config_name}', fontsize=12, fontweight='bold')
|
|
ax.legend(fontsize=10)
|
|
ax.grid(True, alpha=0.3)
|
|
ax.set_xscale('log')
|
|
ax.set_yscale('log')
|
|
|
|
plt.tight_layout()
|
|
plt.savefig('docs/quicksort_3way_comparison.png', dpi=300, bbox_inches='tight')
|
|
print("Saved: docs/quicksort_3way_comparison.png")
|
|
plt.close()
|
|
|
|
# Also create a bar chart for specific sizes
|
|
print("Generating Three-Way Quicksort bar chart...")
|
|
fig, ax = plt.subplots(figsize=(14, 8))
|
|
|
|
# Test specific sizes with different duplicate levels
|
|
test_sizes = [1000, 5000, 10000]
|
|
x = np.arange(len(test_sizes))
|
|
width = 0.25
|
|
|
|
configs_for_bar = [
|
|
('10 Unique', lambda size: generate_array_with_duplicates(size, unique_count=10)),
|
|
('5 Unique', lambda size: generate_array_with_duplicates(size, unique_count=5)),
|
|
('All Equal', lambda size: [5] * size)
|
|
]
|
|
|
|
bar_colors = ['#1f77b4', '#ff7f0e', '#2ca02c']
|
|
|
|
# Run benchmarks for bar chart
|
|
bar_results = {}
|
|
for config_name, gen_func in configs_for_bar:
|
|
array_generators = {'Test': gen_func}
|
|
results = compare_algorithms(
|
|
algorithms=algorithms,
|
|
array_generators=array_generators,
|
|
sizes=test_sizes,
|
|
iterations=3
|
|
)
|
|
bar_results[config_name] = results
|
|
|
|
for config_idx, (config_name, _) in enumerate(configs_for_bar):
|
|
results = bar_results[config_name]
|
|
standard_times = []
|
|
threeway_times = []
|
|
|
|
for size in test_sizes:
|
|
std_time = None
|
|
way3_time = None
|
|
|
|
if 'Test' in results['Standard Quicksort (Randomized)']:
|
|
if size in results['Standard Quicksort (Randomized)']['Test']:
|
|
mean_val = results['Standard Quicksort (Randomized)']['Test'][size]['mean']
|
|
if np.isfinite(mean_val):
|
|
std_time = mean_val
|
|
|
|
if 'Test' in results['Three-Way Quicksort']:
|
|
if size in results['Three-Way Quicksort']['Test']:
|
|
mean_val = results['Three-Way Quicksort']['Test'][size]['mean']
|
|
if np.isfinite(mean_val):
|
|
way3_time = mean_val
|
|
|
|
standard_times.append(std_time if std_time is not None else np.nan)
|
|
threeway_times.append(way3_time if way3_time is not None else np.nan)
|
|
|
|
offset = (config_idx - 1) * width
|
|
bars1 = ax.bar(x + offset, standard_times, width/2,
|
|
label=f'Standard ({config_name})',
|
|
color=bar_colors[config_idx], alpha=0.7)
|
|
bars2 = ax.bar(x + offset + width/2, threeway_times, width/2,
|
|
label=f'Three-Way ({config_name})',
|
|
color=bar_colors[config_idx], alpha=0.9)
|
|
|
|
# Add value labels
|
|
for bars in [bars1, bars2]:
|
|
for bar in bars:
|
|
height = bar.get_height()
|
|
if height > 0 and np.isfinite(height):
|
|
ax.text(bar.get_x() + bar.get_width()/2., height,
|
|
f'{height:.4f}s',
|
|
ha='center', va='bottom', fontsize=8, rotation=90)
|
|
|
|
ax.set_xlabel('Array Size', fontsize=12, fontweight='bold')
|
|
ax.set_ylabel('Time (seconds)', fontsize=12, fontweight='bold')
|
|
ax.set_title('Three-Way Quicksort Performance on Duplicate-Heavy Data',
|
|
fontsize=14, fontweight='bold')
|
|
ax.set_xticks(x)
|
|
ax.set_xticklabels([str(s) for s in test_sizes])
|
|
ax.legend(fontsize=10, ncol=3, loc='upper left')
|
|
ax.grid(True, alpha=0.3, axis='y')
|
|
ax.set_yscale('log')
|
|
|
|
plt.tight_layout()
|
|
plt.savefig('docs/quicksort_3way_bar.png', dpi=300, bbox_inches='tight')
|
|
print("Saved: docs/quicksort_3way_bar.png")
|
|
plt.close()
|
|
|
|
print("Three-Way Quicksort plots generated successfully!")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
generate_performance_plots()
|
|
generate_3way_quicksort_plot()
|
|
|