LEANN/research/paper_plot/gpu_under.py

#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Motto: Were It to Benefit My Country, I Would Lay Down My Life!
# \file: /gpu_utilization_plot.py
# \brief: Plots GPU throughput vs. batch size to show utilization with equally spaced x-axis.
# Author: AI Assistant

import numpy as np
import pandas as pd # Using pandas for data structuring, similar to example
from matplotlib import pyplot as plt

# Apply styling similar to the example script
plt.rcParams["font.family"] = "Helvetica"
plt.rcParams["ytick.direction"] = "in"
plt.rcParams["xtick.direction"] = "in"
# plt.rcParams["hatch.linewidth"] = 1.5 # Not used for line plots
plt.rcParams["font.weight"] = "bold"
plt.rcParams["axes.labelweight"] = "bold"
plt.rcParams["text.usetex"] = True # Enables LaTeX for text rendering

# New Benchmark data (4th set)
data = {
    'batch_size': [1, 4, 8, 10, 16, 20, 32, 40, 64, 128, 256,],
    'avg_time_s': [
        0.0031, 0.0057, 0.0100, 0.0114, 0.0186, 0.0234,
        0.0359, 0.0422, 0.0626, 0.1259, 0.2454,
    ],
    'throughput_seq_s': [
        318.10, 696.77, 798.95, 874.70, 859.58, 855.19,
        890.80, 946.93, 1022.75, 1017.03, 1043.17,
    ]
}
benchmark_df = pd.DataFrame(data)

# Create the plot
# Increased width slightly for more x-axis labels
fig, ax = plt.subplots()
fig.set_size_inches(8, 5)

# Generate equally spaced x-coordinates (indices)
x_indices = np.arange(len(benchmark_df))

# Plotting throughput vs. batch size (using indices for x-axis)
ax.plot(
    x_indices, # Use equally spaced indices for plotting
    benchmark_df['throughput_seq_s'],
    marker='o',       # Add markers to data points
    linestyle='-',
    color="#63B8B6",  # A color inspired by the example's 'edgecolors'
    linewidth=2,
    markersize=6,
    # label="Model Throughput" # Label for legend if needed, but not showing legend by default
)

# Setting labels for axes
ax.set_xlabel("Batch Size", fontsize=14)
ax.set_ylabel("Throughput (sequences/second)", fontsize=14)

# Customizing Y-axis for the new data range:
# Start Y from 0 to include the anomalous low point and show full scale.
y_min_val = 200
# Round up y_max_val to the nearest 100, as max throughput > 1000
y_max_val = np.ceil(benchmark_df['throughput_seq_s'].max() / 100) * 100
ax.set_ylim((y_min_val, y_max_val))
# Set y-ticks every 100 units, ensuring the top tick is included.
ax.set_yticks(np.arange(y_min_val, y_max_val + 1, 100))

# Customizing X-axis for equally spaced ticks:
# Set tick positions to the indices
ax.set_xticks(x_indices)
# Set tick labels to the actual batch_size values
ax.set_xticklabels(benchmark_df['batch_size'])
ax.tick_params(axis='x', rotation=45, labelsize=10) # Rotate X-axis labels, fontsize 10
ax.tick_params(axis='y', labelsize=12)


# Add a light grid for better readability, common in academic plots
ax.grid(True, linestyle=':', linewidth=0.5, color='grey', alpha=0.7, zorder=0)

# Remove title (as requested)
# ax.set_title("GPU Throughput vs. Batch Size", fontsize=16) # Title would go here

# Optional: Add a legend if you have multiple lines or want to label the single line
# ax.legend(
#     loc="center right", # Location might need adjustment due to data shape
#     edgecolor="black",
#     facecolor="white",
#     framealpha=1.0,
#     shadow=False,
#     fancybox=False,
#     prop={"weight": "bold", "size": 10}
# ).set_zorder(100)

# Adjust layout to prevent labels from being cut off
plt.tight_layout()

# Save the figure
output_filename = "./paper_plot/figures/gpu_throughput_vs_batch_size_equispaced.pdf"
plt.savefig(output_filename, bbox_inches="tight", dpi=300)
print(f"Plot saved to {output_filename}")

# Display the plot (optional, depending on environment)
plt.show()

# %%
# This is just to mimic the '%%' cell structure from the example.
# No actual code needed here for this script.