122 lines
4.1 KiB
Python
122 lines
4.1 KiB
Python
import time
|
|
|
|
import matplotlib.pyplot as plt
|
|
import mlx.core as mx
|
|
import numpy as np
|
|
import torch
|
|
from sentence_transformers import SentenceTransformer
|
|
|
|
# --- Configuration ---
|
|
MODEL_NAME_TORCH = "Qwen/Qwen3-Embedding-0.6B"
|
|
BATCH_SIZES = [1, 8, 16, 32, 64, 128, 256]
|
|
NUM_RUNS = 10
|
|
WARMUP_RUNS = 2
|
|
SEQ_LENGTH = 256
|
|
EMBED_DIM = 768 # Dimension for all-mpnet-base-v2
|
|
|
|
# --- Generate Dummy Data ---
|
|
DUMMY_SENTENCES = ["This is a test sentence for benchmarking." * 5] * max(BATCH_SIZES)
|
|
|
|
|
|
# --- PyTorch Benchmark Function ---
|
|
def benchmark_torch(model, sentences):
|
|
start_time = time.time()
|
|
model.encode(sentences, convert_to_numpy=True)
|
|
torch.mps.synchronize() # Ensure computation is finished on MPS
|
|
end_time = time.time()
|
|
return (end_time - start_time) * 1000 # Return time in ms
|
|
|
|
|
|
# --- Simulated MLX Benchmark Function ---
|
|
def benchmark_mlx_simulated(dummy_embedding_table, sentences):
|
|
# 1. Simulate tokenization (result is just shape)
|
|
batch_size = len(sentences)
|
|
input_ids = mx.random.randint(0, 30000, (batch_size, SEQ_LENGTH))
|
|
attention_mask = mx.ones((batch_size, SEQ_LENGTH))
|
|
|
|
start_time = time.time()
|
|
# 2. Simulate embedding lookup
|
|
embeddings = dummy_embedding_table[input_ids]
|
|
|
|
# 3. Simulate mean pooling
|
|
mask = mx.expand_dims(attention_mask, -1)
|
|
sum_embeddings = (embeddings * mask).sum(axis=1)
|
|
sum_mask = mask.sum(axis=1)
|
|
_ = sum_embeddings / sum_mask
|
|
|
|
mx.eval() # Ensure all MLX computations are finished
|
|
end_time = time.time()
|
|
return (end_time - start_time) * 1000 # Return time in ms
|
|
|
|
|
|
# --- Main Execution ---
|
|
def main():
|
|
print("--- Initializing Models ---")
|
|
# Load real PyTorch model
|
|
print(f"Loading PyTorch model: {MODEL_NAME_TORCH}")
|
|
device = "mps" if torch.backends.mps.is_available() else "cpu"
|
|
if device == "cpu":
|
|
print("Warning: MPS not available for PyTorch. Benchmark will run on CPU.")
|
|
model_torch = SentenceTransformer(MODEL_NAME_TORCH, device=device)
|
|
print(f"PyTorch model loaded on: {device}")
|
|
|
|
# Create dummy MLX embedding table
|
|
print("Creating simulated MLX model...")
|
|
dummy_vocab_size = 30522 # Typical BERT vocab size
|
|
dummy_embedding_table_mlx = mx.random.normal((dummy_vocab_size, EMBED_DIM))
|
|
mx.eval() # Ensure table is created
|
|
print("Simulated MLX model created.")
|
|
|
|
# --- Warm-up ---
|
|
print("\n--- Performing Warm-up Runs ---")
|
|
for _ in range(WARMUP_RUNS):
|
|
benchmark_torch(model_torch, DUMMY_SENTENCES[:1])
|
|
benchmark_mlx_simulated(dummy_embedding_table_mlx, DUMMY_SENTENCES[:1])
|
|
print("Warm-up complete.")
|
|
|
|
# --- Benchmarking ---
|
|
print("\n--- Starting Benchmark ---")
|
|
results_torch = []
|
|
results_mlx = []
|
|
|
|
for batch_size in BATCH_SIZES:
|
|
print(f"Benchmarking batch size: {batch_size}")
|
|
sentences_batch = DUMMY_SENTENCES[:batch_size]
|
|
|
|
# Benchmark PyTorch
|
|
torch_times = [benchmark_torch(model_torch, sentences_batch) for _ in range(NUM_RUNS)]
|
|
results_torch.append(np.mean(torch_times))
|
|
|
|
# Benchmark MLX
|
|
mlx_times = [
|
|
benchmark_mlx_simulated(dummy_embedding_table_mlx, sentences_batch)
|
|
for _ in range(NUM_RUNS)
|
|
]
|
|
results_mlx.append(np.mean(mlx_times))
|
|
|
|
print("\n--- Benchmark Results (Average time per batch in ms) ---")
|
|
print(f"Batch Sizes: {BATCH_SIZES}")
|
|
print(f"PyTorch (mps): {[f'{t:.2f}' for t in results_torch]}")
|
|
print(f"MLX (simulated): {[f'{t:.2f}' for t in results_mlx]}")
|
|
|
|
# --- Plotting ---
|
|
print("\n--- Generating Plot ---")
|
|
plt.figure(figsize=(10, 6))
|
|
plt.plot(BATCH_SIZES, results_torch, marker="o", linestyle="-", label=f"PyTorch ({device})")
|
|
plt.plot(BATCH_SIZES, results_mlx, marker="s", linestyle="-", label="MLX (Simulated)")
|
|
|
|
plt.title("Simulated Embedding Performance: MLX vs PyTorch")
|
|
plt.xlabel("Batch Size")
|
|
plt.ylabel("Average Time per Batch (ms)")
|
|
plt.xticks(BATCH_SIZES)
|
|
plt.grid(True)
|
|
plt.legend()
|
|
|
|
output_filename = "embedding_benchmark_simulated.png"
|
|
plt.savefig(output_filename)
|
|
print(f"Plot saved to {output_filename}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|