fix ruff errors and formatting

This commit is contained in:
yichuan520030910320
2025-07-27 02:22:54 -07:00
parent 383c6d8d7e
commit af1790395a
35 changed files with 166 additions and 107 deletions

View File

@@ -8,4 +8,4 @@ on:
jobs:
build:
uses: ./.github/workflows/build-reusable.yml
uses: ./.github/workflows/build-reusable.yml

View File

@@ -17,23 +17,23 @@ jobs:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.ref }}
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install uv
uses: astral-sh/setup-uv@v4
- name: Install ruff
run: |
uv tool install ruff
- name: Run ruff check
run: |
ruff check .
- name: Run ruff format check
run: |
ruff format --check .
@@ -65,40 +65,40 @@ jobs:
- os: macos-latest
python: '3.13'
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.ref }}
submodules: recursive
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
- name: Install uv
uses: astral-sh/setup-uv@v4
- name: Install system dependencies (Ubuntu)
if: runner.os == 'Linux'
run: |
sudo apt-get update
sudo apt-get install -y libomp-dev libboost-all-dev protobuf-compiler libzmq3-dev \
pkg-config libopenblas-dev patchelf libabsl-dev libaio-dev libprotobuf-dev
# Install Intel MKL for DiskANN
wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940.sh
sudo sh intel-onemkl-2025.0.0.940.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s
source /opt/intel/oneapi/setvars.sh
echo "MKLROOT=/opt/intel/oneapi/mkl/latest" >> $GITHUB_ENV
echo "LD_LIBRARY_PATH=/opt/intel/oneapi/mkl/latest/lib/intel64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
- name: Install system dependencies (macOS)
if: runner.os == 'macOS'
run: |
brew install llvm libomp boost protobuf zeromq
- name: Install build dependencies
run: |
uv pip install --system scikit-build-core numpy swig Cython pybind11
@@ -107,7 +107,7 @@ jobs:
else
uv pip install --system delocate
fi
- name: Build packages
run: |
# Build core (platform independent)
@@ -116,7 +116,7 @@ jobs:
uv build
cd ../..
fi
# Build HNSW backend
cd packages/leann-backend-hnsw
if [ "${{ matrix.os }}" == "macos-latest" ]; then
@@ -125,7 +125,7 @@ jobs:
uv build --wheel --python python
fi
cd ../..
# Build DiskANN backend
cd packages/leann-backend-diskann
if [ "${{ matrix.os }}" == "macos-latest" ]; then
@@ -134,14 +134,14 @@ jobs:
uv build --wheel --python python
fi
cd ../..
# Build meta package (platform independent)
if [[ "${{ matrix.os }}" == ubuntu-* ]]; then
cd packages/leann
uv build
cd ../..
fi
- name: Repair wheels (Linux)
if: runner.os == 'Linux'
run: |
@@ -153,7 +153,7 @@ jobs:
mv dist_repaired dist
fi
cd ../..
# Repair DiskANN wheel
cd packages/leann-backend-diskann
if [ -d dist ]; then
@@ -162,7 +162,7 @@ jobs:
mv dist_repaired dist
fi
cd ../..
- name: Repair wheels (macOS)
if: runner.os == 'macOS'
run: |
@@ -174,7 +174,7 @@ jobs:
mv dist_repaired dist
fi
cd ../..
# Repair DiskANN wheel
cd packages/leann-backend-diskann
if [ -d dist ]; then
@@ -183,14 +183,14 @@ jobs:
mv dist_repaired dist
fi
cd ../..
- name: List built packages
run: |
echo "📦 Built packages:"
find packages/*/dist -name "*.whl" -o -name "*.tar.gz" | sort
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: packages-${{ matrix.os }}-py${{ matrix.python }}
path: packages/*/dist/
path: packages/*/dist/

View File

@@ -16,10 +16,10 @@ jobs:
contents: write
outputs:
commit-sha: ${{ steps.push.outputs.commit-sha }}
steps:
- uses: actions/checkout@v4
- name: Validate version
run: |
# Remove 'v' prefix if present for validation
@@ -30,7 +30,7 @@ jobs:
exit 1
fi
echo "✅ Version format valid: ${{ inputs.version }}"
- name: Update versions and push
id: push
run: |
@@ -38,7 +38,7 @@ jobs:
CURRENT_VERSION=$(grep "^version" packages/leann-core/pyproject.toml | cut -d'"' -f2)
echo "Current version: $CURRENT_VERSION"
echo "Target version: ${{ inputs.version }}"
if [ "$CURRENT_VERSION" = "${{ inputs.version }}" ]; then
echo "⚠️ Version is already ${{ inputs.version }}, skipping update"
COMMIT_SHA=$(git rev-parse HEAD)
@@ -52,7 +52,7 @@ jobs:
COMMIT_SHA=$(git rev-parse HEAD)
echo "✅ Pushed version update: $COMMIT_SHA"
fi
echo "commit-sha=$COMMIT_SHA" >> $GITHUB_OUTPUT
build-packages:
@@ -60,7 +60,7 @@ jobs:
needs: update-version
uses: ./.github/workflows/build-reusable.yml
with:
ref: 'main'
ref: 'main'
publish:
name: Publish and Release
@@ -69,26 +69,26 @@ jobs:
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- uses: actions/checkout@v4
with:
ref: 'main'
ref: 'main'
- name: Download all artifacts
uses: actions/download-artifact@v4
with:
path: dist-artifacts
- name: Collect packages
run: |
mkdir -p dist
find dist-artifacts -name "*.whl" -exec cp {} dist/ \;
find dist-artifacts -name "*.tar.gz" -exec cp {} dist/ \;
echo "📦 Packages to publish:"
ls -la dist/
- name: Publish to PyPI
env:
TWINE_USERNAME: __token__
@@ -98,12 +98,12 @@ jobs:
echo "❌ PYPI_API_TOKEN not configured!"
exit 1
fi
pip install twine
twine upload dist/* --skip-existing --verbose
echo "✅ Published to PyPI!"
- name: Create release
run: |
# Check if tag already exists
@@ -114,7 +114,7 @@ jobs:
git push origin "v${{ inputs.version }}"
echo "✅ Created and pushed tag v${{ inputs.version }}"
fi
# Check if release already exists
if gh release view "v${{ inputs.version }}" >/dev/null 2>&1; then
echo "⚠️ Release v${{ inputs.version }} already exists, skipping release creation"
@@ -126,4 +126,4 @@ jobs:
echo "✅ Created GitHub release v${{ inputs.version }}"
fi
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

4
.gitignore vendored
View File

@@ -9,7 +9,7 @@ demo/indices/
outputs/
*.pkl
*.pdf
*.idx
*.idx
*.map
.history/
lm_eval.egg-info/
@@ -85,4 +85,4 @@ packages/leann-backend-diskann/third_party/DiskANN/_deps/
*.meta.json
*.passages.json
batchtest.py
batchtest.py

View File

@@ -19,4 +19,4 @@ That's it! The workflow will automatically:
- ✅ Publish to PyPI
- ✅ Create GitHub tag and release
Check progress: https://github.com/yichuan-w/LEANN/actions
Check progress: https://github.com/yichuan-w/LEANN/actions

View File

@@ -8,4 +8,4 @@ We welcome contributions! Leann is built by the community, for the community.
- 💡 **Feature Requests**: Have an idea? We'd love to hear it!
- 🔧 **Code Contributions**: PRs welcome for all skill levels
- 📖 **Documentation**: Help make Leann more accessible
- 🧪 **Benchmarks**: Share your performance results
- 🧪 **Benchmarks**: Share your performance results

View File

@@ -7,4 +7,4 @@ You can speed up the process by using a lightweight embedding model. Add this to
```bash
--embedding-model sentence-transformers/all-MiniLM-L6-v2
```
**Model sizes:** `all-MiniLM-L6-v2` (30M parameters), `facebook/contriever` (~100M parameters), `Qwen3-0.6B` (600M parameters)
**Model sizes:** `all-MiniLM-L6-v2` (30M parameters), `facebook/contriever` (~100M parameters), `Qwen3-0.6B` (600M parameters)

View File

@@ -19,4 +19,4 @@
- **Simple Python API** - Get started in minutes
- **Extensible backend system** - Easy to add new algorithms
- **Comprehensive examples** - From basic usage to production deployment
- **Comprehensive examples** - From basic usage to production deployment

View File

@@ -18,4 +18,4 @@
- [ ] Integration with LangChain/LlamaIndex
- [ ] Visual similarity search
- [ ] Query rewrtiting, rerank and expansion
- [ ] Query rewrtiting, rerank and expansion

View File

@@ -1,5 +1,5 @@
The Project Gutenberg eBook of Pride and Prejudice
This ebook is for the use of anyone anywhere in the United States and
most other parts of the world at no cost and with almost no restrictions
whatsoever. You may copy it, give it away or re-use it under the terms
@@ -14557,7 +14557,7 @@ her into Derbyshire, had been the means of uniting them.
*** END OF THE PROJECT GUTENBERG EBOOK PRIDE AND PREJUDICE ***
Updated editions will replace the previous one—the old editions will
be renamed.
@@ -14662,7 +14662,7 @@ performed, viewed, copied or distributed:
at www.gutenberg.org. If you
are not located in the United States, you will have to check the laws
of the country where you are located before using this eBook.
1.E.2. If an individual Project Gutenberg™ electronic work is
derived from texts not protected by U.S. copyright law (does not
contain a notice indicating that it is posted with permission of the
@@ -14724,7 +14724,7 @@ provided that:
Gutenberg Literary Archive Foundation at the address specified in
Section 4, “Information about donations to the Project Gutenberg
Literary Archive Foundation.”
• You provide a full refund of any money paid by a user who notifies
you in writing (or by e-mail) within 30 days of receipt that s/he
does not agree to the terms of the full Project Gutenberg™
@@ -14732,15 +14732,15 @@ provided that:
copies of the works possessed in a physical medium and discontinue
all use of and all access to other copies of Project Gutenberg™
works.
• You provide, in accordance with paragraph 1.F.3, a full refund of
any money paid for a work or a replacement copy, if a defect in the
electronic work is discovered and reported to you within 90 days of
receipt of the work.
• You comply with all other terms of this agreement for free
distribution of Project Gutenberg™ works.
1.E.9. If you wish to charge a fee or distribute a Project
Gutenberg™ electronic work or group of works on different terms than
@@ -14903,5 +14903,3 @@ This website includes information about Project Gutenberg™,
including how to make donations to the Project Gutenberg Literary
Archive Foundation, how to help produce our new eBooks, and how to
subscribe to our email newsletter to hear about new eBooks.

View File

@@ -27,7 +27,10 @@ def load_sample_documents():
"title": "Intro to Python",
"content": "Python is a high-level, interpreted language known for simplicity.",
},
{"title": "ML Basics", "content": "Machine learning builds systems that learn from data."},
{
"title": "ML Basics",
"content": "Machine learning builds systems that learn from data.",
},
{
"title": "Data Structures",
"content": "Data structures like arrays, lists, and graphs organize data.",

View File

@@ -21,7 +21,9 @@ DEFAULT_CHROME_PROFILE = os.path.expanduser("~/Library/Application Support/Googl
def create_leann_index_from_multiple_chrome_profiles(
profile_dirs: list[Path], index_path: str = "chrome_history_index.leann", max_count: int = -1
profile_dirs: list[Path],
index_path: str = "chrome_history_index.leann",
max_count: int = -1,
):
"""
Create LEANN index from multiple Chrome profile data sources.

View File

@@ -474,7 +474,8 @@ Messages ({len(messages)} messages, {message_group["total_length"]} chars):
message_group, contact_name
)
doc = Document(
text=doc_content, metadata={"contact_name": contact_name}
text=doc_content,
metadata={"contact_name": contact_name},
)
docs.append(doc)
count += 1

View File

@@ -315,7 +315,11 @@ async def main():
# Create or load the LEANN index from all sources
index_path = create_leann_index_from_multiple_sources(
messages_dirs, INDEX_PATH, args.max_emails, args.include_html, args.embedding_model
messages_dirs,
INDEX_PATH,
args.max_emails,
args.include_html,
args.embedding_model,
)
if index_path:

View File

@@ -92,7 +92,10 @@ def main():
help="Directory to store the index (default: mail_index_embedded)",
)
parser.add_argument(
"--max-emails", type=int, default=10000, help="Maximum number of emails to process"
"--max-emails",
type=int,
default=10000,
help="Maximum number of emails to process",
)
parser.add_argument(
"--include-html",
@@ -112,7 +115,10 @@ def main():
else:
print("Creating new index...")
index = create_and_save_index(
mail_path, save_dir, max_count=args.max_emails, include_html=args.include_html
mail_path,
save_dir,
max_count=args.max_emails,
include_html=args.include_html,
)
if index:
queries = [

View File

@@ -347,7 +347,9 @@ def demo_aggregation():
print(f"\n{'=' * 20} {method.upper()} AGGREGATION {'=' * 20}")
aggregator = MultiVectorAggregator(
aggregation_method=method, spatial_clustering=True, cluster_distance_threshold=100.0
aggregation_method=method,
spatial_clustering=True,
cluster_distance_threshold=100.0,
)
aggregated = aggregator.aggregate_results(mock_results, top_k=5)

View File

@@ -1 +0,0 @@

View File

@@ -16,4 +16,4 @@ wheel.packages = ["leann_backend_diskann"]
editable.mode = "redirect"
cmake.build-type = "Release"
build.verbose = true
build.tool-args = ["-j8"]
build.tool-args = ["-j8"]

View File

@@ -2,12 +2,12 @@ syntax = "proto3";
package protoembedding;
message NodeEmbeddingRequest {
repeated uint32 node_ids = 1;
message NodeEmbeddingRequest {
repeated uint32 node_ids = 1;
}
message NodeEmbeddingResponse {
bytes embeddings_data = 1; // All embedded binary datas
repeated int32 dimensions = 2; // Shape [batch_size, embedding_dim]
repeated uint32 missing_ids = 3; // Missing node ids
}
}

View File

@@ -52,4 +52,4 @@ set(FAISS_BUILD_AVX512 OFF CACHE BOOL "" FORCE)
# IMPORTANT: Disable building AVX versions to speed up compilation
set(FAISS_BUILD_AVX_VERSIONS OFF CACHE BOOL "" FORCE)
add_subdirectory(third_party/faiss)
add_subdirectory(third_party/faiss)

View File

@@ -72,7 +72,11 @@ def read_vector_raw(f, element_fmt_char):
def read_numpy_vector(f, np_dtype, struct_fmt_char):
"""Reads a vector into a NumPy array."""
count = -1 # Initialize count for robust error handling
print(f" Reading vector (dtype={np_dtype}, fmt='{struct_fmt_char}')... ", end="", flush=True)
print(
f" Reading vector (dtype={np_dtype}, fmt='{struct_fmt_char}')... ",
end="",
flush=True,
)
try:
count, data_bytes = read_vector_raw(f, struct_fmt_char)
print(f"Count={count}, Bytes={len(data_bytes)}")
@@ -647,7 +651,10 @@ def convert_hnsw_graph_to_csr(input_filename, output_filename, prune_embeddings=
print(f"Error: Input file not found: {input_filename}", file=sys.stderr)
return False
except MemoryError as e:
print(f"\nFatal MemoryError during conversion: {e}. Insufficient RAM.", file=sys.stderr)
print(
f"\nFatal MemoryError during conversion: {e}. Insufficient RAM.",
file=sys.stderr,
)
# Clean up potentially partially written output file?
try:
os.remove(output_filename)

View File

@@ -9,7 +9,7 @@ name = "leann-backend-hnsw"
version = "0.1.14"
description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
dependencies = [
"leann-core==0.1.14",
"leann-core==0.1.14",
"numpy",
"pyzmq>=23.0.0",
"msgpack>=1.0.0",
@@ -24,4 +24,4 @@ build.tool-args = ["-j8"]
# CMake definitions to optimize compilation
[tool.scikit-build.cmake.define]
CMAKE_BUILD_PARALLEL_LEVEL = "8"
CMAKE_BUILD_PARALLEL_LEVEL = "8"

View File

@@ -46,4 +46,4 @@ colab = [
leann = "leann.cli:main"
[tool.setuptools.packages.find]
where = ["src"]
where = ["src"]

View File

@@ -245,7 +245,11 @@ def search_hf_models_fuzzy(query: str, limit: int = 10) -> list[str]:
# HF Hub's search is already fuzzy! It handles typos and partial matches
models = list_models(
search=query, filter="text-generation", sort="downloads", direction=-1, limit=limit
search=query,
filter="text-generation",
sort="downloads",
direction=-1,
limit=limit,
)
model_names = [model.id if hasattr(model, "id") else str(model) for model in models]
@@ -582,7 +586,11 @@ class HFChat(LLMInterface):
# Tokenize input
inputs = self.tokenizer(
formatted_prompt, return_tensors="pt", padding=True, truncation=True, max_length=2048
formatted_prompt,
return_tensors="pt",
padding=True,
truncation=True,
max_length=2048,
)
# Move inputs to device

View File

@@ -37,4 +37,4 @@ For full documentation, visit [https://leann.readthedocs.io](https://leann.readt
## License
MIT License
MIT License

View File

@@ -39,4 +39,4 @@ diskann = [
Homepage = "https://github.com/yourusername/leann"
Documentation = "https://leann.readthedocs.io"
Repository = "https://github.com/yourusername/leann"
Issues = "https://github.com/yourusername/leann/issues"
Issues = "https://github.com/yourusername/leann/issues"

View File

@@ -1,6 +1,6 @@
import json
import sqlite3
import xml.etree.ElementTree as ET
import xml.etree.ElementTree as ElementTree
from pathlib import Path
from typing import Annotated
@@ -26,7 +26,7 @@ def get_safe_path(s: str) -> str:
def process_history(history: str):
if history.startswith("<?xml") or history.startswith("<msg>"):
try:
root = ET.fromstring(history)
root = ElementTree.fromstring(history)
title = root.find(".//title").text if root.find(".//title") is not None else None
quoted = (
root.find(".//refermsg/content").text
@@ -52,7 +52,8 @@ def get_message(history: dict | str):
def export_chathistory(user_id: str):
res = requests.get(
"http://localhost:48065/wechat/chatlog", params={"userId": user_id, "count": 100000}
"http://localhost:48065/wechat/chatlog",
params={"userId": user_id, "count": 100000},
).json()
for i in range(len(res["chatLogs"])):
res["chatLogs"][i]["content"] = process_history(res["chatLogs"][i]["content"])
@@ -116,7 +117,8 @@ def export_sqlite(
all_users = requests.get("http://localhost:48065/wechat/allcontacts").json()
for user in tqdm(all_users):
cursor.execute(
"INSERT OR IGNORE INTO users (id, name) VALUES (?, ?)", (user["arg"], user["title"])
"INSERT OR IGNORE INTO users (id, name) VALUES (?, ?)",
(user["arg"], user["title"]),
)
usr_chatlog = export_chathistory(user["arg"])
for msg in usr_chatlog:

View File

@@ -19,16 +19,16 @@ uv pip install build twine delocate auditwheel scikit-build-core cmake pybind11
build_package() {
local package_dir=$1
local package_name=$(basename $package_dir)
echo "Building $package_name..."
cd $package_dir
# Clean previous builds
rm -rf dist/ build/ _skbuild/
# Build directly with pip wheel (avoids sdist issues)
pip wheel . --no-deps -w dist
# Repair wheel for binary packages
if [[ "$package_name" != "leann-core" ]] && [[ "$package_name" != "leann" ]]; then
if [[ "$OSTYPE" == "darwin"* ]]; then
@@ -57,7 +57,7 @@ build_package() {
fi
fi
fi
echo "Built wheels in $package_dir/dist/"
ls -la dist/
cd - > /dev/null
@@ -84,4 +84,4 @@ else
fi
echo -e "\nBuild complete! Test with:"
echo "uv pip install packages/*/dist/*.whl"
echo "uv pip install packages/*/dist/*.whl"

View File

@@ -28,4 +28,4 @@ else
fi
echo "✅ Version updated to $NEW_VERSION"
echo "✅ Dependencies updated to use leann-core==$NEW_VERSION"
echo "✅ Dependencies updated to use leann-core==$NEW_VERSION"

View File

@@ -15,4 +15,4 @@ VERSION=$1
git add . && git commit -m "chore: bump version to $VERSION" && git push
# Create release (triggers CI)
gh release create v$VERSION --generate-notes
gh release create v$VERSION --generate-notes

View File

@@ -27,4 +27,4 @@ else
else
echo "Cancelled"
fi
fi
fi

View File

@@ -58,7 +58,8 @@ class GraphWrapper:
self.graph = torch.cuda.CUDAGraph()
with torch.cuda.graph(self.graph):
self.static_output = self.model(
input_ids=self.static_input, attention_mask=self.static_attention_mask
input_ids=self.static_input,
attention_mask=self.static_attention_mask,
)
self.use_cuda_graph = True
else:
@@ -82,7 +83,10 @@ class GraphWrapper:
def _warmup(self, num_warmup: int = 3):
with torch.no_grad():
for _ in range(num_warmup):
self.model(input_ids=self.static_input, attention_mask=self.static_attention_mask)
self.model(
input_ids=self.static_input,
attention_mask=self.static_attention_mask,
)
def __call__(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
if self.use_cuda_graph:
@@ -261,7 +265,10 @@ class Benchmark:
# print size
print(f"in_features: {in_features}, out_features: {out_features}")
new_module = bnb.nn.Linear8bitLt(
in_features, out_features, bias=bias, has_fp16_weights=False
in_features,
out_features,
bias=bias,
has_fp16_weights=False,
)
# Copy weights and bias
@@ -350,8 +357,6 @@ class Benchmark:
# Try xformers if available (only on CUDA)
if torch.cuda.is_available():
try:
from xformers.ops import memory_efficient_attention # noqa: F401
if hasattr(model, "enable_xformers_memory_efficient_attention"):
model.enable_xformers_memory_efficient_attention()
print("- Enabled xformers memory efficient attention")
@@ -427,7 +432,11 @@ class Benchmark:
else "cpu"
)
return torch.randint(
0, 1000, (batch_size, self.config.seq_length), device=device, dtype=torch.long
0,
1000,
(batch_size, self.config.seq_length),
device=device,
dtype=torch.long,
)
def _run_inference(

View File

@@ -7,7 +7,7 @@ This directory contains comprehensive sanity checks for the Leann system, ensuri
### `test_distance_functions.py`
Tests all supported distance functions across DiskANN backend:
-**MIPS** (Maximum Inner Product Search)
-**L2** (Euclidean Distance)
-**L2** (Euclidean Distance)
-**Cosine** (Cosine Similarity)
```bash
@@ -27,7 +27,7 @@ uv run python tests/sanity_checks/test_l2_verification.py
### `test_sanity_check.py`
Comprehensive end-to-end verification including:
- Distance function testing
- Embedding model compatibility
- Embedding model compatibility
- Search result correctness validation
- Backend integration testing
@@ -64,7 +64,7 @@ When all tests pass, you should see:
```
📊 测试结果总结:
mips : ✅ 通过
l2 : ✅ 通过
l2 : ✅ 通过
cosine : ✅ 通过
🎉 测试完成!
@@ -98,7 +98,7 @@ pkill -f "embedding_server"
### Typical Timing (3 documents, consumer hardware):
- **Index Building**: 2-5 seconds per distance function
- **Search Query**: 50-200ms
- **Search Query**: 50-200ms
- **Recompute Mode**: 5-15 seconds (higher accuracy)
### Memory Usage:
@@ -117,4 +117,4 @@ These tests are designed to be run in automated environments:
uv run python tests/sanity_checks/test_l2_verification.py
```
The tests are deterministic and should produce consistent results across different platforms.
The tests are deterministic and should produce consistent results across different platforms.

View File

@@ -115,7 +115,13 @@ def main():
# --- Plotting ---
print("\n--- Generating Plot ---")
plt.figure(figsize=(10, 6))
plt.plot(BATCH_SIZES, results_torch, marker="o", linestyle="-", label=f"PyTorch ({device})")
plt.plot(
BATCH_SIZES,
results_torch,
marker="o",
linestyle="-",
label=f"PyTorch ({device})",
)
plt.plot(BATCH_SIZES, results_mlx, marker="s", linestyle="-", label="MLX")
plt.title(f"Embedding Performance: MLX vs PyTorch\nModel: {MODEL_NAME_TORCH}")

View File

@@ -170,7 +170,11 @@ class Benchmark:
def _create_random_batch(self, batch_size: int) -> torch.Tensor:
return torch.randint(
0, 1000, (batch_size, self.config.seq_length), device=self.device, dtype=torch.long
0,
1000,
(batch_size, self.config.seq_length),
device=self.device,
dtype=torch.long,
)
def _run_inference(self, input_ids: torch.Tensor) -> float:
@@ -256,7 +260,11 @@ def run_mlx_benchmark():
"""Run MLX-specific benchmark"""
if not MLX_AVAILABLE:
print("MLX not available, skipping MLX benchmark")
return {"max_throughput": 0.0, "avg_throughput": 0.0, "error": "MLX not available"}
return {
"max_throughput": 0.0,
"avg_throughput": 0.0,
"error": "MLX not available",
}
config = BenchmarkConfig(model_path="mlx-community/all-MiniLM-L6-v2-4bit", use_mlx=True)
@@ -265,7 +273,11 @@ def run_mlx_benchmark():
results = benchmark.run()
if not results:
return {"max_throughput": 0.0, "avg_throughput": 0.0, "error": "No valid results"}
return {
"max_throughput": 0.0,
"avg_throughput": 0.0,
"error": "No valid results",
}
max_throughput = max(results[batch_size]["throughput"] for batch_size in results)
avg_throughput = np.mean([results[batch_size]["throughput"] for batch_size in results])