fix ruff errors and formatting

This commit is contained in:
yichuan520030910320
2025-07-27 02:22:54 -07:00
parent 383c6d8d7e
commit af1790395a
35 changed files with 166 additions and 107 deletions

View File

@@ -58,7 +58,8 @@ class GraphWrapper:
self.graph = torch.cuda.CUDAGraph()
with torch.cuda.graph(self.graph):
self.static_output = self.model(
input_ids=self.static_input, attention_mask=self.static_attention_mask
input_ids=self.static_input,
attention_mask=self.static_attention_mask,
)
self.use_cuda_graph = True
else:
@@ -82,7 +83,10 @@ class GraphWrapper:
def _warmup(self, num_warmup: int = 3):
with torch.no_grad():
for _ in range(num_warmup):
self.model(input_ids=self.static_input, attention_mask=self.static_attention_mask)
self.model(
input_ids=self.static_input,
attention_mask=self.static_attention_mask,
)
def __call__(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
if self.use_cuda_graph:
@@ -261,7 +265,10 @@ class Benchmark:
# print size
print(f"in_features: {in_features}, out_features: {out_features}")
new_module = bnb.nn.Linear8bitLt(
in_features, out_features, bias=bias, has_fp16_weights=False
in_features,
out_features,
bias=bias,
has_fp16_weights=False,
)
# Copy weights and bias
@@ -350,8 +357,6 @@ class Benchmark:
# Try xformers if available (only on CUDA)
if torch.cuda.is_available():
try:
from xformers.ops import memory_efficient_attention # noqa: F401
if hasattr(model, "enable_xformers_memory_efficient_attention"):
model.enable_xformers_memory_efficient_attention()
print("- Enabled xformers memory efficient attention")
@@ -427,7 +432,11 @@ class Benchmark:
else "cpu"
)
return torch.randint(
0, 1000, (batch_size, self.config.seq_length), device=device, dtype=torch.long
0,
1000,
(batch_size, self.config.seq_length),
device=device,
dtype=torch.long,
)
def _run_inference(

View File

@@ -7,7 +7,7 @@ This directory contains comprehensive sanity checks for the Leann system, ensuri
### `test_distance_functions.py`
Tests all supported distance functions across DiskANN backend:
-**MIPS** (Maximum Inner Product Search)
-**L2** (Euclidean Distance)
-**L2** (Euclidean Distance)
-**Cosine** (Cosine Similarity)
```bash
@@ -27,7 +27,7 @@ uv run python tests/sanity_checks/test_l2_verification.py
### `test_sanity_check.py`
Comprehensive end-to-end verification including:
- Distance function testing
- Embedding model compatibility
- Embedding model compatibility
- Search result correctness validation
- Backend integration testing
@@ -64,7 +64,7 @@ When all tests pass, you should see:
```
📊 测试结果总结:
mips : ✅ 通过
l2 : ✅ 通过
l2 : ✅ 通过
cosine : ✅ 通过
🎉 测试完成!
@@ -98,7 +98,7 @@ pkill -f "embedding_server"
### Typical Timing (3 documents, consumer hardware):
- **Index Building**: 2-5 seconds per distance function
- **Search Query**: 50-200ms
- **Search Query**: 50-200ms
- **Recompute Mode**: 5-15 seconds (higher accuracy)
### Memory Usage:
@@ -117,4 +117,4 @@ These tests are designed to be run in automated environments:
uv run python tests/sanity_checks/test_l2_verification.py
```
The tests are deterministic and should produce consistent results across different platforms.
The tests are deterministic and should produce consistent results across different platforms.

View File

@@ -115,7 +115,13 @@ def main():
# --- Plotting ---
print("\n--- Generating Plot ---")
plt.figure(figsize=(10, 6))
plt.plot(BATCH_SIZES, results_torch, marker="o", linestyle="-", label=f"PyTorch ({device})")
plt.plot(
BATCH_SIZES,
results_torch,
marker="o",
linestyle="-",
label=f"PyTorch ({device})",
)
plt.plot(BATCH_SIZES, results_mlx, marker="s", linestyle="-", label="MLX")
plt.title(f"Embedding Performance: MLX vs PyTorch\nModel: {MODEL_NAME_TORCH}")

View File

@@ -170,7 +170,11 @@ class Benchmark:
def _create_random_batch(self, batch_size: int) -> torch.Tensor:
return torch.randint(
0, 1000, (batch_size, self.config.seq_length), device=self.device, dtype=torch.long
0,
1000,
(batch_size, self.config.seq_length),
device=self.device,
dtype=torch.long,
)
def _run_inference(self, input_ids: torch.Tensor) -> float:
@@ -256,7 +260,11 @@ def run_mlx_benchmark():
"""Run MLX-specific benchmark"""
if not MLX_AVAILABLE:
print("MLX not available, skipping MLX benchmark")
return {"max_throughput": 0.0, "avg_throughput": 0.0, "error": "MLX not available"}
return {
"max_throughput": 0.0,
"avg_throughput": 0.0,
"error": "MLX not available",
}
config = BenchmarkConfig(model_path="mlx-community/all-MiniLM-L6-v2-4bit", use_mlx=True)
@@ -265,7 +273,11 @@ def run_mlx_benchmark():
results = benchmark.run()
if not results:
return {"max_throughput": 0.0, "avg_throughput": 0.0, "error": "No valid results"}
return {
"max_throughput": 0.0,
"avg_throughput": 0.0,
"error": "No valid results",
}
max_throughput = max(results[batch_size]["throughput"] for batch_size in results)
avg_throughput = np.mean([results[batch_size]["throughput"] for batch_size in results])