fix: resolve all ruff linting errors and add lint CI check

- Fix ambiguous fullwidth characters (commas, parentheses) in strings and comments - Replace Chinese comments with English equivalents - Fix unused imports with proper noqa annotations for intentional imports - Fix bare except clauses with specific exception types - Fix redefined variables and undefined names - Add ruff noqa annotations for generated protobuf files - Add lint and format check to GitHub Actions CI pipeline
2025-07-26 22:35:12 -07:00
parent 8537a6b17e
commit b3e9ee96fa
53 changed files with 5655 additions and 5220 deletions
@@ -1,43 +1,46 @@
 import time
-import numpy as np
+
 import matplotlib.pyplot as plt
-import torch
-from sentence_transformers import SentenceTransformer
 import mlx.core as mx
+import numpy as np
+import torch
 from mlx_lm import load
+from sentence_transformers import SentenceTransformer

 # --- Configuration ---
 MODEL_NAME_TORCH = "Qwen/Qwen3-Embedding-0.6B"
 MODEL_NAME_MLX = "mlx-community/Qwen3-Embedding-0.6B-4bit-DWQ"
 BATCH_SIZES = [1, 8, 16, 32, 64, 128]
 NUM_RUNS = 10  # Number of runs to average for each batch size
-WARMUP_RUNS = 2 # Number of warm-up runs
+WARMUP_RUNS = 2  # Number of warm-up runs

 # --- Generate Dummy Data ---
 DUMMY_SENTENCES = ["This is a test sentence for benchmarking." * 5] * max(BATCH_SIZES)

 # --- Benchmark Functions ---b

+
 def benchmark_torch(model, sentences):
    start_time = time.time()
    model.encode(sentences, convert_to_numpy=True)
    end_time = time.time()
    return (end_time - start_time) * 1000  # Return time in ms

+
 def benchmark_mlx(model, tokenizer, sentences):
    start_time = time.time()
-    
+
    # Tokenize sentences using MLX tokenizer
    tokens = []
    for sentence in sentences:
        token_ids = tokenizer.encode(sentence)
        tokens.append(token_ids)
-    
+
    # Pad sequences to the same length
    max_len = max(len(t) for t in tokens)
    input_ids = []
    attention_mask = []
-    
+
    for token_seq in tokens:
        # Pad sequence
        padded = token_seq + [tokenizer.eos_token_id] * (max_len - len(token_seq))
@@ -45,24 +48,25 @@ def benchmark_mlx(model, tokenizer, sentences):
        # Create attention mask (1 for real tokens, 0 for padding)
        mask = [1] * len(token_seq) + [0] * (max_len - len(token_seq))
        attention_mask.append(mask)
-    
+
    # Convert to MLX arrays
    input_ids = mx.array(input_ids)
    attention_mask = mx.array(attention_mask)
-    
+
    # Get embeddings
    embeddings = model(input_ids)
-    
+
    # Mean pooling
    mask = mx.expand_dims(attention_mask, -1)
    sum_embeddings = (embeddings * mask).sum(axis=1)
    sum_mask = mask.sum(axis=1)
    _ = sum_embeddings / sum_mask
-    
+
    mx.eval()  # Ensure computation is finished
    end_time = time.time()
    return (end_time - start_time) * 1000  # Return time in ms

+
 # --- Main Execution ---
 def main():
    print("--- Initializing Models ---")
@@ -92,13 +96,15 @@ def main():
    for batch_size in BATCH_SIZES:
        print(f"Benchmarking batch size: {batch_size}")
        sentences_batch = DUMMY_SENTENCES[:batch_size]
-        
+
        # Benchmark PyTorch
        torch_times = [benchmark_torch(model_torch, sentences_batch) for _ in range(NUM_RUNS)]
        results_torch.append(np.mean(torch_times))
-        
+
        # Benchmark MLX
-        mlx_times = [benchmark_mlx(model_mlx, tokenizer_mlx, sentences_batch) for _ in range(NUM_RUNS)]
+        mlx_times = [
+            benchmark_mlx(model_mlx, tokenizer_mlx, sentences_batch) for _ in range(NUM_RUNS)
+        ]
        results_mlx.append(np.mean(mlx_times))

    print("\n--- Benchmark Results (Average time per batch in ms) ---")
@@ -109,20 +115,21 @@ def main():
    # --- Plotting ---
    print("\n--- Generating Plot ---")
    plt.figure(figsize=(10, 6))
-    plt.plot(BATCH_SIZES, results_torch, marker='o', linestyle='-', label=f'PyTorch ({device})')
-    plt.plot(BATCH_SIZES, results_mlx, marker='s', linestyle='-', label='MLX')
+    plt.plot(BATCH_SIZES, results_torch, marker="o", linestyle="-", label=f"PyTorch ({device})")
+    plt.plot(BATCH_SIZES, results_mlx, marker="s", linestyle="-", label="MLX")

-    plt.title(f'Embedding Performance: MLX vs PyTorch\nModel: {MODEL_NAME_TORCH}')
+    plt.title(f"Embedding Performance: MLX vs PyTorch\nModel: {MODEL_NAME_TORCH}")
    plt.xlabel("Batch Size")
    plt.ylabel("Average Time per Batch (ms)")
    plt.xticks(BATCH_SIZES)
    plt.grid(True)
    plt.legend()
-    
+
    # Save the plot
    output_filename = "embedding_benchmark.png"
    plt.savefig(output_filename)
    print(f"Plot saved to {output_filename}")

+
 if __name__ == "__main__":
    main()
@@ -3,49 +3,52 @@
 Debug script to test ZMQ communication with the exact same setup as main_cli_example.py
 """

-import zmq
-import time
-import threading
 import sys
-sys.path.append('packages/leann-backend-diskann')
+import time
+
+import zmq
+
+sys.path.append("packages/leann-backend-diskann")
 from leann_backend_diskann import embedding_pb2

+
 def test_zmq_with_same_model():
    print("=== Testing ZMQ with same model as main_cli_example.py ===")
-    
+
    # Test the exact same model that main_cli_example.py uses
    model_name = "sentence-transformers/all-mpnet-base-v2"
-    
+
    # Start server with the same model
    import subprocess
+
    server_cmd = [
-        sys.executable, "-m", 
+        sys.executable,
+        "-m",
        "packages.leann-backend-diskann.leann_backend_diskann.embedding_server",
-        "--zmq-port", "5556",  # Use different port to avoid conflicts
-        "--model-name", model_name
+        "--zmq-port",
+        "5556",  # Use different port to avoid conflicts
+        "--model-name",
+        model_name,
    ]
-    
+
    print(f"Starting server with command: {' '.join(server_cmd)}")
    server_process = subprocess.Popen(
-        server_cmd,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        text=True
+        server_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
    )
-    
+
    # Wait for server to start
    print("Waiting for server to start...")
    time.sleep(10)
-    
+
    # Check if server is running
    if server_process.poll() is not None:
        stdout, stderr = server_process.communicate()
        print(f"Server failed to start. stdout: {stdout}")
        print(f"Server failed to start. stderr: {stderr}")
        return False
-    
+
    print(f"Server started with PID: {server_process.pid}")
-    
+
    try:
        # Test client
        context = zmq.Context()
@@ -53,39 +56,39 @@ def test_zmq_with_same_model():
        socket.connect("tcp://127.0.0.1:5556")
        socket.setsockopt(zmq.RCVTIMEO, 30000)  # 30 second timeout like C++
        socket.setsockopt(zmq.SNDTIMEO, 30000)
-        
+
        # Create request with same format as C++
        request = embedding_pb2.NodeEmbeddingRequest()
        request.node_ids.extend([0, 1, 2, 3, 4])  # Test with some node IDs
-        
+
        print(f"Sending request with {len(request.node_ids)} node IDs...")
        start_time = time.time()
-        
+
        # Send request
        socket.send(request.SerializeToString())
-        
+
        # Receive response
        response_data = socket.recv()
        end_time = time.time()
-        
+
        print(f"Received response in {end_time - start_time:.3f} seconds")
        print(f"Response size: {len(response_data)} bytes")
-        
+
        # Parse response
        response = embedding_pb2.NodeEmbeddingResponse()
        response.ParseFromString(response_data)
-        
+
        print(f"Response dimensions: {list(response.dimensions)}")
        print(f"Embeddings data size: {len(response.embeddings_data)} bytes")
        print(f"Missing IDs: {list(response.missing_ids)}")
-        
+
        # Calculate expected size
        if len(response.dimensions) == 2:
            batch_size = response.dimensions[0]
            embedding_dim = response.dimensions[1]
            expected_bytes = batch_size * embedding_dim * 4  # 4 bytes per float
            print(f"Expected bytes: {expected_bytes}, Actual: {len(response.embeddings_data)}")
-            
+
            if len(response.embeddings_data) == expected_bytes:
                print("✅ Response format is correct!")
                return True
@@ -95,7 +98,7 @@ def test_zmq_with_same_model():
        else:
            print("❌ Invalid response dimensions!")
            return False
-            
+
    except Exception as e:
        print(f"❌ Error during ZMQ test: {e}")
        return False
@@ -105,9 +108,10 @@ def test_zmq_with_same_model():
        server_process.wait()
        print("Server terminated")

+
 if __name__ == "__main__":
    success = test_zmq_with_same_model()
    if success:
        print("\n✅ ZMQ communication test passed!")
    else:
-        print("\n❌ ZMQ communication test failed!") 
+        print("\n❌ ZMQ communication test failed!")