Merge remote-tracking branch 'origin/main' into feature/claude-code-research

feat: Add Claude Code integration with MCP server
feat: Claude Code integration ready - LEANN CLI works out of the box
2025-08-05 23:02:00 -07:00 · 2025-08-05 14:03:36 -07:00 · 2025-08-05 12:27:58 -07:00 · 2025-08-04 20:10:14 -07:00 · 2025-08-04 20:01:23 -07:00 · 2025-08-04 19:29:17 -07:00
37 changed files with 433 additions and 2117 deletions
--- a/.github/workflows/build-reusable.yml
+++ b/.github/workflows/build-reusable.yml
@@ -28,7 +28,7 @@ jobs:

      - name: Install ruff
        run: |
-          uv tool install ruff==0.12.7
+          uv tool install ruff

      - name: Run ruff check
        run: |
@@ -111,10 +111,12 @@ jobs:

      - name: Build packages
        run: |
-          # Build core (platform independent) on all platforms for consistency
-          cd packages/leann-core
-          uv build
-          cd ../..
+          # Build core (platform independent)
+          if [[ "${{ matrix.os }}" == ubuntu-* ]]; then
+            cd packages/leann-core
+            uv build
+            cd ../..
+          fi

          # Build HNSW backend
          cd packages/leann-backend-hnsw
@@ -135,7 +137,7 @@ jobs:
            # Use system clang instead of homebrew LLVM for better compatibility
            export CC=clang
            export CXX=clang++
-            # sgesdd_ is only available on macOS 13.3+
+            # DiskANN requires macOS 13.3+ for sgesdd_ LAPACK function
            export MACOSX_DEPLOYMENT_TARGET=13.3
            uv build --wheel --python python
          else
@@ -143,10 +145,12 @@ jobs:
          fi
          cd ../..

-          # Build meta package (platform independent) on all platforms
-          cd packages/leann
-          uv build
-          cd ../..
+          # Build meta package (platform independent)
+          if [[ "${{ matrix.os }}" == ubuntu-* ]]; then
+            cd packages/leann
+            uv build
+            cd ../..
+          fi

      - name: Repair wheels (Linux)
        if: runner.os == 'Linux'
@@ -160,15 +164,10 @@ jobs:
          fi
          cd ../..

-          # Repair DiskANN wheel - use show first to debug
+          # Repair DiskANN wheel
          cd packages/leann-backend-diskann
          if [ -d dist ]; then
-            echo "Checking DiskANN wheel contents before repair:"
-            unzip -l dist/*.whl | grep -E "\.so|\.pyd|_diskannpy" || echo "No .so files found"
-            auditwheel show dist/*.whl || echo "auditwheel show failed"
            auditwheel repair dist/*.whl -w dist_repaired
-            echo "Checking DiskANN wheel contents after repair:"
-            unzip -l dist_repaired/*.whl | grep -E "\.so|\.pyd|_diskannpy" || echo "No .so files found after repair"
            rm -rf dist
            mv dist_repaired dist
          fi
@@ -202,27 +201,22 @@ jobs:

      - name: Install built packages for testing
        run: |
-          # Create a virtual environment with the correct Python version
-          uv venv --python python${{ matrix.python }}
+          # Create a virtual environment
+          uv venv
          source .venv/bin/activate || source .venv/Scripts/activate

-          # Install the built wheels directly to ensure we use locally built packages
-          # Use only locally built wheels on all platforms for full consistency
-          FIND_LINKS="--find-links packages/leann-core/dist --find-links packages/leann/dist"
-          FIND_LINKS="$FIND_LINKS --find-links packages/leann-backend-hnsw/dist --find-links packages/leann-backend-diskann/dist"
-
-          uv pip install leann-core leann leann-backend-hnsw leann-backend-diskann \
-            $FIND_LINKS --force-reinstall
+          # Install the built wheels
+          # Use --find-links to let uv choose the correct wheel for the platform
+          if [[ "${{ matrix.os }}" == ubuntu-* ]]; then
+            uv pip install leann-core --find-links packages/leann-core/dist
+            uv pip install leann --find-links packages/leann/dist
+          fi
+          uv pip install leann-backend-hnsw --find-links packages/leann-backend-hnsw/dist
+          uv pip install leann-backend-diskann --find-links packages/leann-backend-diskann/dist

          # Install test dependencies using extras
          uv pip install -e ".[test]"

-          # Debug: Check if _diskannpy module is installed correctly
-          echo "Checking installed DiskANN module structure:"
-          python -c "import leann_backend_diskann; print('leann_backend_diskann location:', leann_backend_diskann.__file__)" || echo "Failed to import leann_backend_diskann"
-          python -c "from leann_backend_diskann import _diskannpy; print('_diskannpy imported successfully')" || echo "Failed to import _diskannpy"
-          ls -la $(python -c "import leann_backend_diskann; import os; print(os.path.dirname(leann_backend_diskann.__file__))" 2>/dev/null) 2>/dev/null || echo "Failed to list module directory"
-
      - name: Run tests with pytest
        env:
          CI: true  # Mark as CI environment to skip memory-intensive tests
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0
+    rev: v4.5.0
    hooks:
      - id: trailing-whitespace
      - id: end-of-file-fixer
@@ -10,7 +10,7 @@ repos:
      - id: debug-statements

  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.12.7  # Fixed version to match pyproject.toml
+    rev: v0.2.1
    hooks:
      - id: ruff
      - id: ruff-format
--- a/README.md
+++ b/README.md
@@ -6,7 +6,6 @@
  <img src="https://img.shields.io/badge/Python-3.9%2B-blue.svg" alt="Python 3.9+">
  <img src="https://img.shields.io/badge/License-MIT-green.svg" alt="MIT License">
  <img src="https://img.shields.io/badge/Platform-Linux%20%7C%20macOS-lightgrey" alt="Platform">
-  <img src="https://img.shields.io/badge/MCP-Native%20Integration-blue?style=flat-square" alt="MCP Integration">
 </p>

 <h2 align="center" tabindex="-1" class="heading-element" dir="auto">
@@ -17,10 +16,9 @@ LEANN is an innovative vector database that democratizes personal AI. Transform

 LEANN achieves this through *graph-based selective recomputation* with *high-degree preserving pruning*, computing embeddings on-demand instead of storing them all. [Illustration Fig →](#️-architecture--how-it-works) | [Paper →](https://arxiv.org/abs/2506.08276)

-**Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can semantic search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)**, **[codebase](#-claude-code-integration-transform-your-development-workflow)**\* , or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy.
+**Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)**, or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy.

-
-\* Claude Code only supports basic `grep`-style keyword search. **LEANN** is a drop-in **semantic search MCP service fully compatible with Claude Code**, unlocking intelligent retrieval without changing your workflow. 🔥 Check out [the easy setup →](packages/leann-mcp/README.md)
+> **🚀 NEW: Claude Code Integration!** LEANN now provides native MCP integration for Claude Code users. Index your codebase and get intelligent code assistance directly in Claude Code. [Setup Guide →](packages/leann-mcp/README.md)



@@ -30,7 +28,7 @@ LEANN achieves this through *graph-based selective recomputation* with *high-deg
  <img src="assets/effects.png" alt="LEANN vs Traditional Vector DB Storage Comparison" width="70%">
 </p>

-> **The numbers speak for themselves:** Index 60 million text chunks in just 6GB instead of 201GB. From emails to browser history, everything fits on your laptop. [See detailed benchmarks for different applications below ↓](#storage-comparison)
+> **The numbers speak for themselves:** Index 60 million Wikipedia chunks in just 6GB instead of 201GB. From emails to browser history, everything fits on your laptop. [See detailed benchmarks for different applications below ↓](#storage-comparison)


 🔒 **Privacy:** Your data never leaves your laptop. No OpenAI, no cloud, no "terms of service".
@@ -97,6 +95,7 @@ uv sync

 </details>

+
 ## Quick Start

 Our declarative API makes RAG as easy as writing a config file.
@@ -188,8 +187,8 @@ All RAG examples share these common parameters. **Interactive mode** is availabl
 --force-rebuild         # Force rebuild index even if it exists

 # Embedding Parameters
--embedding-model MODEL  # e.g., facebook/contriever, text-embedding-3-small, nomic-embed-text, or mlx-community/multilingual-e5-base-mlx
--embedding-mode MODE    # sentence-transformers, openai, mlx, or ollama
+--embedding-model MODEL  # e.g., facebook/contriever, text-embedding-3-small or mlx-community/multilingual-e5-base-mlx
+--embedding-mode MODE    # sentence-transformers, openai, or mlx

 # LLM Parameters (Text generation models)
 --llm TYPE              # LLM backend: openai, ollama, or hf (default: openai)
@@ -222,7 +221,7 @@ Ask questions directly about your personal PDFs, documents, and any directory co
  <img src="videos/paper_clear.gif" alt="LEANN Document Search Demo" width="600">
 </p>

-The example below asks a question about summarizing our paper (uses default data in `data/`, which is a directory with diverse data sources: two papers, Pride and Prejudice, and a Technical report about LLM in Huawei in Chinese), and this is the **easiest example** to run here:
+The example below asks a question about summarizing our paper (uses default data in `data/`, which is a directory with diverse data sources: two papers, Pride and Prejudice, and a README in Chinese) and this is the **easiest example** to run here:

 ```bash
 source .venv/bin/activate # Don't forget to activate the virtual environment
@@ -417,26 +416,7 @@ Once the index is built, you can ask questions like:

 </details>

-### 🚀 Claude Code Integration: Transform Your Development Workflow!

-**The future of code assistance is here.** Transform your development workflow with LEANN's native MCP integration for Claude Code. Index your entire codebase and get intelligent code assistance directly in your IDE.
-
-**Key features:**
- 🔍 **Semantic code search** across your entire project
- 📚 **Context-aware assistance** for debugging and development
- 🚀 **Zero-config setup** with automatic language detection
-
-```bash
-# Install LEANN globally for MCP integration
-uv tool install leann-core
-
-# Setup is automatic - just start using Claude Code!
-```
-Try our fully agentic pipeline with auto query rewriting, semantic search planning, and more:
-
-![LEANN MCP Integration](assets/mcp_leann.png)
-
-**Ready to supercharge your coding?** [Complete Setup Guide →](packages/leann-mcp/README.md)

 ## 🖥️ Command Line Interface

@@ -453,7 +433,7 @@ leann --help
 **To make it globally available:**
 ```bash
 # Install the LEANN CLI globally using uv tool
-uv tool install leann-core
+uv tool install leann

 # Now you can use leann from anywhere without activating venv
 leann --help
@@ -466,8 +446,11 @@ leann --help
 ### Usage Examples

 ```bash
-# build from a specific directory, and my_docs is the index name
-leann build my-docs --docs ./your_documents
+# Build an index from current directory (default)
+leann build my-docs
+
+# Or from specific directory
+leann build my-docs --docs ./documents

 # Search your documents
 leann search my-docs "machine learning concepts"
@@ -541,16 +524,12 @@ Options:
 - **Dynamic batching:** Efficiently batch embedding computations for GPU utilization
 - **Two-level search:** Smart graph traversal that prioritizes promising nodes

-**Backends:**
- **HNSW** (default): Ideal for most datasets with maximum storage savings through full recomputation
- **DiskANN**: Advanced option with superior search performance, using PQ-based graph traversal with real-time reranking for the best speed-accuracy trade-off
+**Backends:** HNSW (default) for most use cases, with optional DiskANN support for billion-scale datasets.

 ## Benchmarks

-**[DiskANN vs HNSW Performance Comparison →](benchmarks/diskann_vs_hnsw_speed_comparison.py)** - Compare search performance between both backends
-
-**[Simple Example: Compare LEANN vs FAISS →](benchmarks/compare_faiss_vs_leann.py)** - See storage savings in action

+**[Simple Example: Compare LEANN vs FAISS →](benchmarks/compare_faiss_vs_leann.py)**
 ### 📊 Storage Comparison

 | System | DPR (2.1M) | Wiki (60M) | Chat (400K) | Email (780K) | Browser (38K) |
--- a/apps/base_rag_example.py
+++ b/apps/base_rag_example.py
@@ -75,7 +75,7 @@ class BaseRAGExample(ABC):
            "--embedding-mode",
            type=str,
            default="sentence-transformers",
-            choices=["sentence-transformers", "openai", "mlx", "ollama"],
+            choices=["sentence-transformers", "openai", "mlx"],
            help="Embedding backend mode (default: sentence-transformers)",
        )

@@ -85,7 +85,7 @@ class BaseRAGExample(ABC):
            "--llm",
            type=str,
            default="openai",
-            choices=["openai", "ollama", "hf", "simulated"],
+            choices=["openai", "ollama", "hf"],
            help="LLM backend to use (default: openai)",
        )
        llm_group.add_argument(
--- a/assets/mcp_leann.png
+++ b/assets/mcp_leann.png
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -1,24 +1,9 @@
-# 🧪 LEANN Benchmarks & Testing
+# 🧪 Leann Sanity Checks

-This directory contains performance benchmarks and comprehensive tests for the LEANN system, including backend comparisons and sanity checks across different configurations.
+This directory contains comprehensive sanity checks for the Leann system, ensuring all components work correctly across different configurations.

 ## 📁 Test Files

-### `diskann_vs_hnsw_speed_comparison.py`
-Performance comparison between DiskANN and HNSW backends:
- ✅ **Search latency** comparison with both backends using recompute
- ✅ **Index size** and **build time** measurements
- ✅ **Score validity** testing (ensures no -inf scores)
- ✅ **Configurable dataset sizes** for different scales
-
-```bash
-# Quick comparison with 500 docs, 10 queries
-python benchmarks/diskann_vs_hnsw_speed_comparison.py
-
-# Large-scale comparison with 2000 docs, 20 queries
-python benchmarks/diskann_vs_hnsw_speed_comparison.py 2000 20
-```
-
 ### `test_distance_functions.py`
 Tests all supported distance functions across DiskANN backend:
 - ✅ **MIPS** (Maximum Inner Product Search)
--- a/benchmarks/diskann_vs_hnsw_speed_comparison.py
+++ b/benchmarks/diskann_vs_hnsw_speed_comparison.py
@@ -1,268 +0,0 @@
-#!/usr/bin/env python3
-"""
-DiskANN vs HNSW Search Performance Comparison
-
-This benchmark compares search performance between DiskANN and HNSW backends:
- DiskANN: With graph partitioning enabled (is_recompute=True)
- HNSW: With recompute enabled (is_recompute=True)
- Tests performance across different dataset sizes
- Measures search latency, recall, and index size
-"""
-
-import gc
-import tempfile
-import time
-from pathlib import Path
-from typing import Any
-
-import numpy as np
-
-
-def create_test_texts(n_docs: int) -> list[str]:
-    """Create synthetic test documents for benchmarking."""
-    np.random.seed(42)
-    topics = [
-        "machine learning and artificial intelligence",
-        "natural language processing and text analysis",
-        "computer vision and image recognition",
-        "data science and statistical analysis",
-        "deep learning and neural networks",
-        "information retrieval and search engines",
-        "database systems and data management",
-        "software engineering and programming",
-        "cybersecurity and network protection",
-        "cloud computing and distributed systems",
-    ]
-
-    texts = []
-    for i in range(n_docs):
-        topic = topics[i % len(topics)]
-        variation = np.random.randint(1, 100)
-        text = (
-            f"This is document {i} about {topic}. Content variation {variation}. "
-            f"Additional information about {topic} with details and examples. "
-            f"Technical discussion of {topic} including implementation aspects."
-        )
-        texts.append(text)
-
-    return texts
-
-
-def benchmark_backend(
-    backend_name: str, texts: list[str], test_queries: list[str], backend_kwargs: dict[str, Any]
-) -> dict[str, float]:
-    """Benchmark a specific backend with the given configuration."""
-    from leann.api import LeannBuilder, LeannSearcher
-
-    print(f"\n🔧 Testing {backend_name.upper()} backend...")
-
-    with tempfile.TemporaryDirectory() as temp_dir:
-        index_path = str(Path(temp_dir) / f"benchmark_{backend_name}.leann")
-
-        # Build index
-        print(f"📦 Building {backend_name} index with {len(texts)} documents...")
-        start_time = time.time()
-
-        builder = LeannBuilder(
-            backend_name=backend_name,
-            embedding_model="facebook/contriever",
-            embedding_mode="sentence-transformers",
-            **backend_kwargs,
-        )
-
-        for text in texts:
-            builder.add_text(text)
-
-        builder.build_index(index_path)
-        build_time = time.time() - start_time
-
-        # Measure index size
-        index_dir = Path(index_path).parent
-        index_files = list(index_dir.glob(f"{Path(index_path).stem}.*"))
-        total_size = sum(f.stat().st_size for f in index_files if f.is_file())
-        size_mb = total_size / (1024 * 1024)
-
-        print(f"   ✅ Build completed in {build_time:.2f}s, index size: {size_mb:.1f}MB")
-
-        # Search benchmark
-        print("🔍 Running search benchmark...")
-        searcher = LeannSearcher(index_path)
-
-        search_times = []
-        all_results = []
-
-        for query in test_queries:
-            start_time = time.time()
-            results = searcher.search(query, top_k=5)
-            search_time = time.time() - start_time
-            search_times.append(search_time)
-            all_results.append(results)
-
-        avg_search_time = np.mean(search_times) * 1000  # Convert to ms
-        print(f"   ✅ Average search time: {avg_search_time:.1f}ms")
-
-        # Check for valid scores (detect -inf issues)
-        all_scores = [
-            result.score
-            for results in all_results
-            for result in results
-            if result.score is not None
-        ]
-        valid_scores = [
-            score for score in all_scores if score != float("-inf") and score != float("inf")
-        ]
-        score_validity_rate = len(valid_scores) / len(all_scores) if all_scores else 0
-
-        # Clean up
-        try:
-            if hasattr(searcher, "__del__"):
-                searcher.__del__()
-            del searcher
-            del builder
-            gc.collect()
-        except Exception as e:
-            print(f"⚠️  Warning: Resource cleanup error: {e}")
-
-        return {
-            "build_time": build_time,
-            "avg_search_time_ms": avg_search_time,
-            "index_size_mb": size_mb,
-            "score_validity_rate": score_validity_rate,
-        }
-
-
-def run_comparison(n_docs: int = 500, n_queries: int = 10):
-    """Run performance comparison between DiskANN and HNSW."""
-    print("🚀 Starting DiskANN vs HNSW Performance Comparison")
-    print(f"📊 Dataset: {n_docs} documents, {n_queries} test queries")
-
-    # Create test data
-    texts = create_test_texts(n_docs)
-    test_queries = [
-        "machine learning algorithms",
-        "natural language processing",
-        "computer vision techniques",
-        "data analysis methods",
-        "neural network architectures",
-        "database query optimization",
-        "software development practices",
-        "security vulnerabilities",
-        "cloud infrastructure",
-        "distributed computing",
-    ][:n_queries]
-
-    # HNSW benchmark
-    hnsw_results = benchmark_backend(
-        backend_name="hnsw",
-        texts=texts,
-        test_queries=test_queries,
-        backend_kwargs={
-            "is_recompute": True,  # Enable recompute for fair comparison
-            "M": 16,
-            "efConstruction": 200,
-        },
-    )
-
-    # DiskANN benchmark
-    diskann_results = benchmark_backend(
-        backend_name="diskann",
-        texts=texts,
-        test_queries=test_queries,
-        backend_kwargs={
-            "is_recompute": True,  # Enable graph partitioning
-            "num_neighbors": 32,
-            "search_list_size": 50,
-        },
-    )
-
-    # Performance comparison
-    print("\n📈 Performance Comparison Results")
-    print(f"{'=' * 60}")
-    print(f"{'Metric':<25} {'HNSW':<15} {'DiskANN':<15} {'Speedup':<10}")
-    print(f"{'-' * 60}")
-
-    # Build time comparison
-    build_speedup = hnsw_results["build_time"] / diskann_results["build_time"]
-    print(
-        f"{'Build Time (s)':<25} {hnsw_results['build_time']:<15.2f} {diskann_results['build_time']:<15.2f} {build_speedup:<10.2f}x"
-    )
-
-    # Search time comparison
-    search_speedup = hnsw_results["avg_search_time_ms"] / diskann_results["avg_search_time_ms"]
-    print(
-        f"{'Search Time (ms)':<25} {hnsw_results['avg_search_time_ms']:<15.1f} {diskann_results['avg_search_time_ms']:<15.1f} {search_speedup:<10.2f}x"
-    )
-
-    # Index size comparison
-    size_ratio = diskann_results["index_size_mb"] / hnsw_results["index_size_mb"]
-    print(
-        f"{'Index Size (MB)':<25} {hnsw_results['index_size_mb']:<15.1f} {diskann_results['index_size_mb']:<15.1f} {size_ratio:<10.2f}x"
-    )
-
-    # Score validity
-    print(
-        f"{'Score Validity (%)':<25} {hnsw_results['score_validity_rate'] * 100:<15.1f} {diskann_results['score_validity_rate'] * 100:<15.1f}"
-    )
-
-    print(f"{'=' * 60}")
-    print("\n🎯 Summary:")
-    if search_speedup > 1:
-        print(f"   DiskANN is {search_speedup:.2f}x faster than HNSW for search")
-    else:
-        print(f"   HNSW is {1 / search_speedup:.2f}x faster than DiskANN for search")
-
-    if size_ratio > 1:
-        print(f"   DiskANN uses {size_ratio:.2f}x more storage than HNSW")
-    else:
-        print(f"   DiskANN uses {1 / size_ratio:.2f}x less storage than HNSW")
-
-    print(
-        f"   Both backends achieved {min(hnsw_results['score_validity_rate'], diskann_results['score_validity_rate']) * 100:.1f}% score validity"
-    )
-
-
-if __name__ == "__main__":
-    import sys
-
-    try:
-        # Handle help request
-        if len(sys.argv) > 1 and sys.argv[1] in ["-h", "--help", "help"]:
-            print("DiskANN vs HNSW Performance Comparison")
-            print("=" * 50)
-            print(f"Usage: python {sys.argv[0]} [n_docs] [n_queries]")
-            print()
-            print("Arguments:")
-            print("  n_docs      Number of documents to index (default: 500)")
-            print("  n_queries   Number of test queries to run (default: 10)")
-            print()
-            print("Examples:")
-            print("  python benchmarks/diskann_vs_hnsw_speed_comparison.py")
-            print("  python benchmarks/diskann_vs_hnsw_speed_comparison.py 1000")
-            print("  python benchmarks/diskann_vs_hnsw_speed_comparison.py 2000 20")
-            sys.exit(0)
-
-        # Parse command line arguments
-        n_docs = int(sys.argv[1]) if len(sys.argv) > 1 else 500
-        n_queries = int(sys.argv[2]) if len(sys.argv) > 2 else 10
-
-        print("DiskANN vs HNSW Performance Comparison")
-        print("=" * 50)
-        print(f"Dataset: {n_docs} documents, {n_queries} queries")
-        print()
-
-        run_comparison(n_docs=n_docs, n_queries=n_queries)
-
-    except KeyboardInterrupt:
-        print("\n⚠️  Benchmark interrupted by user")
-        sys.exit(130)
-    except Exception as e:
-        print(f"\n❌ Benchmark failed: {e}")
-        sys.exit(1)
-    finally:
-        # Ensure clean exit
-        try:
-            gc.collect()
-            print("\n🧹 Cleanup completed")
-        except Exception:
-            pass
-        sys.exit(0)
--- a/data/huawei_pangu.md
+++ b/data/huawei_pangu.md
@@ -1,82 +0,0 @@
-# 盘古之殇：华为诺亚盘古大模型研发历程的心酸与黑暗
-
-各位好，
-
-我是一名盘古大模型团队，华为诺亚方舟实验室的员工。
-
-首先为自证身份，列举一些细节：
-
-1. 现诺亚主任，前算法应用部部长，后改名为小模型实验室的主任王云鹤。前诺亚主任：姚骏（大家称姚老师）。几个实验室主任：唐睿明（明哥，明队，已离职），尚利峰，张维（维哥），郝建业（郝老师），刘武龙（称呼为武龙所）等。其他骨干成员和专家陆续有很多人离职。
-2. 我们隶属于“四野”这个组织。四野下属有许多纵队，基础语言大模型是四纵。王云鹤的小模型是十六纵队。我们参加过苏州的集结，有各种月份的时间节点。在苏州攻关会颁发任务令，需要在节点前达成目标。苏州集结会把各地的人员都集中在苏州研究所，平常住宾馆，比如在甪直的酒店，与家人孩子天各一方。
-3. 在苏州集结的时候周六默认上班，非常辛苦，不过周六有下午茶，有一次还有小龙虾。在苏州研究所的工位搬迁过一次，从一栋楼换到了另一栋。苏州研究所楼栋都是欧式装修，门口有大坡，里面景色很不错。去苏州集结一般至少要去一周，甚至更久，多的人甚至一两个月都回不了家。
-4. 诺亚曾经传说是研究型的，但是来了之后因为在四野做大模型项目，项目成员完全变成了交付型的，且充满了例会，评审，汇报。很多时候做实验都要申请。团队需要对接终端小艺，华为云，ICT等诸多业务线，交付压力不小。
-5. 诺亚研发的盘古模型早期内部代号叫做“盘古智子”，一开始只有内部需要申请试用的网页版，到后续迫于压力在welink上接入和公测开放。
-
-这些天发生关于质疑盘古大模型抄袭千问的事情闹的沸沸扬扬。作为一个盘古团队的成员，我最近夜夜辗转反侧，难以入眠。盘古的品牌受到如此大的影响，一方面，我自私的为我的职业发展担忧，也为自己过去的努力工作感到不值。另一方面，由于有人开始揭露这些事情我内心又感到大快人心。在多少个日日夜夜，我们对内部某些人一次次靠着造假而又获得了无数利益的行为咬牙切齿而又无能为力。这种压抑和羞辱也逐渐消磨了我对华为的感情，让我在这里的时日逐渐浑浑噩噩，迷茫无措，时常怀疑自己的人生和自我价值。
-
-我承认我是一个懦弱的人，作为一个小小的打工人，我不仅不敢和王云鹤等内部手眼通天的人做对，更不敢和华为这样的庞然大物做对。我很怕失去我的工作，毕竟我也有家人和孩子，所以我打心眼里很佩服揭露者。但是，看到内部还在试图洗地掩盖事实，蒙蔽公众的时候，我实在不能容忍了。我也希望勇敢一次，顺从自己本心。就算自损八百，我也希望能伤敌一千。我决定把我在这里的所见所闻（部分来自于同事口述）公布出来，关于盘古大模型的“传奇故事”：
-
-华为确实主要在昇腾卡上训练大模型（小模型实验室有不少英伟达的卡，他们之前也会用来训练，后面转移到昇腾）。曾经我被华为“打造世界第二选择”的决心而折服，我本身也曾经对华为有深厚的感情。我们陪着昇腾一步步摸爬滚打，从充满bug到现在能训出模型，付出了巨大的心血和代价。
-
-最初我们的算力非常有限，在910A上训练模型。那会只支持fp16，训练的稳定性远不如bf16。盘古的moe开始很早，23年就主要是训练38Bmoe模型和后续的71B dense模型。71B的dense模型通过扩增变成了第一代的135Bdense模型，后面主力模型也逐渐在910B上训练。
-
-71B和135B模型都有一个巨大的硬伤就是tokenizer。当时使用的tokenizer编码效率极低，每个单个的符号，数字，空格，乃至汉字都会占用一个token。可想而知这会非常浪费算力，且使得模型的效果很差。这时候小模型实验室正好有个自己训的词表。姚老师当时怀疑是不是模型的tokenizer不好（虽然事后来看，他的怀疑是无疑正确的），于是就决定，让71B和135B换tokenizer，因为小模型实验室曾经尝试过。团队缝合了两个tokenizer，开始了tokenizer的更换。71B模型的更换失败了，而135B因为采用了更精细的embedding初始化策略，续训了至少1T的数据后词表总算更换成功，但可想而知，效果并不会变好。
-
-于此同期，阿里和智谱等国内其他公司在GPU上训练，且已经摸索出了正确的方法，盘古和竞品的差距越来越大。内部一个230B从头训练的dense模型又因为各种原因训练失败，导致项目的状况几乎陷入绝境。面临几个节点的压力以及内部对盘古的强烈质疑时，团队的士气低迷到了极点。团队在算力极其有限的时候，做出了很多努力和挣扎。比如，团队偶然发现当时的38B moe并没有预期moe的效果。于是去掉了moe参数，还原为了13B的dense模型。由于38B的moe源自很早的pangu alpha 13B，架构相对落后，团队进行了一系列的操作，比如切换绝对位置编码到rope，去掉bias，切换为rmsnorm。同时鉴于tokenizer的一些失败和换词表的经验，这个模型的词表也更换为了王云鹤的小模型实验室7B模型所使用的词表。后面这个13B模型进行了扩增续训，变成了第二代38B dense模型（在几个月内这个模型都是主要的盘古中档位模型），曾经具有一定的竞争力。但是，由于更大的135B模型架构落后，且更换词表模型损伤巨大（后续分析发现当时更换的缝合词表有更严重的bug），续训后也与千问等当时国内领先模型存在很大差距。这时由于内部的质疑声和领导的压力也越来越大。团队的状态几乎陷入了绝境。
-
-在这种情况下，王云鹤和他的小模型实验室出手了。他们声称是从旧的135B参数继承改造而来，通过训练短短的几百B数据，各项指标平均提升了十个点左右。实际上，这就是他们套壳应用到大模型的第一次杰作。华为的外行领导内行，使得领导完全对于这种扯淡的事情没有概念，他们只会觉得肯定是有什么算法创新。经过内部的分析，他们实际上是使用Qwen 1.5 110B续训而来，通过加层，扩增ffn维度，添加盘古pi论文的一些机制得来，凑够了大概135B的参数。实际上，旧的135B有107层，而这个模型只有82层，各种配置也都不一样。新的来路不明的135B训练完很多参数的分布也和Qwen 110B几乎一模一样。连模型代码的类名当时都是Qwen，甚至懒得改名。后续这个模型就是所谓的135B V2。而这个模型当时也提供给了很多下游，甚至包括外部客户。
-
-这件事对于我们这些认真诚实做事的同事们带来了巨大的冲击，内部很多人其实都知道这件事，甚至包括终端和华为云。我们都戏称以后别叫盘古模型了，叫千古吧。当时团队成员就想向bcg举报了，毕竟这已经是重大的业务造假了。但是后面据说被领导拦了下来，因为更高级别的领导（比如姚老师，以及可能熊总和查老）其实后面也知道了，但是并不管，因为通过套壳拿出好的结果，对他们也是有利的。这件事使得当时团队几位最强的同事开始心灰意冷，离职跑路也逐渐成为挂在嘴边的事。
-
-此时，盘古似乎迎来了转机。由于前面所述的这些盘古模型基本都是续训和改造而来，当时诺亚完全没有掌握从头训练的技术，何况还是在昇腾的NPU上进行训练。在当时团队的核心成员的极力争取下，盘古开始了第三代模型的训练，付出了巨大的努力后，在数据架构和训练算法方面都与业界逐渐接轨，而这其中的艰辛和小模型实验室的人一点关系都没有。
-
-一开始团队成员毫无信心，只从一个13B的模型开始训练，但是后面发现效果还不错，于是这个模型后续再次进行了一次参数扩增，变成了第三代的38B，代号38B V3。想必很多产品线的兄弟都对这个模型很熟悉。当时这个模型的tokenizer是基于llama的词表进行扩展的（也是业界常见的做法）。而当时王云鹤的实验室做出来了另一个词表（也就是后续pangu系列的词表）。当时两个词表还被迫进行了一次赛马，最终没有明显的好坏结论。于是，领导当即决定，应该统一词表，使用王云鹤他们的。于是，在后续从头训练的135B V3（也就是对外的Pangu Ultra），便是采用了这个tokenizer。这也解释了很多使用我们模型的兄弟的疑惑，为什么当时同为V3代的两个不同档位的模型，会使用不同的tokenizer。
-
-
-我们打心眼里觉得，135B V3是我们四纵团队当时的骄傲。这是第一个真正意义上的，华为全栈自研，正经从头训练的千亿级别的模型，且效果与24年同期竞品可比的。写到这里我已经热泪盈眶，太不容易了。当时为了稳定训练，团队做了大量实验对比，并且多次在模型梯度出现异常的时候进行及时回退重启。这个模型真正做到了后面技术报告所说的训练全程没有一个loss spike。我们克服了不知道多少困难，我们做到了，我们愿用生命和荣誉保证这个模型训练的真实性。多少个凌晨，我们为了它的训练而不眠。在被内部心声骂的一文不值的时候，我们有多么不甘，有多少的委屈，我们挺住了。
-
-我们这帮人是真的在为打磨国产算力底座燃烧自己的青春啊……客居他乡，我们放弃了家庭，放弃了假期，放弃了健康，放弃了娱乐，抛头颅洒热血，其中的艰辛与困苦，寥寥数笔不足以概括其万一。在各种动员大会上，当时口号中喊出的盘古必胜，华为必胜，我们心里是真的深深被感动。
-
-然而，我们的所有辛苦的成果，经常被小模型实验室轻飘飘的拿走了。数据，直接要走。代码，直接要走，还要求我们配合适配到能一键运行。我们当时戏称小模型实验室为点鼠标实验室。我们付出辛苦，他们取得荣耀。果然应了那句话，你在负重前行是因为有人替你岁月静好。在这种情况下，越来越多的战友再也坚持不下去了，选择了离开。看到身边那些优秀的同事一个个离职，我的内心又感叹又难过。在这种作战一样的环境下，我们比起同事来说更像是战友。他们在技术上也有无数值得我学习的地方，堪称良师。看到他们去了诸如字节Seed，Deepseek，月之暗面，腾讯和快手等等很多出色的团队，我打心眼里为他们高兴和祝福，脱离了这个辛苦却肮脏的地方。我至今还对一位离职同事的话记忆犹新，ta说：“来这里是我技术生涯中的耻辱，在这里再呆每一天都是浪费生命”。话虽难听却让我无言以对。我担心我自己技术方面的积累不足，以及没法适应互联网公司高淘汰的环境，让我多次想离职的心始终没有迈出这一步。
-
-盘古除了dense模型，后续也启动了moe的探索。一开始训练的是一个224B的moe模型。而与之平行的，小模型实验室也开启了第二次主要的套壳行动（次要的插曲可能还包括一些别的模型，比如math模型），即这次流传甚广的pangu pro moe 72B。这个模型内部自称是从小模型实验室的7B扩增上来的（就算如此，这也与技术报告不符，何况是套壳qwen 2.5的14b续训）。还记得他们训了没几天，内部的评测就立刻追上了当时的38B V3。AI系统实验室很多兄弟因为需要适配模型，都知道他们的套壳行动，只是迫于各种原因，无法伸张正义。实际上，对于后续训了很久很久的这个模型，Honestagi能够分析出这个量级的相似性我已经很诧异了，因为这个模型为了续训洗参数，所付出的算力甚至早就足够从头训一个同档位的模型了。听同事说他们为了洗掉千问的水印，采取了不少办法，甚至包括故意训了脏数据。这也为学术界研究模型血缘提供了一个前所未有的特殊模范吧。以后新的血缘方法提出可以拿出来溜溜。
-
-24年底和25年初，在Deepseek v3和r1发布之后，由于其惊艳的技术水平，团队受到了巨大的冲击，也受到了更大的质疑。于是为了紧跟潮流，盘古模仿Deepseek的模型尺寸，开启了718B moe的训练。这个时候，小模型实验室再次出手了。他们选择了套壳Deepseekv3续训。他们通过冻住Deepseek加载的参数，进行训练。连任务加载ckpt的目录都是deepseekv3，改都不改，何其嚣张？与之相反，一些有真正技术信仰的同事，在从头训练另一个718B的moe。但其中出现了各种各样的问题。但是很显然，这个模型怎么可能比直接套壳的好呢？如果不是团队leader坚持，早就被叫停了。
-
-华为的流程管理之繁重，严重拖累了大模型的研发节奏，例如版本管理，模型血缘，各种流程化，各种可追溯。讽刺的是，小模型实验室的模型似乎从来不受这些流程的约束，想套壳就套壳，想续训就续训，算力源源不断的伸手拿走。这种强烈到近乎魔幻的对比，说明了当前流程管理的情况：只许州官放火，不许百姓点灯。何其可笑？何其可悲？何其可恶？何其可耻！
-
-HonestAGI的事情出来后，内部让大家不停的研讨分析，如何公关和“回应”。诚然，这个原文的分析也许不够有力，给了王云鹤与小模型实验室他们狡辩和颠倒黑白的机会。为此，这两天我内心感到作呕，时时怀疑自己的人生意义以及苍天无眼。我不奉陪了，我要离职了，同时我也在申请从盘古部分技术报告的作者名单中移除。曾经在这些技术报告上署名是我一生都无法抹除的污点。当时我没想到，他们竟然猖狂到敢开源。我没想到，他们敢如此愚弄世人，大肆宣发。当时，我也许是存了侥幸心理，没有拒绝署名。我相信很多扎实做事的战友，也只是被迫上了贼船，或者不知情。但这件事已经无法挽回，我希望我的余生能够坚持扎实做真正有意义的事，为我当时的软弱和不坚定赎罪。
-
-深夜写到这里，我已经泪流满面，泣不成声。还记得一些出色的同事离职时，我苦笑问他们要不要发个长长的心声惯例帖，揭露一下现状。对方说：不了，浪费时间，而且我也怕揭露出来你们过的更糟。我当时一下黯然神伤，因为曾经共同为了理想奋斗过的战友已经彻底对华为彻底灰心了。当时大家调侃，我们用着当年共产党的小米加步枪，组织却有着堪比当年国民党的作风。
-
-曾几何时，我为我们用着小米加步枪打败洋枪洋炮而自豪。
-
-现在，我累了，我想投降。
-
-其实时至今日，我还是真心希望华为能认真吸取教训，能做好盘古，把盘古做到世界一流，把昇腾变成英伟达的水平。内部的劣币驱逐良币，使得诺亚乃至华为在短时间内急剧流失了大量出色的大模型人才。相信他们也正在如Deepseek等各个团队闪耀着，施展着他们的抱负才华，为中美在AI的激烈竞赛中奉献力量。我时常感叹，华为不是没有人才，而是根本不知道怎么留住人才。如果给这些人合适的环境，合适的资源，更少的枷锁，更少的政治斗争，盘古何愁不成？
-
-最后：我以生命，人格和荣誉发誓，我写的以上所有内容均为真实（至少在我有限的认知范围内）。我没有那么高的技术水平以及机会去做详尽扎实的分析，也不敢直接用内部记录举证，怕因为信息安全抓到。但是我相信我很多曾经的战友，会为我作证。在华为内部的兄弟，包括我们曾经服务过的产品线兄弟们，相信本文的无数细节能和你们的印象对照，印证我的说法。你们可能也曾经被蒙骗，但这些残酷的真相不会被尘封。我们奋战过的痕迹，也不应该被扭曲和埋葬。
-
-写了这么多，某些人肯定想把我找出来，抹杀掉。公司搞不好也想让我噤声乃至追责。如果真的这样，我，乃至我的家人的人身乃至生命安全可能都会受到威胁。为了自我保护，我近期每天会跟大家报平安。
-
-如果我消失了，就当是我为了真理和理想，为了华为乃至中国能够更好地发展算力和AI而牺牲了吧，我愿埋葬于那片曾经奋斗过的地方。
-
-诺亚，再见
-
-2025年7月6日凌晨      写于深圳
-
---
-
-各位好，
-
-感谢大家的关心与祝福。我目前暂时安全，但公司应该在进行排查与某些名单收集，后续情况未知。
-
-我补充一些细节，以免某些人继续颠倒黑白。
-
-关于135B V2，小模型实验室在迅速地完成套壳并拿完所有套壳带来的好处后（比如任务令表彰和及时激励），因为不想继续支撑下游应用和模型迭代，又把这个烫手山芋甩给了四纵。确实技高一筹，直接把四纵的兄弟们拉下水。同事提供过去一个老旧的模型，最终拿回了一个当时一个魔改的先进的千问。做大模型的人，自己做的模型就像自己孩子一样熟悉，不要把别人都当傻子。就像自家儿子出门一趟，回来个别人家孩子。
-
-盘古report的署名是不符合学术规范的。例如，135B V3有不少有技术贡献的人，因为作者名额数量限制，劳动成果没有得到应有的回报，团队内曾经有不小的意见。这个模型当时是大家智慧和汗水的结晶，甚至是团队当时的精神支柱，支撑着不少兄弟们继续留在诺亚。所谓的名额限制，以及挂名了一些毫无技术贡献的人（如一些小模型实验室的人），让兄弟们何其心寒。
-
---
-
-暂时平安。另外，支持我勇于说出真相的战友们 https://github.com/HW-whistleblower/True-Story-of-Pangu/issues/317
--- a/docs/claude-code-integration.md
+++ b/docs/claude-code-integration.md
@@ -0,0 +1,150 @@
+# Claude Code x LEANN 集成指南
+
+## ✅ 现状：已经可以工作！
+
+好消息：LEANN CLI已经完全可以在Claude Code中使用，无需任何修改！
+
+## 🚀 立即开始
+
+### 1. 激活环境
+```bash
+# 在LEANN项目目录下
+source .venv/bin/activate.fish  # fish shell
+# 或
+source .venv/bin/activate       # bash shell
+```
+
+### 2. 基本命令
+
+#### 查看现有索引
+```bash
+leann list
+```
+
+#### 搜索文档
+```bash
+leann search my-docs "machine learning" --recompute-embeddings
+```
+
+#### 问答对话
+```bash
+echo "What is machine learning?" | leann ask my-docs --llm ollama --model qwen3:8b --recompute-embeddings
+```
+
+#### 构建新索引
+```bash
+leann build project-docs --docs ./src --recompute-embeddings
+```
+
+## 💡 Claude Code 使用技巧
+
+### 在Claude Code中直接使用
+
+1. **激活环境**：
+   ```bash
+   cd /Users/andyl/Projects/LEANN-RAG
+   source .venv/bin/activate.fish
+   ```
+
+2. **搜索代码库**：
+   ```bash
+   leann search my-docs "authentication patterns" --recompute-embeddings --top-k 10
+   ```
+
+3. **智能问答**：
+   ```bash
+   echo "How does the authentication system work?" | leann ask my-docs --llm ollama --model qwen3:8b --recompute-embeddings
+   ```
+
+### 批量操作示例
+
+```bash
+# 构建项目文档索引
+leann build project-docs --docs ./docs --force
+
+# 搜索多个关键词
+leann search project-docs "API authentication" --recompute-embeddings
+leann search project-docs "database schema" --recompute-embeddings
+leann search project-docs "deployment guide" --recompute-embeddings
+
+# 问答模式
+echo "What are the API endpoints?" | leann ask project-docs --recompute-embeddings
+```
+
+## 🎯 Claude 可以立即执行的工作流
+
+### 代码分析工作流
+```bash
+# 1. 构建代码库索引
+leann build codebase --docs ./src --backend hnsw --recompute-embeddings
+
+# 2. 分析架构
+echo "What is the overall architecture?" | leann ask codebase --recompute-embeddings
+
+# 3. 查找特定功能
+leann search codebase "user authentication" --recompute-embeddings --top-k 5
+
+# 4. 理解实现细节
+echo "How is user authentication implemented?" | leann ask codebase --recompute-embeddings
+```
+
+### 文档理解工作流
+```bash
+# 1. 索引项目文档
+leann build docs --docs ./docs --recompute-embeddings
+
+# 2. 快速查找信息
+leann search docs "installation requirements" --recompute-embeddings
+
+# 3. 获取详细说明
+echo "What are the system requirements?" | leann ask docs --recompute-embeddings
+```
+
+## ⚠️ 重要提示
+
+1. **必须使用 `--recompute-embeddings`** - 这是关键参数，不加会报错
+2. **需要先激活虚拟环境** - 确保有LEANN的Python环境
+3. **Ollama需要预先安装** - ask功能需要本地LLM
+
+## 🔥 立即可用的Claude提示词
+
+```
+Help me analyze this codebase using LEANN:
+
+1. First, activate the environment:
+   cd /Users/andyl/Projects/LEANN-RAG && source .venv/bin/activate.fish
+
+2. Build an index of the source code:
+   leann build codebase --docs ./src --recompute-embeddings
+
+3. Search for authentication patterns:
+   leann search codebase "authentication middleware" --recompute-embeddings --top-k 10
+
+4. Ask about the authentication system:
+   echo "How does user authentication work in this codebase?" | leann ask codebase --recompute-embeddings
+
+Please execute these commands and help me understand the code structure.
+```
+
+## 📈 下一步改进计划
+
+虽然现在已经可以用，但还可以进一步优化：
+
+1. **简化命令** - 默认启用recompute-embeddings
+2. **配置文件** - 避免重复输入参数
+3. **状态管理** - 自动检测环境和索引
+4. **输出格式** - 更适合Claude解析的格式
+
+但这些都是锦上添花，现在就能用起来！
+
+## 🎉 总结
+
+**LEANN现在就可以在Claude Code中完美工作！**
+
+- ✅ 搜索功能正常
+- ✅ RAG问答功能正常
+- ✅ 索引构建功能正常
+- ✅ 支持多种数据源
+- ✅ 支持本地LLM
+
+只需要记住加上 `--recompute-embeddings` 参数就行！
--- a/docs/configuration-guide.md
+++ b/docs/configuration-guide.md
@@ -49,25 +49,14 @@ Based on our experience developing LEANN, embedding models fall into three categ
 - **Cons**: Slower inference, longer index build times
 - **Use when**: Quality is paramount and you have sufficient compute resources. **Highly recommended** for production use

-### Quick Start: Cloud and Local Embedding Options
+### Quick Start: OpenAI Embeddings (Fastest Setup)

-**OpenAI Embeddings (Fastest Setup)**
 For immediate testing without local model downloads:
 ```bash
 # Set OpenAI embeddings (requires OPENAI_API_KEY)
 --embedding-mode openai --embedding-model text-embedding-3-small
 ```

-**Ollama Embeddings (Privacy-Focused)**
-For local embeddings with complete privacy:
-```bash
-# First, pull an embedding model
-ollama pull nomic-embed-text
-
-# Use Ollama embeddings
--embedding-mode ollama --embedding-model nomic-embed-text
-```
-
 <details>
 <summary><strong>Cloud vs Local Trade-offs</strong></summary>

@@ -97,30 +86,16 @@ ollama pull nomic-embed-text
 ```

 ### DiskANN
-**Best for**: Performance-critical applications and large datasets - **Production-ready with automatic graph partitioning**
-
-**How it works:**
- **Product Quantization (PQ) + Real-time Reranking**: Uses compressed PQ codes for fast graph traversal, then recomputes exact embeddings for final candidates
- **Automatic Graph Partitioning**: When `is_recompute=True`, automatically partitions large indices and safely removes redundant files to save storage
- **Superior Speed-Accuracy Trade-off**: Faster search than HNSW while maintaining high accuracy
-
-**Trade-offs compared to HNSW:**
- ✅ **Faster search latency** (typically 2-8x speedup)
- ✅ **Better scaling** for large datasets
- ✅ **Smart storage management** with automatic partitioning
- ✅ **Better graph locality** with `--ldg-times` parameter for SSD optimization
- ⚠️ **Slightly larger index size** due to PQ tables and graph metadata
+**Best for**: Large datasets (> 10M vectors, 10GB+ index size) - **⚠️ Beta version, still in active development**
+- Uses Product Quantization (PQ) for coarse filtering during graph traversal
+- Novel approach: stores only PQ codes, performs rerank with exact computation in final step
+- Implements a corner case of double-queue: prunes all neighbors and recomputes at the end

 ```bash
-# Recommended for most use cases
--backend-name diskann --graph-degree 32 --build-complexity 64
-
-# For large-scale deployments
+# For billion-scale deployments
 --backend-name diskann --graph-degree 64 --build-complexity 128
 ```

-**Performance Benchmark**: Run `python benchmarks/diskann_vs_hnsw_speed_comparison.py` to compare DiskANN and HNSW on your system.
-
 ## LLM Selection: Engine and Model Comparison

 ### LLM Engines
@@ -291,4 +266,3 @@ LEANN's recomputation feature provides exact distance calculations but can be di
 - [Lessons Learned Developing LEANN](https://yichuan-w.github.io/blog/lessons_learned_in_dev_leann/)
 - [LEANN Technical Paper](https://arxiv.org/abs/2506.08276)
 - [DiskANN Original Paper](https://papers.nips.cc/paper/2019/file/09853c7fb1d3f8ee67a61b6bf4a7f8e6-Paper.pdf)
- [SSD-based Graph Partitioning](https://github.com/SonglinLife/SSD_BASED_PLAN)
--- a/packages/leann-backend-diskann/leann_backend_diskann/init.py
+++ b/packages/leann-backend-diskann/leann_backend_diskann/init.py
@@ -1,7 +1 @@
 from . import diskann_backend as diskann_backend
-from . import graph_partition
-
-# Export main classes and functions
-from .graph_partition import GraphPartitioner, partition_graph
-
-__all__ = ["GraphPartitioner", "diskann_backend", "graph_partition", "partition_graph"]
--- a/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py
+++ b/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py
@@ -4,7 +4,7 @@ import os
 import struct
 import sys
 from pathlib import Path
-from typing import Any, Literal, Optional
+from typing import Any, Literal

 import numpy as np
 import psutil
@@ -137,71 +137,6 @@ class DiskannBuilder(LeannBackendBuilderInterface):
    def __init__(self, **kwargs):
        self.build_params = kwargs

-    def _safe_cleanup_after_partition(self, index_dir: Path, index_prefix: str):
-        """
-        Safely cleanup files after partition.
-        In partition mode, C++ doesn't read _disk.index content,
-        so we can delete it if all derived files exist.
-        """
-        disk_index_file = index_dir / f"{index_prefix}_disk.index"
-        beam_search_file = index_dir / f"{index_prefix}_disk_beam_search.index"
-
-        # Required files that C++ partition mode needs
-        # Note: C++ generates these with _disk.index suffix
-        disk_suffix = "_disk.index"
-        required_files = [
-            f"{index_prefix}{disk_suffix}_medoids.bin",  # Critical: assert fails if missing
-            # Note: _centroids.bin is not created in single-shot build - C++ handles this automatically
-            f"{index_prefix}_pq_pivots.bin",  # PQ table
-            f"{index_prefix}_pq_compressed.bin",  # PQ compressed vectors
-        ]
-
-        # Check if all required files exist
-        missing_files = []
-        for filename in required_files:
-            file_path = index_dir / filename
-            if not file_path.exists():
-                missing_files.append(filename)
-
-        if missing_files:
-            logger.warning(
-                f"Cannot safely delete _disk.index - missing required files: {missing_files}"
-            )
-            logger.info("Keeping all original files for safety")
-            return
-
-        # Calculate space savings
-        space_saved = 0
-        files_to_delete = []
-
-        if disk_index_file.exists():
-            space_saved += disk_index_file.stat().st_size
-            files_to_delete.append(disk_index_file)
-
-        if beam_search_file.exists():
-            space_saved += beam_search_file.stat().st_size
-            files_to_delete.append(beam_search_file)
-
-        # Safe to delete!
-        for file_to_delete in files_to_delete:
-            try:
-                os.remove(file_to_delete)
-                logger.info(f"✅ Safely deleted: {file_to_delete.name}")
-            except Exception as e:
-                logger.warning(f"Failed to delete {file_to_delete.name}: {e}")
-
-        if space_saved > 0:
-            space_saved_mb = space_saved / (1024 * 1024)
-            logger.info(f"💾 Space saved: {space_saved_mb:.1f} MB")
-
-            # Show what files are kept
-            logger.info("📁 Kept essential files for partition mode:")
-            for filename in required_files:
-                file_path = index_dir / filename
-                if file_path.exists():
-                    size_mb = file_path.stat().st_size / (1024 * 1024)
-                    logger.info(f"  - {filename} ({size_mb:.1f} MB)")
-
    def build(self, data: np.ndarray, ids: list[str], index_path: str, **kwargs):
        path = Path(index_path)
        index_dir = path.parent
@@ -216,17 +151,6 @@ class DiskannBuilder(LeannBackendBuilderInterface):
        _write_vectors_to_bin(data, index_dir / data_filename)

        build_kwargs = {**self.build_params, **kwargs}
-
-        # Extract is_recompute from nested backend_kwargs if needed
-        is_recompute = build_kwargs.get("is_recompute", False)
-        if not is_recompute and "backend_kwargs" in build_kwargs:
-            is_recompute = build_kwargs["backend_kwargs"].get("is_recompute", False)
-
-        # Flatten all backend_kwargs parameters to top level for compatibility
-        if "backend_kwargs" in build_kwargs:
-            nested_params = build_kwargs.pop("backend_kwargs")
-            build_kwargs.update(nested_params)
-
        metric_enum = _get_diskann_metrics().get(
            build_kwargs.get("distance_metric", "mips").lower()
        )
@@ -261,30 +185,6 @@ class DiskannBuilder(LeannBackendBuilderInterface):
                    build_kwargs.get("pq_disk_bytes", 0),
                    "",
                )
-
-            # Auto-partition if is_recompute is enabled
-            if build_kwargs.get("is_recompute", False):
-                logger.info("is_recompute=True, starting automatic graph partitioning...")
-                from .graph_partition import partition_graph
-
-                # Partition the index using absolute paths
-                # Convert to absolute paths to avoid issues with working directory changes
-                absolute_index_dir = Path(index_dir).resolve()
-                absolute_index_prefix_path = str(absolute_index_dir / index_prefix)
-                disk_graph_path, partition_bin_path = partition_graph(
-                    index_prefix_path=absolute_index_prefix_path,
-                    output_dir=str(absolute_index_dir),
-                    partition_prefix=index_prefix,
-                )
-
-                # Safe cleanup: In partition mode, C++ doesn't read _disk.index content
-                # but still needs the derived files (_medoids.bin, _centroids.bin, etc.)
-                self._safe_cleanup_after_partition(index_dir, index_prefix)
-
-                logger.info("✅ Graph partitioning completed successfully!")
-                logger.info(f"  - Disk graph: {disk_graph_path}")
-                logger.info(f"  - Partition file: {partition_bin_path}")
-
        finally:
            temp_data_file = index_dir / data_filename
            if temp_data_file.exists():
@@ -313,26 +213,7 @@ class DiskannSearcher(BaseSearcher):

            # For DiskANN, we need to reinitialize the index when zmq_port changes
            # Store the initialization parameters for later use
-            # Note: C++ load method expects the BASE path (without _disk.index suffix)
-            # C++ internally constructs: index_prefix + "_disk.index"
-            index_name = self.index_path.stem  # "simple_test.leann" -> "simple_test"
-            diskann_index_prefix = str(self.index_dir / index_name)  # /path/to/simple_test
-            full_index_prefix = diskann_index_prefix  # /path/to/simple_test (base path)
-
-            # Auto-detect partition files and set partition_prefix
-            partition_graph_file = self.index_dir / f"{index_name}_disk_graph.index"
-            partition_bin_file = self.index_dir / f"{index_name}_partition.bin"
-
-            partition_prefix = ""
-            if partition_graph_file.exists() and partition_bin_file.exists():
-                # C++ expects full path prefix, not just filename
-                partition_prefix = str(self.index_dir / index_name)  # /path/to/simple_test
-                logger.info(
-                    f"✅ Detected partition files, using partition_prefix='{partition_prefix}'"
-                )
-            else:
-                logger.debug("No partition files detected, using standard index files")
-
+            full_index_prefix = str(self.index_dir / self.index_path.stem)
            self._init_params = {
                "metric_enum": metric_enum,
                "full_index_prefix": full_index_prefix,
@@ -340,14 +221,8 @@ class DiskannSearcher(BaseSearcher):
                "num_nodes_to_cache": kwargs.get("num_nodes_to_cache", 0),
                "cache_mechanism": 1,
                "pq_prefix": "",
-                "partition_prefix": partition_prefix,
+                "partition_prefix": "",
            }
-
-            # Log partition configuration for debugging
-            if partition_prefix:
-                logger.info(
-                    f"✅ Detected partition files, using partition_prefix='{partition_prefix}'"
-                )
            self._diskannpy = diskannpy
            self._current_zmq_port = None
            self._index = None
@@ -384,7 +259,7 @@ class DiskannSearcher(BaseSearcher):
        prune_ratio: float = 0.0,
        recompute_embeddings: bool = False,
        pruning_strategy: Literal["global", "local", "proportional"] = "global",
-        zmq_port: Optional[int] = None,
+        zmq_port: int | None = None,
        batch_recompute: bool = False,
        dedup_node_dis: bool = False,
        **kwargs,
@@ -459,25 +334,3 @@ class DiskannSearcher(BaseSearcher):
        string_labels = [[str(int_label) for int_label in batch_labels] for batch_labels in labels]

        return {"labels": string_labels, "distances": distances}
-
-    def cleanup(self):
-        """Cleanup DiskANN-specific resources including C++ index."""
-        # Call parent cleanup first
-        super().cleanup()
-
-        # Delete the C++ index to trigger destructors
-        try:
-            if hasattr(self, "_index") and self._index is not None:
-                del self._index
-                self._index = None
-                self._current_zmq_port = None
-        except Exception:
-            pass
-
-        # Force garbage collection to ensure C++ objects are destroyed
-        try:
-            import gc
-
-            gc.collect()
-        except Exception:
-            pass
--- a/packages/leann-backend-diskann/leann_backend_diskann/diskann_embedding_server.py
+++ b/packages/leann-backend-diskann/leann_backend_diskann/diskann_embedding_server.py
@@ -10,7 +10,6 @@ import sys
 import threading
 import time
 from pathlib import Path
-from typing import Optional

 import numpy as np
 import zmq
@@ -33,7 +32,7 @@ if not logger.handlers:


 def create_diskann_embedding_server(
-    passages_file: Optional[str] = None,
+    passages_file: str | None = None,
    zmq_port: int = 5555,
    model_name: str = "sentence-transformers/all-mpnet-base-v2",
    embedding_mode: str = "sentence-transformers",
@@ -81,8 +80,7 @@ def create_diskann_embedding_server(
    with open(passages_file) as f:
        meta = json.load(f)

-    logger.info(f"Loading PassageManager with metadata_file_path: {passages_file}")
-    passages = PassageManager(meta["passage_sources"], metadata_file_path=passages_file)
+    passages = PassageManager(meta["passage_sources"])
    logger.info(
        f"Loaded PassageManager with {len(passages.global_offset_map)} passages from metadata"
    )
@@ -263,7 +261,7 @@ if __name__ == "__main__":
        "--embedding-mode",
        type=str,
        default="sentence-transformers",
-        choices=["sentence-transformers", "openai", "mlx", "ollama"],
+        choices=["sentence-transformers", "openai", "mlx"],
        help="Embedding backend mode",
    )
    parser.add_argument(
--- a/packages/leann-backend-diskann/leann_backend_diskann/graph_partition.py
+++ b/packages/leann-backend-diskann/leann_backend_diskann/graph_partition.py
@@ -1,299 +0,0 @@
-#!/usr/bin/env python3
-"""
-Graph Partition Module for LEANN DiskANN Backend
-
-This module provides Python bindings for the graph partition functionality
-of DiskANN, allowing users to partition disk-based indices for better
-performance.
-"""
-
-import os
-import shutil
-import subprocess
-import tempfile
-from pathlib import Path
-from typing import Optional
-
-
-class GraphPartitioner:
-    """
-    A Python interface for DiskANN's graph partition functionality.
-
-    This class provides methods to partition disk-based indices for improved
-    search performance and memory efficiency.
-    """
-
-    def __init__(self, build_type: str = "release"):
-        """
-        Initialize the GraphPartitioner.
-
-        Args:
-            build_type: Build type for the executables ("debug" or "release")
-        """
-        self.build_type = build_type
-        self._ensure_executables()
-
-    def _get_executable_path(self, name: str) -> str:
-        """Get the path to a graph partition executable."""
-        # Get the directory where this Python module is located
-        module_dir = Path(__file__).parent
-        # Navigate to the graph_partition directory
-        graph_partition_dir = module_dir.parent / "third_party" / "DiskANN" / "graph_partition"
-        executable_path = graph_partition_dir / "build" / self.build_type / "graph_partition" / name
-
-        if not executable_path.exists():
-            raise FileNotFoundError(f"Executable {name} not found at {executable_path}")
-
-        return str(executable_path)
-
-    def _ensure_executables(self):
-        """Ensure that the required executables are built."""
-        try:
-            self._get_executable_path("partitioner")
-            self._get_executable_path("index_relayout")
-        except FileNotFoundError:
-            # Try to build the executables automatically
-            print("Executables not found, attempting to build them...")
-            self._build_executables()
-
-    def _build_executables(self):
-        """Build the required executables."""
-        graph_partition_dir = (
-            Path(__file__).parent.parent / "third_party" / "DiskANN" / "graph_partition"
-        )
-        original_dir = os.getcwd()
-
-        try:
-            os.chdir(graph_partition_dir)
-
-            # Clean any existing build
-            if (graph_partition_dir / "build").exists():
-                shutil.rmtree(graph_partition_dir / "build")
-
-            # Run the build script
-            cmd = ["./build.sh", self.build_type, "split_graph", "/tmp/dummy"]
-            subprocess.run(cmd, capture_output=True, text=True, cwd=graph_partition_dir)
-
-            # Check if executables were created
-            partitioner_path = self._get_executable_path("partitioner")
-            relayout_path = self._get_executable_path("index_relayout")
-
-            print(f"✅ Built partitioner: {partitioner_path}")
-            print(f"✅ Built index_relayout: {relayout_path}")
-
-        except Exception as e:
-            raise RuntimeError(f"Failed to build executables: {e}")
-        finally:
-            os.chdir(original_dir)
-
-    def partition_graph(
-        self,
-        index_prefix_path: str,
-        output_dir: Optional[str] = None,
-        partition_prefix: Optional[str] = None,
-        **kwargs,
-    ) -> tuple[str, str]:
-        """
-        Partition a disk-based index for improved performance.
-
-        Args:
-            index_prefix_path: Path to the index prefix (e.g., "/path/to/index")
-            output_dir: Output directory for results (defaults to parent of index_prefix_path)
-            partition_prefix: Prefix for output files (defaults to basename of index_prefix_path)
-            **kwargs: Additional parameters for graph partitioning:
-                - gp_times: Number of LDG partition iterations (default: 10)
-                - lock_nums: Number of lock nodes (default: 10)
-                - cut: Cut adjacency list degree (default: 100)
-                - scale_factor: Scale factor (default: 1)
-                - data_type: Data type (default: "float")
-                - thread_nums: Number of threads (default: 10)
-
-        Returns:
-            Tuple of (disk_graph_index_path, partition_bin_path)
-
-        Raises:
-            RuntimeError: If the partitioning process fails
-        """
-        # Set default parameters
-        params = {
-            "gp_times": 10,
-            "lock_nums": 10,
-            "cut": 100,
-            "scale_factor": 1,
-            "data_type": "float",
-            "thread_nums": 10,
-            **kwargs,
-        }
-
-        # Determine output directory
-        if output_dir is None:
-            output_dir = str(Path(index_prefix_path).parent)
-
-        # Create output directory if it doesn't exist
-        Path(output_dir).mkdir(parents=True, exist_ok=True)
-
-        # Determine partition prefix
-        if partition_prefix is None:
-            partition_prefix = Path(index_prefix_path).name
-
-        # Get executable paths
-        partitioner_path = self._get_executable_path("partitioner")
-        relayout_path = self._get_executable_path("index_relayout")
-
-        # Create temporary directory for processing
-        with tempfile.TemporaryDirectory() as temp_dir:
-            # Change to the graph_partition directory for temporary files
-            graph_partition_dir = (
-                Path(__file__).parent.parent / "third_party" / "DiskANN" / "graph_partition"
-            )
-            original_dir = os.getcwd()
-
-            try:
-                os.chdir(graph_partition_dir)
-
-                # Create temporary data directory
-                temp_data_dir = Path(temp_dir) / "data"
-                temp_data_dir.mkdir(parents=True, exist_ok=True)
-
-                # Set up paths for temporary files
-                graph_path = temp_data_dir / "starling" / "_M_R_L_B" / "GRAPH"
-                graph_gp_path = (
-                    graph_path
-                    / f"GP_TIMES_{params['gp_times']}_LOCK_{params['lock_nums']}_GP_USE_FREQ0_CUT{params['cut']}_SCALE{params['scale_factor']}"
-                )
-                graph_gp_path.mkdir(parents=True, exist_ok=True)
-
-                # Find input index file
-                old_index_file = f"{index_prefix_path}_disk_beam_search.index"
-                if not os.path.exists(old_index_file):
-                    old_index_file = f"{index_prefix_path}_disk.index"
-
-                if not os.path.exists(old_index_file):
-                    raise RuntimeError(f"Index file not found: {old_index_file}")
-
-                # Run partitioner
-                gp_file_path = graph_gp_path / "_part.bin"
-                partitioner_cmd = [
-                    partitioner_path,
-                    "--index_file",
-                    old_index_file,
-                    "--data_type",
-                    params["data_type"],
-                    "--gp_file",
-                    str(gp_file_path),
-                    "-T",
-                    str(params["thread_nums"]),
-                    "--ldg_times",
-                    str(params["gp_times"]),
-                    "--scale",
-                    str(params["scale_factor"]),
-                    "--mode",
-                    "1",
-                ]
-
-                print(f"Running partitioner: {' '.join(partitioner_cmd)}")
-                result = subprocess.run(
-                    partitioner_cmd, capture_output=True, text=True, cwd=graph_partition_dir
-                )
-
-                if result.returncode != 0:
-                    raise RuntimeError(
-                        f"Partitioner failed with return code {result.returncode}.\n"
-                        f"stdout: {result.stdout}\n"
-                        f"stderr: {result.stderr}"
-                    )
-
-                # Run relayout
-                part_tmp_index = graph_gp_path / "_part_tmp.index"
-                relayout_cmd = [
-                    relayout_path,
-                    old_index_file,
-                    str(gp_file_path),
-                    params["data_type"],
-                    "1",
-                ]
-
-                print(f"Running relayout: {' '.join(relayout_cmd)}")
-                result = subprocess.run(
-                    relayout_cmd, capture_output=True, text=True, cwd=graph_partition_dir
-                )
-
-                if result.returncode != 0:
-                    raise RuntimeError(
-                        f"Relayout failed with return code {result.returncode}.\n"
-                        f"stdout: {result.stdout}\n"
-                        f"stderr: {result.stderr}"
-                    )
-
-                # Copy results to output directory
-                disk_graph_path = Path(output_dir) / f"{partition_prefix}_disk_graph.index"
-                partition_bin_path = Path(output_dir) / f"{partition_prefix}_partition.bin"
-
-                shutil.copy2(part_tmp_index, disk_graph_path)
-                shutil.copy2(gp_file_path, partition_bin_path)
-
-                print(f"Results copied to: {output_dir}")
-                return str(disk_graph_path), str(partition_bin_path)
-
-            finally:
-                os.chdir(original_dir)
-
-    def get_partition_info(self, partition_bin_path: str) -> dict:
-        """
-        Get information about a partition file.
-
-        Args:
-            partition_bin_path: Path to the partition binary file
-
-        Returns:
-            Dictionary containing partition information
-        """
-        if not os.path.exists(partition_bin_path):
-            raise FileNotFoundError(f"Partition file not found: {partition_bin_path}")
-
-        # For now, return basic file information
-        # In the future, this could parse the binary file for detailed info
-        stat = os.stat(partition_bin_path)
-        return {
-            "file_size": stat.st_size,
-            "file_path": partition_bin_path,
-            "modified_time": stat.st_mtime,
-        }
-
-
-def partition_graph(
-    index_prefix_path: str,
-    output_dir: Optional[str] = None,
-    partition_prefix: Optional[str] = None,
-    build_type: str = "release",
-    **kwargs,
-) -> tuple[str, str]:
-    """
-    Convenience function to partition a graph index.
-
-    Args:
-        index_prefix_path: Path to the index prefix
-        output_dir: Output directory (defaults to parent of index_prefix_path)
-        partition_prefix: Prefix for output files (defaults to basename of index_prefix_path)
-        build_type: Build type for executables ("debug" or "release")
-        **kwargs: Additional parameters for graph partitioning
-
-    Returns:
-        Tuple of (disk_graph_index_path, partition_bin_path)
-    """
-    partitioner = GraphPartitioner(build_type=build_type)
-    return partitioner.partition_graph(index_prefix_path, output_dir, partition_prefix, **kwargs)
-
-
-# Example usage:
-if __name__ == "__main__":
-    # Example: partition an index
-    try:
-        disk_graph_path, partition_bin_path = partition_graph(
-            "/path/to/your/index_prefix", gp_times=10, lock_nums=10, cut=100
-        )
-        print("Partitioning completed successfully!")
-        print(f"Disk graph index: {disk_graph_path}")
-        print(f"Partition binary: {partition_bin_path}")
-    except Exception as e:
-        print(f"Partitioning failed: {e}")
--- a/packages/leann-backend-diskann/leann_backend_diskann/graph_partition_simple.py
+++ b/packages/leann-backend-diskann/leann_backend_diskann/graph_partition_simple.py
@@ -1,137 +0,0 @@
-#!/usr/bin/env python3
-"""
-Simplified Graph Partition Module for LEANN DiskANN Backend
-
-This module provides a simple Python interface for graph partitioning
-that directly calls the existing executables.
-"""
-
-import os
-import subprocess
-import tempfile
-from pathlib import Path
-from typing import Optional
-
-
-def partition_graph_simple(
-    index_prefix_path: str, output_dir: Optional[str] = None, **kwargs
-) -> tuple[str, str]:
-    """
-    Simple function to partition a graph index.
-
-    Args:
-        index_prefix_path: Path to the index prefix (e.g., "/path/to/index")
-        output_dir: Output directory (defaults to parent of index_prefix_path)
-        **kwargs: Additional parameters for graph partitioning
-
-    Returns:
-        Tuple of (disk_graph_index_path, partition_bin_path)
-    """
-    # Set default parameters
-    params = {
-        "gp_times": 10,
-        "lock_nums": 10,
-        "cut": 100,
-        "scale_factor": 1,
-        "data_type": "float",
-        "thread_nums": 10,
-        **kwargs,
-    }
-
-    # Determine output directory
-    if output_dir is None:
-        output_dir = str(Path(index_prefix_path).parent)
-
-    # Find the graph_partition directory
-    current_file = Path(__file__)
-    graph_partition_dir = current_file.parent.parent / "third_party" / "DiskANN" / "graph_partition"
-
-    if not graph_partition_dir.exists():
-        raise RuntimeError(f"Graph partition directory not found: {graph_partition_dir}")
-
-    # Find input index file
-    old_index_file = f"{index_prefix_path}_disk_beam_search.index"
-    if not os.path.exists(old_index_file):
-        old_index_file = f"{index_prefix_path}_disk.index"
-
-    if not os.path.exists(old_index_file):
-        raise RuntimeError(f"Index file not found: {old_index_file}")
-
-    # Create temporary directory for processing
-    with tempfile.TemporaryDirectory() as temp_dir:
-        temp_data_dir = Path(temp_dir) / "data"
-        temp_data_dir.mkdir(parents=True, exist_ok=True)
-
-        # Set up paths for temporary files
-        graph_path = temp_data_dir / "starling" / "_M_R_L_B" / "GRAPH"
-        graph_gp_path = (
-            graph_path
-            / f"GP_TIMES_{params['gp_times']}_LOCK_{params['lock_nums']}_GP_USE_FREQ0_CUT{params['cut']}_SCALE{params['scale_factor']}"
-        )
-        graph_gp_path.mkdir(parents=True, exist_ok=True)
-
-        # Run the build script with our parameters
-        cmd = [str(graph_partition_dir / "build.sh"), "release", "split_graph", index_prefix_path]
-
-        # Set environment variables for parameters
-        env = os.environ.copy()
-        env.update(
-            {
-                "GP_TIMES": str(params["gp_times"]),
-                "GP_LOCK_NUMS": str(params["lock_nums"]),
-                "GP_CUT": str(params["cut"]),
-                "GP_SCALE_F": str(params["scale_factor"]),
-                "DATA_TYPE": params["data_type"],
-                "GP_T": str(params["thread_nums"]),
-            }
-        )
-
-        print(f"Running graph partition with command: {' '.join(cmd)}")
-        print(f"Working directory: {graph_partition_dir}")
-
-        # Run the command
-        result = subprocess.run(
-            cmd, env=env, capture_output=True, text=True, cwd=graph_partition_dir
-        )
-
-        if result.returncode != 0:
-            print(f"Command failed with return code {result.returncode}")
-            print(f"stdout: {result.stdout}")
-            print(f"stderr: {result.stderr}")
-            raise RuntimeError(
-                f"Graph partitioning failed with return code {result.returncode}.\n"
-                f"stdout: {result.stdout}\n"
-                f"stderr: {result.stderr}"
-            )
-
-        # Check if output files were created
-        disk_graph_path = Path(output_dir) / "_disk_graph.index"
-        partition_bin_path = Path(output_dir) / "_partition.bin"
-
-        if not disk_graph_path.exists():
-            raise RuntimeError(f"Expected output file not found: {disk_graph_path}")
-
-        if not partition_bin_path.exists():
-            raise RuntimeError(f"Expected output file not found: {partition_bin_path}")
-
-        print("✅ Partitioning completed successfully!")
-        print(f"   Disk graph index: {disk_graph_path}")
-        print(f"   Partition binary: {partition_bin_path}")
-
-        return str(disk_graph_path), str(partition_bin_path)
-
-
-# Example usage
-if __name__ == "__main__":
-    try:
-        disk_graph_path, partition_bin_path = partition_graph_simple(
-            "/Users/yichuan/Desktop/release2/leann/diskannbuild/test_doc_files",
-            gp_times=5,
-            lock_nums=5,
-            cut=50,
-        )
-        print("Success! Output files:")
-        print(f"  - {disk_graph_path}")
-        print(f"  - {partition_bin_path}")
-    except Exception as e:
-        print(f"Error: {e}")
--- a/packages/leann-backend-diskann/pyproject.toml
+++ b/packages/leann-backend-diskann/pyproject.toml
@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"

 [project]
 name = "leann-backend-diskann"
-version = "0.2.5"
-dependencies = ["leann-core==0.2.5", "numpy", "protobuf>=3.19.0"]
+version = "0.2.1"
+dependencies = ["leann-core==0.2.1", "numpy", "protobuf>=3.19.0"]

 [tool.scikit-build]
 # Key: simplified CMake path
--- a/packages/leann-backend-diskann/third_party/DiskANN
+++ b/packages/leann-backend-diskann/third_party/DiskANN
--- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py
+++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py
@@ -2,7 +2,7 @@ import logging
 import os
 import shutil
 from pathlib import Path
-from typing import Any, Literal, Optional
+from typing import Any, Literal

 import numpy as np
 from leann.interface import (
@@ -152,7 +152,7 @@ class HNSWSearcher(BaseSearcher):
        self,
        query: np.ndarray,
        top_k: int,
-        zmq_port: Optional[int] = None,
+        zmq_port: int | None = None,
        complexity: int = 64,
        beam_width: int = 1,
        prune_ratio: float = 0.0,
--- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py
+++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py
@@ -10,7 +10,6 @@ import sys
 import threading
 import time
 from pathlib import Path
-from typing import Optional

 import msgpack
 import numpy as np
@@ -34,7 +33,7 @@ if not logger.handlers:


 def create_hnsw_embedding_server(
-    passages_file: Optional[str] = None,
+    passages_file: str | None = None,
    zmq_port: int = 5555,
    model_name: str = "sentence-transformers/all-mpnet-base-v2",
    distance_metric: str = "mips",
@@ -82,8 +81,19 @@ def create_hnsw_embedding_server(
    with open(passages_file) as f:
        meta = json.load(f)

-    # Let PassageManager handle path resolution uniformly
-    passages = PassageManager(meta["passage_sources"], metadata_file_path=passages_file)
+    # Convert relative paths to absolute paths based on metadata file location
+    metadata_dir = Path(passages_file).parent.parent  # Go up one level from the metadata file
+    passage_sources = []
+    for source in meta["passage_sources"]:
+        source_copy = source.copy()
+        # Convert relative paths to absolute paths
+        if not Path(source_copy["path"]).is_absolute():
+            source_copy["path"] = str(metadata_dir / source_copy["path"])
+        if not Path(source_copy["index_path"]).is_absolute():
+            source_copy["index_path"] = str(metadata_dir / source_copy["index_path"])
+        passage_sources.append(source_copy)
+
+    passages = PassageManager(passage_sources)
    logger.info(
        f"Loaded PassageManager with {len(passages.global_offset_map)} passages from metadata"
    )
@@ -285,7 +295,7 @@ if __name__ == "__main__":
        "--embedding-mode",
        type=str,
        default="sentence-transformers",
-        choices=["sentence-transformers", "openai", "mlx", "ollama"],
+        choices=["sentence-transformers", "openai", "mlx"],
        help="Embedding backend mode",
    )

--- a/packages/leann-backend-hnsw/pyproject.toml
+++ b/packages/leann-backend-hnsw/pyproject.toml
@@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build"

 [project]
 name = "leann-backend-hnsw"
-version = "0.2.5"
+version = "0.2.1"
 description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
 dependencies = [
-    "leann-core==0.2.5",
+    "leann-core==0.2.1",
    "numpy",
    "pyzmq>=23.0.0",
    "msgpack>=1.0.0",
--- a/packages/leann-core/pyproject.toml
+++ b/packages/leann-core/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "leann-core"
-version = "0.2.5"
+version = "0.2.1"
 description = "Core API and plugin system for LEANN"
 readme = "README.md"
 requires-python = ">=3.9"
--- a/packages/leann-core/src/leann/api.py
+++ b/packages/leann-core/src/leann/api.py
@@ -10,7 +10,7 @@ import time
 import warnings
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Literal, Optional
+from typing import Any, Literal

 import numpy as np

@@ -33,7 +33,7 @@ def compute_embeddings(
    model_name: str,
    mode: str = "sentence-transformers",
    use_server: bool = True,
-    port: Optional[int] = None,
+    port: int | None = None,
    is_build=False,
 ) -> np.ndarray:
    """
@@ -87,26 +87,21 @@ def compute_embeddings_via_server(chunks: list[str], model_name: str, port: int)
    # Connect to embedding server
    context = zmq.Context()
    socket = context.socket(zmq.REQ)
-    socket.setsockopt(zmq.LINGER, 0)  # Don't block on close
-    socket.setsockopt(zmq.RCVTIMEO, 300000)
-    socket.setsockopt(zmq.SNDTIMEO, 300000)
-    socket.setsockopt(zmq.IMMEDIATE, 1)
    socket.connect(f"tcp://localhost:{port}")

-    try:
-        # Send chunks to server for embedding computation
-        request = chunks
-        socket.send(msgpack.packb(request))
+    # Send chunks to server for embedding computation
+    request = chunks
+    socket.send(msgpack.packb(request))

-        # Receive embeddings from server
-        response = socket.recv()
-        embeddings_list = msgpack.unpackb(response)
+    # Receive embeddings from server
+    response = socket.recv()
+    embeddings_list = msgpack.unpackb(response)

-        # Convert back to numpy array
-        embeddings = np.array(embeddings_list, dtype=np.float32)
-    finally:
-        socket.close()
-        # Don't call context.term() - this was causing hangs
+    # Convert back to numpy array
+    embeddings = np.array(embeddings_list, dtype=np.float32)
+
+    socket.close()
+    context.term()

    return embeddings

@@ -120,9 +115,7 @@ class SearchResult:


 class PassageManager:
-    def __init__(
-        self, passage_sources: list[dict[str, Any]], metadata_file_path: Optional[str] = None
-    ):
+    def __init__(self, passage_sources: list[dict[str, Any]]):
        self.offset_maps = {}
        self.passage_files = {}
        self.global_offset_map = {}  # Combined map for fast lookup
@@ -132,26 +125,10 @@ class PassageManager:
            passage_file = source["path"]
            index_file = source["index_path"]  # .idx file

-            # Fix path resolution - relative paths should be relative to metadata file directory
+            # Fix path resolution for Colab and other environments
            if not Path(index_file).is_absolute():
-                if metadata_file_path:
-                    # Resolve relative to metadata file directory
-                    metadata_dir = Path(metadata_file_path).parent
-                    logger.debug(
-                        f"PassageManager: Resolving relative paths from metadata_dir: {metadata_dir}"
-                    )
-                    index_file = str((metadata_dir / index_file).resolve())
-                    passage_file = str((metadata_dir / passage_file).resolve())
-                    logger.debug(f"PassageManager: Resolved index_file: {index_file}")
-                else:
-                    # Fallback to current directory resolution (legacy behavior)
-                    logger.warning(
-                        "PassageManager: No metadata_file_path provided, using fallback resolution from cwd"
-                    )
-                    logger.debug(f"PassageManager: Current working directory: {Path.cwd()}")
-                    index_file = str(Path(index_file).resolve())
-                    passage_file = str(Path(passage_file).resolve())
-                    logger.debug(f"PassageManager: Fallback resolved index_file: {index_file}")
+                # If relative path, try to resolve it properly
+                index_file = str(Path(index_file).resolve())

            if not Path(index_file).exists():
                raise FileNotFoundError(f"Passage index file not found: {index_file}")
@@ -180,12 +157,12 @@ class LeannBuilder:
        self,
        backend_name: str,
        embedding_model: str = "facebook/contriever",
-        dimensions: Optional[int] = None,
+        dimensions: int | None = None,
        embedding_mode: str = "sentence-transformers",
        **backend_kwargs,
    ):
        self.backend_name = backend_name
-        backend_factory: Optional[LeannBackendFactoryInterface] = BACKEND_REGISTRY.get(backend_name)
+        backend_factory: LeannBackendFactoryInterface | None = BACKEND_REGISTRY.get(backend_name)
        if backend_factory is None:
            raise ValueError(f"Backend '{backend_name}' not found or not registered.")
        self.backend_factory = backend_factory
@@ -265,7 +242,7 @@ class LeannBuilder:
        self.backend_kwargs = backend_kwargs
        self.chunks: list[dict[str, Any]] = []

-    def add_text(self, text: str, metadata: Optional[dict[str, Any]] = None):
+    def add_text(self, text: str, metadata: dict[str, Any] | None = None):
        if metadata is None:
            metadata = {}
        passage_id = metadata.get("id", str(len(self.chunks)))
@@ -337,8 +314,8 @@ class LeannBuilder:
            "passage_sources": [
                {
                    "type": "jsonl",
-                    "path": passages_file.name,  # Use relative path (just filename)
-                    "index_path": offset_file.name,  # Use relative path (just filename)
+                    "path": str(passages_file),
+                    "index_path": str(offset_file),
                }
            ],
        }
@@ -453,8 +430,8 @@ class LeannBuilder:
            "passage_sources": [
                {
                    "type": "jsonl",
-                    "path": passages_file.name,  # Use relative path (just filename)
-                    "index_path": offset_file.name,  # Use relative path (just filename)
+                    "path": str(passages_file),
+                    "index_path": str(offset_file),
                }
            ],
            "built_from_precomputed_embeddings": True,
@@ -496,9 +473,7 @@ class LeannSearcher:
        self.embedding_model = self.meta_data["embedding_model"]
        # Support both old and new format
        self.embedding_mode = self.meta_data.get("embedding_mode", "sentence-transformers")
-        self.passage_manager = PassageManager(
-            self.meta_data.get("passage_sources", []), metadata_file_path=self.meta_path_str
-        )
+        self.passage_manager = PassageManager(self.meta_data.get("passage_sources", []))
        backend_factory = BACKEND_REGISTRY.get(backend_name)
        if backend_factory is None:
            raise ValueError(f"Backend '{backend_name}' not found.")
@@ -571,6 +546,7 @@ class LeannSearcher:
            zmq_port=zmq_port,
            **kwargs,
        )
+        time.time() - start_time
        # logger.info(f"  Search time: {search_time} seconds")
        logger.info(f"  Backend returned: labels={len(results.get('labels', [[]])[0])} results")

@@ -578,7 +554,7 @@ class LeannSearcher:
        if "labels" in results and "distances" in results:
            logger.info(f"  Processing {len(results['labels'][0])} passage IDs:")
            for i, (string_id, dist) in enumerate(
-                zip(results["labels"][0], results["distances"][0])
+                zip(results["labels"][0], results["distances"][0], strict=False)
            ):
                try:
                    passage_data = self.passage_manager.get_passage(string_id)
@@ -611,17 +587,12 @@ class LeannSearcher:
        logger.info(f"  {GREEN}✓ Final enriched results: {len(enriched_results)} passages{RESET}")
        return enriched_results

-    def cleanup(self):
-        """Cleanup embedding server and other resources."""
-        if hasattr(self.backend_impl, "cleanup"):
-            self.backend_impl.cleanup()
-

 class LeannChat:
    def __init__(
        self,
        index_path: str,
-        llm_config: Optional[dict[str, Any]] = None,
+        llm_config: dict[str, Any] | None = None,
        enable_warmup: bool = False,
        **kwargs,
    ):
@@ -637,7 +608,7 @@ class LeannChat:
        prune_ratio: float = 0.0,
        recompute_embeddings: bool = True,
        pruning_strategy: Literal["global", "local", "proportional"] = "global",
-        llm_kwargs: Optional[dict[str, Any]] = None,
+        llm_kwargs: dict[str, Any] | None = None,
        expected_zmq_port: int = 5557,
        **search_kwargs,
    ):
--- a/packages/leann-core/src/leann/chat.py
+++ b/packages/leann-core/src/leann/chat.py
@@ -8,7 +8,7 @@ import difflib
 import logging
 import os
 from abc import ABC, abstractmethod
-from typing import Any, Optional
+from typing import Any

 import torch

@@ -17,12 +17,12 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)


-def check_ollama_models(host: str) -> list[str]:
+def check_ollama_models() -> list[str]:
    """Check available Ollama models and return a list"""
    try:
        import requests

-        response = requests.get(f"{host}/api/tags", timeout=5)
+        response = requests.get("http://localhost:11434/api/tags", timeout=5)
        if response.status_code == 200:
            data = response.json()
            return [model["name"] for model in data.get("models", [])]
@@ -309,12 +309,10 @@ def search_hf_models(query: str, limit: int = 10) -> list[str]:
    return search_hf_models_fuzzy(query, limit)


-def validate_model_and_suggest(
-    model_name: str, llm_type: str, host: str = "http://localhost:11434"
-) -> Optional[str]:
+def validate_model_and_suggest(model_name: str, llm_type: str) -> str | None:
    """Validate model name and provide suggestions if invalid"""
    if llm_type == "ollama":
-        available_models = check_ollama_models(host)
+        available_models = check_ollama_models()
        if available_models and model_name not in available_models:
            error_msg = f"Model '{model_name}' not found in your local Ollama installation."

@@ -471,7 +469,7 @@ class OllamaChat(LLMInterface):
                requests.get(host)

            # Pre-check model availability with helpful suggestions
-            model_error = validate_model_and_suggest(model, "ollama", host)
+            model_error = validate_model_and_suggest(model, "ollama")
            if model_error:
                raise ValueError(model_error)

@@ -685,7 +683,7 @@ class HFChat(LLMInterface):
 class OpenAIChat(LLMInterface):
    """LLM interface for OpenAI models."""

-    def __init__(self, model: str = "gpt-4o", api_key: Optional[str] = None):
+    def __init__(self, model: str = "gpt-4o", api_key: str | None = None):
        self.model = model
        self.api_key = api_key or os.getenv("OPENAI_API_KEY")

@@ -761,7 +759,7 @@ class SimulatedChat(LLMInterface):
        return "This is a simulated answer from the LLM based on the retrieved context."


-def get_llm(llm_config: Optional[dict[str, Any]] = None) -> LLMInterface:
+def get_llm(llm_config: dict[str, Any] | None = None) -> LLMInterface:
    """
    Factory function to get an LLM interface based on configuration.

--- a/packages/leann-core/src/leann/cli.py
+++ b/packages/leann-core/src/leann/cli.py
@@ -1,7 +1,6 @@
 import argparse
 import asyncio
 from pathlib import Path
-from typing import Optional

 from llama_index.core import SimpleDirectoryReader
 from llama_index.core.node_parser import SentenceSplitter
@@ -75,11 +74,10 @@ class LeannCLI:
            formatter_class=argparse.RawDescriptionHelpFormatter,
            epilog="""
 Examples:
-  leann build my-docs --docs ./documents                    # Build index named my-docs
-  leann build my-ppts --docs ./ --file-types .pptx,.pdf    # Index only PowerPoint and PDF files
-  leann search my-docs "query"                             # Search in my-docs index
-  leann ask my-docs "question"                             # Ask my-docs index
-  leann list                                              # List all stored indexes
+  leann build my-docs --docs ./documents    # Build index named my-docs
+  leann search my-docs "query"             # Search in my-docs index
+  leann ask my-docs "question"             # Ask my-docs index
+  leann list                              # List all stored indexes
            """,
        )

@@ -95,24 +93,12 @@ Examples:
            "--backend", type=str, default="hnsw", choices=["hnsw", "diskann"]
        )
        build_parser.add_argument("--embedding-model", type=str, default="facebook/contriever")
-        build_parser.add_argument(
-            "--embedding-mode",
-            type=str,
-            default="sentence-transformers",
-            choices=["sentence-transformers", "openai", "mlx", "ollama"],
-            help="Embedding backend mode (default: sentence-transformers)",
-        )
        build_parser.add_argument("--force", "-f", action="store_true", help="Force rebuild")
        build_parser.add_argument("--graph-degree", type=int, default=32)
        build_parser.add_argument("--complexity", type=int, default=64)
        build_parser.add_argument("--num-threads", type=int, default=1)
        build_parser.add_argument("--compact", action="store_true", default=True)
        build_parser.add_argument("--recompute", action="store_true", default=True)
-        build_parser.add_argument(
-            "--file-types",
-            type=str,
-            help="Comma-separated list of file extensions to include (e.g., '.txt,.pdf,.pptx'). If not specified, uses default supported types.",
-        )

        # Search command
        search_parser = subparsers.add_parser("search", help="Search documents")
@@ -122,12 +108,7 @@ Examples:
        search_parser.add_argument("--complexity", type=int, default=64)
        search_parser.add_argument("--beam-width", type=int, default=1)
        search_parser.add_argument("--prune-ratio", type=float, default=0.0)
-        search_parser.add_argument(
-            "--recompute-embeddings",
-            action="store_true",
-            default=True,
-            help="Recompute embeddings (default: True)",
-        )
+        search_parser.add_argument("--recompute-embeddings", action="store_true")
        search_parser.add_argument(
            "--pruning-strategy",
            choices=["global", "local", "proportional"],
@@ -150,12 +131,7 @@ Examples:
        ask_parser.add_argument("--complexity", type=int, default=32)
        ask_parser.add_argument("--beam-width", type=int, default=1)
        ask_parser.add_argument("--prune-ratio", type=float, default=0.0)
-        ask_parser.add_argument(
-            "--recompute-embeddings",
-            action="store_true",
-            default=True,
-            help="Recompute embeddings (default: True)",
-        )
+        ask_parser.add_argument("--recompute-embeddings", action="store_true")
        ask_parser.add_argument(
            "--pruning-strategy",
            choices=["global", "local", "proportional"],
@@ -278,10 +254,8 @@ Examples:
                    print(f'  leann search {example_name} "your query"')
                    print(f"  leann ask {example_name} --interactive")

-    def load_documents(self, docs_dir: str, custom_file_types: Optional[str] = None):
+    def load_documents(self, docs_dir: str):
        print(f"Loading documents from {docs_dir}...")
-        if custom_file_types:
-            print(f"Using custom file types: {custom_file_types}")

        # Try to use better PDF parsers first
        documents = []
@@ -313,81 +287,66 @@ Examples:
                documents.extend(default_docs)

        # Load other file types with default reader
-        if custom_file_types:
-            # Parse custom file types from comma-separated string
-            code_extensions = [ext.strip() for ext in custom_file_types.split(",") if ext.strip()]
-            # Ensure extensions start with a dot
-            code_extensions = [ext if ext.startswith(".") else f".{ext}" for ext in code_extensions]
-        else:
-            # Use default supported file types
-            code_extensions = [
-                # Original document types
-                ".txt",
-                ".md",
-                ".docx",
-                ".pptx",
-                # Code files for Claude Code integration
-                ".py",
-                ".js",
-                ".ts",
-                ".jsx",
-                ".tsx",
-                ".java",
-                ".cpp",
-                ".c",
-                ".h",
-                ".hpp",
-                ".cs",
-                ".go",
-                ".rs",
-                ".rb",
-                ".php",
-                ".swift",
-                ".kt",
-                ".scala",
-                ".r",
-                ".sql",
-                ".sh",
-                ".bash",
-                ".zsh",
-                ".fish",
-                ".ps1",
-                ".bat",
-                # Config and markup files
-                ".json",
-                ".yaml",
-                ".yml",
-                ".xml",
-                ".toml",
-                ".ini",
-                ".cfg",
-                ".conf",
-                ".html",
-                ".css",
-                ".scss",
-                ".less",
-                ".vue",
-                ".svelte",
-                # Data science
-                ".ipynb",
-                ".R",
-                ".py",
-                ".jl",
-            ]
-        # Try to load other file types, but don't fail if none are found
-        try:
-            other_docs = SimpleDirectoryReader(
-                docs_dir,
-                recursive=True,
-                encoding="utf-8",
-                required_exts=code_extensions,
-            ).load_data(show_progress=True)
-            documents.extend(other_docs)
-        except ValueError as e:
-            if "No files found" in str(e):
-                print("No additional files found for other supported types.")
-            else:
-                raise e
+        code_extensions = [
+            # Original document types
+            ".txt",
+            ".md",
+            ".docx",
+            # Code files for Claude Code integration
+            ".py",
+            ".js",
+            ".ts",
+            ".jsx",
+            ".tsx",
+            ".java",
+            ".cpp",
+            ".c",
+            ".h",
+            ".hpp",
+            ".cs",
+            ".go",
+            ".rs",
+            ".rb",
+            ".php",
+            ".swift",
+            ".kt",
+            ".scala",
+            ".r",
+            ".sql",
+            ".sh",
+            ".bash",
+            ".zsh",
+            ".fish",
+            ".ps1",
+            ".bat",
+            # Config and markup files
+            ".json",
+            ".yaml",
+            ".yml",
+            ".xml",
+            ".toml",
+            ".ini",
+            ".cfg",
+            ".conf",
+            ".html",
+            ".css",
+            ".scss",
+            ".less",
+            ".vue",
+            ".svelte",
+            # Data science
+            ".ipynb",
+            ".R",
+            ".py",
+            ".jl",
+        ]
+        other_docs = SimpleDirectoryReader(
+            docs_dir,
+            recursive=True,
+            encoding="utf-8",
+            required_exts=code_extensions,
+        ).load_data(show_progress=True)
+        documents.extend(other_docs)

        all_texts = []

@@ -465,7 +424,7 @@ Examples:
            print(f"Index '{index_name}' already exists. Use --force to rebuild.")
            return

-        all_texts = self.load_documents(docs_dir, args.file_types)
+        all_texts = self.load_documents(docs_dir)
        if not all_texts:
            print("No documents found")
            return
@@ -477,7 +436,6 @@ Examples:
        builder = LeannBuilder(
            backend_name=args.backend,
            embedding_model=args.embedding_model,
-            embedding_mode=args.embedding_mode,
            graph_degree=args.graph_degree,
            complexity=args.complexity,
            is_compact=args.compact,
--- a/packages/leann-core/src/leann/embedding_compute.py
+++ b/packages/leann-core/src/leann/embedding_compute.py
@@ -6,7 +6,6 @@ Preserves all optimization parameters to ensure performance

 import logging
 import os
-from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Any

 import numpy as np
@@ -36,7 +35,7 @@ def compute_embeddings(
    Args:
        texts: List of texts to compute embeddings for
        model_name: Model name
-        mode: Computation mode ('sentence-transformers', 'openai', 'mlx', 'ollama')
+        mode: Computation mode ('sentence-transformers', 'openai', 'mlx')
        is_build: Whether this is a build operation (shows progress bar)
        batch_size: Batch size for processing
        adaptive_optimization: Whether to use adaptive optimization based on batch size
@@ -56,8 +55,6 @@ def compute_embeddings(
        return compute_embeddings_openai(texts, model_name)
    elif mode == "mlx":
        return compute_embeddings_mlx(texts, model_name)
-    elif mode == "ollama":
-        return compute_embeddings_ollama(texts, model_name, is_build=is_build)
    else:
        raise ValueError(f"Unsupported embedding mode: {mode}")

@@ -368,262 +365,3 @@ def compute_embeddings_mlx(chunks: list[str], model_name: str, batch_size: int =

    # Stack numpy arrays
    return np.stack(all_embeddings)
-
-
-def compute_embeddings_ollama(
-    texts: list[str], model_name: str, is_build: bool = False, host: str = "http://localhost:11434"
-) -> np.ndarray:
-    """
-    Compute embeddings using Ollama API.
-
-    Args:
-        texts: List of texts to compute embeddings for
-        model_name: Ollama model name (e.g., "nomic-embed-text", "mxbai-embed-large")
-        is_build: Whether this is a build operation (shows progress bar)
-        host: Ollama host URL (default: http://localhost:11434)
-
-    Returns:
-        Normalized embeddings array, shape: (len(texts), embedding_dim)
-    """
-    try:
-        import requests
-    except ImportError:
-        raise ImportError(
-            "The 'requests' library is required for Ollama embeddings. Install with: uv pip install requests"
-        )
-
-    if not texts:
-        raise ValueError("Cannot compute embeddings for empty text list")
-
-    logger.info(
-        f"Computing embeddings for {len(texts)} texts using Ollama API, model: '{model_name}'"
-    )
-
-    # Check if Ollama is running
-    try:
-        response = requests.get(f"{host}/api/version", timeout=5)
-        response.raise_for_status()
-    except requests.exceptions.ConnectionError:
-        error_msg = (
-            f"❌ Could not connect to Ollama at {host}.\n\n"
-            "Please ensure Ollama is running:\n"
-            "  • macOS/Linux: ollama serve\n"
-            "  • Windows: Make sure Ollama is running in the system tray\n\n"
-            "Installation: https://ollama.com/download"
-        )
-        raise RuntimeError(error_msg)
-    except Exception as e:
-        raise RuntimeError(f"Unexpected error connecting to Ollama: {e}")
-
-    # Check if model exists and provide helpful suggestions
-    try:
-        response = requests.get(f"{host}/api/tags", timeout=5)
-        response.raise_for_status()
-        models = response.json()
-        model_names = [model["name"] for model in models.get("models", [])]
-
-        # Filter for embedding models (models that support embeddings)
-        embedding_models = []
-        suggested_embedding_models = [
-            "nomic-embed-text",
-            "mxbai-embed-large",
-            "bge-m3",
-            "all-minilm",
-            "snowflake-arctic-embed",
-        ]
-
-        for model in model_names:
-            # Check if it's an embedding model (by name patterns or known models)
-            base_name = model.split(":")[0]
-            if any(emb in base_name for emb in ["embed", "bge", "minilm", "e5"]):
-                embedding_models.append(model)
-
-        # Check if model exists (handle versioned names)
-        model_found = any(
-            model_name == name.split(":")[0] or model_name == name for name in model_names
-        )
-
-        if not model_found:
-            error_msg = f"❌ Model '{model_name}' not found in local Ollama.\n\n"
-
-            # Suggest pulling the model
-            error_msg += "📦 To install this embedding model:\n"
-            error_msg += f"   ollama pull {model_name}\n\n"
-
-            # Show available embedding models
-            if embedding_models:
-                error_msg += "✅ Available embedding models:\n"
-                for model in embedding_models[:5]:
-                    error_msg += f"   • {model}\n"
-                if len(embedding_models) > 5:
-                    error_msg += f"   ... and {len(embedding_models) - 5} more\n"
-            else:
-                error_msg += "💡 Popular embedding models to install:\n"
-                for model in suggested_embedding_models[:3]:
-                    error_msg += f"   • ollama pull {model}\n"
-
-            error_msg += "\n📚 Browse more: https://ollama.com/library"
-            raise ValueError(error_msg)
-
-        # Verify the model supports embeddings by testing it
-        try:
-            test_response = requests.post(
-                f"{host}/api/embeddings", json={"model": model_name, "prompt": "test"}, timeout=10
-            )
-            if test_response.status_code != 200:
-                error_msg = (
-                    f"⚠️ Model '{model_name}' exists but may not support embeddings.\n\n"
-                    f"Please use an embedding model like:\n"
-                )
-                for model in suggested_embedding_models[:3]:
-                    error_msg += f"   • {model}\n"
-                raise ValueError(error_msg)
-        except requests.exceptions.RequestException:
-            # If test fails, continue anyway - model might still work
-            pass
-
-    except requests.exceptions.RequestException as e:
-        logger.warning(f"Could not verify model existence: {e}")
-
-    # Process embeddings with optimized concurrent processing
-    import requests
-
-    def get_single_embedding(text_idx_tuple):
-        """Helper function to get embedding for a single text."""
-        text, idx = text_idx_tuple
-        max_retries = 3
-        retry_count = 0
-
-        # Truncate very long texts to avoid API issues
-        truncated_text = text[:8000] if len(text) > 8000 else text
-
-        while retry_count < max_retries:
-            try:
-                response = requests.post(
-                    f"{host}/api/embeddings",
-                    json={"model": model_name, "prompt": truncated_text},
-                    timeout=30,
-                )
-                response.raise_for_status()
-
-                result = response.json()
-                embedding = result.get("embedding")
-
-                if embedding is None:
-                    raise ValueError(f"No embedding returned for text {idx}")
-
-                return idx, embedding
-
-            except requests.exceptions.Timeout:
-                retry_count += 1
-                if retry_count >= max_retries:
-                    logger.warning(f"Timeout for text {idx} after {max_retries} retries")
-                    return idx, None
-
-            except Exception as e:
-                if retry_count >= max_retries - 1:
-                    logger.error(f"Failed to get embedding for text {idx}: {e}")
-                    return idx, None
-                retry_count += 1
-
-        return idx, None
-
-    # Determine if we should use concurrent processing
-    use_concurrent = (
-        len(texts) > 5 and not is_build
-    )  # Don't use concurrent in build mode to avoid overwhelming
-    max_workers = min(4, len(texts))  # Limit concurrent requests to avoid overwhelming Ollama
-
-    all_embeddings = [None] * len(texts)  # Pre-allocate list to maintain order
-    failed_indices = []
-
-    if use_concurrent:
-        logger.info(
-            f"Using concurrent processing with {max_workers} workers for {len(texts)} texts"
-        )
-
-        with ThreadPoolExecutor(max_workers=max_workers) as executor:
-            # Submit all tasks
-            future_to_idx = {
-                executor.submit(get_single_embedding, (text, idx)): idx
-                for idx, text in enumerate(texts)
-            }
-
-            # Add progress bar for concurrent processing
-            try:
-                if is_build or len(texts) > 10:
-                    from tqdm import tqdm
-
-                    futures_iterator = tqdm(
-                        as_completed(future_to_idx),
-                        total=len(texts),
-                        desc="Computing Ollama embeddings",
-                    )
-                else:
-                    futures_iterator = as_completed(future_to_idx)
-            except ImportError:
-                futures_iterator = as_completed(future_to_idx)
-
-            # Collect results as they complete
-            for future in futures_iterator:
-                try:
-                    idx, embedding = future.result()
-                    if embedding is not None:
-                        all_embeddings[idx] = embedding
-                    else:
-                        failed_indices.append(idx)
-                except Exception as e:
-                    idx = future_to_idx[future]
-                    logger.error(f"Exception for text {idx}: {e}")
-                    failed_indices.append(idx)
-
-    else:
-        # Sequential processing with progress bar
-        show_progress = is_build or len(texts) > 10
-
-        try:
-            if show_progress:
-                from tqdm import tqdm
-
-                iterator = tqdm(
-                    enumerate(texts), total=len(texts), desc="Computing Ollama embeddings"
-                )
-            else:
-                iterator = enumerate(texts)
-        except ImportError:
-            iterator = enumerate(texts)
-
-        for idx, text in iterator:
-            result_idx, embedding = get_single_embedding((text, idx))
-            if embedding is not None:
-                all_embeddings[idx] = embedding
-            else:
-                failed_indices.append(idx)
-
-    # Handle failed embeddings
-    if failed_indices:
-        if len(failed_indices) == len(texts):
-            raise RuntimeError("Failed to compute any embeddings")
-
-        logger.warning(f"Failed to compute embeddings for {len(failed_indices)}/{len(texts)} texts")
-
-        # Use zero embeddings as fallback for failed ones
-        valid_embedding = next((e for e in all_embeddings if e is not None), None)
-        if valid_embedding:
-            embedding_dim = len(valid_embedding)
-            for idx in failed_indices:
-                all_embeddings[idx] = [0.0] * embedding_dim
-
-    # Remove None values and convert to numpy array
-    all_embeddings = [e for e in all_embeddings if e is not None]
-
-    # Convert to numpy array and normalize
-    embeddings = np.array(all_embeddings, dtype=np.float32)
-
-    # Normalize embeddings (L2 normalization)
-    norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
-    embeddings = embeddings / (norms + 1e-8)  # Add small epsilon to avoid division by zero
-
-    logger.info(f"Generated {len(embeddings)} embeddings, dimension: {embeddings.shape[1]}")
-
-    return embeddings
--- a/packages/leann-core/src/leann/embedding_server_manager.py
+++ b/packages/leann-core/src/leann/embedding_server_manager.py
@@ -1,13 +1,11 @@
 import atexit
 import logging
 import os
-import signal
 import socket
 import subprocess
 import sys
 import time
 from pathlib import Path
-from typing import Optional

 import psutil

@@ -184,8 +182,8 @@ class EmbeddingServerManager:
                                       e.g., "leann_backend_diskann.embedding_server"
        """
        self.backend_module_name = backend_module_name
-        self.server_process: Optional[subprocess.Popen] = None
-        self.server_port: Optional[int] = None
+        self.server_process: subprocess.Popen | None = None
+        self.server_port: int | None = None
        self._atexit_registered = False

    def start_server(
@@ -312,7 +310,6 @@ class EmbeddingServerManager:
            cwd=project_root,
            stdout=None,  # Direct to console
            stderr=None,  # Direct to console
-            start_new_session=True,  # Create new process group for better cleanup
        )
        self.server_port = port
        logger.info(f"Server process started with PID: {self.server_process.pid}")
@@ -354,14 +351,7 @@ class EmbeddingServerManager:
        logger.info(
            f"Terminating server process (PID: {self.server_process.pid}) for backend {self.backend_module_name}..."
        )
-
-        # Try terminating the whole process group first
-        try:
-            pgid = os.getpgid(self.server_process.pid)
-            os.killpg(pgid, signal.SIGTERM)
-        except Exception:
-            # Fallback to terminating just the process
-            self.server_process.terminate()
+        self.server_process.terminate()

        try:
            self.server_process.wait(timeout=3)
@@ -370,13 +360,7 @@ class EmbeddingServerManager:
            logger.warning(
                f"Server process {self.server_process.pid} did not terminate gracefully within 3 seconds, killing it."
            )
-            # Try killing the whole process group
-            try:
-                pgid = os.getpgid(self.server_process.pid)
-                os.killpg(pgid, signal.SIGKILL)
-            except Exception:
-                # Fallback to killing just the process
-                self.server_process.kill()
+            self.server_process.kill()
            try:
                self.server_process.wait(timeout=2)
                logger.info(f"Server process {self.server_process.pid} killed successfully.")
@@ -388,12 +372,7 @@ class EmbeddingServerManager:

        # Clean up process resources to prevent resource tracker warnings
        try:
-            self.server_process.wait(timeout=1)  # Give it one final chance with timeout
-        except subprocess.TimeoutExpired:
-            logger.warning(
-                f"Process {self.server_process.pid} still hanging after all kill attempts"
-            )
-            # Don't wait indefinitely - just abandon it
+            self.server_process.wait()  # Ensure process is fully cleaned up
        except Exception:
            pass

--- a/packages/leann-core/src/leann/interface.py
+++ b/packages/leann-core/src/leann/interface.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Any, Literal, Optional
+from typing import Any, Literal

 import numpy as np

@@ -34,9 +34,7 @@ class LeannBackendSearcherInterface(ABC):
        pass

    @abstractmethod
-    def _ensure_server_running(
-        self, passages_source_file: str, port: Optional[int], **kwargs
-    ) -> int:
+    def _ensure_server_running(self, passages_source_file: str, port: int | None, **kwargs) -> int:
        """Ensure server is running"""
        pass

@@ -50,7 +48,7 @@ class LeannBackendSearcherInterface(ABC):
        prune_ratio: float = 0.0,
        recompute_embeddings: bool = False,
        pruning_strategy: Literal["global", "local", "proportional"] = "global",
-        zmq_port: Optional[int] = None,
+        zmq_port: int | None = None,
        **kwargs,
    ) -> dict[str, Any]:
        """Search for nearest neighbors
@@ -76,7 +74,7 @@ class LeannBackendSearcherInterface(ABC):
        self,
        query: str,
        use_server_if_available: bool = True,
-        zmq_port: Optional[int] = None,
+        zmq_port: int | None = None,
    ) -> np.ndarray:
        """Compute embedding for a query string

--- a/packages/leann-core/src/leann/mcp.py
+++ b/packages/leann-core/src/leann/mcp.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3

 import json
+import os
 import subprocess
 import sys

@@ -61,6 +62,10 @@ def handle_request(request):
        tool_name = request["params"]["name"]
        args = request["params"].get("arguments", {})

+        # Set working directory and environment
+        env = os.environ.copy()
+        cwd = "/Users/andyl/Projects/LEANN-RAG"
+
        try:
            if tool_name == "leann_search":
                cmd = [
@@ -71,14 +76,18 @@ def handle_request(request):
                    "--recompute-embeddings",
                    f"--top-k={args.get('top_k', 5)}",
                ]
-                result = subprocess.run(cmd, capture_output=True, text=True)
+                result = subprocess.run(cmd, capture_output=True, text=True, cwd=cwd, env=env)

            elif tool_name == "leann_ask":
                cmd = f'echo "{args["question"]}" | leann ask {args["index_name"]} --recompute-embeddings --llm ollama --model qwen3:8b'
-                result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
+                result = subprocess.run(
+                    cmd, shell=True, capture_output=True, text=True, cwd=cwd, env=env
+                )

            elif tool_name == "leann_list":
-                result = subprocess.run(["leann", "list"], capture_output=True, text=True)
+                result = subprocess.run(
+                    ["leann", "list"], capture_output=True, text=True, cwd=cwd, env=env
+                )

            return {
                "jsonrpc": "2.0",
--- a/packages/leann-core/src/leann/searcher_base.py
+++ b/packages/leann-core/src/leann/searcher_base.py
@@ -1,7 +1,7 @@
 import json
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Any, Literal, Optional
+from typing import Any, Literal

 import numpy as np

@@ -132,15 +132,10 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
        import msgpack
        import zmq

-        context = None
-        socket = None
        try:
            context = zmq.Context()
            socket = context.socket(zmq.REQ)
-            socket.setsockopt(zmq.LINGER, 0)  # Don't block on close
-            socket.setsockopt(zmq.RCVTIMEO, 300000)
-            socket.setsockopt(zmq.SNDTIMEO, 300000)
-            socket.setsockopt(zmq.IMMEDIATE, 1)
+            socket.setsockopt(zmq.RCVTIMEO, 30000)  # 30 second timeout
            socket.connect(f"tcp://localhost:{zmq_port}")

            # Send embedding request
@@ -152,6 +147,9 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
            response_bytes = socket.recv()
            response = msgpack.unpackb(response_bytes)

+            socket.close()
+            context.term()
+
            # Convert response to numpy array
            if isinstance(response, list) and len(response) > 0:
                return np.array(response, dtype=np.float32)
@@ -160,10 +158,6 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):

        except Exception as e:
            raise RuntimeError(f"Failed to compute embeddings via server: {e}")
-        finally:
-            if socket:
-                socket.close()
-            # Don't call context.term() - this was causing hangs

    @abstractmethod
    def search(
@@ -175,7 +169,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
        prune_ratio: float = 0.0,
        recompute_embeddings: bool = False,
        pruning_strategy: Literal["global", "local", "proportional"] = "global",
-        zmq_port: Optional[int] = None,
+        zmq_port: int | None = None,
        **kwargs,
    ) -> dict[str, Any]:
        """
@@ -197,15 +191,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
        """
        pass

-    def cleanup(self):
-        """Cleanup resources including embedding server."""
+    def __del__(self):
+        """Ensures the embedding server is stopped when the searcher is destroyed."""
        if hasattr(self, "embedding_server_manager"):
            self.embedding_server_manager.stop_server()
-
-    def __del__(self):
-        """Ensures resources are cleaned up when the searcher is destroyed."""
-        try:
-            self.cleanup()
-        except Exception:
-            # Ignore errors during destruction
-            pass
--- a/packages/leann-mcp/README.md
+++ b/packages/leann-mcp/README.md
@@ -1,25 +1,18 @@
-# 🔥 LEANN Claude Code Integration
+# LEANN Claude Code Integration

-Transform your development workflow with intelligent code assistance using LEANN's semantic search directly in Claude Code.
+Intelligent code assistance using LEANN's vector search directly in Claude Code.

 ## Prerequisites

-**Step 1:** First, complete the basic LEANN installation following the [📦 Installation guide](../../README.md#installation) in the root README:
+First, install LEANN CLI globally:

 ```bash
-uv venv
-source .venv/bin/activate
-uv pip install leann
-```
-
-**Step 2:** Install LEANN globally for MCP integration:
-```bash
-uv tool install leann-core
+uv tool install leann
 ```

 This makes the `leann` command available system-wide, which `leann_mcp` requires.

-## 🚀 Quick Setup
+## Quick Setup

 Add the LEANN MCP server to Claude Code:

@@ -27,25 +20,23 @@ Add the LEANN MCP server to Claude Code:
 claude mcp add leann-server -- leann_mcp
 ```

-## 🛠️ Available Tools
+## Available Tools

-Once connected, you'll have access to these powerful semantic search tools in Claude Code:
+- **`leann_list`** - List available indexes across all projects
+- **`leann_search`** - Search code and documents with semantic queries
+- **`leann_ask`** - Ask questions and get AI-powered answers from your codebase

- **`leann_list`** - List all available indexes across your projects
- **`leann_search`** - Perform semantic searches across code and documents
- **`leann_ask`** - Ask natural language questions and get AI-powered answers from your codebase
-
-## 🎯 Quick Start Example
+## Quick Start

 ```bash
-# Build an index for your project (change to your actual path)
-leann build my-project --docs ./
+# Build an index for your project
+leann build my-project

 # Start Claude Code
 claude
 ```

-**Try this in Claude Code:**
+Then in Claude Code:
 ```
 Help me understand this codebase. List available indexes and search for authentication patterns.
 ```
@@ -55,37 +46,24 @@ Help me understand this codebase. List available indexes and search for authenti
 </p>


-## 🧠 How It Works
+## How It Works

-The integration consists of three key components working seamlessly together:
-
- **`leann`** - Core CLI tool for indexing and searching (installed globally via `uv tool install`)
+- **`leann`** - Core CLI tool for indexing and searching (installed globally)
 - **`leann_mcp`** - MCP server that wraps `leann` commands for Claude Code integration
- **Claude Code** - Calls `leann_mcp`, which executes `leann` commands and returns intelligent results
+- Claude Code calls `leann_mcp`, which executes `leann` commands and returns results

-## 📁 File Support
+## File Support

-LEANN understands **30+ file types** including:
- **Programming**: Python, JavaScript, TypeScript, Java, Go, Rust, C++, C#
- **Data**: SQL, YAML, JSON, CSV, XML
- **Documentation**: Markdown, TXT, PDF
- **And many more!**
+Python, JavaScript, TypeScript, Java, Go, Rust, SQL, YAML, JSON, and 30+ more file types.

-## 💾 Storage & Organization
+## Storage

- **Project indexes**: Stored in `.leann/` directory (just like `.git`)
- **Global registry**: Project tracking at `~/.leann/projects.json`
- **Multi-project support**: Switch between different codebases seamlessly
- **Portable**: Transfer indexes between machines with minimal overhead
+- Project indexes in `.leann/` directory (like `.git`)
+- Global project registry at `~/.leann/projects.json`
+- Multi-project support built-in

-## 🗑️ Uninstalling
-
-To remove the LEANN MCP server from Claude Code:
+## Removing

 ```bash
 claude mcp remove leann-server
 ```
-To remove LEANN
-```
-uv pip uninstall leann leann-backend-hnsw leann-core
-```
--- a/packages/leann/pyproject.toml
+++ b/packages/leann/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "leann"
-version = "0.2.5"
+version = "0.2.1"
 description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
 readme = "README.md"
 requires-python = ">=3.9"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,7 @@ dependencies = [
    "pypdfium2>=4.30.0",
    # LlamaIndex core and readers - updated versions
    "llama-index>=0.12.44",
-    "llama-index-readers-file>=0.4.0", # Essential for PDF parsing
+    "llama-index-readers-file>=0.4.0",  # Essential for PDF parsing
    # "llama-index-readers-docling",  # Requires Python >= 3.10
    # "llama-index-node-parser-docling",  # Requires Python >= 3.10
    "llama-index-vector-stores-faiss>=0.4.0",
@@ -43,7 +43,6 @@ dependencies = [
    "mlx>=0.26.3; sys_platform == 'darwin'",
    "mlx-lm>=0.26.0; sys_platform == 'darwin'",
    "psutil>=5.8.0",
-    "pybind11>=3.0.0",
 ]

 [project.optional-dependencies]
@@ -52,7 +51,7 @@ dev = [
    "pytest-cov>=4.0",
    "pytest-xdist>=3.0",  # For parallel test execution
    "black>=23.0",
-    "ruff==0.12.7",  # Fixed version to ensure consistent formatting across all environments
+    "ruff>=0.1.0",
    "matplotlib",
    "huggingface-hub>=0.20.0",
    "pre-commit>=3.5.0",
@@ -60,7 +59,7 @@ dev = [

 test = [
    "pytest>=7.0",
-    "pytest-timeout>=2.0",  # Simple timeout protection for CI
+    "pytest-timeout>=2.0",
    "llama-index-core>=0.12.0",
    "llama-index-readers-file>=0.4.0",
    "python-dotenv>=1.0.0",
@@ -89,7 +88,7 @@ leann-backend-diskann = { path = "packages/leann-backend-diskann", editable = tr
 leann-backend-hnsw = { path = "packages/leann-backend-hnsw", editable = true }

 [tool.ruff]
-target-version = "py39"
+target-version = "py310"
 line-length = 100
 extend-exclude = [
    "third_party",
@@ -152,7 +151,7 @@ markers = [
    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
    "openai: marks tests that require OpenAI API key",
 ]
-timeout = 300  # Simple timeout for CI safety (5 minutes)
+timeout = 600
 addopts = [
    "-v",
    "--tb=short",
--- a/tests/README.md
+++ b/tests/README.md
@@ -6,11 +6,10 @@ This directory contains automated tests for the LEANN project using pytest.

 ### `test_readme_examples.py`
 Tests the examples shown in README.md:
- The basic example code that users see first (parametrized for both HNSW and DiskANN backends)
+- The basic example code that users see first
 - Import statements work correctly
 - Different backend options (HNSW, DiskANN)
- Different LLM configuration options (parametrized for both backends)
- **All main README examples are tested with both HNSW and DiskANN backends using pytest parametrization**
+- Different LLM configuration options

 ### `test_basic.py`
 Basic functionality tests that verify:
@@ -26,16 +25,6 @@ Tests the document RAG example functionality:
 - Tests error handling with invalid parameters
 - Verifies that normalized embeddings are detected and cosine distance is used

-### `test_diskann_partition.py`
-Tests DiskANN graph partitioning functionality:
- Tests DiskANN index building without partitioning (baseline)
- Tests automatic graph partitioning with `is_recompute=True`
- Verifies that partition files are created and large files are cleaned up for storage saving
- Tests search functionality with partitioned indices
- Validates medoid and max_base_norm file generation and usage
- Includes performance comparison between DiskANN (with partition) and HNSW
- **Note**: These tests are skipped in CI due to hardware requirements and computation time
-
 ## Running Tests

 ### Install test dependencies:
@@ -65,23 +54,15 @@ pytest tests/ -m "not openai"

 # Skip slow tests
 pytest tests/ -m "not slow"
-
-# Run DiskANN partition tests (requires local machine, not CI)
-pytest tests/test_diskann_partition.py
 ```

 ### Run with specific backend:
 ```bash
 # Test only HNSW backend
 pytest tests/test_basic.py::test_backend_basic[hnsw]
-pytest tests/test_readme_examples.py::test_readme_basic_example[hnsw]

 # Test only DiskANN backend
 pytest tests/test_basic.py::test_backend_basic[diskann]
-pytest tests/test_readme_examples.py::test_readme_basic_example[diskann]
-
-# All DiskANN tests (parametrized + specialized partition tests)
-pytest tests/ -k diskann
 ```

 ## CI/CD Integration
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,41 +0,0 @@
-"""Pytest configuration and fixtures for LEANN tests."""
-
-import os
-
-import pytest
-
-
-@pytest.fixture(autouse=True)
-def test_environment():
-    """Set up test environment variables."""
-    # Mark as test environment to skip memory-intensive operations
-    os.environ["CI"] = "true"
-    yield
-
-
-@pytest.fixture(scope="session", autouse=True)
-def cleanup_session():
-    """Session-level cleanup to ensure no hanging processes."""
-    yield
-
-    # Basic cleanup after all tests
-    try:
-        import os
-
-        import psutil
-
-        current_process = psutil.Process(os.getpid())
-        children = current_process.children(recursive=True)
-
-        for child in children:
-            try:
-                child.terminate()
-            except psutil.NoSuchProcess:
-                pass
-
-        # Give them time to terminate gracefully
-        psutil.wait_procs(children, timeout=3)
-
-    except Exception:
-        # Don't fail tests due to cleanup errors
-        pass
--- a/tests/test_diskann_partition.py
+++ b/tests/test_diskann_partition.py
@@ -1,369 +0,0 @@
-"""
-Test DiskANN graph partitioning functionality.
-
-Tests the automatic graph partitioning feature that was implemented to save
-storage space by partitioning large DiskANN indices and safely deleting
-redundant files while maintaining search functionality.
-"""
-
-import os
-import tempfile
-from pathlib import Path
-
-import pytest
-
-
-@pytest.mark.skipif(
-    os.environ.get("CI") == "true",
-    reason="Skip DiskANN partition tests in CI - requires specific hardware and large memory",
-)
-def test_diskann_without_partition():
-    """Test DiskANN index building without partition (baseline)."""
-    from leann.api import LeannBuilder, LeannSearcher
-
-    with tempfile.TemporaryDirectory() as temp_dir:
-        index_path = str(Path(temp_dir) / "test_no_partition.leann")
-
-        # Test data - enough to trigger index building
-        texts = [
-            f"Document {i} discusses topic {i % 10} with detailed analysis of subject {i // 10}."
-            for i in range(500)
-        ]
-
-        # Build without partition (is_recompute=False)
-        builder = LeannBuilder(
-            backend_name="diskann",
-            embedding_model="facebook/contriever",
-            embedding_mode="sentence-transformers",
-            num_neighbors=32,
-            search_list_size=50,
-            is_recompute=False,  # No partition
-        )
-
-        for text in texts:
-            builder.add_text(text)
-
-        builder.build_index(index_path)
-
-        # Verify index was created
-        index_dir = Path(index_path).parent
-        assert index_dir.exists()
-
-        # Check that traditional DiskANN files exist
-        index_prefix = Path(index_path).stem
-        # Core DiskANN files (beam search index may not be created for small datasets)
-        required_files = [
-            f"{index_prefix}_disk.index",
-            f"{index_prefix}_pq_compressed.bin",
-            f"{index_prefix}_pq_pivots.bin",
-        ]
-
-        # Check all generated files first for debugging
-        generated_files = [f.name for f in index_dir.glob(f"{index_prefix}*")]
-        print(f"Generated files: {generated_files}")
-
-        for required_file in required_files:
-            file_path = index_dir / required_file
-            assert file_path.exists(), f"Required file {required_file} not found"
-
-        # Ensure no partition files exist in non-partition mode
-        partition_files = [f"{index_prefix}_disk_graph.index", f"{index_prefix}_partition.bin"]
-
-        for partition_file in partition_files:
-            file_path = index_dir / partition_file
-            assert not file_path.exists(), (
-                f"Partition file {partition_file} should not exist in non-partition mode"
-            )
-
-        # Test search functionality
-        searcher = LeannSearcher(index_path)
-        results = searcher.search("topic 3 analysis", top_k=3)
-
-        assert len(results) > 0
-        assert all(result.score is not None and result.score != float("-inf") for result in results)
-
-
-@pytest.mark.skipif(
-    os.environ.get("CI") == "true",
-    reason="Skip DiskANN partition tests in CI - requires specific hardware and large memory",
-)
-def test_diskann_with_partition():
-    """Test DiskANN index building with automatic graph partitioning."""
-    from leann.api import LeannBuilder
-
-    with tempfile.TemporaryDirectory() as temp_dir:
-        index_path = str(Path(temp_dir) / "test_with_partition.leann")
-
-        # Test data - enough to trigger partitioning
-        texts = [
-            f"Document {i} explores subject {i % 15} with comprehensive coverage of area {i // 15}."
-            for i in range(500)
-        ]
-
-        # Build with partition (is_recompute=True)
-        builder = LeannBuilder(
-            backend_name="diskann",
-            embedding_model="facebook/contriever",
-            embedding_mode="sentence-transformers",
-            num_neighbors=32,
-            search_list_size=50,
-            is_recompute=True,  # Enable automatic partitioning
-        )
-
-        for text in texts:
-            builder.add_text(text)
-
-        builder.build_index(index_path)
-
-        # Verify index was created
-        index_dir = Path(index_path).parent
-        assert index_dir.exists()
-
-        # Check that partition files exist
-        index_prefix = Path(index_path).stem
-        partition_files = [
-            f"{index_prefix}_disk_graph.index",  # Partitioned graph
-            f"{index_prefix}_partition.bin",  # Partition metadata
-            f"{index_prefix}_pq_compressed.bin",
-            f"{index_prefix}_pq_pivots.bin",
-        ]
-
-        for partition_file in partition_files:
-            file_path = index_dir / partition_file
-            assert file_path.exists(), f"Expected partition file {partition_file} not found"
-
-        # Check that large files were cleaned up (storage saving goal)
-        large_files = [f"{index_prefix}_disk.index", f"{index_prefix}_disk_beam_search.index"]
-
-        for large_file in large_files:
-            file_path = index_dir / large_file
-            assert not file_path.exists(), (
-                f"Large file {large_file} should have been deleted for storage saving"
-            )
-
-        # Verify required auxiliary files for partition mode exist
-        required_files = [
-            f"{index_prefix}_disk.index_medoids.bin",
-            f"{index_prefix}_disk.index_max_base_norm.bin",
-        ]
-
-        for req_file in required_files:
-            file_path = index_dir / req_file
-            assert file_path.exists(), (
-                f"Required auxiliary file {req_file} missing for partition mode"
-            )
-
-
-@pytest.mark.skipif(
-    os.environ.get("CI") == "true",
-    reason="Skip DiskANN partition tests in CI - requires specific hardware and large memory",
-)
-def test_diskann_partition_search_functionality():
-    """Test that search works correctly with partitioned indices."""
-    from leann.api import LeannBuilder, LeannSearcher
-
-    with tempfile.TemporaryDirectory() as temp_dir:
-        index_path = str(Path(temp_dir) / "test_partition_search.leann")
-
-        # Create diverse test data
-        texts = [
-            "LEANN is a storage-efficient approximate nearest neighbor search system.",
-            "Graph partitioning helps reduce memory usage in large scale vector search.",
-            "DiskANN provides high-performance disk-based approximate nearest neighbor search.",
-            "Vector embeddings enable semantic search over unstructured text data.",
-            "Approximate nearest neighbor algorithms trade accuracy for speed and storage.",
-        ] * 100  # Repeat to get enough data
-
-        # Build with partitioning
-        builder = LeannBuilder(
-            backend_name="diskann",
-            embedding_model="facebook/contriever",
-            embedding_mode="sentence-transformers",
-            is_recompute=True,  # Enable partitioning
-        )
-
-        for text in texts:
-            builder.add_text(text)
-
-        builder.build_index(index_path)
-
-        # Test search with partitioned index
-        searcher = LeannSearcher(index_path)
-
-        # Test various queries
-        test_queries = [
-            ("vector search algorithms", 5),
-            ("LEANN storage efficiency", 3),
-            ("graph partitioning memory", 4),
-            ("approximate nearest neighbor", 7),
-        ]
-
-        for query, top_k in test_queries:
-            results = searcher.search(query, top_k=top_k)
-
-            # Verify search results
-            assert len(results) == top_k, f"Expected {top_k} results for query '{query}'"
-            assert all(result.score is not None for result in results), (
-                "All results should have scores"
-            )
-            assert all(result.score != float("-inf") for result in results), (
-                "No result should have -inf score"
-            )
-            assert all(result.text is not None for result in results), (
-                "All results should have text"
-            )
-
-            # Scores should be in descending order (higher similarity first)
-            scores = [result.score for result in results]
-            assert scores == sorted(scores, reverse=True), (
-                "Results should be sorted by score descending"
-            )
-
-
-@pytest.mark.skipif(
-    os.environ.get("CI") == "true",
-    reason="Skip DiskANN partition tests in CI - requires specific hardware and large memory",
-)
-def test_diskann_medoid_and_norm_files():
-    """Test that medoid and max_base_norm files are correctly generated and used."""
-    import struct
-
-    from leann.api import LeannBuilder, LeannSearcher
-
-    with tempfile.TemporaryDirectory() as temp_dir:
-        index_path = str(Path(temp_dir) / "test_medoid_norm.leann")
-
-        # Small but sufficient dataset
-        texts = [f"Test document {i} with content about subject {i % 10}." for i in range(200)]
-
-        builder = LeannBuilder(
-            backend_name="diskann",
-            embedding_model="facebook/contriever",
-            embedding_mode="sentence-transformers",
-            is_recompute=True,
-        )
-
-        for text in texts:
-            builder.add_text(text)
-
-        builder.build_index(index_path)
-
-        index_dir = Path(index_path).parent
-        index_prefix = Path(index_path).stem
-
-        # Test medoids file
-        medoids_file = index_dir / f"{index_prefix}_disk.index_medoids.bin"
-        assert medoids_file.exists(), "Medoids file should be generated"
-
-        # Read and validate medoids file format
-        with open(medoids_file, "rb") as f:
-            nshards = struct.unpack("<I", f.read(4))[0]
-            one_val = struct.unpack("<I", f.read(4))[0]
-            medoid_id = struct.unpack("<I", f.read(4))[0]
-
-            assert nshards == 1, "Single-shot build should have 1 shard"
-            assert one_val == 1, "Expected value should be 1"
-            assert medoid_id >= 0, "Medoid ID should be valid (not hardcoded 0)"
-
-        # Test max_base_norm file
-        norm_file = index_dir / f"{index_prefix}_disk.index_max_base_norm.bin"
-        assert norm_file.exists(), "Max base norm file should be generated"
-
-        # Read and validate norm file
-        with open(norm_file, "rb") as f:
-            npts = struct.unpack("<I", f.read(4))[0]
-            ndims = struct.unpack("<I", f.read(4))[0]
-            norm_val = struct.unpack("<f", f.read(4))[0]
-
-            assert npts == 1, "Should have 1 norm point"
-            assert ndims == 1, "Should have 1 dimension"
-            assert norm_val > 0, "Norm value should be positive"
-            assert norm_val != float("inf"), "Norm value should be finite"
-
-        # Test that search works with these files
-        searcher = LeannSearcher(index_path)
-        results = searcher.search("test subject", top_k=3)
-
-        # Verify that scores are not -inf (which indicates norm file was loaded correctly)
-        assert len(results) > 0
-        assert all(result.score != float("-inf") for result in results), (
-            "Scores should not be -inf when norm file is correct"
-        )
-
-
-@pytest.mark.skipif(
-    os.environ.get("CI") == "true",
-    reason="Skip performance comparison in CI - requires significant compute time",
-)
-def test_diskann_vs_hnsw_performance():
-    """Compare DiskANN (with partition) vs HNSW performance."""
-    import time
-
-    from leann.api import LeannBuilder, LeannSearcher
-
-    with tempfile.TemporaryDirectory() as temp_dir:
-        # Test data
-        texts = [
-            f"Performance test document {i} covering topic {i % 20} in detail." for i in range(1000)
-        ]
-        query = "performance topic test"
-
-        # Test DiskANN with partitioning
-        diskann_path = str(Path(temp_dir) / "perf_diskann.leann")
-        diskann_builder = LeannBuilder(
-            backend_name="diskann",
-            embedding_model="facebook/contriever",
-            embedding_mode="sentence-transformers",
-            is_recompute=True,
-        )
-
-        for text in texts:
-            diskann_builder.add_text(text)
-
-        start_time = time.time()
-        diskann_builder.build_index(diskann_path)
-
-        # Test HNSW
-        hnsw_path = str(Path(temp_dir) / "perf_hnsw.leann")
-        hnsw_builder = LeannBuilder(
-            backend_name="hnsw",
-            embedding_model="facebook/contriever",
-            embedding_mode="sentence-transformers",
-            is_recompute=True,
-        )
-
-        for text in texts:
-            hnsw_builder.add_text(text)
-
-        start_time = time.time()
-        hnsw_builder.build_index(hnsw_path)
-
-        # Compare search performance
-        diskann_searcher = LeannSearcher(diskann_path)
-        hnsw_searcher = LeannSearcher(hnsw_path)
-
-        # Warm up searches
-        diskann_searcher.search(query, top_k=5)
-        hnsw_searcher.search(query, top_k=5)
-
-        # Timed searches
-        start_time = time.time()
-        diskann_results = diskann_searcher.search(query, top_k=10)
-        diskann_search_time = time.time() - start_time
-
-        start_time = time.time()
-        hnsw_results = hnsw_searcher.search(query, top_k=10)
-        hnsw_search_time = time.time() - start_time
-
-        # Basic assertions
-        assert len(diskann_results) == 10
-        assert len(hnsw_results) == 10
-        assert all(r.score != float("-inf") for r in diskann_results)
-        assert all(r.score != float("-inf") for r in hnsw_results)
-
-        # Performance ratio (informational)
-        if hnsw_search_time > 0:
-            speed_ratio = hnsw_search_time / diskann_search_time
-            print(f"DiskANN search time: {diskann_search_time:.4f}s")
-            print(f"HNSW search time: {hnsw_search_time:.4f}s")
-            print(f"DiskANN is {speed_ratio:.2f}x faster than HNSW")
--- a/tests/test_readme_examples.py
+++ b/tests/test_readme_examples.py
@@ -10,9 +10,8 @@ from pathlib import Path
 import pytest


-@pytest.mark.parametrize("backend_name", ["hnsw", "diskann"])
-def test_readme_basic_example(backend_name):
-    """Test the basic example from README.md with both backends."""
+def test_readme_basic_example():
+    """Test the basic example from README.md."""
    # Skip on macOS CI due to MPS environment issues with all-MiniLM-L6-v2
    if os.environ.get("CI") == "true" and platform.system() == "Darwin":
        pytest.skip("Skipping on macOS CI due to MPS environment issues with all-MiniLM-L6-v2")
@@ -22,18 +21,18 @@ def test_readme_basic_example(backend_name):
    from leann.api import SearchResult

    with tempfile.TemporaryDirectory() as temp_dir:
-        INDEX_PATH = str(Path(temp_dir) / f"demo_{backend_name}.leann")
+        INDEX_PATH = str(Path(temp_dir) / "demo.leann")

        # Build an index
        # In CI, use a smaller model to avoid memory issues
        if os.environ.get("CI") == "true":
            builder = LeannBuilder(
-                backend_name=backend_name,
+                backend_name="hnsw",
                embedding_model="sentence-transformers/all-MiniLM-L6-v2",  # Smaller model
                dimensions=384,  # Smaller dimensions
            )
        else:
-            builder = LeannBuilder(backend_name=backend_name)
+            builder = LeannBuilder(backend_name="hnsw")
        builder.add_text("LEANN saves 97% storage compared to traditional vector databases.")
        builder.add_text("Tung Tung Tung Sahur called—they need their banana-crocodile hybrid back")
        builder.build_index(INDEX_PATH)
@@ -53,9 +52,6 @@ def test_readme_basic_example(backend_name):
        # Verify search results
        assert len(results) > 0
        assert isinstance(results[0], SearchResult)
-        assert results[0].score != float("-inf"), (
-            f"should return valid scores, got {results[0].score}"
-        )
        # The second text about banana-crocodile should be more relevant
        assert "banana" in results[0].text or "crocodile" in results[0].text

@@ -114,31 +110,26 @@ def test_backend_options():
        assert len(list(Path(diskann_path).parent.glob(f"{Path(diskann_path).stem}.*"))) > 0


-@pytest.mark.parametrize("backend_name", ["hnsw", "diskann"])
-def test_llm_config_simulated(backend_name):
-    """Test simulated LLM configuration option with both backends."""
+def test_llm_config_simulated():
+    """Test simulated LLM configuration option."""
    # Skip on macOS CI due to MPS environment issues with all-MiniLM-L6-v2
    if os.environ.get("CI") == "true" and platform.system() == "Darwin":
        pytest.skip("Skipping on macOS CI due to MPS environment issues with all-MiniLM-L6-v2")

-    # Skip DiskANN tests in CI due to hardware requirements
-    if os.environ.get("CI") == "true" and backend_name == "diskann":
-        pytest.skip("Skip DiskANN tests in CI - requires specific hardware and large memory")
-
    from leann import LeannBuilder, LeannChat

    with tempfile.TemporaryDirectory() as temp_dir:
        # Build a simple index
-        index_path = str(Path(temp_dir) / f"test_{backend_name}.leann")
+        index_path = str(Path(temp_dir) / "test.leann")
        # Use smaller model in CI to avoid memory issues
        if os.environ.get("CI") == "true":
            builder = LeannBuilder(
-                backend_name=backend_name,
+                backend_name="hnsw",
                embedding_model="sentence-transformers/all-MiniLM-L6-v2",
                dimensions=384,
            )
        else:
-            builder = LeannBuilder(backend_name=backend_name)
+            builder = LeannBuilder(backend_name="hnsw")
        builder.add_text("Test document for LLM testing")
        builder.build_index(index_path)

--- a/uv.lock
+++ b/uv.lock
@@ -2155,7 +2155,7 @@ wheels = [

 [[package]]
 name = "leann-backend-diskann"
-version = "0.2.5"
+version = "0.2.1"
 source = { editable = "packages/leann-backend-diskann" }
 dependencies = [
    { name = "leann-core" },
@@ -2167,14 +2167,14 @@ dependencies = [

 [package.metadata]
 requires-dist = [
-    { name = "leann-core", specifier = "==0.2.5" },
+    { name = "leann-core", specifier = "==0.2.1" },
    { name = "numpy" },
    { name = "protobuf", specifier = ">=3.19.0" },
 ]

 [[package]]
 name = "leann-backend-hnsw"
-version = "0.2.5"
+version = "0.2.1"
 source = { editable = "packages/leann-backend-hnsw" }
 dependencies = [
    { name = "leann-core" },
@@ -2187,7 +2187,7 @@ dependencies = [

 [package.metadata]
 requires-dist = [
-    { name = "leann-core", specifier = "==0.2.5" },
+    { name = "leann-core", specifier = "==0.2.1" },
    { name = "msgpack", specifier = ">=1.0.0" },
    { name = "numpy" },
    { name = "pyzmq", specifier = ">=23.0.0" },
@@ -2195,7 +2195,7 @@ requires-dist = [

 [[package]]
 name = "leann-core"
-version = "0.2.5"
+version = "0.2.1"
 source = { editable = "packages/leann-core" }
 dependencies = [
    { name = "accelerate" },
@@ -2281,7 +2281,6 @@ dependencies = [
    { name = "pdfplumber" },
    { name = "protobuf" },
    { name = "psutil" },
-    { name = "pybind11" },
    { name = "pymupdf" },
    { name = "pypdf2" },
    { name = "pypdfium2" },
@@ -2361,7 +2360,6 @@ requires-dist = [
    { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.5.0" },
    { name = "protobuf", specifier = "==4.25.3" },
    { name = "psutil", specifier = ">=5.8.0" },
-    { name = "pybind11", specifier = ">=3.0.0" },
    { name = "pymupdf", specifier = ">=1.26.0" },
    { name = "pypdf2", specifier = ">=3.0.0" },
    { name = "pypdfium2", specifier = ">=4.30.0" },
@@ -2373,7 +2371,7 @@ requires-dist = [
    { name = "python-docx", marker = "extra == 'documents'", specifier = ">=0.8.11" },
    { name = "python-dotenv", marker = "extra == 'test'", specifier = ">=1.0.0" },
    { name = "requests", specifier = ">=2.25.0" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = "==0.12.7" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" },
    { name = "sentence-transformers", specifier = ">=2.2.0" },
    { name = "sentence-transformers", marker = "extra == 'test'", specifier = ">=2.2.0" },
    { name = "sglang" },
@@ -4205,15 +4203,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/10/15/6b30e77872012bbfe8265d42a01d5b3c17ef0ac0f2fae531ad91b6a6c02e/pyarrow-21.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:cdc4c17afda4dab2a9c0b79148a43a7f4e1094916b3e18d8975bfd6d6d52241f", size = 26227521, upload-time = "2025-07-18T00:57:29.119Z" },
 ]

-[[package]]
-name = "pybind11"
-version = "3.0.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ef/83/698d120e257a116f2472c710932023ad779409adf2734d2e940f34eea2c5/pybind11-3.0.0.tar.gz", hash = "sha256:c3f07bce3ada51c3e4b76badfa85df11688d12c46111f9d242bc5c9415af7862", size = 544819, upload-time = "2025-07-10T16:52:09.335Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/41/9c/85f50a5476832c3efc67b6d7997808388236ae4754bf53e1749b3bc27577/pybind11-3.0.0-py3-none-any.whl", hash = "sha256:7c5cac504da5a701b5163f0e6a7ba736c713a096a5378383c5b4b064b753f607", size = 292118, upload-time = "2025-07-10T16:52:07.828Z" },
-]
-
 [[package]]
 name = "pycparser"
 version = "2.22"
@@ -4884,27 +4873,27 @@ wheels = [

 [[package]]
 name = "ruff"
-version = "0.12.7"
+version = "0.12.5"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a1/81/0bd3594fa0f690466e41bd033bdcdf86cba8288345ac77ad4afbe5ec743a/ruff-0.12.7.tar.gz", hash = "sha256:1fc3193f238bc2d7968772c82831a4ff69252f673be371fb49663f0068b7ec71", size = 5197814, upload-time = "2025-07-29T22:32:35.877Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/30/cd/01015eb5034605fd98d829c5839ec2c6b4582b479707f7c1c2af861e8258/ruff-0.12.5.tar.gz", hash = "sha256:b209db6102b66f13625940b7f8c7d0f18e20039bb7f6101fbdac935c9612057e", size = 5170722, upload-time = "2025-07-24T13:26:37.456Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e1/d2/6cb35e9c85e7a91e8d22ab32ae07ac39cc34a71f1009a6f9e4a2a019e602/ruff-0.12.7-py3-none-linux_armv6l.whl", hash = "sha256:76e4f31529899b8c434c3c1dede98c4483b89590e15fb49f2d46183801565303", size = 11852189, upload-time = "2025-07-29T22:31:41.281Z" },
-    { url = "https://files.pythonhosted.org/packages/63/5b/a4136b9921aa84638f1a6be7fb086f8cad0fde538ba76bda3682f2599a2f/ruff-0.12.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:789b7a03e72507c54fb3ba6209e4bb36517b90f1a3569ea17084e3fd295500fb", size = 12519389, upload-time = "2025-07-29T22:31:54.265Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/c9/3e24a8472484269b6b1821794141f879c54645a111ded4b6f58f9ab0705f/ruff-0.12.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:2e1c2a3b8626339bb6369116e7030a4cf194ea48f49b64bb505732a7fce4f4e3", size = 11743384, upload-time = "2025-07-29T22:31:59.575Z" },
-    { url = "https://files.pythonhosted.org/packages/26/7c/458dd25deeb3452c43eaee853c0b17a1e84169f8021a26d500ead77964fd/ruff-0.12.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32dec41817623d388e645612ec70d5757a6d9c035f3744a52c7b195a57e03860", size = 11943759, upload-time = "2025-07-29T22:32:01.95Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/8b/658798472ef260ca050e400ab96ef7e85c366c39cf3dfbef4d0a46a528b6/ruff-0.12.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47ef751f722053a5df5fa48d412dbb54d41ab9b17875c6840a58ec63ff0c247c", size = 11654028, upload-time = "2025-07-29T22:32:04.367Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/86/9c2336f13b2a3326d06d39178fd3448dcc7025f82514d1b15816fe42bfe8/ruff-0.12.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a828a5fc25a3efd3e1ff7b241fd392686c9386f20e5ac90aa9234a5faa12c423", size = 13225209, upload-time = "2025-07-29T22:32:06.952Z" },
-    { url = "https://files.pythonhosted.org/packages/76/69/df73f65f53d6c463b19b6b312fd2391dc36425d926ec237a7ed028a90fc1/ruff-0.12.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5726f59b171111fa6a69d82aef48f00b56598b03a22f0f4170664ff4d8298efb", size = 14182353, upload-time = "2025-07-29T22:32:10.053Z" },
-    { url = "https://files.pythonhosted.org/packages/58/1e/de6cda406d99fea84b66811c189b5ea139814b98125b052424b55d28a41c/ruff-0.12.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:74e6f5c04c4dd4aba223f4fe6e7104f79e0eebf7d307e4f9b18c18362124bccd", size = 13631555, upload-time = "2025-07-29T22:32:12.644Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/ae/625d46d5164a6cc9261945a5e89df24457dc8262539ace3ac36c40f0b51e/ruff-0.12.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d0bfe4e77fba61bf2ccadf8cf005d6133e3ce08793bbe870dd1c734f2699a3e", size = 12667556, upload-time = "2025-07-29T22:32:15.312Z" },
-    { url = "https://files.pythonhosted.org/packages/55/bf/9cb1ea5e3066779e42ade8d0cd3d3b0582a5720a814ae1586f85014656b6/ruff-0.12.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06bfb01e1623bf7f59ea749a841da56f8f653d641bfd046edee32ede7ff6c606", size = 12939784, upload-time = "2025-07-29T22:32:17.69Z" },
-    { url = "https://files.pythonhosted.org/packages/55/7f/7ead2663be5627c04be83754c4f3096603bf5e99ed856c7cd29618c691bd/ruff-0.12.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e41df94a957d50083fd09b916d6e89e497246698c3f3d5c681c8b3e7b9bb4ac8", size = 11771356, upload-time = "2025-07-29T22:32:20.134Z" },
-    { url = "https://files.pythonhosted.org/packages/17/40/a95352ea16edf78cd3a938085dccc55df692a4d8ba1b3af7accbe2c806b0/ruff-0.12.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:4000623300563c709458d0ce170c3d0d788c23a058912f28bbadc6f905d67afa", size = 11612124, upload-time = "2025-07-29T22:32:22.645Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/74/633b04871c669e23b8917877e812376827c06df866e1677f15abfadc95cb/ruff-0.12.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:69ffe0e5f9b2cf2b8e289a3f8945b402a1b19eff24ec389f45f23c42a3dd6fb5", size = 12479945, upload-time = "2025-07-29T22:32:24.765Z" },
-    { url = "https://files.pythonhosted.org/packages/be/34/c3ef2d7799c9778b835a76189c6f53c179d3bdebc8c65288c29032e03613/ruff-0.12.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:a07a5c8ffa2611a52732bdc67bf88e243abd84fe2d7f6daef3826b59abbfeda4", size = 12998677, upload-time = "2025-07-29T22:32:27.022Z" },
-    { url = "https://files.pythonhosted.org/packages/77/ab/aca2e756ad7b09b3d662a41773f3edcbd262872a4fc81f920dc1ffa44541/ruff-0.12.7-py3-none-win32.whl", hash = "sha256:c928f1b2ec59fb77dfdf70e0419408898b63998789cc98197e15f560b9e77f77", size = 11756687, upload-time = "2025-07-29T22:32:29.381Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/71/26d45a5042bc71db22ddd8252ca9d01e9ca454f230e2996bb04f16d72799/ruff-0.12.7-py3-none-win_amd64.whl", hash = "sha256:9c18f3d707ee9edf89da76131956aba1270c6348bfee8f6c647de841eac7194f", size = 12912365, upload-time = "2025-07-29T22:32:31.517Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/9b/0b8aa09817b63e78d94b4977f18b1fcaead3165a5ee49251c5d5c245bb2d/ruff-0.12.7-py3-none-win_arm64.whl", hash = "sha256:dfce05101dbd11833a0776716d5d1578641b7fddb537fe7fa956ab85d1769b69", size = 11982083, upload-time = "2025-07-29T22:32:33.881Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/de/ad2f68f0798ff15dd8c0bcc2889558970d9a685b3249565a937cd820ad34/ruff-0.12.5-py3-none-linux_armv6l.whl", hash = "sha256:1de2c887e9dec6cb31fcb9948299de5b2db38144e66403b9660c9548a67abd92", size = 11819133, upload-time = "2025-07-24T13:25:56.369Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/fc/c6b65cd0e7fbe60f17e7ad619dca796aa49fbca34bb9bea5f8faf1ec2643/ruff-0.12.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d1ab65e7d8152f519e7dea4de892317c9da7a108da1c56b6a3c1d5e7cf4c5e9a", size = 12501114, upload-time = "2025-07-24T13:25:59.471Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/de/c6bec1dce5ead9f9e6a946ea15e8d698c35f19edc508289d70a577921b30/ruff-0.12.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:962775ed5b27c7aa3fdc0d8f4d4433deae7659ef99ea20f783d666e77338b8cf", size = 11716873, upload-time = "2025-07-24T13:26:01.496Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/16/cf372d2ebe91e4eb5b82a2275c3acfa879e0566a7ac94d331ea37b765ac8/ruff-0.12.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:73b4cae449597e7195a49eb1cdca89fd9fbb16140c7579899e87f4c85bf82f73", size = 11958829, upload-time = "2025-07-24T13:26:03.721Z" },
+    { url = "https://files.pythonhosted.org/packages/25/bf/cd07e8f6a3a6ec746c62556b4c4b79eeb9b0328b362bb8431b7b8afd3856/ruff-0.12.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8b13489c3dc50de5e2d40110c0cce371e00186b880842e245186ca862bf9a1ac", size = 11626619, upload-time = "2025-07-24T13:26:06.118Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/c9/c2ccb3b8cbb5661ffda6925f81a13edbb786e623876141b04919d1128370/ruff-0.12.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f1504fea81461cf4841778b3ef0a078757602a3b3ea4b008feb1308cb3f23e08", size = 13221894, upload-time = "2025-07-24T13:26:08.292Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/58/68a5be2c8e5590ecdad922b2bcd5583af19ba648f7648f95c51c3c1eca81/ruff-0.12.5-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c7da4129016ae26c32dfcbd5b671fe652b5ab7fc40095d80dcff78175e7eddd4", size = 14163909, upload-time = "2025-07-24T13:26:10.474Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/d1/ef6b19622009ba8386fdb792c0743f709cf917b0b2f1400589cbe4739a33/ruff-0.12.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ca972c80f7ebcfd8af75a0f18b17c42d9f1ef203d163669150453f50ca98ab7b", size = 13583652, upload-time = "2025-07-24T13:26:13.381Z" },
+    { url = "https://files.pythonhosted.org/packages/62/e3/1c98c566fe6809a0c83751d825a03727f242cdbe0d142c9e292725585521/ruff-0.12.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8dbbf9f25dfb501f4237ae7501d6364b76a01341c6f1b2cd6764fe449124bb2a", size = 12700451, upload-time = "2025-07-24T13:26:15.488Z" },
+    { url = "https://files.pythonhosted.org/packages/24/ff/96058f6506aac0fbc0d0fc0d60b0d0bd746240a0594657a2d94ad28033ba/ruff-0.12.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c47dea6ae39421851685141ba9734767f960113d51e83fd7bb9958d5be8763a", size = 12937465, upload-time = "2025-07-24T13:26:17.808Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/d3/68bc5e7ab96c94b3589d1789f2dd6dd4b27b263310019529ac9be1e8f31b/ruff-0.12.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c5076aa0e61e30f848846f0265c873c249d4b558105b221be1828f9f79903dc5", size = 11771136, upload-time = "2025-07-24T13:26:20.422Z" },
+    { url = "https://files.pythonhosted.org/packages/52/75/7356af30a14584981cabfefcf6106dea98cec9a7af4acb5daaf4b114845f/ruff-0.12.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a5a4c7830dadd3d8c39b1cc85386e2c1e62344f20766be6f173c22fb5f72f293", size = 11601644, upload-time = "2025-07-24T13:26:22.928Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/67/91c71d27205871737cae11025ee2b098f512104e26ffd8656fd93d0ada0a/ruff-0.12.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:46699f73c2b5b137b9dc0fc1a190b43e35b008b398c6066ea1350cce6326adcb", size = 12478068, upload-time = "2025-07-24T13:26:26.134Z" },
+    { url = "https://files.pythonhosted.org/packages/34/04/b6b00383cf2f48e8e78e14eb258942fdf2a9bf0287fbf5cdd398b749193a/ruff-0.12.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5a655a0a0d396f0f072faafc18ebd59adde8ca85fb848dc1b0d9f024b9c4d3bb", size = 12991537, upload-time = "2025-07-24T13:26:28.533Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/b9/053d6445dc7544fb6594785056d8ece61daae7214859ada4a152ad56b6e0/ruff-0.12.5-py3-none-win32.whl", hash = "sha256:dfeb2627c459b0b78ca2bbdc38dd11cc9a0a88bf91db982058b26ce41714ffa9", size = 11751575, upload-time = "2025-07-24T13:26:30.835Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/0f/ab16e8259493137598b9149734fec2e06fdeda9837e6f634f5c4e35916da/ruff-0.12.5-py3-none-win_amd64.whl", hash = "sha256:ae0d90cf5f49466c954991b9d8b953bd093c32c27608e409ae3564c63c5306a5", size = 12882273, upload-time = "2025-07-24T13:26:32.929Z" },
+    { url = "https://files.pythonhosted.org/packages/00/db/c376b0661c24cf770cb8815268190668ec1330eba8374a126ceef8c72d55/ruff-0.12.5-py3-none-win_arm64.whl", hash = "sha256:48cdbfc633de2c5c37d9f090ba3b352d1576b0015bfc3bc98eaf230275b7e805", size = 11951564, upload-time = "2025-07-24T13:26:34.994Z" },
 ]

 [[package]]
Author	SHA1	Message	Date
Andy Lee	b55eeeae5f	Merge remote-tracking branch 'origin/main' into feature/claude-code-research	2025-08-05 23:02:00 -07:00
Andy Lee	e890b2311f	feat: Add Claude Code integration with MCP server	2025-08-05 14:03:36 -07:00
Andy Lee	f3d99fd118	feat: Claude Code integration ready - LEANN CLI works out of the box ✅ Verified LEANN CLI works perfectly with Claude Code ✅ Added integration guide with working examples ✅ Documented simple workflow for immediate use Key findings: - No code changes needed - Just need --recompute-embeddings flag - Search, ask, and build all work - Ready for Claude Code agents and workflows	2025-08-05 12:27:58 -07:00
Andy Lee	8eee90bf80	docs: add a link	2025-08-04 20:10:14 -07:00
Andy Lee	649d4ad03e	docs: Address all configuration guide feedback - Fix grammar: 'If time is not a constraint' instead of 'time expense is not large' - Highlight Qwen3-Embedding-0.6B performance (nearly OpenAI API level) - Add OpenAI quick start section with configuration example - Fold Cloud vs Local trade-offs into collapsible section - Update HNSW as 'default and recommended for extreme low storage' - Add DiskANN beta warning and explain PQ+rerank architecture - Expand Ollama models: add qwen3:0.6b, 4b, 7b variants - Note OpenAI as current default but recommend Ollama switch - Add 'need to install extra software' warning for Ollama - Remove incorrect latency numbers from search-complexity recommendations	2025-08-04 20:01:23 -07:00
Andy Lee	d9b6f195c5	docs: Improve configuration guide based on feedback - List specific files in default data/ directory (2 AI papers, literature, tech report) - Update examples to use English and better RAG-suitable queries - Change full dataset reference to use --max-items -1 - Adjust small model guidance about upgrading to larger models when time allows - Update top-k defaults to reflect actual default of 20 - Ensure consistent use of full model name Qwen/Qwen3-Embedding-0.6B - Reorder optimization steps, move MLX to third position - Remove incorrect chunk size tuning guidance - Change README from 'Having trouble' to 'Need best practices'	2025-08-04 19:29:17 -07:00
Andy Lee	00f506c0bd	docs: Adjust DiskANN positioning in features and roadmap - features.md: Put HNSW/FAISS first as default, DiskANN as optional - roadmap.md: Reorder to show HNSW integration before DiskANN - Consistent with positioning DiskANN as advanced option for large-scale use	2025-08-04 17:53:27 -07:00
Andy Lee	e872dd1d23	docs: Weaken DiskANN emphasis in README - Change backend description to emphasize HNSW as default - DiskANN positioned as optional for billion-scale datasets - Simplify evaluation commands to be more generic	2025-08-04 17:51:21 -07:00
Andy Lee	063c687ff7	chore: move evaluation data .gitattributes to correct location	2025-08-04 17:46:17 -07:00
Andy Lee	bb8ecd54d7	feat: add comprehensive configuration guide and update README - Create docs/configuration-guide.md with detailed guidance on: - Embedding model selection (small/medium/large) - Index selection (HNSW vs DiskANN) - LLM engine and model comparison - Parameter tuning (build/search complexity, top-k) - Performance optimization tips - Deep dive into LEANN's recomputation feature - Update README.md to link to the configuration guide - Include latest 2025 model recommendations (Qwen3, DeepSeek-R1, O3-mini)	2025-08-04 17:41:27 -07:00
Andy Lee	716217ae24	docs: config guidance	2025-08-04 16:21:13 -07:00