feat: implement smart memory configuration for DiskANN

- Add intelligent memory calculation based on data size and system specs - search_memory_maximum: 1/10 of embedding size (controls PQ compression) - build_memory_maximum: 50% of available RAM (controls sharding) - Provides optimal balance between performance and memory usage - Automatic fallback to default values if parameters are explicitly provided
2025-08-03 22:54:08 -07:00
91 changed files with 3745 additions and 5889 deletions
--- a/.github/workflows/build-and-publish.yml
+++ b/.github/workflows/build-and-publish.yml
@@ -5,7 +5,6 @@ on:
    branches: [ main ]
  pull_request:
    branches: [ main ]
-  workflow_dispatch:

 jobs:
  build:
--- a/.github/workflows/build-reusable.yml
+++ b/.github/workflows/build-reusable.yml
@@ -54,36 +54,16 @@ jobs:
            python: '3.12'
          - os: ubuntu-22.04
            python: '3.13'
-          - os: macos-14
+          - os: macos-latest
            python: '3.9'
-          - os: macos-14
+          - os: macos-latest
            python: '3.10'
-          - os: macos-14
+          - os: macos-latest
            python: '3.11'
-          - os: macos-14
+          - os: macos-latest
            python: '3.12'
-          - os: macos-14
+          - os: macos-latest
            python: '3.13'
-          - os: macos-15
-            python: '3.9'
-          - os: macos-15
-            python: '3.10'
-          - os: macos-15
-            python: '3.11'
-          - os: macos-15
-            python: '3.12'
-          - os: macos-15
-            python: '3.13'
-          - os: macos-13
-            python: '3.9'
-          - os: macos-13
-            python: '3.10'
-          - os: macos-13
-            python: '3.11'
-          - os: macos-13
-            python: '3.12'
-          # Note: macos-13 + Python 3.13 excluded due to PyTorch compatibility
-          # (PyTorch 2.5+ supports Python 3.13 but not Intel Mac x86_64)
    runs-on: ${{ matrix.os }}

    steps:
@@ -129,73 +109,48 @@ jobs:
            uv pip install --system delocate
          fi

-      - name: Set macOS environment variables
-        if: runner.os == 'macOS'
-        run: |
-          # Use brew --prefix to automatically detect Homebrew installation path
-          HOMEBREW_PREFIX=$(brew --prefix)
-          echo "HOMEBREW_PREFIX=${HOMEBREW_PREFIX}" >> $GITHUB_ENV
-          echo "OpenMP_ROOT=${HOMEBREW_PREFIX}/opt/libomp" >> $GITHUB_ENV
-
-          # Set CMAKE_PREFIX_PATH to let CMake find all packages automatically
-          echo "CMAKE_PREFIX_PATH=${HOMEBREW_PREFIX}" >> $GITHUB_ENV
-
-          # Set compiler flags for OpenMP (required for both backends)
-          echo "LDFLAGS=-L${HOMEBREW_PREFIX}/opt/libomp/lib" >> $GITHUB_ENV
-          echo "CPPFLAGS=-I${HOMEBREW_PREFIX}/opt/libomp/include" >> $GITHUB_ENV
-
      - name: Build packages
        run: |
          # Build core (platform independent)
-          cd packages/leann-core
-          uv build
-          cd ../..
+          if [[ "${{ matrix.os }}" == ubuntu-* ]]; then
+            cd packages/leann-core
+            uv build
+            cd ../..
+          fi

          # Build HNSW backend
          cd packages/leann-backend-hnsw
-          if [[ "${{ matrix.os }}" == macos-* ]]; then
-            # Use system clang for better compatibility
+          if [ "${{ matrix.os }}" == "macos-latest" ]; then
+            # Use system clang instead of homebrew LLVM for better compatibility
            export CC=clang
            export CXX=clang++
-            # Homebrew libraries on each macOS version require matching minimum version
-            if [[ "${{ matrix.os }}" == "macos-13" ]]; then
-              export MACOSX_DEPLOYMENT_TARGET=13.0
-            elif [[ "${{ matrix.os }}" == "macos-14" ]]; then
-              export MACOSX_DEPLOYMENT_TARGET=14.0
-            elif [[ "${{ matrix.os }}" == "macos-15" ]]; then
-              export MACOSX_DEPLOYMENT_TARGET=15.0
-            fi
-            uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
+            export MACOSX_DEPLOYMENT_TARGET=11.0
+            uv build --wheel --python python
          else
-            uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
+            uv build --wheel --python python
          fi
          cd ../..

          # Build DiskANN backend
          cd packages/leann-backend-diskann
-          if [[ "${{ matrix.os }}" == macos-* ]]; then
-            # Use system clang for better compatibility
+          if [ "${{ matrix.os }}" == "macos-latest" ]; then
+            # Use system clang instead of homebrew LLVM for better compatibility
            export CC=clang
            export CXX=clang++
            # DiskANN requires macOS 13.3+ for sgesdd_ LAPACK function
-            # But Homebrew libraries on each macOS version require matching minimum version
-            if [[ "${{ matrix.os }}" == "macos-13" ]]; then
-              export MACOSX_DEPLOYMENT_TARGET=13.3
-            elif [[ "${{ matrix.os }}" == "macos-14" ]]; then
-              export MACOSX_DEPLOYMENT_TARGET=14.0
-            elif [[ "${{ matrix.os }}" == "macos-15" ]]; then
-              export MACOSX_DEPLOYMENT_TARGET=15.0
-            fi
-            uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
+            export MACOSX_DEPLOYMENT_TARGET=13.3
+            uv build --wheel --python python
          else
-            uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
+            uv build --wheel --python python
          fi
          cd ../..

          # Build meta package (platform independent)
-          cd packages/leann
-          uv build
-          cd ../..
+          if [[ "${{ matrix.os }}" == ubuntu-* ]]; then
+            cd packages/leann
+            uv build
+            cd ../..
+          fi

      - name: Repair wheels (Linux)
        if: runner.os == 'Linux'
@@ -221,24 +176,10 @@ jobs:
      - name: Repair wheels (macOS)
        if: runner.os == 'macOS'
        run: |
-          # Determine deployment target based on runner OS
-          # Must match the Homebrew libraries for each macOS version
-          if [[ "${{ matrix.os }}" == "macos-13" ]]; then
-            HNSW_TARGET="13.0"
-            DISKANN_TARGET="13.3"
-          elif [[ "${{ matrix.os }}" == "macos-14" ]]; then
-            HNSW_TARGET="14.0"
-            DISKANN_TARGET="14.0"
-          elif [[ "${{ matrix.os }}" == "macos-15" ]]; then
-            HNSW_TARGET="15.0"
-            DISKANN_TARGET="15.0"
-          fi
-
          # Repair HNSW wheel
          cd packages/leann-backend-hnsw
          if [ -d dist ]; then
-            export MACOSX_DEPLOYMENT_TARGET=$HNSW_TARGET
-            delocate-wheel -w dist_repaired -v --require-target-macos-version $HNSW_TARGET dist/*.whl
+            delocate-wheel -w dist_repaired -v dist/*.whl
            rm -rf dist
            mv dist_repaired dist
          fi
@@ -247,8 +188,7 @@ jobs:
          # Repair DiskANN wheel
          cd packages/leann-backend-diskann
          if [ -d dist ]; then
-            export MACOSX_DEPLOYMENT_TARGET=$DISKANN_TARGET
-            delocate-wheel -w dist_repaired -v --require-target-macos-version $DISKANN_TARGET dist/*.whl
+            delocate-wheel -w dist_repaired -v dist/*.whl
            rm -rf dist
            mv dist_repaired dist
          fi
@@ -259,34 +199,39 @@ jobs:
          echo "📦 Built packages:"
          find packages/*/dist -name "*.whl" -o -name "*.tar.gz" | sort

-
      - name: Install built packages for testing
        run: |
-          # Create a virtual environment with the correct Python version
-          uv venv --python ${{ matrix.python }}
+          # Create a virtual environment
+          uv venv
          source .venv/bin/activate || source .venv/Scripts/activate

-          # Install packages using --find-links to prioritize local builds
-          uv pip install --find-links packages/leann-core/dist --find-links packages/leann-backend-hnsw/dist --find-links packages/leann-backend-diskann/dist packages/leann-core/dist/*.whl || uv pip install --find-links packages/leann-core/dist packages/leann-core/dist/*.tar.gz
-          uv pip install --find-links packages/leann-core/dist packages/leann-backend-hnsw/dist/*.whl
-          uv pip install --find-links packages/leann-core/dist packages/leann-backend-diskann/dist/*.whl
-          uv pip install packages/leann/dist/*.whl || uv pip install packages/leann/dist/*.tar.gz
+          # Install the built wheels
+          # Use --find-links to let uv choose the correct wheel for the platform
+          if [[ "${{ matrix.os }}" == ubuntu-* ]]; then
+            uv pip install leann-core --find-links packages/leann-core/dist
+            uv pip install leann --find-links packages/leann/dist
+          fi
+          uv pip install leann-backend-hnsw --find-links packages/leann-backend-hnsw/dist
+          uv pip install leann-backend-diskann --find-links packages/leann-backend-diskann/dist

          # Install test dependencies using extras
          uv pip install -e ".[test]"

      - name: Run tests with pytest
        env:
-          CI: true
+          CI: true  # Mark as CI environment to skip memory-intensive tests
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          HF_HUB_DISABLE_SYMLINKS: 1
          TOKENIZERS_PARALLELISM: false
-          PYTORCH_ENABLE_MPS_FALLBACK: 0
-          OMP_NUM_THREADS: 1
-          MKL_NUM_THREADS: 1
+          PYTORCH_ENABLE_MPS_FALLBACK: 0  # Disable MPS on macOS CI to avoid memory issues
+          OMP_NUM_THREADS: 1  # Disable OpenMP parallelism to avoid libomp crashes
+          MKL_NUM_THREADS: 1  # Single thread for MKL operations
        run: |
+          # Activate virtual environment
          source .venv/bin/activate || source .venv/Scripts/activate
-          pytest tests/ -v --tb=short
+
+          # Run all tests
+          pytest tests/

      - name: Run sanity checks (optional)
        run: |
--- a/.github/workflows/link-check.yml
+++ b/.github/workflows/link-check.yml
@@ -1,19 +0,0 @@
-name: Link Check
-
-on:
-  push:
-    branches: [ main, master ]
-  pull_request:
-  schedule:
-    - cron: "0 3 * * 1"
-
-jobs:
-  link-check:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - uses: lycheeverse/lychee-action@v2
-        with:
-          args: --no-progress --insecure README.md docs/ apps/ examples/ benchmarks/
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@@ -34,15 +34,11 @@ build/
 nprobe_logs/
 micro/results
 micro/contriever-INT8
-data/*
-!data/2501.14312v1 (1).pdf
-!data/2506.08276v1.pdf
-!data/PrideandPrejudice.txt
-!data/huawei_pangu.md
-!data/ground_truth/
-!data/indices/
-!data/queries/
-!data/.gitattributes
+examples/data/*
+!examples/data/2501.14312v1 (1).pdf
+!examples/data/2506.08276v1.pdf
+!examples/data/PrideandPrejudice.txt
+!examples/data/README.md
 *.qdstrm
 benchmark_results/
 results/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0
+    rev: v4.5.0
    hooks:
      - id: trailing-whitespace
      - id: end-of-file-fixer
@@ -10,7 +10,7 @@ repos:
      - id: debug-statements

  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.12.7  # Fixed version to match pyproject.toml
+    rev: v0.2.1
    hooks:
      - id: ruff
      - id: ruff-format
--- a/README.md
+++ b/README.md
@@ -3,11 +3,9 @@
 </p>

 <p align="center">
-  <img src="https://img.shields.io/badge/Python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue.svg" alt="Python Versions">
-  <img src="https://github.com/yichuan-w/LEANN/actions/workflows/build-and-publish.yml/badge.svg" alt="CI Status">
-  <img src="https://img.shields.io/badge/Platform-Ubuntu%20%7C%20macOS%20(ARM64%2FIntel)-lightgrey" alt="Platform">
+  <img src="https://img.shields.io/badge/Python-3.9%2B-blue.svg" alt="Python 3.9+">
  <img src="https://img.shields.io/badge/License-MIT-green.svg" alt="MIT License">
-  <img src="https://img.shields.io/badge/MCP-Native%20Integration-blue" alt="MCP Integration">
+  <img src="https://img.shields.io/badge/Platform-Linux%20%7C%20macOS-lightgrey" alt="Platform">
 </p>

 <h2 align="center" tabindex="-1" class="heading-element" dir="auto">
@@ -18,10 +16,7 @@ LEANN is an innovative vector database that democratizes personal AI. Transform

 LEANN achieves this through *graph-based selective recomputation* with *high-degree preserving pruning*, computing embeddings on-demand instead of storing them all. [Illustration Fig →](#️-architecture--how-it-works) | [Paper →](https://arxiv.org/abs/2506.08276)

-**Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can semantic search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)**, **[codebase](#-claude-code-integration-transform-your-development-workflow)**\* , or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy.
-
-
-\* Claude Code only supports basic `grep`-style keyword search. **LEANN** is a drop-in **semantic search MCP service fully compatible with Claude Code**, unlocking intelligent retrieval without changing your workflow. 🔥 Check out [the easy setup →](packages/leann-mcp/README.md)
+**Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)**, or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy.



@@ -31,7 +26,7 @@ LEANN achieves this through *graph-based selective recomputation* with *high-deg
  <img src="assets/effects.png" alt="LEANN vs Traditional Vector DB Storage Comparison" width="70%">
 </p>

-> **The numbers speak for themselves:** Index 60 million text chunks in just 6GB instead of 201GB. From emails to browser history, everything fits on your laptop. [See detailed benchmarks for different applications below ↓](#-storage-comparison)
+> **The numbers speak for themselves:** Index 60 million Wikipedia chunks in just 6GB instead of 201GB. From emails to browser history, everything fits on your laptop. [See detailed benchmarks for different applications below ↓](#storage-comparison)


 🔒 **Privacy:** Your data never leaves your laptop. No OpenAI, no cloud, no "terms of service".
@@ -46,42 +41,40 @@ LEANN achieves this through *graph-based selective recomputation* with *high-deg

 ## Installation

-### 📦 Prerequisites: Install uv
+<details>
+<summary><strong>📦 Prerequisites: Install uv (if you don't have it)</strong></summary>

-[Install uv](https://docs.astral.sh/uv/getting-started/installation/#installation-methods) first if you don't have it. Typically, you can install it with:
+Install uv first if you don't have it:

 ```bash
 curl -LsSf https://astral.sh/uv/install.sh | sh
 ```

-### 🚀 Quick Install
+📖 [Detailed uv installation methods →](https://docs.astral.sh/uv/getting-started/installation/#installation-methods)

-Clone the repository to access all examples and try amazing applications,
+</details>
+
+
+LEANN provides two installation methods: **pip install** (quick and easy) and **build from source** (recommended for development).
+
+
+
+### 🚀 Quick Install (Recommended for most users)
+
+Clone the repository to access all examples and install LEANN from [PyPI](https://pypi.org/project/leann/) to run them immediately:

 ```bash
-git clone https://github.com/yichuan-w/LEANN.git leann
+git clone git@github.com:yichuan-w/LEANN.git leann
 cd leann
-```
-
-and install LEANN from [PyPI](https://pypi.org/project/leann/) to run them immediately:
-
-```bash
 uv venv
 source .venv/bin/activate
 uv pip install leann
 ```
-<!--
-> Low-resource? See “Low-resource setups” in the [Configuration Guide](docs/configuration-guide.md#low-resource-setups). -->
-
-<details>
-<summary>
-<strong>🔧 Build from Source (Recommended for development)</strong>
-</summary>
-

+### 🔧 Build from Source (Recommended for development)

 ```bash
-git clone https://github.com/yichuan-w/LEANN.git leann
+git clone git@github.com:yichuan-w/LEANN.git leann
 cd leann
 git submodule update --init --recursive
 ```
@@ -98,14 +91,14 @@ sudo apt-get install libomp-dev libboost-all-dev protobuf-compiler libabsl-dev l
 uv sync
 ```

-</details>
+


 ## Quick Start

 Our declarative API makes RAG as easy as writing a config file.

-Check out [demo.ipynb](demo.ipynb) or [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/yichuan-w/LEANN/blob/main/demo.ipynb)
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/yichuan-w/LEANN/blob/main/demo.ipynb) [Try in this ipynb file →](demo.ipynb)

 ```python
 from leann import LeannBuilder, LeannSearcher, LeannChat
@@ -129,11 +122,11 @@ response = chat.ask("How much storage does LEANN save?", top_k=1)

 ## RAG on Everything!

-LEANN supports RAG on various data sources including documents (`.pdf`, `.txt`, `.md`), Apple Mail, Google Search History, WeChat, and more.
+LEANN supports RAG on various data sources including documents (.pdf, .txt, .md), Apple Mail, Google Search History, WeChat, and more.

-### Generation Model Setup

-LEANN supports multiple LLM providers for text generation (OpenAI API, HuggingFace, Ollama).
+> **Generation Model Setup**
+> LEANN supports multiple LLM providers for text generation (OpenAI API, HuggingFace, Ollama).

 <details>
 <summary><strong>🔑 OpenAI API Setup (Default)</strong></summary>
@@ -173,52 +166,7 @@ ollama pull llama3.2:1b

 </details>

-### ⭐ Flexible Configuration
-
-LEANN provides flexible parameters for embedding models, search strategies, and data processing to fit your specific needs.
-
-📚 **Need configuration best practices?** Check our [Configuration Guide](docs/configuration-guide.md) for detailed optimization tips, model selection advice, and solutions to common issues like slow embeddings or poor search quality.
-
-<details>
-<summary><strong>📋 Click to expand: Common Parameters (Available in All Examples)</strong></summary>
-
-All RAG examples share these common parameters. **Interactive mode** is available in all examples - simply run without `--query` to start a continuous Q&A session where you can ask multiple questions. Type 'quit' to exit.
-
-```bash
-# Core Parameters (General preprocessing for all examples)
--index-dir DIR              # Directory to store the index (default: current directory)
--query "YOUR QUESTION"      # Single query mode. Omit for interactive chat (type 'quit' to exit), and now you can play with your index interactively
--max-items N                # Limit data preprocessing (default: -1, process all data)
--force-rebuild              # Force rebuild index even if it exists
-
-# Embedding Parameters
--embedding-model MODEL      # e.g., facebook/contriever, text-embedding-3-small, mlx-community/Qwen3-Embedding-0.6B-8bit or nomic-embed-text
--embedding-mode MODE        # sentence-transformers, openai, mlx, or ollama
-
-# LLM Parameters (Text generation models)
--llm TYPE                   # LLM backend: openai, ollama, or hf (default: openai)
--llm-model MODEL            # Model name (default: gpt-4o) e.g., gpt-4o-mini, llama3.2:1b, Qwen/Qwen2.5-1.5B-Instruct
--thinking-budget LEVEL      # Thinking budget for reasoning models: low/medium/high (supported by o3, o3-mini, GPT-Oss:20b, and other reasoning models)
-
-# Search Parameters
--top-k N                    # Number of results to retrieve (default: 20)
--search-complexity N        # Search complexity for graph traversal (default: 32)
-
-# Chunking Parameters
--chunk-size N               # Size of text chunks (default varies by source: 256 for most, 192 for WeChat)
--chunk-overlap N            # Overlap between chunks (default varies: 25-128 depending on source)
-
-# Index Building Parameters
--backend-name NAME          # Backend to use: hnsw or diskann (default: hnsw)
--graph-degree N             # Graph degree for index construction (default: 32)
--build-complexity N         # Build complexity for index construction (default: 64)
--compact / --no-compact     # Use compact storage (default: true). Must be `no-compact` for `no-recompute` build.
--recompute / --no-recompute # Enable/disable embedding recomputation (default: enabled). Should not do a `no-recompute` search in a `recompute` build.
-```
-
-</details>
-
-### 📄 Personal Data Manager: Process Any Documents (`.pdf`, `.txt`, `.md`)!
+### 📄 Personal Data Manager: Process Any Documents (.pdf, .txt, .md)!

 Ask questions directly about your personal PDFs, documents, and any directory containing your files!

@@ -226,29 +174,25 @@ Ask questions directly about your personal PDFs, documents, and any directory co
  <img src="videos/paper_clear.gif" alt="LEANN Document Search Demo" width="600">
 </p>

-The example below asks a question about summarizing our paper (uses default data in `data/`, which is a directory with diverse data sources: two papers, Pride and Prejudice, and a Technical report about LLM in Huawei in Chinese), and this is the **easiest example** to run here:
+The example below asks a question about summarizing two papers (uses default data in `examples/data`) and this is the easiest example to run here:

 ```bash
-source .venv/bin/activate # Don't forget to activate the virtual environment
-python -m apps.document_rag --query "What are the main techniques LEANN explores?"
+source .venv/bin/activate
+python ./examples/main_cli_example.py
 ```

 <details>
-<summary><strong>📋 Click to expand: Document-Specific Arguments</strong></summary>
+<summary><strong>📋 Click to expand: User Configurable Arguments</strong></summary>

-#### Parameters
 ```bash
--data-dir DIR           # Directory containing documents to process (default: data)
--file-types .ext .ext   # Filter by specific file types (optional - all LlamaIndex supported types if omitted)
-```
+# Use custom index directory
+python examples/main_cli_example.py --index-dir "./my_custom_index"

-#### Example Commands
-```bash
-# Process all documents with larger chunks for academic papers
-python -m apps.document_rag --data-dir "~/Documents/Papers" --chunk-size 1024
+# Use custom data directory
+python examples/main_cli_example.py --data-dir "./my_documents"

-# Filter only markdown and Python files with smaller chunks
-python -m apps.document_rag --data-dir "./docs" --chunk-size 256 --file-types .md .py
+# Ask a specific question
+python examples/main_cli_example.py --query "What are the main findings in these papers?"
 ```

 </details>
@@ -262,29 +206,30 @@ python -m apps.document_rag --data-dir "./docs" --chunk-size 256 --file-types .m
  <img src="videos/mail_clear.gif" alt="LEANN Email Search Demo" width="600">
 </p>

-Before running the example below, you need to grant full disk access to your terminal/VS Code in System Preferences → Privacy & Security → Full Disk Access.
-
+**Note:** You need to grant full disk access to your terminal/VS Code in System Preferences → Privacy & Security → Full Disk Access.
 ```bash
-python -m apps.email_rag --query "What's the food I ordered by DoorDash or Uber Eats mostly?"
+python examples/mail_reader_leann.py --query "What's the food I ordered by DoorDash or Uber Eats mostly?"
 ```
 **780K email chunks → 78MB storage.** Finally, search your email like you search Google.

 <details>
-<summary><strong>📋 Click to expand: Email-Specific Arguments</strong></summary>
+<summary><strong>📋 Click to expand: User Configurable Arguments</strong></summary>

-#### Parameters
 ```bash
--mail-path PATH         # Path to specific mail directory (auto-detects if omitted)
--include-html          # Include HTML content in processing (useful for newsletters)
-```
+# Use default mail path (works for most macOS setups)
+python examples/mail_reader_leann.py

-#### Example Commands
-```bash
-# Search work emails from a specific account
-python -m apps.email_rag --mail-path "~/Library/Mail/V10/WORK_ACCOUNT"
+# Run with custom index directory
+python examples/mail_reader_leann.py --index-dir "./my_mail_index"

-# Find all receipts and order confirmations (includes HTML)
-python -m apps.email_rag --query "receipt order confirmation invoice" --include-html
+# Process all emails (may take time but indexes everything)
+python examples/mail_reader_leann.py --max-emails -1
+
+# Limit number of emails processed (useful for testing)
+python examples/mail_reader_leann.py --max-emails 1000
+
+# Run a single query
+python examples/mail_reader_leann.py --query "What did my boss say about deadlines?"
 ```

 </details>
@@ -305,25 +250,25 @@ Once the index is built, you can ask questions like:
 </p>

 ```bash
-python -m apps.browser_rag --query "Tell me my browser history about machine learning?"
+python examples/google_history_reader_leann.py --query "Tell me my browser history about machine learning?"
 ```
 **38K browser entries → 6MB storage.** Your browser history becomes your personal search engine.

 <details>
-<summary><strong>📋 Click to expand: Browser-Specific Arguments</strong></summary>
+<summary><strong>📋 Click to expand: User Configurable Arguments</strong></summary>

-#### Parameters
 ```bash
--chrome-profile PATH    # Path to Chrome profile directory (auto-detects if omitted)
-```
+# Use default Chrome profile (auto-finds all profiles)
+python examples/google_history_reader_leann.py

-#### Example Commands
-```bash
-# Search academic research from your browsing history
-python -m apps.browser_rag --query "arxiv papers machine learning transformer architecture"
+# Run with custom index directory
+python examples/google_history_reader_leann.py --index-dir "./my_chrome_index"

-# Track competitor analysis across work profile
-python -m apps.browser_rag --chrome-profile "~/Library/Application Support/Google/Chrome/Work Profile" --max-items 5000
+# Limit number of history entries processed (useful for testing)
+python examples/google_history_reader_leann.py --max-entries 500
+
+# Run a single query
+python examples/google_history_reader_leann.py --query "What websites did I visit about machine learning?"
 ```

 </details>
@@ -363,7 +308,7 @@ Once the index is built, you can ask questions like:
 </p>

 ```bash
-python -m apps.wechat_rag --query "Show me all group chats about weekend plans"
+python examples/wechat_history_reader_leann.py --query "Show me all group chats about weekend plans"
 ```
 **400K messages → 64MB storage** Search years of chat history in any language.

@@ -371,13 +316,7 @@ python -m apps.wechat_rag --query "Show me all group chats about weekend plans"
 <details>
 <summary><strong>🔧 Click to expand: Installation Requirements</strong></summary>

-First, you need to install the [WeChat exporter](https://github.com/sunnyyoung/WeChatTweak-CLI),
-
-```bash
-brew install sunnyyoung/repo/wechattweak-cli
-```
-
-or install it manually (if you have issues with Homebrew):
+First, you need to install the WeChat exporter:

 ```bash
 sudo packages/wechat-exporter/wechattweak-cli install
@@ -386,28 +325,30 @@ sudo packages/wechat-exporter/wechattweak-cli install
 **Troubleshooting:**
 - **Installation issues**: Check the [WeChatTweak-CLI issues page](https://github.com/sunnyyoung/WeChatTweak-CLI/issues/41)
 - **Export errors**: If you encounter the error below, try restarting WeChat
-  ```bash
-  Failed to export WeChat data. Please ensure WeChat is running and WeChatTweak is installed.
-  Failed to find or export WeChat data. Exiting.
-  ```
+```
+Failed to export WeChat data. Please ensure WeChat is running and WeChatTweak is installed.
+Failed to find or export WeChat data. Exiting.
+```
 </details>

 <details>
-<summary><strong>📋 Click to expand: WeChat-Specific Arguments</strong></summary>
+<summary><strong>📋 Click to expand: User Configurable Arguments</strong></summary>

-#### Parameters
 ```bash
--export-dir DIR         # Directory to store exported WeChat data (default: wechat_export_direct)
--force-export          # Force re-export even if data exists
-```
+# Use default settings (recommended for first run)
+python examples/wechat_history_reader_leann.py

-#### Example Commands
-```bash
-# Search for travel plans discussed in group chats
-python -m apps.wechat_rag --query "travel plans" --max-items 10000
+# Run with custom export directory and wehn we run the first time, LEANN will export all chat history automatically for you
+python examples/wechat_history_reader_leann.py --export-dir "./my_wechat_exports"

-# Re-export and search recent chats (useful after new messages)
-python -m apps.wechat_rag --force-export --query "work schedule"
+# Run with custom index directory
+python examples/wechat_history_reader_leann.py --index-dir "./my_wechat_index"
+
+# Limit number of chat entries processed (useful for testing)
+python examples/wechat_history_reader_leann.py --max-entries 1000
+
+# Run a single query
+python examples/wechat_history_reader_leann.py --query "Show me conversations about travel plans"
 ```

 </details>
@@ -421,58 +362,15 @@ Once the index is built, you can ask questions like:

 </details>

-### 🚀 Claude Code Integration: Transform Your Development Workflow!

-**The future of code assistance is here.** Transform your development workflow with LEANN's native MCP integration for Claude Code. Index your entire codebase and get intelligent code assistance directly in your IDE.
-
-**Key features:**
- 🔍 **Semantic code search** across your entire project, fully local index and lightweight
- 📚 **Context-aware assistance** for debugging and development
- 🚀 **Zero-config setup** with automatic language detection
-
-```bash
-# Install LEANN globally for MCP integration
-uv tool install leann-core --with leann
-claude mcp add --scope user leann-server -- leann_mcp
-# Setup is automatic - just start using Claude Code!
-```
-Try our fully agentic pipeline with auto query rewriting, semantic search planning, and more:
-
-![LEANN MCP Integration](assets/mcp_leann.png)
-
-**🔥 Ready to supercharge your coding?** [Complete Setup Guide →](packages/leann-mcp/README.md)

 ## 🖥️ Command Line Interface

 LEANN includes a powerful CLI for document processing and search. Perfect for quick document indexing and interactive chat.

-### Installation
-
-If you followed the Quick Start, `leann` is already installed in your virtual environment:
 ```bash
-source .venv/bin/activate
-leann --help
-```
-
-**To make it globally available:**
-```bash
-# Install the LEANN CLI globally using uv tool
-uv tool install leann-core --with leann
-
-
-# Now you can use leann from anywhere without activating venv
-leann --help
-```
-
-> **Note**: Global installation is required for Claude Code integration. The `leann_mcp` server depends on the globally available `leann` command.
-
-
-
-### Usage Examples
-
-```bash
-# build from a specific directory, and my_docs is the index name(Here you can also build from multiple dict or multiple files)
-leann build my-docs --docs ./your_documents
+# Build an index from documents
+leann build my-docs --docs ./documents

 # Search your documents
 leann search my-docs "machine learning concepts"
@@ -485,29 +383,27 @@ leann list
 ```

 **Key CLI features:**
- Auto-detects document formats (PDF, TXT, MD, DOCX, PPTX + code files)
+- Auto-detects document formats (PDF, TXT, MD, DOCX)
 - Smart text chunking with overlap
 - Multiple LLM providers (Ollama, OpenAI, HuggingFace)
- Organized index storage in `.leann/indexes/` (project-local)
+- Organized index storage in `~/.leann/indexes/`
 - Support for advanced search parameters

 <details>
 <summary><strong>📋 Click to expand: Complete CLI Reference</strong></summary>

-You can use `leann --help`, or `leann build --help`, `leann search --help`, `leann ask --help` to get the complete CLI reference.
-
 **Build Command:**
 ```bash
-leann build INDEX_NAME --docs DIRECTORY|FILE [DIRECTORY|FILE ...] [OPTIONS]
+leann build INDEX_NAME --docs DIRECTORY [OPTIONS]

 Options:
  --backend {hnsw,diskann}     Backend to use (default: hnsw)
  --embedding-model MODEL      Embedding model (default: facebook/contriever)
-  --graph-degree N             Graph degree (default: 32)
-  --complexity N               Build complexity (default: 64)
-  --force                      Force rebuild existing index
-  --compact / --no-compact     Use compact storage (default: true). Must be `no-compact` for `no-recompute` build.
-  --recompute / --no-recompute Enable recomputation (default: true)
+  --graph-degree N            Graph degree (default: 32)
+  --complexity N              Build complexity (default: 64)
+  --force                     Force rebuild existing index
+  --compact                   Use compact storage (default: true)
+  --recompute                 Enable recomputation (default: true)
 ```

 **Search Command:**
@@ -515,9 +411,9 @@ Options:
 leann search INDEX_NAME QUERY [OPTIONS]

 Options:
-  --top-k N                     Number of results (default: 5)
-  --complexity N                Search complexity (default: 64)
-  --recompute / --no-recompute  Enable/disable embedding recomputation (default: enabled). Should not do a `no-recompute` search in a `recompute` build.
+  --top-k N                   Number of results (default: 5)
+  --complexity N              Search complexity (default: 64)
+  --recompute-embeddings      Use recomputation for highest accuracy
  --pruning-strategy {global,local,proportional}
 ```

@@ -548,17 +444,13 @@ Options:
 - **Dynamic batching:** Efficiently batch embedding computations for GPU utilization
 - **Two-level search:** Smart graph traversal that prioritizes promising nodes

-**Backends:**
- **HNSW** (default): Ideal for most datasets with maximum storage savings through full recomputation
- **DiskANN**: Advanced option with superior search performance, using PQ-based graph traversal with real-time reranking for the best speed-accuracy trade-off
+**Backends:** DiskANN or HNSW - pick what works for your data size.

 ## Benchmarks

-**[DiskANN vs HNSW Performance Comparison →](benchmarks/diskann_vs_hnsw_speed_comparison.py)** - Compare search performance between both backends

-**[Simple Example: Compare LEANN vs FAISS →](benchmarks/compare_faiss_vs_leann.py)** - See storage savings in action
-
-### 📊 Storage Comparison
+📊 **[Simple Example: Compare LEANN vs FAISS →](examples/compare_faiss_vs_leann.py)**
+### Storage Comparison

 | System | DPR (2.1M) | Wiki (60M) | Chat (400K) | Email (780K) | Browser (38K) |
 |--------|-------------|------------|-------------|--------------|---------------|
@@ -572,7 +464,8 @@ Options:

 ```bash
 uv pip install -e ".[dev]"  # Install dev dependencies
-python benchmarks/run_evaluation.py    # Will auto-download evaluation data and run benchmarks
+python examples/run_evaluation.py data/indices/dpr/dpr_diskann      # DPR dataset
+python examples/run_evaluation.py data/indices/rpj_wiki/rpj_wiki.index  # Wikipedia
 ```

 The evaluation script downloads data automatically on first run. The last three results were tested with partial personal data, and you can reproduce them with your own data!
@@ -610,15 +503,9 @@ MIT License - see [LICENSE](LICENSE) for details.

 ## 🙏 Acknowledgments

-Core Contributors: [Yichuan Wang](https://yichuan-w.github.io/) & [Zhifei Li](https://github.com/andylizf).
+This work is done at [**Berkeley Sky Computing Lab**](https://sky.cs.berkeley.edu/)
+---

-We welcome more contributors! Feel free to open issues or submit PRs.
-
-This work is done at [**Berkeley Sky Computing Lab**](https://sky.cs.berkeley.edu/).
-
-## Star History
-
-[![Star History Chart](https://api.star-history.com/svg?repos=yichuan-w/LEANN&type=Date)](https://www.star-history.com/#yichuan-w/LEANN&Date)
 <p align="center">
  <strong>⭐ Star us on GitHub if Leann is useful for your research or applications!</strong>
 </p>
--- a/apps/init.py
+++ b/apps/init.py
--- a/apps/base_rag_example.py
+++ b/apps/base_rag_example.py
@@ -1,324 +0,0 @@
-"""
-Base class for unified RAG examples interface.
-Provides common parameters and functionality for all RAG examples.
-"""
-
-import argparse
-from abc import ABC, abstractmethod
-from pathlib import Path
-from typing import Any
-
-import dotenv
-from leann.api import LeannBuilder, LeannChat
-from llama_index.core.node_parser import SentenceSplitter
-
-dotenv.load_dotenv()
-
-
-class BaseRAGExample(ABC):
-    """Base class for all RAG examples with unified interface."""
-
-    def __init__(
-        self,
-        name: str,
-        description: str,
-        default_index_name: str,
-    ):
-        self.name = name
-        self.description = description
-        self.default_index_name = default_index_name
-        self.parser = self._create_parser()
-
-    def _create_parser(self) -> argparse.ArgumentParser:
-        """Create argument parser with common parameters."""
-        parser = argparse.ArgumentParser(
-            description=self.description, formatter_class=argparse.RawDescriptionHelpFormatter
-        )
-
-        # Core parameters (all examples share these)
-        core_group = parser.add_argument_group("Core Parameters")
-        core_group.add_argument(
-            "--index-dir",
-            type=str,
-            default=f"./{self.default_index_name}",
-            help=f"Directory to store the index (default: ./{self.default_index_name})",
-        )
-        core_group.add_argument(
-            "--query",
-            type=str,
-            default=None,
-            help="Query to run (if not provided, will run in interactive mode)",
-        )
-        # Allow subclasses to override default max_items
-        max_items_default = getattr(self, "max_items_default", -1)
-        core_group.add_argument(
-            "--max-items",
-            type=int,
-            default=max_items_default,
-            help="Maximum number of items to process  -1 for all, means index all documents, and you should set it to a reasonable number if you have a large dataset and try at the first time)",
-        )
-        core_group.add_argument(
-            "--force-rebuild", action="store_true", help="Force rebuild index even if it exists"
-        )
-
-        # Embedding parameters
-        embedding_group = parser.add_argument_group("Embedding Parameters")
-        # Allow subclasses to override default embedding_model
-        embedding_model_default = getattr(self, "embedding_model_default", "facebook/contriever")
-        embedding_group.add_argument(
-            "--embedding-model",
-            type=str,
-            default=embedding_model_default,
-            help=f"Embedding model to use (default: {embedding_model_default}), we provide facebook/contriever, text-embedding-3-small,mlx-community/Qwen3-Embedding-0.6B-8bit or nomic-embed-text",
-        )
-        embedding_group.add_argument(
-            "--embedding-mode",
-            type=str,
-            default="sentence-transformers",
-            choices=["sentence-transformers", "openai", "mlx", "ollama"],
-            help="Embedding backend mode (default: sentence-transformers), we provide sentence-transformers, openai, mlx, or ollama",
-        )
-
-        # LLM parameters
-        llm_group = parser.add_argument_group("LLM Parameters")
-        llm_group.add_argument(
-            "--llm",
-            type=str,
-            default="openai",
-            choices=["openai", "ollama", "hf", "simulated"],
-            help="LLM backend: openai, ollama, or hf (default: openai)",
-        )
-        llm_group.add_argument(
-            "--llm-model",
-            type=str,
-            default=None,
-            help="Model name (default: gpt-4o) e.g., gpt-4o-mini, llama3.2:1b, Qwen/Qwen2.5-1.5B-Instruct",
-        )
-        llm_group.add_argument(
-            "--llm-host",
-            type=str,
-            default="http://localhost:11434",
-            help="Host for Ollama API (default: http://localhost:11434)",
-        )
-        llm_group.add_argument(
-            "--thinking-budget",
-            type=str,
-            choices=["low", "medium", "high"],
-            default=None,
-            help="Thinking budget for reasoning models (low/medium/high). Supported by GPT-Oss:20b and other reasoning models.",
-        )
-
-        # Search parameters
-        search_group = parser.add_argument_group("Search Parameters")
-        search_group.add_argument(
-            "--top-k", type=int, default=20, help="Number of results to retrieve (default: 20)"
-        )
-        search_group.add_argument(
-            "--search-complexity",
-            type=int,
-            default=32,
-            help="Search complexity for graph traversal (default: 64)",
-        )
-
-        # Index building parameters
-        index_group = parser.add_argument_group("Index Building Parameters")
-        index_group.add_argument(
-            "--backend-name",
-            type=str,
-            default="hnsw",
-            choices=["hnsw", "diskann"],
-            help="Backend to use for index (default: hnsw)",
-        )
-        index_group.add_argument(
-            "--graph-degree",
-            type=int,
-            default=32,
-            help="Graph degree for index construction (default: 32)",
-        )
-        index_group.add_argument(
-            "--build-complexity",
-            type=int,
-            default=64,
-            help="Build complexity for index construction (default: 64)",
-        )
-        index_group.add_argument(
-            "--no-compact",
-            action="store_true",
-            help="Disable compact index storage",
-        )
-        index_group.add_argument(
-            "--no-recompute",
-            action="store_true",
-            help="Disable embedding recomputation",
-        )
-
-        # Add source-specific parameters
-        self._add_specific_arguments(parser)
-
-        return parser
-
-    @abstractmethod
-    def _add_specific_arguments(self, parser: argparse.ArgumentParser):
-        """Add source-specific arguments. Override in subclasses."""
-        pass
-
-    @abstractmethod
-    async def load_data(self, args) -> list[str]:
-        """Load data from the source. Returns list of text chunks."""
-        pass
-
-    def get_llm_config(self, args) -> dict[str, Any]:
-        """Get LLM configuration based on arguments."""
-        config = {"type": args.llm}
-
-        if args.llm == "openai":
-            config["model"] = args.llm_model or "gpt-4o"
-        elif args.llm == "ollama":
-            config["model"] = args.llm_model or "llama3.2:1b"
-            config["host"] = args.llm_host
-        elif args.llm == "hf":
-            config["model"] = args.llm_model or "Qwen/Qwen2.5-1.5B-Instruct"
-        elif args.llm == "simulated":
-            # Simulated LLM doesn't need additional configuration
-            pass
-
-        return config
-
-    async def build_index(self, args, texts: list[str]) -> str:
-        """Build LEANN index from texts."""
-        index_path = str(Path(args.index_dir) / f"{self.default_index_name}.leann")
-
-        print(f"\n[Building Index] Creating {self.name} index...")
-        print(f"Total text chunks: {len(texts)}")
-
-        builder = LeannBuilder(
-            backend_name=args.backend_name,
-            embedding_model=args.embedding_model,
-            embedding_mode=args.embedding_mode,
-            graph_degree=args.graph_degree,
-            complexity=args.build_complexity,
-            is_compact=not args.no_compact,
-            is_recompute=not args.no_recompute,
-            num_threads=1,  # Force single-threaded mode
-        )
-
-        # Add texts in batches for better progress tracking
-        batch_size = 1000
-        for i in range(0, len(texts), batch_size):
-            batch = texts[i : i + batch_size]
-            for text in batch:
-                builder.add_text(text)
-            print(f"Added {min(i + batch_size, len(texts))}/{len(texts)} texts...")
-
-        print("Building index structure...")
-        builder.build_index(index_path)
-        print(f"Index saved to: {index_path}")
-
-        return index_path
-
-    async def run_interactive_chat(self, args, index_path: str):
-        """Run interactive chat with the index."""
-        chat = LeannChat(
-            index_path,
-            llm_config=self.get_llm_config(args),
-            system_prompt=f"You are a helpful assistant that answers questions about {self.name} data.",
-            complexity=args.search_complexity,
-        )
-
-        print(f"\n[Interactive Mode] Chat with your {self.name} data!")
-        print("Type 'quit' or 'exit' to stop.\n")
-
-        while True:
-            try:
-                query = input("You: ").strip()
-                if query.lower() in ["quit", "exit", "q"]:
-                    print("Goodbye!")
-                    break
-
-                if not query:
-                    continue
-
-                # Prepare LLM kwargs with thinking budget if specified
-                llm_kwargs = {}
-                if hasattr(args, "thinking_budget") and args.thinking_budget:
-                    llm_kwargs["thinking_budget"] = args.thinking_budget
-
-                response = chat.ask(
-                    query,
-                    top_k=args.top_k,
-                    complexity=args.search_complexity,
-                    llm_kwargs=llm_kwargs,
-                )
-                print(f"\nAssistant: {response}\n")
-
-            except KeyboardInterrupt:
-                print("\nGoodbye!")
-                break
-            except Exception as e:
-                print(f"Error: {e}")
-
-    async def run_single_query(self, args, index_path: str, query: str):
-        """Run a single query against the index."""
-        chat = LeannChat(
-            index_path,
-            llm_config=self.get_llm_config(args),
-            system_prompt=f"You are a helpful assistant that answers questions about {self.name} data.",
-            complexity=args.search_complexity,
-        )
-
-        print(f"\n[Query]: \033[36m{query}\033[0m")
-
-        # Prepare LLM kwargs with thinking budget if specified
-        llm_kwargs = {}
-        if hasattr(args, "thinking_budget") and args.thinking_budget:
-            llm_kwargs["thinking_budget"] = args.thinking_budget
-
-        response = chat.ask(
-            query, top_k=args.top_k, complexity=args.search_complexity, llm_kwargs=llm_kwargs
-        )
-        print(f"\n[Response]: \033[36m{response}\033[0m")
-
-    async def run(self):
-        """Main entry point for the example."""
-        args = self.parser.parse_args()
-
-        # Check if index exists
-        index_path = str(Path(args.index_dir) / f"{self.default_index_name}.leann")
-        index_exists = Path(args.index_dir).exists()
-
-        if not index_exists or args.force_rebuild:
-            # Load data and build index
-            print(f"\n{'Rebuilding' if index_exists else 'Building'} index...")
-            texts = await self.load_data(args)
-
-            if not texts:
-                print("No data found to index!")
-                return
-
-            index_path = await self.build_index(args, texts)
-        else:
-            print(f"\nUsing existing index in {args.index_dir}")
-
-        # Run query or interactive mode
-        if args.query:
-            await self.run_single_query(args, index_path, args.query)
-        else:
-            await self.run_interactive_chat(args, index_path)
-
-
-def create_text_chunks(documents, chunk_size=256, chunk_overlap=25) -> list[str]:
-    """Helper function to create text chunks from documents."""
-    node_parser = SentenceSplitter(
-        chunk_size=chunk_size,
-        chunk_overlap=chunk_overlap,
-        separator=" ",
-        paragraph_separator="\n\n",
-    )
-
-    all_texts = []
-    for doc in documents:
-        nodes = node_parser.get_nodes_from_documents([doc])
-        if nodes:
-            all_texts.extend(node.get_content() for node in nodes)
-
-    return all_texts
--- a/apps/browser_rag.py
+++ b/apps/browser_rag.py
@@ -1,170 +0,0 @@
-"""
-Browser History RAG example using the unified interface.
-Supports Chrome browser history.
-"""
-
-import os
-import sys
-from pathlib import Path
-
-# Add parent directory to path for imports
-sys.path.insert(0, str(Path(__file__).parent))
-
-from base_rag_example import BaseRAGExample, create_text_chunks
-
-from .history_data.history import ChromeHistoryReader
-
-
-class BrowserRAG(BaseRAGExample):
-    """RAG example for Chrome browser history."""
-
-    def __init__(self):
-        # Set default values BEFORE calling super().__init__
-        self.embedding_model_default = (
-            "sentence-transformers/all-MiniLM-L6-v2"  # Fast 384-dim model
-        )
-
-        super().__init__(
-            name="Browser History",
-            description="Process and query Chrome browser history with LEANN",
-            default_index_name="google_history_index",
-        )
-
-    def _add_specific_arguments(self, parser):
-        """Add browser-specific arguments."""
-        browser_group = parser.add_argument_group("Browser Parameters")
-        browser_group.add_argument(
-            "--chrome-profile",
-            type=str,
-            default=None,
-            help="Path to Chrome profile directory (auto-detected if not specified)",
-        )
-        browser_group.add_argument(
-            "--auto-find-profiles",
-            action="store_true",
-            default=True,
-            help="Automatically find all Chrome profiles (default: True)",
-        )
-        browser_group.add_argument(
-            "--chunk-size", type=int, default=256, help="Text chunk size (default: 256)"
-        )
-        browser_group.add_argument(
-            "--chunk-overlap", type=int, default=128, help="Text chunk overlap (default: 128)"
-        )
-
-    def _get_chrome_base_path(self) -> Path:
-        """Get the base Chrome profile path based on OS."""
-        if sys.platform == "darwin":
-            return Path.home() / "Library" / "Application Support" / "Google" / "Chrome"
-        elif sys.platform.startswith("linux"):
-            return Path.home() / ".config" / "google-chrome"
-        elif sys.platform == "win32":
-            return Path(os.environ["LOCALAPPDATA"]) / "Google" / "Chrome" / "User Data"
-        else:
-            raise ValueError(f"Unsupported platform: {sys.platform}")
-
-    def _find_chrome_profiles(self) -> list[Path]:
-        """Auto-detect all Chrome profiles."""
-        base_path = self._get_chrome_base_path()
-        if not base_path.exists():
-            return []
-
-        profiles = []
-
-        # Check Default profile
-        default_profile = base_path / "Default"
-        if default_profile.exists() and (default_profile / "History").exists():
-            profiles.append(default_profile)
-
-        # Check numbered profiles
-        for item in base_path.iterdir():
-            if item.is_dir() and item.name.startswith("Profile "):
-                if (item / "History").exists():
-                    profiles.append(item)
-
-        return profiles
-
-    async def load_data(self, args) -> list[str]:
-        """Load browser history and convert to text chunks."""
-        # Determine Chrome profiles
-        if args.chrome_profile and not args.auto_find_profiles:
-            profile_dirs = [Path(args.chrome_profile)]
-        else:
-            print("Auto-detecting Chrome profiles...")
-            profile_dirs = self._find_chrome_profiles()
-
-            # If specific profile given, filter to just that one
-            if args.chrome_profile:
-                profile_path = Path(args.chrome_profile)
-                profile_dirs = [p for p in profile_dirs if p == profile_path]
-
-        if not profile_dirs:
-            print("No Chrome profiles found!")
-            print("Please specify --chrome-profile manually")
-            return []
-
-        print(f"Found {len(profile_dirs)} Chrome profiles")
-
-        # Create reader
-        reader = ChromeHistoryReader()
-
-        # Process each profile
-        all_documents = []
-        total_processed = 0
-
-        for i, profile_dir in enumerate(profile_dirs):
-            print(f"\nProcessing profile {i + 1}/{len(profile_dirs)}: {profile_dir.name}")
-
-            try:
-                # Apply max_items limit per profile
-                max_per_profile = -1
-                if args.max_items > 0:
-                    remaining = args.max_items - total_processed
-                    if remaining <= 0:
-                        break
-                    max_per_profile = remaining
-
-                # Load history
-                documents = reader.load_data(
-                    chrome_profile_path=str(profile_dir),
-                    max_count=max_per_profile,
-                )
-
-                if documents:
-                    all_documents.extend(documents)
-                    total_processed += len(documents)
-                    print(f"Processed {len(documents)} history entries from this profile")
-
-            except Exception as e:
-                print(f"Error processing {profile_dir}: {e}")
-                continue
-
-        if not all_documents:
-            print("No browser history found to process!")
-            return []
-
-        print(f"\nTotal history entries processed: {len(all_documents)}")
-
-        # Convert to text chunks
-        all_texts = create_text_chunks(
-            all_documents, chunk_size=args.chunk_size, chunk_overlap=args.chunk_overlap
-        )
-
-        return all_texts
-
-
-if __name__ == "__main__":
-    import asyncio
-
-    # Example queries for browser history RAG
-    print("\n🌐 Browser History RAG Example")
-    print("=" * 50)
-    print("\nExample queries you can try:")
-    print("- 'What websites did I visit about machine learning?'")
-    print("- 'Find my search history about programming'")
-    print("- 'What YouTube videos did I watch recently?'")
-    print("- 'Show me websites about travel planning'")
-    print("\nNote: Make sure Chrome is closed before running\n")
-
-    rag = BrowserRAG()
-    asyncio.run(rag.run())
--- a/apps/document_rag.py
+++ b/apps/document_rag.py
@@ -1,108 +0,0 @@
-"""
-Document RAG example using the unified interface.
-Supports PDF, TXT, MD, and other document formats.
-"""
-
-import sys
-from pathlib import Path
-
-# Add parent directory to path for imports
-sys.path.insert(0, str(Path(__file__).parent))
-
-from base_rag_example import BaseRAGExample, create_text_chunks
-from llama_index.core import SimpleDirectoryReader
-
-
-class DocumentRAG(BaseRAGExample):
-    """RAG example for document processing (PDF, TXT, MD, etc.)."""
-
-    def __init__(self):
-        super().__init__(
-            name="Document",
-            description="Process and query documents (PDF, TXT, MD, etc.) with LEANN",
-            default_index_name="test_doc_files",
-        )
-
-    def _add_specific_arguments(self, parser):
-        """Add document-specific arguments."""
-        doc_group = parser.add_argument_group("Document Parameters")
-        doc_group.add_argument(
-            "--data-dir",
-            type=str,
-            default="data",
-            help="Directory containing documents to index (default: data)",
-        )
-        doc_group.add_argument(
-            "--file-types",
-            nargs="+",
-            default=None,
-            help="Filter by file types (e.g., .pdf .txt .md). If not specified, all supported types are processed",
-        )
-        doc_group.add_argument(
-            "--chunk-size", type=int, default=256, help="Text chunk size (default: 256)"
-        )
-        doc_group.add_argument(
-            "--chunk-overlap", type=int, default=128, help="Text chunk overlap (default: 128)"
-        )
-
-    async def load_data(self, args) -> list[str]:
-        """Load documents and convert to text chunks."""
-        print(f"Loading documents from: {args.data_dir}")
-        if args.file_types:
-            print(f"Filtering by file types: {args.file_types}")
-        else:
-            print("Processing all supported file types")
-
-        # Check if data directory exists
-        data_path = Path(args.data_dir)
-        if not data_path.exists():
-            raise ValueError(f"Data directory not found: {args.data_dir}")
-
-        # Load documents
-        reader_kwargs = {
-            "recursive": True,
-            "encoding": "utf-8",
-        }
-        if args.file_types:
-            reader_kwargs["required_exts"] = args.file_types
-
-        documents = SimpleDirectoryReader(args.data_dir, **reader_kwargs).load_data(
-            show_progress=True
-        )
-
-        if not documents:
-            print(f"No documents found in {args.data_dir} with extensions {args.file_types}")
-            return []
-
-        print(f"Loaded {len(documents)} documents")
-
-        # Convert to text chunks
-        all_texts = create_text_chunks(
-            documents, chunk_size=args.chunk_size, chunk_overlap=args.chunk_overlap
-        )
-
-        # Apply max_items limit if specified
-        if args.max_items > 0 and len(all_texts) > args.max_items:
-            print(f"Limiting to {args.max_items} chunks (from {len(all_texts)})")
-            all_texts = all_texts[: args.max_items]
-
-        return all_texts
-
-
-if __name__ == "__main__":
-    import asyncio
-
-    # Example queries for document RAG
-    print("\n📄 Document RAG Example")
-    print("=" * 50)
-    print("\nExample queries you can try:")
-    print("- 'What are the main techniques LEANN uses?'")
-    print("- 'What is the technique DLPM?'")
-    print("- 'Who does Elizabeth Bennet marry?'")
-    print(
-        "- 'What is the problem of developing pan gu model Huawei meets? (盘古大模型开发中遇到什么问题?)'"
-    )
-    print("\nOr run without --query for interactive mode\n")
-
-    rag = DocumentRAG()
-    asyncio.run(rag.run())
--- a/apps/email_rag.py
+++ b/apps/email_rag.py
@@ -1,156 +0,0 @@
-"""
-Email RAG example using the unified interface.
-Supports Apple Mail on macOS.
-"""
-
-import sys
-from pathlib import Path
-
-# Add parent directory to path for imports
-sys.path.insert(0, str(Path(__file__).parent))
-
-from base_rag_example import BaseRAGExample, create_text_chunks
-
-from .email_data.LEANN_email_reader import EmlxReader
-
-
-class EmailRAG(BaseRAGExample):
-    """RAG example for Apple Mail processing."""
-
-    def __init__(self):
-        # Set default values BEFORE calling super().__init__
-        self.max_items_default = -1  # Process all emails by default
-        self.embedding_model_default = (
-            "sentence-transformers/all-MiniLM-L6-v2"  # Fast 384-dim model
-        )
-
-        super().__init__(
-            name="Email",
-            description="Process and query Apple Mail emails with LEANN",
-            default_index_name="mail_index",
-        )
-
-    def _add_specific_arguments(self, parser):
-        """Add email-specific arguments."""
-        email_group = parser.add_argument_group("Email Parameters")
-        email_group.add_argument(
-            "--mail-path",
-            type=str,
-            default=None,
-            help="Path to Apple Mail directory (auto-detected if not specified)",
-        )
-        email_group.add_argument(
-            "--include-html", action="store_true", help="Include HTML content in email processing"
-        )
-        email_group.add_argument(
-            "--chunk-size", type=int, default=256, help="Text chunk size (default: 256)"
-        )
-        email_group.add_argument(
-            "--chunk-overlap", type=int, default=25, help="Text chunk overlap (default: 25)"
-        )
-
-    def _find_mail_directories(self) -> list[Path]:
-        """Auto-detect all Apple Mail directories."""
-        mail_base = Path.home() / "Library" / "Mail"
-        if not mail_base.exists():
-            return []
-
-        # Find all Messages directories
-        messages_dirs = []
-        for item in mail_base.rglob("Messages"):
-            if item.is_dir():
-                messages_dirs.append(item)
-
-        return messages_dirs
-
-    async def load_data(self, args) -> list[str]:
-        """Load emails and convert to text chunks."""
-        # Determine mail directories
-        if args.mail_path:
-            messages_dirs = [Path(args.mail_path)]
-        else:
-            print("Auto-detecting Apple Mail directories...")
-            messages_dirs = self._find_mail_directories()
-
-        if not messages_dirs:
-            print("No Apple Mail directories found!")
-            print("Please specify --mail-path manually")
-            return []
-
-        print(f"Found {len(messages_dirs)} mail directories")
-
-        # Create reader
-        reader = EmlxReader(include_html=args.include_html)
-
-        # Process each directory
-        all_documents = []
-        total_processed = 0
-
-        for i, messages_dir in enumerate(messages_dirs):
-            print(f"\nProcessing directory {i + 1}/{len(messages_dirs)}: {messages_dir}")
-
-            try:
-                # Count emlx files
-                emlx_files = list(messages_dir.glob("*.emlx"))
-                print(f"Found {len(emlx_files)} email files")
-
-                # Apply max_items limit per directory
-                max_per_dir = -1  # Default to process all
-                if args.max_items > 0:
-                    remaining = args.max_items - total_processed
-                    if remaining <= 0:
-                        break
-                    max_per_dir = remaining
-                # If args.max_items == -1, max_per_dir stays -1 (process all)
-
-                # Load emails - fix the parameter passing
-                documents = reader.load_data(
-                    input_dir=str(messages_dir),
-                    max_count=max_per_dir,
-                )
-
-                if documents:
-                    all_documents.extend(documents)
-                    total_processed += len(documents)
-                    print(f"Processed {len(documents)} emails from this directory")
-
-            except Exception as e:
-                print(f"Error processing {messages_dir}: {e}")
-                continue
-
-        if not all_documents:
-            print("No emails found to process!")
-            return []
-
-        print(f"\nTotal emails processed: {len(all_documents)}")
-        print("now starting to split into text chunks ... take some time")
-
-        # Convert to text chunks
-        # Email reader uses chunk_overlap=25 as in original
-        all_texts = create_text_chunks(
-            all_documents, chunk_size=args.chunk_size, chunk_overlap=args.chunk_overlap
-        )
-
-        return all_texts
-
-
-if __name__ == "__main__":
-    import asyncio
-
-    # Check platform
-    if sys.platform != "darwin":
-        print("\n⚠️  Warning: This example is designed for macOS (Apple Mail)")
-        print("   Windows/Linux support coming soon!\n")
-
-    # Example queries for email RAG
-    print("\n📧 Email RAG Example")
-    print("=" * 50)
-    print("\nExample queries you can try:")
-    print("- 'What did my boss say about deadlines?'")
-    print("- 'Find emails about travel expenses'")
-    print("- 'Show me emails from last month about the project'")
-    print("- 'What food did I order from DoorDash?'")
-    print("\nNote: You may need to grant Full Disk Access to your terminal\n")
-
-    rag = EmailRAG()
-    asyncio.run(rag.run())
--- a/apps/wechat_rag.py
+++ b/apps/wechat_rag.py
@@ -1,189 +0,0 @@
-"""
-WeChat History RAG example using the unified interface.
-Supports WeChat chat history export and search.
-"""
-
-import subprocess
-import sys
-from pathlib import Path
-
-# Add parent directory to path for imports
-sys.path.insert(0, str(Path(__file__).parent))
-
-from base_rag_example import BaseRAGExample
-
-from .history_data.wechat_history import WeChatHistoryReader
-
-
-class WeChatRAG(BaseRAGExample):
-    """RAG example for WeChat chat history."""
-
-    def __init__(self):
-        # Set default values BEFORE calling super().__init__
-        self.max_items_default = -1  # Match original default
-        self.embedding_model_default = (
-            "sentence-transformers/all-MiniLM-L6-v2"  # Fast 384-dim model
-        )
-
-        super().__init__(
-            name="WeChat History",
-            description="Process and query WeChat chat history with LEANN",
-            default_index_name="wechat_history_magic_test_11Debug_new",
-        )
-
-    def _add_specific_arguments(self, parser):
-        """Add WeChat-specific arguments."""
-        wechat_group = parser.add_argument_group("WeChat Parameters")
-        wechat_group.add_argument(
-            "--export-dir",
-            type=str,
-            default="./wechat_export",
-            help="Directory to store WeChat exports (default: ./wechat_export)",
-        )
-        wechat_group.add_argument(
-            "--force-export",
-            action="store_true",
-            help="Force re-export of WeChat data even if exports exist",
-        )
-        wechat_group.add_argument(
-            "--chunk-size", type=int, default=192, help="Text chunk size (default: 192)"
-        )
-        wechat_group.add_argument(
-            "--chunk-overlap", type=int, default=64, help="Text chunk overlap (default: 64)"
-        )
-
-    def _export_wechat_data(self, export_dir: Path) -> bool:
-        """Export WeChat data using wechattweak-cli."""
-        print("Exporting WeChat data...")
-
-        # Check if WeChat is running
-        try:
-            result = subprocess.run(["pgrep", "WeChat"], capture_output=True, text=True)
-            if result.returncode != 0:
-                print("WeChat is not running. Please start WeChat first.")
-                return False
-        except Exception:
-            pass  # pgrep might not be available on all systems
-
-        # Create export directory
-        export_dir.mkdir(parents=True, exist_ok=True)
-
-        # Run export command
-        cmd = ["packages/wechat-exporter/wechattweak-cli", "export", str(export_dir)]
-
-        try:
-            print(f"Running: {' '.join(cmd)}")
-            result = subprocess.run(cmd, capture_output=True, text=True)
-
-            if result.returncode == 0:
-                print("WeChat data exported successfully!")
-                return True
-            else:
-                print(f"Export failed: {result.stderr}")
-                return False
-
-        except FileNotFoundError:
-            print("\nError: wechattweak-cli not found!")
-            print("Please install it first:")
-            print("  sudo packages/wechat-exporter/wechattweak-cli install")
-            return False
-        except Exception as e:
-            print(f"Export error: {e}")
-            return False
-
-    async def load_data(self, args) -> list[str]:
-        """Load WeChat history and convert to text chunks."""
-        # Initialize WeChat reader with export capabilities
-        reader = WeChatHistoryReader()
-
-        # Find existing exports or create new ones using the centralized method
-        export_dirs = reader.find_or_export_wechat_data(args.export_dir)
-        if not export_dirs:
-            print("Failed to find or export WeChat data. Trying to find any existing exports...")
-            # Try to find any existing exports in common locations
-            export_dirs = reader.find_wechat_export_dirs()
-            if not export_dirs:
-                print("No WeChat data found. Please ensure WeChat exports exist.")
-                return []
-
-        # Load documents from all found export directories
-        all_documents = []
-        total_processed = 0
-
-        for i, export_dir in enumerate(export_dirs):
-            print(f"\nProcessing WeChat export {i + 1}/{len(export_dirs)}: {export_dir}")
-
-            try:
-                # Apply max_items limit per export
-                max_per_export = -1
-                if args.max_items > 0:
-                    remaining = args.max_items - total_processed
-                    if remaining <= 0:
-                        break
-                    max_per_export = remaining
-
-                documents = reader.load_data(
-                    wechat_export_dir=str(export_dir),
-                    max_count=max_per_export,
-                    concatenate_messages=True,  # Enable message concatenation for better context
-                )
-
-                if documents:
-                    print(f"Loaded {len(documents)} chat documents from {export_dir}")
-                    all_documents.extend(documents)
-                    total_processed += len(documents)
-                else:
-                    print(f"No documents loaded from {export_dir}")
-
-            except Exception as e:
-                print(f"Error processing {export_dir}: {e}")
-                continue
-
-        if not all_documents:
-            print("No documents loaded from any source. Exiting.")
-            return []
-
-        print(f"\nTotal loaded {len(all_documents)} chat documents from {len(export_dirs)} exports")
-        print("now starting to split into text chunks ... take some time")
-
-        # Convert to text chunks with contact information
-        all_texts = []
-        for doc in all_documents:
-            # Split the document into chunks
-            from llama_index.core.node_parser import SentenceSplitter
-
-            text_splitter = SentenceSplitter(
-                chunk_size=args.chunk_size, chunk_overlap=args.chunk_overlap
-            )
-            nodes = text_splitter.get_nodes_from_documents([doc])
-
-            for node in nodes:
-                # Add contact information to each chunk
-                contact_name = doc.metadata.get("contact_name", "Unknown")
-                text = f"[Contact] means the message is from: {contact_name}\n" + node.get_content()
-                all_texts.append(text)
-
-        print(f"Created {len(all_texts)} text chunks from {len(all_documents)} documents")
-        return all_texts
-
-
-if __name__ == "__main__":
-    import asyncio
-
-    # Check platform
-    if sys.platform != "darwin":
-        print("\n⚠️  Warning: WeChat export is only supported on macOS")
-        print("   You can still query existing exports on other platforms\n")
-
-    # Example queries for WeChat RAG
-    print("\n💬 WeChat History RAG Example")
-    print("=" * 50)
-    print("\nExample queries you can try:")
-    print("- 'Show me conversations about travel plans'")
-    print("- 'Find group chats about weekend activities'")
-    print("- '我想买魔术师约翰逊的球衣,给我一些对应聊天记录?'")
-    print("- 'What did we discuss about the project last month?'")
-    print("\nNote: WeChat must be running for export to work\n")
-
-    rag = WeChatRAG()
-    asyncio.run(rag.run())
--- a/assets/claude_code_leann.png
+++ b/assets/claude_code_leann.png
--- a/assets/mcp_leann.png
+++ b/assets/mcp_leann.png
--- a/benchmarks/benchmark_no_recompute.py
+++ b/benchmarks/benchmark_no_recompute.py
@@ -1,148 +0,0 @@
-import argparse
-import os
-import time
-from pathlib import Path
-
-from leann import LeannBuilder, LeannSearcher
-
-
-def _meta_exists(index_path: str) -> bool:
-    p = Path(index_path)
-    return (p.parent / f"{p.stem}.meta.json").exists()
-
-
-def ensure_index(index_path: str, backend_name: str, num_docs: int, is_recompute: bool) -> None:
-    # if _meta_exists(index_path):
-    #     return
-    kwargs = {}
-    if backend_name == "hnsw":
-        kwargs["is_compact"] = is_recompute
-    builder = LeannBuilder(
-        backend_name=backend_name,
-        embedding_model=os.getenv("LEANN_EMBED_MODEL", "facebook/contriever"),
-        embedding_mode=os.getenv("LEANN_EMBED_MODE", "sentence-transformers"),
-        graph_degree=32,
-        complexity=64,
-        is_recompute=is_recompute,
-        num_threads=4,
-        **kwargs,
-    )
-    for i in range(num_docs):
-        builder.add_text(
-            f"This is a test document number {i}. It contains some repeated text for benchmarking."
-        )
-    builder.build_index(index_path)
-
-
-def _bench_group(
-    index_path: str,
-    recompute: bool,
-    query: str,
-    repeats: int,
-    complexity: int = 32,
-    top_k: int = 10,
-) -> float:
-    # Independent searcher per group; fixed port when recompute
-    searcher = LeannSearcher(index_path=index_path)
-
-    # Warm-up once
-    _ = searcher.search(
-        query,
-        top_k=top_k,
-        complexity=complexity,
-        recompute_embeddings=recompute,
-    )
-
-    def _once() -> float:
-        t0 = time.time()
-        _ = searcher.search(
-            query,
-            top_k=top_k,
-            complexity=complexity,
-            recompute_embeddings=recompute,
-        )
-        return time.time() - t0
-
-    if repeats <= 1:
-        t = _once()
-    else:
-        vals = [_once() for _ in range(repeats)]
-        vals.sort()
-        t = vals[len(vals) // 2]
-
-    searcher.cleanup()
-    return t
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--num-docs", type=int, default=5000)
-    parser.add_argument("--repeats", type=int, default=3)
-    parser.add_argument("--complexity", type=int, default=32)
-    args = parser.parse_args()
-
-    base = Path.cwd() / ".leann" / "indexes" / f"bench_n{args.num_docs}"
-    base.parent.mkdir(parents=True, exist_ok=True)
-    # ---------- Build HNSW variants ----------
-    hnsw_r = str(base / f"hnsw_recompute_n{args.num_docs}.leann")
-    hnsw_nr = str(base / f"hnsw_norecompute_n{args.num_docs}.leann")
-    ensure_index(hnsw_r, "hnsw", args.num_docs, True)
-    ensure_index(hnsw_nr, "hnsw", args.num_docs, False)
-
-    # ---------- Build DiskANN variants ----------
-    diskann_r = str(base / "diskann_r.leann")
-    diskann_nr = str(base / "diskann_nr.leann")
-    ensure_index(diskann_r, "diskann", args.num_docs, True)
-    ensure_index(diskann_nr, "diskann", args.num_docs, False)
-
-    # ---------- Helpers ----------
-    def _size_for(prefix: str) -> int:
-        p = Path(prefix)
-        base_dir = p.parent
-        stem = p.stem
-        total = 0
-        for f in base_dir.iterdir():
-            if f.is_file() and f.name.startswith(stem):
-                total += f.stat().st_size
-        return total
-
-    # ---------- HNSW benchmark ----------
-    t_hnsw_r = _bench_group(
-        hnsw_r, True, "test document number 42", repeats=args.repeats, complexity=args.complexity
-    )
-    t_hnsw_nr = _bench_group(
-        hnsw_nr, False, "test document number 42", repeats=args.repeats, complexity=args.complexity
-    )
-    size_hnsw_r = _size_for(hnsw_r)
-    size_hnsw_nr = _size_for(hnsw_nr)
-
-    print("Benchmark results (HNSW):")
-    print(f"  recompute=True:  search_time={t_hnsw_r:.3f}s, size={size_hnsw_r / 1024 / 1024:.1f}MB")
-    print(
-        f"  recompute=False: search_time={t_hnsw_nr:.3f}s, size={size_hnsw_nr / 1024 / 1024:.1f}MB"
-    )
-    print("  Expectation: no-recompute should be faster but larger on disk.")
-
-    # ---------- DiskANN benchmark ----------
-    t_diskann_r = _bench_group(
-        diskann_r, True, "DiskANN R test doc 123", repeats=args.repeats, complexity=args.complexity
-    )
-    t_diskann_nr = _bench_group(
-        diskann_nr,
-        False,
-        "DiskANN NR test doc 123",
-        repeats=args.repeats,
-        complexity=args.complexity,
-    )
-    size_diskann_r = _size_for(diskann_r)
-    size_diskann_nr = _size_for(diskann_nr)
-
-    print("\nBenchmark results (DiskANN):")
-    print(f"  build(recompute=True, partition): size={size_diskann_r / 1024 / 1024:.1f}MB")
-    print(f"  build(recompute=False):          size={size_diskann_nr / 1024 / 1024:.1f}MB")
-    print(f"  search recompute=True (final rerank): {t_diskann_r:.3f}s")
-    print(f"  search recompute=False (PQ only):     {t_diskann_nr:.3f}s")
-
-
-if __name__ == "__main__":
-    main()
--- a/benchmarks/diskann_vs_hnsw_speed_comparison.py
+++ b/benchmarks/diskann_vs_hnsw_speed_comparison.py
@@ -1,286 +0,0 @@
-#!/usr/bin/env python3
-"""
-DiskANN vs HNSW Search Performance Comparison
-
-This benchmark compares search performance between DiskANN and HNSW backends:
- DiskANN: With graph partitioning enabled (is_recompute=True)
- HNSW: With recompute enabled (is_recompute=True)
- Tests performance across different dataset sizes
- Measures search latency, recall, and index size
-"""
-
-import gc
-import multiprocessing as mp
-import tempfile
-import time
-from pathlib import Path
-from typing import Any
-
-import numpy as np
-
-# Prefer 'fork' start method to avoid POSIX semaphore leaks on macOS
-try:
-    mp.set_start_method("fork", force=True)
-except Exception:
-    pass
-
-
-def create_test_texts(n_docs: int) -> list[str]:
-    """Create synthetic test documents for benchmarking."""
-    np.random.seed(42)
-    topics = [
-        "machine learning and artificial intelligence",
-        "natural language processing and text analysis",
-        "computer vision and image recognition",
-        "data science and statistical analysis",
-        "deep learning and neural networks",
-        "information retrieval and search engines",
-        "database systems and data management",
-        "software engineering and programming",
-        "cybersecurity and network protection",
-        "cloud computing and distributed systems",
-    ]
-
-    texts = []
-    for i in range(n_docs):
-        topic = topics[i % len(topics)]
-        variation = np.random.randint(1, 100)
-        text = (
-            f"This is document {i} about {topic}. Content variation {variation}. "
-            f"Additional information about {topic} with details and examples. "
-            f"Technical discussion of {topic} including implementation aspects."
-        )
-        texts.append(text)
-
-    return texts
-
-
-def benchmark_backend(
-    backend_name: str, texts: list[str], test_queries: list[str], backend_kwargs: dict[str, Any]
-) -> dict[str, float]:
-    """Benchmark a specific backend with the given configuration."""
-    from leann.api import LeannBuilder, LeannSearcher
-
-    print(f"\n🔧 Testing {backend_name.upper()} backend...")
-
-    with tempfile.TemporaryDirectory() as temp_dir:
-        index_path = str(Path(temp_dir) / f"benchmark_{backend_name}.leann")
-
-        # Build index
-        print(f"📦 Building {backend_name} index with {len(texts)} documents...")
-        start_time = time.time()
-
-        builder = LeannBuilder(
-            backend_name=backend_name,
-            embedding_model="facebook/contriever",
-            embedding_mode="sentence-transformers",
-            **backend_kwargs,
-        )
-
-        for text in texts:
-            builder.add_text(text)
-
-        builder.build_index(index_path)
-        build_time = time.time() - start_time
-
-        # Measure index size
-        index_dir = Path(index_path).parent
-        index_files = list(index_dir.glob(f"{Path(index_path).stem}.*"))
-        total_size = sum(f.stat().st_size for f in index_files if f.is_file())
-        size_mb = total_size / (1024 * 1024)
-
-        print(f"   ✅ Build completed in {build_time:.2f}s, index size: {size_mb:.1f}MB")
-
-        # Search benchmark
-        print("🔍 Running search benchmark...")
-        searcher = LeannSearcher(index_path)
-
-        search_times = []
-        all_results = []
-
-        for query in test_queries:
-            start_time = time.time()
-            results = searcher.search(query, top_k=5)
-            search_time = time.time() - start_time
-            search_times.append(search_time)
-            all_results.append(results)
-
-        avg_search_time = np.mean(search_times) * 1000  # Convert to ms
-        print(f"   ✅ Average search time: {avg_search_time:.1f}ms")
-
-        # Check for valid scores (detect -inf issues)
-        all_scores = [
-            result.score
-            for results in all_results
-            for result in results
-            if result.score is not None
-        ]
-        valid_scores = [
-            score for score in all_scores if score != float("-inf") and score != float("inf")
-        ]
-        score_validity_rate = len(valid_scores) / len(all_scores) if all_scores else 0
-
-        # Clean up (ensure embedding server shutdown and object GC)
-        try:
-            if hasattr(searcher, "cleanup"):
-                searcher.cleanup()
-            del searcher
-            del builder
-            gc.collect()
-        except Exception as e:
-            print(f"⚠️  Warning: Resource cleanup error: {e}")
-
-        return {
-            "build_time": build_time,
-            "avg_search_time_ms": avg_search_time,
-            "index_size_mb": size_mb,
-            "score_validity_rate": score_validity_rate,
-        }
-
-
-def run_comparison(n_docs: int = 500, n_queries: int = 10):
-    """Run performance comparison between DiskANN and HNSW."""
-    print("🚀 Starting DiskANN vs HNSW Performance Comparison")
-    print(f"📊 Dataset: {n_docs} documents, {n_queries} test queries")
-
-    # Create test data
-    texts = create_test_texts(n_docs)
-    test_queries = [
-        "machine learning algorithms",
-        "natural language processing",
-        "computer vision techniques",
-        "data analysis methods",
-        "neural network architectures",
-        "database query optimization",
-        "software development practices",
-        "security vulnerabilities",
-        "cloud infrastructure",
-        "distributed computing",
-    ][:n_queries]
-
-    # HNSW benchmark
-    hnsw_results = benchmark_backend(
-        backend_name="hnsw",
-        texts=texts,
-        test_queries=test_queries,
-        backend_kwargs={
-            "is_recompute": True,  # Enable recompute for fair comparison
-            "M": 16,
-            "efConstruction": 200,
-        },
-    )
-
-    # DiskANN benchmark
-    diskann_results = benchmark_backend(
-        backend_name="diskann",
-        texts=texts,
-        test_queries=test_queries,
-        backend_kwargs={
-            "is_recompute": True,  # Enable graph partitioning
-            "num_neighbors": 32,
-            "search_list_size": 50,
-        },
-    )
-
-    # Performance comparison
-    print("\n📈 Performance Comparison Results")
-    print(f"{'=' * 60}")
-    print(f"{'Metric':<25} {'HNSW':<15} {'DiskANN':<15} {'Speedup':<10}")
-    print(f"{'-' * 60}")
-
-    # Build time comparison
-    build_speedup = hnsw_results["build_time"] / diskann_results["build_time"]
-    print(
-        f"{'Build Time (s)':<25} {hnsw_results['build_time']:<15.2f} {diskann_results['build_time']:<15.2f} {build_speedup:<10.2f}x"
-    )
-
-    # Search time comparison
-    search_speedup = hnsw_results["avg_search_time_ms"] / diskann_results["avg_search_time_ms"]
-    print(
-        f"{'Search Time (ms)':<25} {hnsw_results['avg_search_time_ms']:<15.1f} {diskann_results['avg_search_time_ms']:<15.1f} {search_speedup:<10.2f}x"
-    )
-
-    # Index size comparison
-    size_ratio = diskann_results["index_size_mb"] / hnsw_results["index_size_mb"]
-    print(
-        f"{'Index Size (MB)':<25} {hnsw_results['index_size_mb']:<15.1f} {diskann_results['index_size_mb']:<15.1f} {size_ratio:<10.2f}x"
-    )
-
-    # Score validity
-    print(
-        f"{'Score Validity (%)':<25} {hnsw_results['score_validity_rate'] * 100:<15.1f} {diskann_results['score_validity_rate'] * 100:<15.1f}"
-    )
-
-    print(f"{'=' * 60}")
-    print("\n🎯 Summary:")
-    if search_speedup > 1:
-        print(f"   DiskANN is {search_speedup:.2f}x faster than HNSW for search")
-    else:
-        print(f"   HNSW is {1 / search_speedup:.2f}x faster than DiskANN for search")
-
-    if size_ratio > 1:
-        print(f"   DiskANN uses {size_ratio:.2f}x more storage than HNSW")
-    else:
-        print(f"   DiskANN uses {1 / size_ratio:.2f}x less storage than HNSW")
-
-    print(
-        f"   Both backends achieved {min(hnsw_results['score_validity_rate'], diskann_results['score_validity_rate']) * 100:.1f}% score validity"
-    )
-
-
-if __name__ == "__main__":
-    import sys
-
-    try:
-        # Handle help request
-        if len(sys.argv) > 1 and sys.argv[1] in ["-h", "--help", "help"]:
-            print("DiskANN vs HNSW Performance Comparison")
-            print("=" * 50)
-            print(f"Usage: python {sys.argv[0]} [n_docs] [n_queries]")
-            print()
-            print("Arguments:")
-            print("  n_docs      Number of documents to index (default: 500)")
-            print("  n_queries   Number of test queries to run (default: 10)")
-            print()
-            print("Examples:")
-            print("  python benchmarks/diskann_vs_hnsw_speed_comparison.py")
-            print("  python benchmarks/diskann_vs_hnsw_speed_comparison.py 1000")
-            print("  python benchmarks/diskann_vs_hnsw_speed_comparison.py 2000 20")
-            sys.exit(0)
-
-        # Parse command line arguments
-        n_docs = int(sys.argv[1]) if len(sys.argv) > 1 else 500
-        n_queries = int(sys.argv[2]) if len(sys.argv) > 2 else 10
-
-        print("DiskANN vs HNSW Performance Comparison")
-        print("=" * 50)
-        print(f"Dataset: {n_docs} documents, {n_queries} queries")
-        print()
-
-        run_comparison(n_docs=n_docs, n_queries=n_queries)
-
-    except KeyboardInterrupt:
-        print("\n⚠️  Benchmark interrupted by user")
-        sys.exit(130)
-    except Exception as e:
-        print(f"\n❌ Benchmark failed: {e}")
-        sys.exit(1)
-    finally:
-        # Ensure clean exit (forceful to prevent rare hangs from atexit/threads)
-        try:
-            gc.collect()
-            print("\n🧹 Cleanup completed")
-            # Flush stdio to ensure message is visible before hard-exit
-            try:
-                import sys as _sys
-
-                _sys.stdout.flush()
-                _sys.stderr.flush()
-            except Exception:
-                pass
-        except Exception:
-            pass
-        # Use os._exit to bypass atexit handlers that may hang in rare cases
-        import os as _os
-
-        _os._exit(0)
--- a/benchmarks/data/.gitattributes
+++ b/benchmarks/data/.gitattributes
--- a/data/README.md
+++ b/data/README.md
@@ -0,0 +1,44 @@
+---
+license: mit
+---
+
+# LEANN-RAG Evaluation Data
+
+This repository contains the necessary data to run the recall evaluation scripts for the [LEANN-RAG](https://huggingface.co/LEANN-RAG) project.
+
+## Dataset Components
+
+This dataset is structured into three main parts:
+
+1.  **Pre-built LEANN Indices**:
+    *   `dpr/`: A pre-built index for the DPR dataset.
+    *   `rpj_wiki/`: A pre-built index for the RPJ-Wiki dataset.
+    These indices were created using the `leann-core` library and are required by the `LeannSearcher`.
+
+2.  **Ground Truth Data**:
+    *   `ground_truth/`: Contains the ground truth files (`flat_results_nq_k3.json`) for both the DPR and RPJ-Wiki datasets. These files map queries to the original passage IDs from the Natural Questions benchmark, evaluated using the Contriever model.
+
+3.  **Queries**:
+    *   `queries/`: Contains the `nq_open.jsonl` file with the Natural Questions queries used for the evaluation.
+
+## Usage
+
+To use this data, you can download it locally using the `huggingface-hub` library. First, install the library:
+
+```bash
+pip install huggingface-hub
+```
+
+Then, you can download the entire dataset to a local directory (e.g., `data/`) with the following Python script:
+
+```python
+from huggingface_hub import snapshot_download
+
+snapshot_download(
+    repo_id="LEANN-RAG/leann-rag-evaluation-data",
+    repo_type="dataset",
+    local_dir="data"
+)
+```
+
+This will download all the necessary files into a local `data` folder, preserving the repository structure. The evaluation scripts in the main [LEANN-RAG Space](https://huggingface.co/LEANN-RAG) are configured to work with this data structure.
--- a/docs/THINKING_BUDGET_FEATURE.md
+++ b/docs/THINKING_BUDGET_FEATURE.md
@@ -1,123 +0,0 @@
-# Thinking Budget Feature Implementation
-
-## Overview
-
-This document describes the implementation of the **thinking budget** feature for LEANN, which allows users to control the computational effort for reasoning models like GPT-Oss:20b.
-
-## Feature Description
-
-The thinking budget feature provides three levels of computational effort for reasoning models:
- **`low`**: Fast responses, basic reasoning (default for simple queries)
- **`medium`**: Balanced speed and reasoning depth
- **`high`**: Maximum reasoning effort, best for complex analytical questions
-
-## Implementation Details
-
-### 1. Command Line Interface
-
-Added `--thinking-budget` parameter to both CLI and RAG examples:
-
-```bash
-# LEANN CLI
-leann ask my-index --llm ollama --model gpt-oss:20b --thinking-budget high
-
-# RAG Examples
-python apps/email_rag.py --llm ollama --llm-model gpt-oss:20b --thinking-budget high
-python apps/document_rag.py --llm openai --llm-model o3 --thinking-budget medium
-```
-
-### 2. LLM Backend Support
-
-#### Ollama Backend (`packages/leann-core/src/leann/chat.py`)
-
-```python
-def ask(self, prompt: str, **kwargs) -> str:
-    # Handle thinking budget for reasoning models
-    options = kwargs.copy()
-    thinking_budget = kwargs.get("thinking_budget")
-    if thinking_budget:
-        options.pop("thinking_budget", None)
-        if thinking_budget in ["low", "medium", "high"]:
-            options["reasoning"] = {"effort": thinking_budget, "exclude": False}
-```
-
-**API Format**: Uses Ollama's `reasoning` parameter with `effort` and `exclude` fields.
-
-#### OpenAI Backend (`packages/leann-core/src/leann/chat.py`)
-
-```python
-def ask(self, prompt: str, **kwargs) -> str:
-    # Handle thinking budget for reasoning models
-    thinking_budget = kwargs.get("thinking_budget")
-    if thinking_budget and thinking_budget in ["low", "medium", "high"]:
-        # Check if this is an o-series model
-        o_series_models = ["o3", "o3-mini", "o4-mini", "o1", "o3-pro", "o3-deep-research"]
-        if any(model in self.model for model in o_series_models):
-            params["reasoning_effort"] = thinking_budget
-```
-
-**API Format**: Uses OpenAI's `reasoning_effort` parameter for o-series models.
-
-### 3. Parameter Propagation
-
-The thinking budget parameter is properly propagated through the LEANN architecture:
-
-1. **CLI** (`packages/leann-core/src/leann/cli.py`): Captures `--thinking-budget` argument
-2. **Base RAG** (`apps/base_rag_example.py`): Adds parameter to argument parser
-3. **LeannChat** (`packages/leann-core/src/leann/api.py`): Passes `llm_kwargs` to LLM
-4. **LLM Interface**: Handles the parameter in backend-specific implementations
-
-## Files Modified
-
-### Core Implementation
- `packages/leann-core/src/leann/chat.py`: Added thinking budget support to OllamaChat and OpenAIChat
- `packages/leann-core/src/leann/cli.py`: Added `--thinking-budget` argument
- `apps/base_rag_example.py`: Added thinking budget parameter to RAG examples
-
-### Documentation
- `README.md`: Added thinking budget parameter to usage examples
- `docs/configuration-guide.md`: Added detailed documentation and usage guidelines
-
-### Examples
- `examples/thinking_budget_demo.py`: Comprehensive demo script with usage examples
-
-## Usage Examples
-
-### Basic Usage
-```bash
-# High reasoning effort for complex questions
-leann ask my-index --llm ollama --model gpt-oss:20b --thinking-budget high
-
-# Medium reasoning for balanced performance
-leann ask my-index --llm openai --model gpt-4o --thinking-budget medium
-
-# Low reasoning for fast responses
-leann ask my-index --llm ollama --model gpt-oss:20b --thinking-budget low
-```
-
-### RAG Examples
-```bash
-# Email RAG with high reasoning
-python apps/email_rag.py --llm ollama --llm-model gpt-oss:20b --thinking-budget high
-
-# Document RAG with medium reasoning
-python apps/document_rag.py --llm openai --llm-model gpt-4o --thinking-budget medium
-```
-
-## Supported Models
-
-### Ollama Models
- **GPT-Oss:20b**: Primary target model with reasoning capabilities
- **Other reasoning models**: Any Ollama model that supports the `reasoning` parameter
-
-### OpenAI Models
- **o3, o3-mini, o4-mini, o1**: o-series reasoning models with `reasoning_effort` parameter
- **GPT-OSS models**: Models that support reasoning capabilities
-
-## Testing
-
-The implementation includes comprehensive testing:
- Parameter handling verification
- Backend-specific API format validation
- CLI argument parsing tests
- Integration with existing LEANN architecture
--- a/docs/configuration-guide.md
+++ b/docs/configuration-guide.md
@@ -1,384 +0,0 @@
-# LEANN Configuration Guide
-
-This guide helps you optimize LEANN for different use cases and understand the trade-offs between various configuration options.
-
-## Getting Started: Simple is Better
-
-When first trying LEANN, start with a small dataset to quickly validate your approach:
-
-**For document RAG**: The default `data/` directory works perfectly - includes 2 AI research papers, Pride and Prejudice literature, and a technical report
-```bash
-python -m apps.document_rag --query "What techniques does LEANN use?"
-```
-
-**For other data sources**: Limit the dataset size for quick testing
-```bash
-# WeChat: Test with recent messages only
-python -m apps.wechat_rag --max-items 100 --query "What did we discuss about the project timeline?"
-
-# Browser history: Last few days
-python -m apps.browser_rag --max-items 500 --query "Find documentation about vector databases"
-
-# Email: Recent inbox
-python -m apps.email_rag --max-items 200 --query "Who sent updates about the deployment status?"
-```
-
-Once validated, scale up gradually:
- 100 documents → 1,000 → 10,000 → full dataset (`--max-items -1`)
- This helps identify issues early before committing to long processing times
-
-## Embedding Model Selection: Understanding the Trade-offs
-
-Based on our experience developing LEANN, embedding models fall into three categories:
-
-### Small Models (< 100M parameters)
-**Example**: `sentence-transformers/all-MiniLM-L6-v2` (22M params)
- **Pros**: Lightweight, fast for both indexing and inference
- **Cons**: Lower semantic understanding, may miss nuanced relationships
- **Use when**: Speed is critical, handling simple queries, interactive mode, or just experimenting with LEANN. If time is not a constraint, consider using a larger/better embedding model
-
-### Medium Models (100M-500M parameters)
-**Example**: `facebook/contriever` (110M params), `BAAI/bge-base-en-v1.5` (110M params)
- **Pros**: Balanced performance, good multilingual support, reasonable speed
- **Cons**: Requires more compute than small models
- **Use when**: Need quality results without extreme compute requirements, general-purpose RAG applications
-
-### Large Models (500M+ parameters)
-**Example**: `Qwen/Qwen3-Embedding-0.6B` (600M params), `intfloat/multilingual-e5-large` (560M params)
- **Pros**: Best semantic understanding, captures complex relationships, excellent multilingual support. **Qwen3-Embedding-0.6B achieves nearly OpenAI API performance!**
- **Cons**: Slower inference, longer index build times
- **Use when**: Quality is paramount and you have sufficient compute resources. **Highly recommended** for production use
-
-### Quick Start: Cloud and Local Embedding Options
-
-**OpenAI Embeddings (Fastest Setup)**
-For immediate testing without local model downloads(also if you [do not have GPU](https://github.com/yichuan-w/LEANN/issues/43) and do not care that much about your document leak, you should use this, we compute the embedding and recompute using openai API):
-```bash
-# Set OpenAI embeddings (requires OPENAI_API_KEY)
--embedding-mode openai --embedding-model text-embedding-3-small
-```
-
-**Ollama Embeddings (Privacy-Focused)**
-For local embeddings with complete privacy:
-```bash
-# First, pull an embedding model
-ollama pull nomic-embed-text
-
-# Use Ollama embeddings
--embedding-mode ollama --embedding-model nomic-embed-text
-```
-
-<details>
-<summary><strong>Cloud vs Local Trade-offs</strong></summary>
-
-**OpenAI Embeddings** (`text-embedding-3-small/large`)
- **Pros**: No local compute needed, consistently fast, high quality
- **Cons**: Requires API key, costs money, data leaves your system, [known limitations with certain languages](https://yichuan-w.github.io/blog/lessons_learned_in_dev_leann/)
- **When to use**: Prototyping, non-sensitive data, need immediate results
-
-**Local Embeddings**
- **Pros**: Complete privacy, no ongoing costs, full control, can sometimes outperform OpenAI embeddings
- **Cons**: Slower than cloud APIs, requires local compute resources
- **When to use**: Production systems, sensitive data, cost-sensitive applications
-
-</details>
-
-## Index Selection: Matching Your Scale
-
-### HNSW (Hierarchical Navigable Small World)
-**Best for**: Small to medium datasets (< 10M vectors) - **Default and recommended for extreme low storage**
- Full recomputation required
- High memory usage during build phase
- Excellent recall (95%+)
-
-```bash
-# Optimal for most use cases
--backend-name hnsw --graph-degree 32 --build-complexity 64
-```
-
-### DiskANN
-**Best for**: Large datasets, especially when you want `recompute=True`.
-
-**Key advantages:**
- **Faster search** on large datasets (3x+ speedup vs HNSW in many cases)
- **Smart storage**: `recompute=True` enables automatic graph partitioning for smaller indexes
- **Better scaling**: Designed for 100k+ documents
-
-**Recompute behavior:**
- `recompute=True` (recommended): Pure PQ traversal + final reranking - faster and enables partitioning
- `recompute=False`: PQ + partial real distances during traversal - slower but higher accuracy
-
-```bash
-# Recommended for most use cases
--backend-name diskann --graph-degree 32 --build-complexity 64
-```
-
-**Performance Benchmark**: Run `uv run benchmarks/diskann_vs_hnsw_speed_comparison.py` to compare DiskANN and HNSW on your system.
-
-## LLM Selection: Engine and Model Comparison
-
-### LLM Engines
-
-**OpenAI** (`--llm openai`)
- **Pros**: Best quality, consistent performance, no local resources needed
- **Cons**: Costs money ($0.15-2.5 per million tokens), requires internet, data privacy concerns
- **Models**: `gpt-4o-mini` (fast, cheap), `gpt-4o` (best quality), `o3` (reasoning), `o3-mini` (reasoning, cheaper)
- **Thinking Budget**: Use `--thinking-budget low/medium/high` for o-series reasoning models (o3, o3-mini, o4-mini)
- **Note**: Our current default, but we recommend switching to Ollama for most use cases
-
-**Ollama** (`--llm ollama`)
- **Pros**: Fully local, free, privacy-preserving, good model variety
- **Cons**: Requires local GPU/CPU resources, slower than cloud APIs, need to install extra [ollama app](https://github.com/ollama/ollama?tab=readme-ov-file#ollama) and pre-download models by `ollama pull`
- **Models**: `qwen3:0.6b` (ultra-fast), `qwen3:1.7b` (balanced), `qwen3:4b` (good quality), `qwen3:7b` (high quality), `deepseek-r1:1.5b` (reasoning)
- **Thinking Budget**: Use `--thinking-budget low/medium/high` for reasoning models like GPT-Oss:20b
-
-**HuggingFace** (`--llm hf`)
- **Pros**: Free tier available, huge model selection, direct model loading (vs Ollama's server-based approach)
- **Cons**: More complex initial setup
- **Models**: `Qwen/Qwen3-1.7B-FP8`
-
-## Parameter Tuning Guide
-
-### Search Complexity Parameters
-
-**`--build-complexity`** (index building)
- Controls thoroughness during index construction
- Higher = better recall but slower build
- Recommendations:
-  - 32: Quick prototyping
-  - 64: Balanced (default)
-  - 128: Production systems
-  - 256: Maximum quality
-
-**`--search-complexity`** (query time)
- Controls search thoroughness
- Higher = better results but slower
- Recommendations:
-  - 16: Fast/Interactive search
-  - 32: High quality with diversity
-  - 64+: Maximum accuracy
-
-### Top-K Selection
-
-**`--top-k`** (number of retrieved chunks)
- More chunks = better context but slower LLM processing
- Should be always smaller than `--search-complexity`
- Guidelines:
-  - 10-20: General questions (default: 20)
-  - 30+: Complex multi-hop reasoning requiring comprehensive context
-
-**Trade-off formula**:
- Retrieval time ∝ log(n) × search_complexity
- LLM processing time ∝ top_k × chunk_size
- Total context = top_k × chunk_size tokens
-
-### Thinking Budget for Reasoning Models
-
-**`--thinking-budget`** (reasoning effort level)
- Controls the computational effort for reasoning models
- Options: `low`, `medium`, `high`
- Guidelines:
-  - `low`: Fast responses, basic reasoning (default for simple queries)
-  - `medium`: Balanced speed and reasoning depth
-  - `high`: Maximum reasoning effort, best for complex analytical questions
- **Supported Models**:
-  - **Ollama**: `gpt-oss:20b`, `gpt-oss:120b`
-  - **OpenAI**: `o3`, `o3-mini`, `o4-mini`, `o1` (o-series reasoning models)
- **Note**: Models without reasoning support will show a warning and proceed without reasoning parameters
- **Example**: `--thinking-budget high` for complex analytical questions
-
-**📖 For detailed usage examples and implementation details, check out [Thinking Budget Documentation](THINKING_BUDGET_FEATURE.md)**
-
-**💡 Quick Examples:**
-```bash
-# OpenAI o-series reasoning model
-python apps/document_rag.py --query "What are the main techniques LEANN explores?" \
-  --index-dir hnswbuild --backend hnsw \
-  --llm openai --llm-model o3 --thinking-budget medium
-
-# Ollama reasoning model
-python apps/document_rag.py --query "What are the main techniques LEANN explores?" \
-  --index-dir hnswbuild --backend hnsw \
-  --llm ollama --llm-model gpt-oss:20b --thinking-budget high
-```
-
-### Graph Degree (HNSW/DiskANN)
-
-**`--graph-degree`**
- Number of connections per node in the graph
- Higher = better recall but more memory
- HNSW: 16-32 (default: 32)
- DiskANN: 32-128 (default: 64)
-
-
-## Performance Optimization Checklist
-
-### If Embedding is Too Slow
-
-1. **Switch to smaller model**:
-   ```bash
-   # From large model
-   --embedding-model Qwen/Qwen3-Embedding-0.6B
-   # To small model
-   --embedding-model sentence-transformers/all-MiniLM-L6-v2
-   ```
-
-2. **Limit dataset size for testing**:
-   ```bash
-   --max-items 1000  # Process first 1k items only
-   ```
-
-3. **Use MLX on Apple Silicon** (optional optimization):
-   ```bash
-   --embedding-mode mlx --embedding-model mlx-community/Qwen3-Embedding-0.6B-8bit
-   ```
-    MLX might not be the best choice, as we tested and found that it only offers 1.3x acceleration compared to HF, so maybe using ollama is a better choice for embedding generation
-
-4. **Use Ollama**
-   ```bash
-   --embedding-mode ollama --embedding-model nomic-embed-text
-   ```
-   To discover additional embedding models in ollama, check out https://ollama.com/search?c=embedding or read more about embedding models at https://ollama.com/blog/embedding-models, please do check the model size that works best for you
-### If Search Quality is Poor
-
-1. **Increase retrieval count**:
-   ```bash
-   --top-k 30  # Retrieve more candidates
-   ```
-
-2. **Upgrade embedding model**:
-   ```bash
-   # For English
-   --embedding-model BAAI/bge-base-en-v1.5
-   # For multilingual
-   --embedding-model intfloat/multilingual-e5-large
-   ```
-
-## Understanding the Trade-offs
-
-Every configuration choice involves trade-offs:
-
-| Factor | Small/Fast | Large/Quality |
-|--------|------------|---------------|
-| Embedding Model | `all-MiniLM-L6-v2` | `Qwen/Qwen3-Embedding-0.6B` |
-| Chunk Size | 512 tokens | 128 tokens |
-| Index Type | HNSW | DiskANN |
-| LLM | `qwen3:1.7b` | `gpt-4o` |
-
-The key is finding the right balance for your specific use case. Start small and simple, measure performance, then scale up only where needed.
-
-## Low-resource setups
-
-If you don’t have a local GPU or builds/searches are too slow, use one or more of the options below.
-
-### 1) Use OpenAI embeddings (no local compute)
-
-Fastest path with zero local GPU requirements. Set your API key and use OpenAI embeddings during build and search:
-
-```bash
-export OPENAI_API_KEY=sk-...
-
-# Build with OpenAI embeddings
-leann build my-index \
-  --embedding-mode openai \
-  --embedding-model text-embedding-3-small
-
-# Search with OpenAI embeddings (recompute at query time)
-leann search my-index "your query" \
-  --recompute
-```
-
-### 2) Run remote builds with SkyPilot (cloud GPU)
-
-Offload embedding generation and index building to a GPU VM using [SkyPilot](https://skypilot.readthedocs.io/en/latest/). A template is provided at `sky/leann-build.yaml`.
-
-```bash
-# One-time: install and configure SkyPilot
-pip install skypilot
-
-# Launch with defaults (L4:1) and mount ./data to ~/leann-data; the build runs automatically
-sky launch -c leann-gpu sky/leann-build.yaml
-
-# Override parameters via -e key=value (optional)
-sky launch -c leann-gpu sky/leann-build.yaml \
-  -e index_name=my-index \
-  -e backend=hnsw \
-  -e embedding_mode=sentence-transformers \
-  -e embedding_model=Qwen/Qwen3-Embedding-0.6B
-
-# Copy the built index back to your local .leann (use rsync)
-rsync -Pavz leann-gpu:~/.leann/indexes/my-index ./.leann/indexes/
-```
-
-### 3) Disable recomputation to trade storage for speed
-
-If you need lower latency and have more storage/memory, disable recomputation. This stores full embeddings and avoids recomputing at search time.
-
-```bash
-# Build without recomputation (HNSW requires non-compact in this mode)
-leann build my-index --no-recompute --no-compact
-
-# Search without recomputation
-leann search my-index "your query" --no-recompute
-```
-
-When to use:
- Extreme low latency requirements (high QPS, interactive assistants)
- Read-heavy workloads where storage is cheaper than latency
- No always-available GPU
-
-Constraints:
- HNSW: when `--no-recompute` is set, LEANN automatically disables compact mode during build
- DiskANN: supported; `--no-recompute` skips selective recompute during search
-
-Storage impact:
- Storing N embeddings of dimension D with float32 requires approximately N × D × 4 bytes
- Example: 1,000,000 chunks × 768 dims × 4 bytes ≈ 2.86 GB (plus graph/metadata)
-
-Converting an existing index (rebuild required):
-```bash
-# Rebuild in-place (ensure you still have original docs or can regenerate chunks)
-leann build my-index --force --no-recompute --no-compact
-```
-
-Python API usage:
-```python
-from leann import LeannSearcher
-
-searcher = LeannSearcher("/path/to/my-index.leann")
-results = searcher.search("your query", top_k=10, recompute_embeddings=False)
-```
-
-Trade-offs:
- Lower latency and fewer network hops at query time
- Significantly higher storage (10–100× vs selective recomputation)
- Slightly larger memory footprint during build and search
-
-Quick benchmark results (`benchmarks/benchmark_no_recompute.py` with 5k texts, complexity=32):
-
- HNSW
-
-  ```text
-  recompute=True:  search_time=0.818s, size=1.1MB
-  recompute=False: search_time=0.012s, size=16.6MB
-  ```
-
- DiskANN
-
-  ```text
-  recompute=True:  search_time=0.041s, size=5.9MB
-  recompute=False: search_time=0.013s, size=24.6MB
-  ```
-
-Conclusion:
- **HNSW**: `no-recompute` is significantly faster (no embedding recomputation) but requires much more storage (stores all embeddings)
- **DiskANN**: `no-recompute` uses PQ + partial real distances during traversal (slower but higher accuracy), while `recompute=True` uses pure PQ traversal + final reranking (faster traversal, enables build-time partitioning for smaller storage)
-
-
-
-## Further Reading
-
- [Lessons Learned Developing LEANN](https://yichuan-w.github.io/blog/lessons_learned_in_dev_leann/)
- [LEANN Technical Paper](https://arxiv.org/abs/2506.08276)
- [DiskANN Original Paper](https://papers.nips.cc/paper/2019/file/09853c7fb1d3f8ee67a61b6bf4a7f8e6-Paper.pdf)
- [SSD-based Graph Partitioning](https://github.com/SonglinLife/SSD_BASED_PLAN)
--- a/docs/features.md
+++ b/docs/features.md
@@ -5,7 +5,7 @@
 - **🔄 Real-time Embeddings** - Eliminate heavy embedding storage with dynamic computation using optimized ZMQ servers and highly optimized search paradigm (overlapping and batching) with highly optimized embedding engine
 - **📈 Scalable Architecture** - Handles millions of documents on consumer hardware; the larger your dataset, the more LEANN can save
 - **🎯 Graph Pruning** - Advanced techniques to minimize the storage overhead of vector search to a limited footprint
- **🏗️ Pluggable Backends** - HNSW/FAISS (default), with optional DiskANN for large-scale deployments
+- **🏗️ Pluggable Backends** - DiskANN, HNSW/FAISS with unified API

 ## 🛠️ Technical Highlights
 - **🔄 Recompute Mode** - Highest accuracy scenarios while eliminating vector storage overhead
@@ -13,7 +13,7 @@
 - **🚀 High-throughput Embedding Pipeline** - Optimized batched processing for maximum efficiency
 - **🎯 Two-level Search** - Novel coarse-to-fine search overlap for accelerated query processing (optional)
 - **💾 Memory-mapped Indices** - Fast startup with raw text mapping to reduce memory overhead
- **🚀 MLX Support** - Ultra-fast recompute/build with quantized embedding models, accelerating building and search ([minimal example](../examples/mlx_demo.py))
+- **🚀 MLX Support** - Ultra-fast recompute/build with quantized embedding models, accelerating building and search ([minimal example](test/build_mlx_index.py))

 ## 🎨 Developer Experience

--- a/docs/normalized_embeddings.md
+++ b/docs/normalized_embeddings.md
@@ -72,4 +72,4 @@ Using the wrong distance metric with normalized embeddings can lead to:
 - **Incorrect ranking** of search results
 - **Suboptimal performance** compared to using the correct metric

-For more details on why this happens, see our analysis in the [embedding detection code](../packages/leann-core/src/leann/api.py) which automatically handles normalized embeddings and MIPS distance metric issues.
+For more details on why this happens, see our analysis of [OpenAI embeddings with MIPS](../examples/main_cli_example.py).
--- a/docs/roadmap.md
+++ b/docs/roadmap.md
@@ -2,8 +2,8 @@

 ## 🎯 Q2 2025

- [X] HNSW backend integration
 - [X] DiskANN backend with MIPS/L2/Cosine support
+- [X] HNSW backend integration
 - [X] Real-time embedding pipeline
 - [X] Memory-efficient graph pruning

--- a/benchmarks/compare_faiss_vs_leann.py
+++ b/benchmarks/compare_faiss_vs_leann.py
@@ -62,7 +62,7 @@ def test_faiss_hnsw():

    try:
        result = subprocess.run(
-            [sys.executable, "benchmarks/faiss_only.py"],
+            [sys.executable, "examples/faiss_only.py"],
            capture_output=True,
            text=True,
            timeout=300,
@@ -115,7 +115,7 @@ def test_leann_hnsw():

    # Load and parse documents
    documents = SimpleDirectoryReader(
-        "data",
+        "examples/data",
        recursive=True,
        encoding="utf-8",
        required_exts=[".pdf", ".txt", ".md"],
--- a/examples/data/2501.14312v1
+++ b/examples/data/2501.14312v1
--- a/examples/data/2506.08276v1.pdf
+++ b/examples/data/2506.08276v1.pdf
--- a/examples/data/PrideandPrejudice.txt
+++ b/examples/data/PrideandPrejudice.txt
--- a/examples/data/README.md
+++ b/examples/data/README.md
--- a/examples/document_search.py
+++ b/examples/document_search.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+"""
+Document search demo with recompute mode
+"""
+
+import shutil
+import time
+from pathlib import Path
+
+# Import backend packages to trigger plugin registration
+try:
+    import leann_backend_diskann  # noqa: F401
+    import leann_backend_hnsw  # noqa: F401
+
+    print("INFO: Backend packages imported successfully.")
+except ImportError as e:
+    print(f"WARNING: Could not import backend packages. Error: {e}")
+
+# Import upper-level API from leann-core
+from leann.api import LeannBuilder, LeannChat, LeannSearcher
+
+
+def load_sample_documents():
+    """Create sample documents for demonstration"""
+    docs = [
+        {
+            "title": "Intro to Python",
+            "content": "Python is a high-level, interpreted language known for simplicity.",
+        },
+        {
+            "title": "ML Basics",
+            "content": "Machine learning builds systems that learn from data.",
+        },
+        {
+            "title": "Data Structures",
+            "content": "Data structures like arrays, lists, and graphs organize data.",
+        },
+    ]
+    return docs
+
+
+def main():
+    print("==========================================================")
+    print("=== Leann Document Search Demo (DiskANN + Recompute) ===")
+    print("==========================================================")
+
+    INDEX_DIR = Path("./test_indices")
+    INDEX_PATH = str(INDEX_DIR / "documents.diskann")
+    BACKEND_TO_TEST = "diskann"
+
+    if INDEX_DIR.exists():
+        print(f"--- Cleaning up old index directory: {INDEX_DIR} ---")
+        shutil.rmtree(INDEX_DIR)
+
+    # --- 1. Build index ---
+    print(f"\n[PHASE 1] Building index using '{BACKEND_TO_TEST}' backend...")
+
+    builder = LeannBuilder(backend_name=BACKEND_TO_TEST, graph_degree=32, complexity=64)
+
+    documents = load_sample_documents()
+    print(f"Loaded {len(documents)} sample documents.")
+    for doc in documents:
+        builder.add_text(doc["content"], metadata={"title": doc["title"]})
+
+    builder.build_index(INDEX_PATH)
+    print("\nIndex built!")
+
+    # --- 2. Basic search demo ---
+    print(f"\n[PHASE 2] Basic search using '{BACKEND_TO_TEST}' backend...")
+    searcher = LeannSearcher(index_path=INDEX_PATH)
+
+    query = "What is machine learning?"
+    print(f"\nQuery: '{query}'")
+
+    print("\n--- Basic search mode (PQ computation) ---")
+    start_time = time.time()
+    results = searcher.search(query, top_k=2)
+    basic_time = time.time() - start_time
+
+    print(f"⏱️  Basic search time: {basic_time:.3f} seconds")
+    print(">>> Basic search results <<<")
+    for i, res in enumerate(results, 1):
+        print(
+            f"  {i}. ID: {res.id}, Score: {res.score:.4f}, Text: '{res.text}', Metadata: {res.metadata}"
+        )
+
+    # --- 3. Recompute search demo ---
+    print("\n[PHASE 3] Recompute search using embedding server...")
+
+    print("\n--- Recompute search mode (get real embeddings via network) ---")
+
+    # Configure recompute parameters
+    recompute_params = {
+        "recompute_beighbor_embeddings": True,  # Enable network recomputation
+        "USE_DEFERRED_FETCH": False,  # Don't use deferred fetch
+        "skip_search_reorder": True,  # Skip search reordering
+        "dedup_node_dis": True,  # Enable node distance deduplication
+        "prune_ratio": 0.1,  # Pruning ratio 10%
+        "batch_recompute": False,  # Don't use batch recomputation
+        "global_pruning": False,  # Don't use global pruning
+        "zmq_port": 5555,  # ZMQ port
+        "embedding_model": "sentence-transformers/all-mpnet-base-v2",
+    }
+
+    print("Recompute parameter configuration:")
+    for key, value in recompute_params.items():
+        print(f"  {key}: {value}")
+
+    print("\n🔄 Executing Recompute search...")
+    try:
+        start_time = time.time()
+        recompute_results = searcher.search(query, top_k=2, **recompute_params)
+        recompute_time = time.time() - start_time
+
+        print(f"⏱️  Recompute search time: {recompute_time:.3f} seconds")
+        print(">>> Recompute search results <<<")
+        for i, res in enumerate(recompute_results, 1):
+            print(
+                f"  {i}. ID: {res.id}, Score: {res.score:.4f}, Text: '{res.text}', Metadata: {res.metadata}"
+            )
+
+        # Compare results
+        print("\n--- Result comparison ---")
+        print(f"Basic search time: {basic_time:.3f} seconds")
+        print(f"Recompute time: {recompute_time:.3f} seconds")
+
+        print("\nBasic search vs Recompute results:")
+        for i in range(min(len(results), len(recompute_results))):
+            basic_score = results[i].score
+            recompute_score = recompute_results[i].score
+            score_diff = abs(basic_score - recompute_score)
+            print(
+                f"  Position {i + 1}: PQ={basic_score:.4f}, Recompute={recompute_score:.4f}, Difference={score_diff:.4f}"
+            )
+
+        if recompute_time > basic_time:
+            print("✅ Recompute mode working correctly (more accurate but slower)")
+        else:
+            print("i️  Recompute time is unusually fast, network recomputation may not be enabled")
+
+    except Exception as e:
+        print(f"❌ Recompute search failed: {e}")
+        print("This usually indicates an embedding server connection issue")
+
+    # --- 4. Chat demo ---
+    print("\n[PHASE 4] Starting chat session...")
+    chat = LeannChat(index_path=INDEX_PATH)
+    chat_response = chat.ask(query)
+    print(f"You: {query}")
+    print(f"Leann: {chat_response}")
+
+    print("\n==========================================================")
+    print("✅ Demo finished successfully!")
+    print("==========================================================")
+
+
+if __name__ == "__main__":
+    main()
--- a/examples/email_data/LEANN_email_reader.py
+++ b/examples/email_data/LEANN_email_reader.py
@@ -52,11 +52,6 @@ class EmlxReader(BaseReader):
        docs: list[Document] = []
        max_count = load_kwargs.get("max_count", 1000)
        count = 0
-        total_files = 0
-        successful_files = 0
-        failed_files = 0
-
-        print(f"Starting to process directory: {input_dir}")

        # Walk through the directory recursively
        for dirpath, dirnames, filenames in os.walk(input_dir):
@@ -64,12 +59,10 @@ class EmlxReader(BaseReader):
            dirnames[:] = [d for d in dirnames if not d.startswith(".")]

            for filename in filenames:
-                # Check if we've reached the max count (skip if max_count == -1)
-                if max_count > 0 and count >= max_count:
+                if count >= max_count:
                    break

                if filename.endswith(".emlx"):
-                    total_files += 1
                    filepath = os.path.join(dirpath, filename)
                    try:
                        # Read the .emlx file
@@ -105,26 +98,17 @@ class EmlxReader(BaseReader):
                                                and not self.include_html
                                            ):
                                                continue
-                                            try:
-                                                payload = part.get_payload(decode=True)
-                                                if payload:
-                                                    body += payload.decode("utf-8", errors="ignore")
-                                            except Exception as e:
-                                                print(f"Error decoding payload: {e}")
-                                                continue
+                                            body += part.get_payload(decode=True).decode(
+                                                "utf-8", errors="ignore"
+                                            )
+                                            # break
                                else:
-                                    try:
-                                        payload = msg.get_payload(decode=True)
-                                        if payload:
-                                            body = payload.decode("utf-8", errors="ignore")
-                                    except Exception as e:
-                                        print(f"Error decoding single part payload: {e}")
-                                        body = ""
+                                    body = msg.get_payload(decode=True).decode(
+                                        "utf-8", errors="ignore"
+                                    )

-                                # Only create document if we have some content
-                                if body.strip() or subject != "No Subject":
-                                    # Create document content with metadata embedded in text
-                                    doc_content = f"""
+                                # Create document content with metadata embedded in text
+                                doc_content = f"""
 [File]: {filename}
 [From]: {from_addr}
 [To]: {to_addr}
@@ -134,34 +118,18 @@ class EmlxReader(BaseReader):
 {body}
 """

-                                    # No separate metadata - everything is in the text
-                                    doc = Document(text=doc_content, metadata={})
-                                    docs.append(doc)
-                                    count += 1
-                                    successful_files += 1
-
-                                    # Print first few successful files for debugging
-                                    if successful_files <= 3:
-                                        print(
-                                            f"Successfully loaded: {filename} - Subject: {subject[:50]}..."
-                                        )
+                                # No separate metadata - everything is in the text
+                                doc = Document(text=doc_content, metadata={})
+                                docs.append(doc)
+                                count += 1

                            except Exception as e:
-                                failed_files += 1
-                                if failed_files <= 5:  # Only print first few errors
-                                    print(f"Error parsing email from {filepath}: {e}")
+                                print(f"Error parsing email from {filepath}: {e}")
                                continue

                    except Exception as e:
-                        failed_files += 1
-                        if failed_files <= 5:  # Only print first few errors
-                            print(f"Error reading file {filepath}: {e}")
+                        print(f"Error reading file {filepath}: {e}")
                        continue

-        print("Processing summary:")
-        print(f"  Total .emlx files found: {total_files}")
-        print(f"  Successfully loaded: {successful_files}")
-        print(f"  Failed to load: {failed_files}")
-        print(f"  Final documents: {len(docs)}")
-
+        print(f"Loaded {len(docs)} email documents")
        return docs
--- a/examples/email_data/email.py
+++ b/examples/email_data/email.py
--- a/benchmarks/faiss_only.py
+++ b/benchmarks/faiss_only.py
@@ -65,7 +65,7 @@ def main():
    tracker.checkpoint("After Faiss index creation")

    documents = SimpleDirectoryReader(
-        "data",
+        "examples/data",
        recursive=True,
        encoding="utf-8",
        required_exts=[".pdf", ".txt", ".md"],
--- a/examples/google_history_reader_leann.py
+++ b/examples/google_history_reader_leann.py
@@ -0,0 +1,362 @@
+import argparse
+import asyncio
+import os
+
+try:
+    import dotenv
+
+    dotenv.load_dotenv()
+except ModuleNotFoundError:
+    # python-dotenv is not installed; skip loading environment variables
+    dotenv = None
+from pathlib import Path
+
+from leann.api import LeannBuilder, LeannChat
+from llama_index.core.node_parser import SentenceSplitter
+
+# dotenv.load_dotenv()  # handled above if python-dotenv is available
+
+# Default Chrome profile path
+DEFAULT_CHROME_PROFILE = os.path.expanduser("~/Library/Application Support/Google/Chrome/Default")
+
+
+def create_leann_index_from_multiple_chrome_profiles(
+    profile_dirs: list[Path],
+    index_path: str = "chrome_history_index.leann",
+    max_count: int = -1,
+    embedding_model: str = "facebook/contriever",
+    embedding_mode: str = "sentence-transformers",
+):
+    """
+    Create LEANN index from multiple Chrome profile data sources.
+
+    Args:
+        profile_dirs: List of Path objects pointing to Chrome profile directories
+        index_path: Path to save the LEANN index
+        max_count: Maximum number of history entries to process per profile
+        embedding_model: The embedding model to use
+        embedding_mode: The embedding backend mode
+    """
+    print("Creating LEANN index from multiple Chrome profile data sources...")
+
+    # Load documents using ChromeHistoryReader from history_data
+    from history_data.history import ChromeHistoryReader
+
+    reader = ChromeHistoryReader()
+
+    INDEX_DIR = Path(index_path).parent
+
+    if not INDEX_DIR.exists():
+        print("--- Index directory not found, building new index ---")
+        all_documents = []
+        total_processed = 0
+
+        # Process each Chrome profile directory
+        for i, profile_dir in enumerate(profile_dirs):
+            print(f"\nProcessing Chrome profile {i + 1}/{len(profile_dirs)}: {profile_dir}")
+
+            try:
+                documents = reader.load_data(
+                    chrome_profile_path=str(profile_dir), max_count=max_count
+                )
+                if documents:
+                    print(f"Loaded {len(documents)} history documents from {profile_dir}")
+                    all_documents.extend(documents)
+                    total_processed += len(documents)
+
+                    # Check if we've reached the max count
+                    if max_count > 0 and total_processed >= max_count:
+                        print(f"Reached max count of {max_count} documents")
+                        break
+                else:
+                    print(f"No documents loaded from {profile_dir}")
+            except Exception as e:
+                print(f"Error processing {profile_dir}: {e}")
+                continue
+
+        if not all_documents:
+            print("No documents loaded from any source. Exiting.")
+            # highlight info that you need to close all chrome browser before running this script and high light the instruction!!
+            print(
+                "\033[91mYou need to close or quit all chrome browser before running this script\033[0m"
+            )
+            return None
+
+        print(
+            f"\nTotal loaded {len(all_documents)} history documents from {len(profile_dirs)} profiles"
+        )
+
+        # Create text splitter with 256 chunk size
+        text_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=128)
+
+        # Convert Documents to text strings and chunk them
+        all_texts = []
+        for doc in all_documents:
+            # Split the document into chunks
+            nodes = text_splitter.get_nodes_from_documents([doc])
+            for node in nodes:
+                text = node.get_content()
+                # text = '[Title] ' + doc.metadata["title"] + '\n' + text
+                all_texts.append(text)
+
+        print(f"Created {len(all_texts)} text chunks from {len(all_documents)} documents")
+
+        # Create LEANN index directory
+        print("--- Index directory not found, building new index ---")
+        INDEX_DIR.mkdir(exist_ok=True)
+
+        print("--- Building new LEANN index ---")
+
+        print("\n[PHASE 1] Building Leann index...")
+
+        # Use HNSW backend for better macOS compatibility
+        # LeannBuilder will automatically detect normalized embeddings and set appropriate distance metric
+        builder = LeannBuilder(
+            backend_name="hnsw",
+            embedding_model=embedding_model,
+            embedding_mode=embedding_mode,
+            graph_degree=32,
+            complexity=64,
+            is_compact=True,
+            is_recompute=True,
+            num_threads=1,  # Force single-threaded mode
+        )
+
+        print(f"Adding {len(all_texts)} history chunks to index...")
+        for chunk_text in all_texts:
+            builder.add_text(chunk_text)
+
+        builder.build_index(index_path)
+        print(f"\nLEANN index built at {index_path}!")
+    else:
+        print(f"--- Using existing index at {INDEX_DIR} ---")
+
+    return index_path
+
+
+def create_leann_index(
+    profile_path: str | None = None,
+    index_path: str = "chrome_history_index.leann",
+    max_count: int = 1000,
+    embedding_model: str = "facebook/contriever",
+    embedding_mode: str = "sentence-transformers",
+):
+    """
+    Create LEANN index from Chrome history data.
+
+    Args:
+        profile_path: Path to the Chrome profile directory (optional, uses default if None)
+        index_path: Path to save the LEANN index
+        max_count: Maximum number of history entries to process
+        embedding_model: The embedding model to use
+        embedding_mode: The embedding backend mode
+    """
+    print("Creating LEANN index from Chrome history data...")
+    INDEX_DIR = Path(index_path).parent
+
+    if not INDEX_DIR.exists():
+        print("--- Index directory not found, building new index ---")
+        INDEX_DIR.mkdir(exist_ok=True)
+
+        print("--- Building new LEANN index ---")
+
+        print("\n[PHASE 1] Building Leann index...")
+
+        # Load documents using ChromeHistoryReader from history_data
+        from history_data.history import ChromeHistoryReader
+
+        reader = ChromeHistoryReader()
+
+        documents = reader.load_data(chrome_profile_path=profile_path, max_count=max_count)
+
+        if not documents:
+            print("No documents loaded. Exiting.")
+            return None
+
+        print(f"Loaded {len(documents)} history documents")
+
+        # Create text splitter with 256 chunk size
+        text_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=25)
+
+        # Convert Documents to text strings and chunk them
+        all_texts = []
+        for doc in documents:
+            # Split the document into chunks
+            nodes = text_splitter.get_nodes_from_documents([doc])
+            for node in nodes:
+                all_texts.append(node.get_content())
+
+        print(f"Created {len(all_texts)} text chunks from {len(documents)} documents")
+
+        # Create LEANN index directory
+        print("--- Index directory not found, building new index ---")
+        INDEX_DIR.mkdir(exist_ok=True)
+
+        print("--- Building new LEANN index ---")
+
+        print("\n[PHASE 1] Building Leann index...")
+
+        # Use HNSW backend for better macOS compatibility
+        # LeannBuilder will automatically detect normalized embeddings and set appropriate distance metric
+        builder = LeannBuilder(
+            backend_name="hnsw",
+            embedding_model=embedding_model,
+            embedding_mode=embedding_mode,
+            graph_degree=32,
+            complexity=64,
+            is_compact=True,
+            is_recompute=True,
+            num_threads=1,  # Force single-threaded mode
+        )
+
+        print(f"Adding {len(all_texts)} history chunks to index...")
+        for chunk_text in all_texts:
+            builder.add_text(chunk_text)
+
+        builder.build_index(index_path)
+        print(f"\nLEANN index built at {index_path}!")
+    else:
+        print(f"--- Using existing index at {INDEX_DIR} ---")
+
+    return index_path
+
+
+async def query_leann_index(index_path: str, query: str):
+    """
+    Query the LEANN index.
+
+    Args:
+        index_path: Path to the LEANN index
+        query: The query string
+    """
+    print("\n[PHASE 2] Starting Leann chat session...")
+    chat = LeannChat(index_path=index_path)
+
+    print(f"You: {query}")
+    chat_response = chat.ask(
+        query,
+        top_k=10,
+        recompute_beighbor_embeddings=True,
+        complexity=32,
+        beam_width=1,
+        llm_config={
+            "type": "openai",
+            "model": "gpt-4o",
+            "api_key": os.getenv("OPENAI_API_KEY"),
+        },
+        llm_kwargs={"temperature": 0.0, "max_tokens": 1000},
+    )
+
+    print(f"Leann chat response: \033[36m{chat_response}\033[0m")
+
+
+async def main():
+    # Parse command line arguments
+    parser = argparse.ArgumentParser(
+        description="LEANN Chrome History Reader - Create and query browser history index"
+    )
+    parser.add_argument(
+        "--chrome-profile",
+        type=str,
+        default=DEFAULT_CHROME_PROFILE,
+        help=f"Path to Chrome profile directory (default: {DEFAULT_CHROME_PROFILE}), usually you dont need to change this",
+    )
+    parser.add_argument(
+        "--index-dir",
+        type=str,
+        default="./google_history_index",
+        help="Directory to store the LEANN index (default: ./chrome_history_index_leann_test)",
+    )
+    parser.add_argument(
+        "--max-entries",
+        type=int,
+        default=1000,
+        help="Maximum number of history entries to process (default: 1000)",
+    )
+    parser.add_argument(
+        "--query",
+        type=str,
+        default=None,
+        help="Single query to run (default: runs example queries)",
+    )
+    parser.add_argument(
+        "--auto-find-profiles",
+        action="store_true",
+        default=True,
+        help="Automatically find all Chrome profiles (default: True)",
+    )
+    parser.add_argument(
+        "--embedding-model",
+        type=str,
+        default="facebook/contriever",
+        help="The embedding model to use (e.g., 'facebook/contriever', 'text-embedding-3-small')",
+    )
+    parser.add_argument(
+        "--embedding-mode",
+        type=str,
+        default="sentence-transformers",
+        choices=["sentence-transformers", "openai", "mlx"],
+        help="The embedding backend mode",
+    )
+    parser.add_argument(
+        "--use-existing-index",
+        action="store_true",
+        help="Use existing index without rebuilding",
+    )
+
+    args = parser.parse_args()
+
+    INDEX_DIR = Path(args.index_dir)
+    INDEX_PATH = str(INDEX_DIR / "chrome_history.leann")
+
+    print(f"Using Chrome profile: {args.chrome_profile}")
+    print(f"Index directory: {INDEX_DIR}")
+    print(f"Max entries: {args.max_entries}")
+
+    if args.use_existing_index:
+        # Use existing index without rebuilding
+        if not Path(INDEX_PATH).exists():
+            print(f"Error: Index file not found at {INDEX_PATH}")
+            return
+        print(f"Using existing index at {INDEX_PATH}")
+        index_path = INDEX_PATH
+    else:
+        # Find Chrome profile directories
+        from history_data.history import ChromeHistoryReader
+
+        if args.auto_find_profiles:
+            profile_dirs = ChromeHistoryReader.find_chrome_profiles()
+            if not profile_dirs:
+                print("No Chrome profiles found automatically. Exiting.")
+                return
+        else:
+            # Use single specified profile
+            profile_path = Path(args.chrome_profile)
+            if not profile_path.exists():
+                print(f"Chrome profile not found: {profile_path}")
+                return
+            profile_dirs = [profile_path]
+
+        # Create or load the LEANN index from all sources
+        index_path = create_leann_index_from_multiple_chrome_profiles(
+            profile_dirs, INDEX_PATH, args.max_entries, args.embedding_model, args.embedding_mode
+        )
+
+    if index_path:
+        if args.query:
+            # Run single query
+            await query_leann_index(index_path, args.query)
+        else:
+            # Example queries
+            queries = [
+                "What websites did I visit about machine learning?",
+                "Find my search history about programming",
+            ]
+
+            for query in queries:
+                print("\n" + "=" * 60)
+                await query_leann_index(index_path, query)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/examples/history_data/init.py
+++ b/examples/history_data/init.py
--- a/examples/history_data/history.py
+++ b/examples/history_data/history.py
@@ -97,11 +97,6 @@ class ChromeHistoryReader(BaseReader):

        except Exception as e:
            print(f"Error reading Chrome history: {e}")
-            # add you may need to close your browser to make the database file available
-            # also highlight in red
-            print(
-                "\033[91mYou may need to close your browser to make the database file available\033[0m"
-            )
            return docs

        return docs
--- a/examples/history_data/wechat_history.py
+++ b/examples/history_data/wechat_history.py
@@ -411,8 +411,8 @@ Messages ({len(messages)} messages, {message_group["total_length"]} chars):
        wechat_export_dir = load_kwargs.get("wechat_export_dir", None)
        include_non_text = load_kwargs.get("include_non_text", False)
        concatenate_messages = load_kwargs.get("concatenate_messages", False)
-        max_length = load_kwargs.get("max_length", 1000)
-        time_window_minutes = load_kwargs.get("time_window_minutes", 30)
+        load_kwargs.get("max_length", 1000)
+        load_kwargs.get("time_window_minutes", 30)

        # Default WeChat export path
        if wechat_export_dir is None:
@@ -460,9 +460,9 @@ Messages ({len(messages)} messages, {message_group["total_length"]} chars):
                        # Concatenate messages based on rules
                        message_groups = self._concatenate_messages(
                            readable_messages,
-                            max_length=max_length,
-                            time_window_minutes=time_window_minutes,
-                            overlap_messages=0,  # No overlap between groups
+                            max_length=-1,
+                            time_window_minutes=-1,
+                            overlap_messages=0,  # Keep 2 messages overlap between groups
                        )

                        # Create documents from concatenated groups
@@ -532,9 +532,7 @@ Message: {readable_text if readable_text else message_text}
 """

                            # Create document with embedded metadata
-                            doc = Document(
-                                text=doc_content, metadata={"contact_name": contact_name}
-                            )
+                            doc = Document(text=doc_content, metadata={})
                            docs.append(doc)
                            count += 1

@@ -562,8 +560,8 @@ Message: {readable_text if readable_text else message_text}

        # Look for common export directory names
        possible_dirs = [
+            Path("./wechat_export_test"),
            Path("./wechat_export"),
-            Path("./wechat_export_direct"),
            Path("./wechat_chat_history"),
            Path("./chat_export"),
        ]
--- a/examples/mail_reader_leann.py
+++ b/examples/mail_reader_leann.py
@@ -0,0 +1,342 @@
+import argparse
+import asyncio
+import os
+import sys
+from pathlib import Path
+
+import dotenv
+
+# Add the project root to Python path so we can import from examples
+project_root = Path(__file__).parent.parent
+sys.path.insert(0, str(project_root))
+
+from leann.api import LeannBuilder, LeannChat
+from llama_index.core.node_parser import SentenceSplitter
+
+dotenv.load_dotenv()
+
+
+# Auto-detect user's mail path
+def get_mail_path():
+    """Get the mail path for the current user"""
+    home_dir = os.path.expanduser("~")
+    return os.path.join(home_dir, "Library", "Mail")
+
+
+# Default mail path for macOS
+DEFAULT_MAIL_PATH = "/Users/yichuan/Library/Mail/V10/0FCA0879-FD8C-4B7E-83BF-FDDA930791C5/[Gmail].mbox/All Mail.mbox/78BA5BE1-8819-4F9A-9613-EB63772F1DD0/Data"
+
+
+def create_leann_index_from_multiple_sources(
+    messages_dirs: list[Path],
+    index_path: str = "mail_index.leann",
+    max_count: int = -1,
+    include_html: bool = False,
+    embedding_model: str = "facebook/contriever",
+):
+    """
+    Create LEANN index from multiple mail data sources.
+
+    Args:
+        messages_dirs: List of Path objects pointing to Messages directories
+        index_path: Path to save the LEANN index
+        max_count: Maximum number of emails to process per directory
+        include_html: Whether to include HTML content in email processing
+    """
+    print("Creating LEANN index from multiple mail data sources...")
+
+    # Load documents using EmlxReader from LEANN_email_reader
+    from examples.email_data.LEANN_email_reader import EmlxReader
+
+    reader = EmlxReader(include_html=include_html)
+    # from email_data.email import EmlxMboxReader
+    # from pathlib import Path
+    # reader = EmlxMboxReader()
+    INDEX_DIR = Path(index_path).parent
+
+    if not INDEX_DIR.exists():
+        print("--- Index directory not found, building new index ---")
+        all_documents = []
+        total_processed = 0
+
+        # Process each Messages directory
+        for i, messages_dir in enumerate(messages_dirs):
+            print(f"\nProcessing Messages directory {i + 1}/{len(messages_dirs)}: {messages_dir}")
+
+            try:
+                documents = reader.load_data(messages_dir)
+                if documents:
+                    print(f"Loaded {len(documents)} email documents from {messages_dir}")
+                    all_documents.extend(documents)
+                    total_processed += len(documents)
+
+                    # Check if we've reached the max count
+                    if max_count > 0 and total_processed >= max_count:
+                        print(f"Reached max count of {max_count} documents")
+                        break
+                else:
+                    print(f"No documents loaded from {messages_dir}")
+            except Exception as e:
+                print(f"Error processing {messages_dir}: {e}")
+                continue
+
+        if not all_documents:
+            print("No documents loaded from any source. Exiting.")
+            return None
+
+        print(
+            f"\nTotal loaded {len(all_documents)} email documents from {len(messages_dirs)} directories and starting to split them into chunks"
+        )
+
+        # Create text splitter with 256 chunk size
+        text_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=25)
+
+        # Convert Documents to text strings and chunk them
+        all_texts = []
+        for doc in all_documents:
+            # Split the document into chunks
+            nodes = text_splitter.get_nodes_from_documents([doc])
+            for node in nodes:
+                text = node.get_content()
+                # text = '[subject] ' + doc.metadata["subject"] + '\n' + text
+                all_texts.append(text)
+
+        print(
+            f"Finished splitting {len(all_documents)} documents into {len(all_texts)} text chunks"
+        )
+
+        # Create LEANN index directory
+
+        print("--- Index directory not found, building new index ---")
+        INDEX_DIR.mkdir(exist_ok=True)
+
+        print("--- Building new LEANN index ---")
+
+        print("\n[PHASE 1] Building Leann index...")
+
+        # Use HNSW backend for better macOS compatibility
+        builder = LeannBuilder(
+            backend_name="hnsw",
+            embedding_model=embedding_model,
+            graph_degree=32,
+            complexity=64,
+            is_compact=True,
+            is_recompute=True,
+            num_threads=1,  # Force single-threaded mode
+        )
+
+        print(f"Adding {len(all_texts)} email chunks to index...")
+        for chunk_text in all_texts:
+            builder.add_text(chunk_text)
+
+        builder.build_index(index_path)
+        print(f"\nLEANN index built at {index_path}!")
+    else:
+        print(f"--- Using existing index at {INDEX_DIR} ---")
+
+    return index_path
+
+
+def create_leann_index(
+    mail_path: str,
+    index_path: str = "mail_index.leann",
+    max_count: int = 1000,
+    include_html: bool = False,
+    embedding_model: str = "facebook/contriever",
+):
+    """
+    Create LEANN index from mail data.
+
+    Args:
+        mail_path: Path to the mail directory
+        index_path: Path to save the LEANN index
+        max_count: Maximum number of emails to process
+        include_html: Whether to include HTML content in email processing
+    """
+    print("Creating LEANN index from mail data...")
+    INDEX_DIR = Path(index_path).parent
+
+    if not INDEX_DIR.exists():
+        print("--- Index directory not found, building new index ---")
+        INDEX_DIR.mkdir(exist_ok=True)
+
+        print("--- Building new LEANN index ---")
+
+        print("\n[PHASE 1] Building Leann index...")
+
+        # Load documents using EmlxReader from LEANN_email_reader
+        from examples.email_data.LEANN_email_reader import EmlxReader
+
+        reader = EmlxReader(include_html=include_html)
+        # from email_data.email import EmlxMboxReader
+        # from pathlib import Path
+        # reader = EmlxMboxReader()
+        documents = reader.load_data(Path(mail_path))
+
+        if not documents:
+            print("No documents loaded. Exiting.")
+            return None
+
+        print(f"Loaded {len(documents)} email documents")
+
+        # Create text splitter with 256 chunk size
+        text_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=128)
+
+        # Convert Documents to text strings and chunk them
+        all_texts = []
+        for doc in documents:
+            # Split the document into chunks
+            nodes = text_splitter.get_nodes_from_documents([doc])
+            for node in nodes:
+                all_texts.append(node.get_content())
+
+        print(f"Created {len(all_texts)} text chunks from {len(documents)} documents")
+
+        # Create LEANN index directory
+
+        print("--- Index directory not found, building new index ---")
+        INDEX_DIR.mkdir(exist_ok=True)
+
+        print("--- Building new LEANN index ---")
+
+        print("\n[PHASE 1] Building Leann index...")
+
+        # Use HNSW backend for better macOS compatibility
+        builder = LeannBuilder(
+            backend_name="hnsw",
+            embedding_model=embedding_model,
+            graph_degree=32,
+            complexity=64,
+            is_compact=True,
+            is_recompute=True,
+            num_threads=1,  # Force single-threaded mode
+        )
+
+        print(f"Adding {len(all_texts)} email chunks to index...")
+        for chunk_text in all_texts:
+            builder.add_text(chunk_text)
+
+        builder.build_index(index_path)
+        print(f"\nLEANN index built at {index_path}!")
+    else:
+        print(f"--- Using existing index at {INDEX_DIR} ---")
+
+    return index_path
+
+
+async def query_leann_index(index_path: str, query: str):
+    """
+    Query the LEANN index.
+
+    Args:
+        index_path: Path to the LEANN index
+        query: The query string
+    """
+    print("\n[PHASE 2] Starting Leann chat session...")
+    chat = LeannChat(index_path=index_path, llm_config={"type": "openai", "model": "gpt-4o"})
+
+    print(f"You: {query}")
+    import time
+
+    time.time()
+    chat_response = chat.ask(
+        query,
+        top_k=20,
+        recompute_beighbor_embeddings=True,
+        complexity=32,
+        beam_width=1,
+    )
+    time.time()
+    # print(f"Time taken: {end_time - start_time} seconds")
+    # highlight the answer
+    print(f"Leann chat response: \033[36m{chat_response}\033[0m")
+
+
+async def main():
+    # Parse command line arguments
+    parser = argparse.ArgumentParser(description="LEANN Mail Reader - Create and query email index")
+    # Remove --mail-path argument and auto-detect all Messages directories
+    # Remove DEFAULT_MAIL_PATH
+    parser.add_argument(
+        "--index-dir",
+        type=str,
+        default="./mail_index",
+        help="Directory to store the LEANN index (default: ./mail_index_leann_raw_text_all_dicts)",
+    )
+    parser.add_argument(
+        "--max-emails",
+        type=int,
+        default=1000,
+        help="Maximum number of emails to process (-1 means all)",
+    )
+    parser.add_argument(
+        "--query",
+        type=str,
+        default="Give me some funny advertisement about apple or other companies",
+        help="Single query to run (default: runs example queries)",
+    )
+    parser.add_argument(
+        "--include-html",
+        action="store_true",
+        default=False,
+        help="Include HTML content in email processing (default: False)",
+    )
+    parser.add_argument(
+        "--embedding-model",
+        type=str,
+        default="facebook/contriever",
+        help="Embedding model to use (default: facebook/contriever)",
+    )
+
+    args = parser.parse_args()
+
+    print(f"args: {args}")
+
+    # Automatically find all Messages directories under the current user's Mail directory
+    from examples.email_data.LEANN_email_reader import find_all_messages_directories
+
+    mail_path = get_mail_path()
+    print(f"Searching for email data in: {mail_path}")
+    messages_dirs = find_all_messages_directories(mail_path)
+    # messages_dirs = find_all_messages_directories(DEFAULT_MAIL_PATH)
+    # messages_dirs = [DEFAULT_MAIL_PATH]
+    # messages_dirs = messages_dirs[:1]
+
+    print("len(messages_dirs): ", len(messages_dirs))
+
+    if not messages_dirs:
+        print("No Messages directories found. Exiting.")
+        return
+
+    INDEX_DIR = Path(args.index_dir)
+    INDEX_PATH = str(INDEX_DIR / "mail_documents.leann")
+    print(f"Index directory: {INDEX_DIR}")
+    print(f"Found {len(messages_dirs)} Messages directories.")
+
+    # Create or load the LEANN index from all sources
+    index_path = create_leann_index_from_multiple_sources(
+        messages_dirs,
+        INDEX_PATH,
+        args.max_emails,
+        args.include_html,
+        args.embedding_model,
+    )
+
+    if index_path:
+        if args.query:
+            # Run single query
+            await query_leann_index(index_path, args.query)
+        else:
+            # Example queries
+            queries = [
+                "Hows Berkeley Graduate Student Instructor",
+                "how's the icloud related advertisement saying",
+                "Whats the number of class recommend to take per semester for incoming EECS students",
+            ]
+            for query in queries:
+                print("\n" + "=" * 60)
+                await query_leann_index(index_path, query)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/examples/mail_reader_llamaindex.py
+++ b/examples/mail_reader_llamaindex.py
@@ -0,0 +1,135 @@
+import argparse
+import os
+import sys
+from pathlib import Path
+
+# Add the project root to Python path so we can import from examples
+project_root = Path(__file__).parent.parent
+sys.path.insert(0, str(project_root))
+
+import torch
+from llama_index.core import StorageContext, VectorStoreIndex
+from llama_index.core.node_parser import SentenceSplitter
+
+# --- EMBEDDING MODEL ---
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+
+# --- END EMBEDDING MODEL ---
+# Import EmlxReader from the new module
+from examples.email_data.LEANN_email_reader import EmlxReader
+
+
+def create_and_save_index(
+    mail_path: str,
+    save_dir: str = "mail_index_embedded",
+    max_count: int = 1000,
+    include_html: bool = False,
+):
+    print("Creating index from mail data with embedded metadata...")
+    documents = EmlxReader(include_html=include_html).load_data(mail_path, max_count=max_count)
+    if not documents:
+        print("No documents loaded. Exiting.")
+        return None
+    text_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=25)
+    # Use facebook/contriever as the embedder
+    embed_model = HuggingFaceEmbedding(model_name="facebook/contriever")
+    # set on device
+
+    if torch.cuda.is_available():
+        embed_model._model.to("cuda")
+    # set mps
+    elif torch.backends.mps.is_available():
+        embed_model._model.to("mps")
+    else:
+        embed_model._model.to("cpu")
+    index = VectorStoreIndex.from_documents(
+        documents, transformations=[text_splitter], embed_model=embed_model
+    )
+    os.makedirs(save_dir, exist_ok=True)
+    index.storage_context.persist(persist_dir=save_dir)
+    print(f"Index saved to {save_dir}")
+    return index
+
+
+def load_index(save_dir: str = "mail_index_embedded"):
+    try:
+        storage_context = StorageContext.from_defaults(persist_dir=save_dir)
+        index = VectorStoreIndex.from_vector_store(
+            storage_context.vector_store, storage_context=storage_context
+        )
+        print(f"Index loaded from {save_dir}")
+        return index
+    except Exception as e:
+        print(f"Error loading index: {e}")
+        return None
+
+
+def query_index(index, query: str):
+    if index is None:
+        print("No index available for querying.")
+        return
+    query_engine = index.as_query_engine()
+    response = query_engine.query(query)
+    print(f"Query: {query}")
+    print(f"Response: {response}")
+
+
+def main():
+    # Parse command line arguments
+    parser = argparse.ArgumentParser(
+        description="LlamaIndex Mail Reader - Create and query email index"
+    )
+    parser.add_argument(
+        "--mail-path",
+        type=str,
+        default="/Users/yichuan/Library/Mail/V10/0FCA0879-FD8C-4B7E-83BF-FDDA930791C5/[Gmail].mbox/All Mail.mbox/78BA5BE1-8819-4F9A-9613-EB63772F1DD0/Data/9/Messages",
+        help="Path to mail data directory",
+    )
+    parser.add_argument(
+        "--save-dir",
+        type=str,
+        default="mail_index_embedded",
+        help="Directory to store the index (default: mail_index_embedded)",
+    )
+    parser.add_argument(
+        "--max-emails",
+        type=int,
+        default=10000,
+        help="Maximum number of emails to process",
+    )
+    parser.add_argument(
+        "--include-html",
+        action="store_true",
+        default=False,
+        help="Include HTML content in email processing (default: False)",
+    )
+
+    args = parser.parse_args()
+
+    mail_path = args.mail_path
+    save_dir = args.save_dir
+
+    if os.path.exists(save_dir) and os.path.exists(os.path.join(save_dir, "vector_store.json")):
+        print("Loading existing index...")
+        index = load_index(save_dir)
+    else:
+        print("Creating new index...")
+        index = create_and_save_index(
+            mail_path,
+            save_dir,
+            max_count=args.max_emails,
+            include_html=args.include_html,
+        )
+    if index:
+        queries = [
+            "Hows Berkeley Graduate Student Instructor",
+            "how's the icloud related advertisement saying",
+            "Whats the number of class recommend to take per semester for incoming EECS students",
+        ]
+        for query in queries:
+            print("\n" + "=" * 50)
+            query_index(index, query)
+
+
+if __name__ == "__main__":
+    main()
--- a/examples/main_cli_example.py
+++ b/examples/main_cli_example.py
@@ -0,0 +1,146 @@
+import argparse
+import asyncio
+from pathlib import Path
+
+import dotenv
+from leann.api import LeannBuilder, LeannChat
+from llama_index.core import SimpleDirectoryReader
+from llama_index.core.node_parser import SentenceSplitter
+
+dotenv.load_dotenv()
+
+
+async def main(args):
+    INDEX_DIR = Path(args.index_dir)
+    INDEX_PATH = str(INDEX_DIR / "pdf_documents.leann")
+
+    if not INDEX_DIR.exists():
+        node_parser = SentenceSplitter(
+            chunk_size=256, chunk_overlap=128, separator=" ", paragraph_separator="\n\n"
+        )
+
+        print("Loading documents...")
+        documents = SimpleDirectoryReader(
+            args.data_dir,
+            recursive=True,
+            encoding="utf-8",
+            required_exts=[".pdf", ".txt", ".md"],
+        ).load_data(show_progress=True)
+        print("Documents loaded.")
+        all_texts = []
+        for doc in documents:
+            nodes = node_parser.get_nodes_from_documents([doc])
+            if nodes:
+                all_texts.extend(node.get_content() for node in nodes)
+
+        print("--- Index directory not found, building new index ---")
+
+        print("\n[PHASE 1] Building Leann index...")
+
+        # LeannBuilder now automatically detects normalized embeddings and sets appropriate distance metric
+        print(f"Using {args.embedding_model} with {args.embedding_mode} mode")
+
+        # Use HNSW backend for better macOS compatibility
+        builder = LeannBuilder(
+            backend_name="hnsw",
+            embedding_model=args.embedding_model,
+            embedding_mode=args.embedding_mode,
+            # distance_metric is automatically set based on embedding model
+            graph_degree=32,
+            complexity=64,
+            is_compact=True,
+            is_recompute=True,
+            num_threads=1,  # Force single-threaded mode
+        )
+
+        print(f"Loaded {len(all_texts)} text chunks from documents.")
+        for chunk_text in all_texts:
+            builder.add_text(chunk_text)
+
+        builder.build_index(INDEX_PATH)
+        print(f"\nLeann index built at {INDEX_PATH}!")
+    else:
+        print(f"--- Using existing index at {INDEX_DIR} ---")
+
+    print("\n[PHASE 2] Starting Leann chat session...")
+
+    # Build llm_config based on command line arguments
+    if args.llm == "simulated":
+        llm_config = {"type": "simulated"}
+    elif args.llm == "ollama":
+        llm_config = {"type": "ollama", "model": args.model, "host": args.host}
+    elif args.llm == "hf":
+        llm_config = {"type": "hf", "model": args.model}
+    elif args.llm == "openai":
+        llm_config = {"type": "openai", "model": args.model}
+    else:
+        raise ValueError(f"Unknown LLM type: {args.llm}")
+
+    print(f"Using LLM: {args.llm} with model: {args.model if args.llm != 'simulated' else 'N/A'}")
+
+    chat = LeannChat(index_path=INDEX_PATH, llm_config=llm_config)
+    # query = (
+    #     "什么是盘古大模型以及盘古开发过程中遇到了什么阴暗面,任务令一般在什么城市颁发"
+    # )
+    query = args.query
+
+    print(f"You: {query}")
+    chat_response = chat.ask(query, top_k=20, recompute_embeddings=True, complexity=32)
+    print(f"Leann chat response: \033[36m{chat_response}\033[0m")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run Leann Chat with various LLM backends.")
+    parser.add_argument(
+        "--llm",
+        type=str,
+        default="openai",
+        choices=["simulated", "ollama", "hf", "openai"],
+        help="The LLM backend to use.",
+    )
+    parser.add_argument(
+        "--model",
+        type=str,
+        default="gpt-4o",
+        help="The model name to use (e.g., 'llama3:8b' for ollama, 'deepseek-ai/deepseek-llm-7b-chat' for hf, 'gpt-4o' for openai).",
+    )
+    parser.add_argument(
+        "--embedding-model",
+        type=str,
+        default="facebook/contriever",
+        help="The embedding model to use (e.g., 'facebook/contriever', 'text-embedding-3-small').",
+    )
+    parser.add_argument(
+        "--embedding-mode",
+        type=str,
+        default="sentence-transformers",
+        choices=["sentence-transformers", "openai", "mlx"],
+        help="The embedding backend mode.",
+    )
+    parser.add_argument(
+        "--host",
+        type=str,
+        default="http://localhost:11434",
+        help="The host for the Ollama API.",
+    )
+    parser.add_argument(
+        "--index-dir",
+        type=str,
+        default="./test_doc_files",
+        help="Directory where the Leann index will be stored.",
+    )
+    parser.add_argument(
+        "--data-dir",
+        type=str,
+        default="examples/data",
+        help="Directory containing documents to index (PDF, TXT, MD files).",
+    )
+    parser.add_argument(
+        "--query",
+        type=str,
+        default="Based on the paper, what are the main techniques LEANN explores to reduce the storage overhead and DLPM explore to achieve Fairness and Efiiciency trade-off?",
+        help="The query to ask the Leann chat system.",
+    )
+    args = parser.parse_args()
+
+    asyncio.run(main(args))
--- a/examples/multi_vector_aggregator.py
+++ b/examples/multi_vector_aggregator.py
@@ -0,0 +1,360 @@
+#!/usr/bin/env python3
+"""
+Multi-Vector Aggregator for Fat Embeddings
+==========================================
+
+This module implements aggregation strategies for multi-vector embeddings,
+similar to ColPali's approach where multiple patch vectors represent a single document.
+
+Key features:
+- MaxSim aggregation (take maximum similarity across patches)
+- Voting-based aggregation (count patch matches)
+- Weighted aggregation (attention-score weighted)
+- Spatial clustering of matching patches
+- Document-level result consolidation
+"""
+
+from collections import defaultdict
+from dataclasses import dataclass
+from typing import Any
+
+import numpy as np
+
+
+@dataclass
+class PatchResult:
+    """Represents a single patch search result."""
+
+    patch_id: int
+    image_name: str
+    image_path: str
+    coordinates: tuple[int, int, int, int]  # (x1, y1, x2, y2)
+    score: float
+    attention_score: float
+    scale: float
+    metadata: dict[str, Any]
+
+
+@dataclass
+class AggregatedResult:
+    """Represents an aggregated document-level result."""
+
+    image_name: str
+    image_path: str
+    doc_score: float
+    patch_count: int
+    best_patch: PatchResult
+    all_patches: list[PatchResult]
+    aggregation_method: str
+    spatial_clusters: list[list[PatchResult]] | None = None
+
+
+class MultiVectorAggregator:
+    """
+    Aggregates multiple patch-level results into document-level results.
+    """
+
+    def __init__(
+        self,
+        aggregation_method: str = "maxsim",
+        spatial_clustering: bool = True,
+        cluster_distance_threshold: float = 100.0,
+    ):
+        """
+        Initialize the aggregator.
+
+        Args:
+            aggregation_method: "maxsim", "voting", "weighted", or "mean"
+            spatial_clustering: Whether to cluster spatially close patches
+            cluster_distance_threshold: Distance threshold for spatial clustering
+        """
+        self.aggregation_method = aggregation_method
+        self.spatial_clustering = spatial_clustering
+        self.cluster_distance_threshold = cluster_distance_threshold
+
+    def aggregate_results(
+        self, search_results: list[dict[str, Any]], top_k: int = 10
+    ) -> list[AggregatedResult]:
+        """
+        Aggregate patch-level search results into document-level results.
+
+        Args:
+            search_results: List of search results from LeannSearcher
+            top_k: Number of top documents to return
+
+        Returns:
+            List of aggregated document results
+        """
+        # Group results by image
+        image_groups = defaultdict(list)
+
+        for result in search_results:
+            metadata = result.metadata
+            if "image_name" in metadata and "patch_id" in metadata:
+                patch_result = PatchResult(
+                    patch_id=metadata["patch_id"],
+                    image_name=metadata["image_name"],
+                    image_path=metadata["image_path"],
+                    coordinates=tuple(metadata["coordinates"]),
+                    score=result.score,
+                    attention_score=metadata.get("attention_score", 0.0),
+                    scale=metadata.get("scale", 1.0),
+                    metadata=metadata,
+                )
+                image_groups[metadata["image_name"]].append(patch_result)
+
+        # Aggregate each image group
+        aggregated_results = []
+        for image_name, patches in image_groups.items():
+            if len(patches) == 0:
+                continue
+
+            agg_result = self._aggregate_image_patches(image_name, patches)
+            aggregated_results.append(agg_result)
+
+        # Sort by aggregated score and return top-k
+        aggregated_results.sort(key=lambda x: x.doc_score, reverse=True)
+        return aggregated_results[:top_k]
+
+    def _aggregate_image_patches(
+        self, image_name: str, patches: list[PatchResult]
+    ) -> AggregatedResult:
+        """Aggregate patches for a single image."""
+
+        if self.aggregation_method == "maxsim":
+            doc_score = max(patch.score for patch in patches)
+            best_patch = max(patches, key=lambda p: p.score)
+
+        elif self.aggregation_method == "voting":
+            # Count patches above threshold
+            threshold = np.percentile([p.score for p in patches], 75)
+            doc_score = sum(1 for patch in patches if patch.score >= threshold)
+            best_patch = max(patches, key=lambda p: p.score)
+
+        elif self.aggregation_method == "weighted":
+            # Weight by attention scores
+            total_weighted_score = sum(p.score * p.attention_score for p in patches)
+            total_weights = sum(p.attention_score for p in patches)
+            doc_score = total_weighted_score / max(total_weights, 1e-8)
+            best_patch = max(patches, key=lambda p: p.score * p.attention_score)
+
+        elif self.aggregation_method == "mean":
+            doc_score = np.mean([patch.score for patch in patches])
+            best_patch = max(patches, key=lambda p: p.score)
+
+        else:
+            raise ValueError(f"Unknown aggregation method: {self.aggregation_method}")
+
+        # Spatial clustering if enabled
+        spatial_clusters = None
+        if self.spatial_clustering:
+            spatial_clusters = self._cluster_patches_spatially(patches)
+
+        return AggregatedResult(
+            image_name=image_name,
+            image_path=patches[0].image_path,
+            doc_score=float(doc_score),
+            patch_count=len(patches),
+            best_patch=best_patch,
+            all_patches=sorted(patches, key=lambda p: p.score, reverse=True),
+            aggregation_method=self.aggregation_method,
+            spatial_clusters=spatial_clusters,
+        )
+
+    def _cluster_patches_spatially(self, patches: list[PatchResult]) -> list[list[PatchResult]]:
+        """Cluster patches that are spatially close to each other."""
+        if len(patches) <= 1:
+            return [patches]
+
+        clusters = []
+        remaining_patches = patches.copy()
+
+        while remaining_patches:
+            # Start new cluster with highest scoring remaining patch
+            seed_patch = max(remaining_patches, key=lambda p: p.score)
+            current_cluster = [seed_patch]
+            remaining_patches.remove(seed_patch)
+
+            # Add nearby patches to cluster
+            added_to_cluster = True
+            while added_to_cluster:
+                added_to_cluster = False
+                for patch in remaining_patches.copy():
+                    if self._is_patch_nearby(patch, current_cluster):
+                        current_cluster.append(patch)
+                        remaining_patches.remove(patch)
+                        added_to_cluster = True
+
+            clusters.append(current_cluster)
+
+        return sorted(clusters, key=lambda cluster: max(p.score for p in cluster), reverse=True)
+
+    def _is_patch_nearby(self, patch: PatchResult, cluster: list[PatchResult]) -> bool:
+        """Check if a patch is spatially close to any patch in the cluster."""
+        patch_center = self._get_patch_center(patch.coordinates)
+
+        for cluster_patch in cluster:
+            cluster_center = self._get_patch_center(cluster_patch.coordinates)
+            distance = np.sqrt(
+                (patch_center[0] - cluster_center[0]) ** 2
+                + (patch_center[1] - cluster_center[1]) ** 2
+            )
+
+            if distance <= self.cluster_distance_threshold:
+                return True
+
+        return False
+
+    def _get_patch_center(self, coordinates: tuple[int, int, int, int]) -> tuple[float, float]:
+        """Get center point of a patch."""
+        x1, y1, x2, y2 = coordinates
+        return ((x1 + x2) / 2, (y1 + y2) / 2)
+
+    def print_aggregated_results(
+        self, results: list[AggregatedResult], max_patches_per_doc: int = 3
+    ):
+        """Pretty print aggregated results."""
+        print(f"\n🔍 Aggregated Results (method: {self.aggregation_method})")
+        print("=" * 80)
+
+        for i, result in enumerate(results):
+            print(f"\n{i + 1}. {result.image_name}")
+            print(f"   Doc Score: {result.doc_score:.4f} | Patches: {result.patch_count}")
+            print(f"   Path: {result.image_path}")
+
+            # Show best patch
+            best = result.best_patch
+            print(
+                f"   🌟 Best Patch: #{best.patch_id} at {best.coordinates} (score: {best.score:.4f})"
+            )
+
+            # Show top patches
+            print("   📍 Top Patches:")
+            for j, patch in enumerate(result.all_patches[:max_patches_per_doc]):
+                print(
+                    f"      {j + 1}. Patch #{patch.patch_id}: {patch.score:.4f} at {patch.coordinates}"
+                )
+
+            # Show spatial clusters if available
+            if result.spatial_clusters and len(result.spatial_clusters) > 1:
+                print(f"   🗂️ Spatial Clusters: {len(result.spatial_clusters)}")
+                for j, cluster in enumerate(result.spatial_clusters[:2]):  # Show top 2 clusters
+                    cluster_score = max(p.score for p in cluster)
+                    print(
+                        f"      Cluster {j + 1}: {len(cluster)} patches (best: {cluster_score:.4f})"
+                    )
+
+
+def demo_aggregation():
+    """Demonstrate the multi-vector aggregation functionality."""
+    print("=== Multi-Vector Aggregation Demo ===")
+
+    # Simulate some patch-level search results
+    # In real usage, these would come from LeannSearcher.search()
+
+    class MockResult:
+        def __init__(self, score, metadata):
+            self.score = score
+            self.metadata = metadata
+
+    # Simulate results for 2 images with multiple patches each
+    mock_results = [
+        # Image 1: cats_and_kitchen.jpg - 4 patches
+        MockResult(
+            0.85,
+            {
+                "image_name": "cats_and_kitchen.jpg",
+                "image_path": "/path/to/cats_and_kitchen.jpg",
+                "patch_id": 3,
+                "coordinates": [100, 50, 224, 174],  # Kitchen area
+                "attention_score": 0.92,
+                "scale": 1.0,
+            },
+        ),
+        MockResult(
+            0.78,
+            {
+                "image_name": "cats_and_kitchen.jpg",
+                "image_path": "/path/to/cats_and_kitchen.jpg",
+                "patch_id": 7,
+                "coordinates": [200, 300, 324, 424],  # Cat area
+                "attention_score": 0.88,
+                "scale": 1.0,
+            },
+        ),
+        MockResult(
+            0.72,
+            {
+                "image_name": "cats_and_kitchen.jpg",
+                "image_path": "/path/to/cats_and_kitchen.jpg",
+                "patch_id": 12,
+                "coordinates": [150, 100, 274, 224],  # Appliances
+                "attention_score": 0.75,
+                "scale": 1.0,
+            },
+        ),
+        MockResult(
+            0.65,
+            {
+                "image_name": "cats_and_kitchen.jpg",
+                "image_path": "/path/to/cats_and_kitchen.jpg",
+                "patch_id": 15,
+                "coordinates": [50, 250, 174, 374],  # Furniture
+                "attention_score": 0.70,
+                "scale": 1.0,
+            },
+        ),
+        # Image 2: city_street.jpg - 3 patches
+        MockResult(
+            0.68,
+            {
+                "image_name": "city_street.jpg",
+                "image_path": "/path/to/city_street.jpg",
+                "patch_id": 2,
+                "coordinates": [300, 100, 424, 224],  # Buildings
+                "attention_score": 0.80,
+                "scale": 1.0,
+            },
+        ),
+        MockResult(
+            0.62,
+            {
+                "image_name": "city_street.jpg",
+                "image_path": "/path/to/city_street.jpg",
+                "patch_id": 8,
+                "coordinates": [100, 350, 224, 474],  # Street level
+                "attention_score": 0.75,
+                "scale": 1.0,
+            },
+        ),
+        MockResult(
+            0.55,
+            {
+                "image_name": "city_street.jpg",
+                "image_path": "/path/to/city_street.jpg",
+                "patch_id": 11,
+                "coordinates": [400, 200, 524, 324],  # Sky area
+                "attention_score": 0.60,
+                "scale": 1.0,
+            },
+        ),
+    ]
+
+    # Test different aggregation methods
+    methods = ["maxsim", "voting", "weighted", "mean"]
+
+    for method in methods:
+        print(f"\n{'=' * 20} {method.upper()} AGGREGATION {'=' * 20}")
+
+        aggregator = MultiVectorAggregator(
+            aggregation_method=method,
+            spatial_clustering=True,
+            cluster_distance_threshold=100.0,
+        )
+
+        aggregated = aggregator.aggregate_results(mock_results, top_k=5)
+        aggregator.print_aggregated_results(aggregated)
+
+
+if __name__ == "__main__":
+    demo_aggregation()
--- a/examples/openai_hnsw_example.py
+++ b/examples/openai_hnsw_example.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+"""
+OpenAI Embedding Example
+
+Complete example showing how to build and search with OpenAI embeddings using HNSW backend.
+"""
+
+import os
+from pathlib import Path
+
+import dotenv
+from leann.api import LeannBuilder, LeannSearcher
+
+# Load environment variables
+dotenv.load_dotenv()
+
+
+def main():
+    # Check if OpenAI API key is available
+    api_key = os.getenv("OPENAI_API_KEY")
+    if not api_key:
+        print("ERROR: OPENAI_API_KEY environment variable not set")
+        return False
+
+    print(f"✅ OpenAI API key found: {api_key[:10]}...")
+
+    # Sample texts
+    sample_texts = [
+        "Machine learning is a powerful technology that enables computers to learn from data.",
+        "Natural language processing helps computers understand and generate human language.",
+        "Deep learning uses neural networks with multiple layers to solve complex problems.",
+        "Computer vision allows machines to interpret and understand visual information.",
+        "Reinforcement learning trains agents to make decisions through trial and error.",
+        "Data science combines statistics, math, and programming to extract insights from data.",
+        "Artificial intelligence aims to create machines that can perform human-like tasks.",
+        "Python is a popular programming language used extensively in data science and AI.",
+        "Neural networks are inspired by the structure and function of the human brain.",
+        "Big data refers to extremely large datasets that require special tools to process.",
+    ]
+
+    INDEX_DIR = Path("./simple_openai_test_index")
+    INDEX_PATH = str(INDEX_DIR / "simple_test.leann")
+
+    print("\n=== Building Index with OpenAI Embeddings ===")
+    print(f"Index path: {INDEX_PATH}")
+
+    try:
+        # Use proper configuration for OpenAI embeddings
+        builder = LeannBuilder(
+            backend_name="hnsw",
+            embedding_model="text-embedding-3-small",
+            embedding_mode="openai",
+            # HNSW settings for OpenAI embeddings
+            M=16,  # Smaller graph degree
+            efConstruction=64,  # Smaller construction complexity
+            is_compact=True,  # Enable compact storage for recompute
+            is_recompute=True,  # MUST enable for OpenAI embeddings
+            num_threads=1,
+        )
+
+        print(f"Adding {len(sample_texts)} texts to the index...")
+        for i, text in enumerate(sample_texts):
+            metadata = {"id": f"doc_{i}", "topic": "AI"}
+            builder.add_text(text, metadata)
+
+        print("Building index...")
+        builder.build_index(INDEX_PATH)
+        print("✅ Index built successfully!")
+
+    except Exception as e:
+        print(f"❌ Error building index: {e}")
+        import traceback
+
+        traceback.print_exc()
+        return False
+
+    print("\n=== Testing Search ===")
+
+    try:
+        searcher = LeannSearcher(INDEX_PATH)
+
+        test_queries = [
+            "What is machine learning?",
+            "How do neural networks work?",
+            "Programming languages for data science",
+        ]
+
+        for query in test_queries:
+            print(f"\n🔍 Query: '{query}'")
+            results = searcher.search(query, top_k=3)
+
+            print(f"   Found {len(results)} results:")
+            for i, result in enumerate(results):
+                print(f"   {i + 1}. Score: {result.score:.4f}")
+                print(f"      Text: {result.text[:80]}...")
+
+        print("\n✅ Search test completed successfully!")
+        return True
+
+    except Exception as e:
+        print(f"❌ Error during search: {e}")
+        import traceback
+
+        traceback.print_exc()
+        return False
+
+
+if __name__ == "__main__":
+    success = main()
+    if success:
+        print("\n🎉 Simple OpenAI index test completed successfully!")
+    else:
+        print("\n💥 Simple OpenAI index test failed!")
--- a/examples/resue_index.py
+++ b/examples/resue_index.py
@@ -0,0 +1,23 @@
+import asyncio
+from pathlib import Path
+
+from leann.api import LeannChat
+
+INDEX_DIR = Path("./test_pdf_index_huawei")
+INDEX_PATH = str(INDEX_DIR / "pdf_documents.leann")
+
+
+async def main():
+    print("\n[PHASE 2] Starting Leann chat session...")
+    chat = LeannChat(index_path=INDEX_PATH)
+    query = "What is the main idea of RL and give me 5 exapmle of classic RL algorithms?"
+    query = "Based on the paper, what are the main techniques LEANN explores to reduce the storage overhead and DLPM explore to achieve Fairness and Efiiciency trade-off?"
+    # query = "什么是盘古大模型以及盘古开发过程中遇到了什么阴暗面,任务令一般在什么城市颁发"
+    response = chat.ask(
+        query, top_k=20, recompute_beighbor_embeddings=True, complexity=32, beam_width=1
+    )
+    print(f"\n[PHASE 2] Response: {response}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/benchmarks/run_evaluation.py
+++ b/benchmarks/run_evaluation.py
@@ -200,10 +200,10 @@ def main():
    args = parser.parse_args()

    # --- Path Configuration ---
-    # Assumes a project structure where the script is in 'benchmarks/'
-    # and evaluation data is in 'benchmarks/data/'.
-    script_dir = Path(__file__).resolve().parent
-    data_root = script_dir / "data"
+    # Assumes a project structure where the script is in 'examples/'
+    # and data is in 'data/' at the project root.
+    project_root = Path(__file__).resolve().parent.parent
+    data_root = project_root / "data"

    # Download data based on mode
    if args.mode == "build":
@@ -279,9 +279,7 @@ def main():

            if not args.index_path:
                print("No indices found. The data download should have included pre-built indices.")
-                print(
-                    "Please check the benchmarks/data/indices/ directory or provide --index-path manually."
-                )
+                print("Please check the data/indices/ directory or provide --index-path manually.")
                sys.exit(1)

    # Detect dataset type from index path to select the correct ground truth
--- a/examples/simple_demo.py
+++ b/examples/simple_demo.py
@@ -1,6 +1,6 @@
 """
 Simple demo showing basic leann usage
-Run: uv run python examples/basic_demo.py
+Run: uv run python examples/simple_demo.py
 """

 import argparse
@@ -81,7 +81,7 @@ def main():
        print()

    print("Demo completed! Try running:")
-    print("   uv run python apps/document_rag.py")
+    print("   uv run python examples/document_search.py")


 if __name__ == "__main__":
--- a/examples/wechat_history_reader_leann.py
+++ b/examples/wechat_history_reader_leann.py
@@ -0,0 +1,320 @@
+import argparse
+import asyncio
+import os
+from pathlib import Path
+
+import dotenv
+from leann.api import LeannBuilder, LeannChat
+from llama_index.core.node_parser import SentenceSplitter
+
+dotenv.load_dotenv()
+
+# Default WeChat export directory
+DEFAULT_WECHAT_EXPORT_DIR = "./wechat_export_direct"
+
+
+def create_leann_index_from_multiple_wechat_exports(
+    export_dirs: list[Path],
+    index_path: str = "wechat_history_index.leann",
+    max_count: int = -1,
+):
+    """
+    Create LEANN index from multiple WeChat export data sources.
+
+    Args:
+        export_dirs: List of Path objects pointing to WeChat export directories
+        index_path: Path to save the LEANN index
+        max_count: Maximum number of chat entries to process per export
+    """
+    print("Creating LEANN index from multiple WeChat export data sources...")
+
+    # Load documents using WeChatHistoryReader from history_data
+    from history_data.wechat_history import WeChatHistoryReader
+
+    reader = WeChatHistoryReader()
+
+    INDEX_DIR = Path(index_path).parent
+
+    if not INDEX_DIR.exists():
+        print("--- Index directory not found, building new index ---")
+        all_documents = []
+        total_processed = 0
+
+        # Process each WeChat export directory
+        for i, export_dir in enumerate(export_dirs):
+            print(f"\nProcessing WeChat export {i + 1}/{len(export_dirs)}: {export_dir}")
+
+            try:
+                documents = reader.load_data(
+                    wechat_export_dir=str(export_dir),
+                    max_count=max_count,
+                    concatenate_messages=True,  # Disable concatenation - one message per document
+                )
+                if documents:
+                    print(f"Loaded {len(documents)} chat documents from {export_dir}")
+                    all_documents.extend(documents)
+                    total_processed += len(documents)
+
+                    # Check if we've reached the max count
+                    if max_count > 0 and total_processed >= max_count:
+                        print(f"Reached max count of {max_count} documents")
+                        break
+                else:
+                    print(f"No documents loaded from {export_dir}")
+            except Exception as e:
+                print(f"Error processing {export_dir}: {e}")
+                continue
+
+        if not all_documents:
+            print("No documents loaded from any source. Exiting.")
+            return None
+
+        print(
+            f"\nTotal loaded {len(all_documents)} chat documents from {len(export_dirs)} exports and starting to split them into chunks"
+        )
+
+        # Create text splitter with 256 chunk size
+        text_splitter = SentenceSplitter(chunk_size=192, chunk_overlap=64)
+
+        # Convert Documents to text strings and chunk them
+        all_texts = []
+        for doc in all_documents:
+            # Split the document into chunks
+            nodes = text_splitter.get_nodes_from_documents([doc])
+            for node in nodes:
+                text = (
+                    "[Contact] means the message is from: "
+                    + doc.metadata["contact_name"]
+                    + "\n"
+                    + node.get_content()
+                )
+                all_texts.append(text)
+
+        print(
+            f"Finished splitting {len(all_documents)} documents into {len(all_texts)} text chunks"
+        )
+
+        # Create LEANN index directory
+        print("--- Index directory not found, building new index ---")
+        INDEX_DIR.mkdir(exist_ok=True)
+
+        print("--- Building new LEANN index ---")
+
+        print("\n[PHASE 1] Building Leann index...")
+
+        # Use HNSW backend for better macOS compatibility
+        builder = LeannBuilder(
+            backend_name="hnsw",
+            embedding_model="Qwen/Qwen3-Embedding-0.6B",
+            graph_degree=32,
+            complexity=64,
+            is_compact=True,
+            is_recompute=True,
+            num_threads=1,  # Force single-threaded mode
+        )
+
+        print(f"Adding {len(all_texts)} chat chunks to index...")
+        for chunk_text in all_texts:
+            builder.add_text(chunk_text)
+
+        builder.build_index(index_path)
+        print(f"\nLEANN index built at {index_path}!")
+    else:
+        print(f"--- Using existing index at {INDEX_DIR} ---")
+
+    return index_path
+
+
+def create_leann_index(
+    export_dir: str | None = None,
+    index_path: str = "wechat_history_index.leann",
+    max_count: int = 1000,
+):
+    """
+    Create LEANN index from WeChat chat history data.
+
+    Args:
+        export_dir: Path to the WeChat export directory (optional, uses default if None)
+        index_path: Path to save the LEANN index
+        max_count: Maximum number of chat entries to process
+    """
+    print("Creating LEANN index from WeChat chat history data...")
+    INDEX_DIR = Path(index_path).parent
+
+    if not INDEX_DIR.exists():
+        print("--- Index directory not found, building new index ---")
+        INDEX_DIR.mkdir(exist_ok=True)
+
+        print("--- Building new LEANN index ---")
+
+        print("\n[PHASE 1] Building Leann index...")
+
+        # Load documents using WeChatHistoryReader from history_data
+        from history_data.wechat_history import WeChatHistoryReader
+
+        reader = WeChatHistoryReader()
+
+        documents = reader.load_data(
+            wechat_export_dir=export_dir,
+            max_count=max_count,
+            concatenate_messages=False,  # Disable concatenation - one message per document
+        )
+
+        if not documents:
+            print("No documents loaded. Exiting.")
+            return None
+
+        print(f"Loaded {len(documents)} chat documents")
+
+        # Create text splitter with 256 chunk size
+        text_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=25)
+
+        # Convert Documents to text strings and chunk them
+        all_texts = []
+        for doc in documents:
+            # Split the document into chunks
+            nodes = text_splitter.get_nodes_from_documents([doc])
+            for node in nodes:
+                all_texts.append(node.get_content())
+
+        print(f"Created {len(all_texts)} text chunks from {len(documents)} documents")
+
+        # Create LEANN index directory
+        print("--- Index directory not found, building new index ---")
+        INDEX_DIR.mkdir(exist_ok=True)
+
+        print("--- Building new LEANN index ---")
+
+        print("\n[PHASE 1] Building Leann index...")
+
+        # Use HNSW backend for better macOS compatibility
+        builder = LeannBuilder(
+            backend_name="hnsw",
+            embedding_model="mlx-community/Qwen3-Embedding-0.6B-4bit-DWQ",  # MLX-optimized model
+            graph_degree=32,
+            complexity=64,
+            is_compact=True,
+            is_recompute=True,
+            num_threads=1,  # Force single-threaded mode
+        )
+
+        print(f"Adding {len(all_texts)} chat chunks to index...")
+        for chunk_text in all_texts:
+            builder.add_text(chunk_text)
+
+        builder.build_index(index_path)
+        print(f"\nLEANN index built at {index_path}!")
+    else:
+        print(f"--- Using existing index at {INDEX_DIR} ---")
+
+    return index_path
+
+
+async def query_leann_index(index_path: str, query: str):
+    """
+    Query the LEANN index.
+
+    Args:
+        index_path: Path to the LEANN index
+        query: The query string
+    """
+    print("\n[PHASE 2] Starting Leann chat session...")
+    chat = LeannChat(index_path=index_path)
+
+    print(f"You: {query}")
+    chat_response = chat.ask(
+        query,
+        top_k=20,
+        recompute_beighbor_embeddings=True,
+        complexity=16,
+        beam_width=1,
+        llm_config={
+            "type": "openai",
+            "model": "gpt-4o",
+            "api_key": os.getenv("OPENAI_API_KEY"),
+        },
+        llm_kwargs={"temperature": 0.0, "max_tokens": 1000},
+    )
+    print(f"Leann chat response: \033[36m{chat_response}\033[0m")
+
+
+async def main():
+    """Main function with integrated WeChat export functionality."""
+
+    # Parse command line arguments
+    parser = argparse.ArgumentParser(
+        description="LEANN WeChat History Reader - Create and query WeChat chat history index"
+    )
+    parser.add_argument(
+        "--export-dir",
+        type=str,
+        default=DEFAULT_WECHAT_EXPORT_DIR,
+        help=f"Directory to store WeChat exports (default: {DEFAULT_WECHAT_EXPORT_DIR})",
+    )
+    parser.add_argument(
+        "--index-dir",
+        type=str,
+        default="./wechat_history_magic_test_11Debug_new",
+        help="Directory to store the LEANN index (default: ./wechat_history_index_leann_test)",
+    )
+    parser.add_argument(
+        "--max-entries",
+        type=int,
+        default=50,
+        help="Maximum number of chat entries to process (default: 5000)",
+    )
+    parser.add_argument(
+        "--query",
+        type=str,
+        default=None,
+        help="Single query to run (default: runs example queries)",
+    )
+    parser.add_argument(
+        "--force-export",
+        action="store_true",
+        default=False,
+        help="Force re-export of WeChat data even if exports exist",
+    )
+
+    args = parser.parse_args()
+
+    INDEX_DIR = Path(args.index_dir)
+    INDEX_PATH = str(INDEX_DIR / "wechat_history.leann")
+
+    print(f"Using WeChat export directory: {args.export_dir}")
+    print(f"Index directory: {INDEX_DIR}")
+    print(f"Max entries: {args.max_entries}")
+
+    # Initialize WeChat reader with export capabilities
+    from history_data.wechat_history import WeChatHistoryReader
+
+    reader = WeChatHistoryReader()
+
+    # Find existing exports or create new ones using the centralized method
+    export_dirs = reader.find_or_export_wechat_data(args.export_dir)
+    if not export_dirs:
+        print("Failed to find or export WeChat data. Exiting.")
+        return
+
+    # Create or load the LEANN index from all sources
+    index_path = create_leann_index_from_multiple_wechat_exports(
+        export_dirs, INDEX_PATH, max_count=args.max_entries
+    )
+
+    if index_path:
+        if args.query:
+            # Run single query
+            await query_leann_index(index_path, args.query)
+        else:
+            # Example queries
+            queries = [
+                "我想买魔术师约翰逊的球衣,给我一些对应聊天记录?",
+            ]
+
+            for query in queries:
+                print("\n" + "=" * 60)
+                await query_leann_index(index_path, query)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/packages/leann-backend-diskann/CMakeLists.txt
+++ b/packages/leann-backend-diskann/CMakeLists.txt
@@ -0,0 +1,8 @@
+# packages/leann-backend-diskann/CMakeLists.txt (simplified version)
+
+cmake_minimum_required(VERSION 3.20)
+project(leann_backend_diskann_wrapper)
+
+# Tell CMake to directly enter the DiskANN submodule and execute its own CMakeLists.txt
+# DiskANN will handle everything itself, including compiling Python bindings
+add_subdirectory(src/third_party/DiskANN)
--- a/packages/leann-backend-diskann/leann_backend_diskann/init.py
+++ b/packages/leann-backend-diskann/leann_backend_diskann/init.py
@@ -1,7 +1 @@
 from . import diskann_backend as diskann_backend
-from . import graph_partition
-
-# Export main classes and functions
-from .graph_partition import GraphPartitioner, partition_graph
-
-__all__ = ["GraphPartitioner", "diskann_backend", "graph_partition", "partition_graph"]
--- a/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py
+++ b/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py
@@ -4,7 +4,7 @@ import os
 import struct
 import sys
 from pathlib import Path
-from typing import Any, Literal, Optional
+from typing import Any, Literal

 import numpy as np
 import psutil
@@ -22,11 +22,6 @@ logger = logging.getLogger(__name__)
@contextlib.contextmanager
 def suppress_cpp_output_if_needed():
    """Suppress C++ stdout/stderr based on LEANN_LOG_LEVEL"""
-    # In CI we avoid fiddling with low-level file descriptors to prevent aborts
-    if os.getenv("CI") == "true":
-        yield
-        return
-
    log_level = os.getenv("LEANN_LOG_LEVEL", "WARNING").upper()

    # Only suppress if log level is WARNING or higher (ERROR, CRITICAL)
@@ -142,71 +137,6 @@ class DiskannBuilder(LeannBackendBuilderInterface):
    def __init__(self, **kwargs):
        self.build_params = kwargs

-    def _safe_cleanup_after_partition(self, index_dir: Path, index_prefix: str):
-        """
-        Safely cleanup files after partition.
-        In partition mode, C++ doesn't read _disk.index content,
-        so we can delete it if all derived files exist.
-        """
-        disk_index_file = index_dir / f"{index_prefix}_disk.index"
-        beam_search_file = index_dir / f"{index_prefix}_disk_beam_search.index"
-
-        # Required files that C++ partition mode needs
-        # Note: C++ generates these with _disk.index suffix
-        disk_suffix = "_disk.index"
-        required_files = [
-            f"{index_prefix}{disk_suffix}_medoids.bin",  # Critical: assert fails if missing
-            # Note: _centroids.bin is not created in single-shot build - C++ handles this automatically
-            f"{index_prefix}_pq_pivots.bin",  # PQ table
-            f"{index_prefix}_pq_compressed.bin",  # PQ compressed vectors
-        ]
-
-        # Check if all required files exist
-        missing_files = []
-        for filename in required_files:
-            file_path = index_dir / filename
-            if not file_path.exists():
-                missing_files.append(filename)
-
-        if missing_files:
-            logger.warning(
-                f"Cannot safely delete _disk.index - missing required files: {missing_files}"
-            )
-            logger.info("Keeping all original files for safety")
-            return
-
-        # Calculate space savings
-        space_saved = 0
-        files_to_delete = []
-
-        if disk_index_file.exists():
-            space_saved += disk_index_file.stat().st_size
-            files_to_delete.append(disk_index_file)
-
-        if beam_search_file.exists():
-            space_saved += beam_search_file.stat().st_size
-            files_to_delete.append(beam_search_file)
-
-        # Safe to delete!
-        for file_to_delete in files_to_delete:
-            try:
-                os.remove(file_to_delete)
-                logger.info(f"✅ Safely deleted: {file_to_delete.name}")
-            except Exception as e:
-                logger.warning(f"Failed to delete {file_to_delete.name}: {e}")
-
-        if space_saved > 0:
-            space_saved_mb = space_saved / (1024 * 1024)
-            logger.info(f"💾 Space saved: {space_saved_mb:.1f} MB")
-
-            # Show what files are kept
-            logger.info("📁 Kept essential files for partition mode:")
-            for filename in required_files:
-                file_path = index_dir / filename
-                if file_path.exists():
-                    size_mb = file_path.stat().st_size / (1024 * 1024)
-                    logger.info(f"  - {filename} ({size_mb:.1f} MB)")
-
    def build(self, data: np.ndarray, ids: list[str], index_path: str, **kwargs):
        path = Path(index_path)
        index_dir = path.parent
@@ -221,17 +151,6 @@ class DiskannBuilder(LeannBackendBuilderInterface):
        _write_vectors_to_bin(data, index_dir / data_filename)

        build_kwargs = {**self.build_params, **kwargs}
-
-        # Extract is_recompute from nested backend_kwargs if needed
-        is_recompute = build_kwargs.get("is_recompute", False)
-        if not is_recompute and "backend_kwargs" in build_kwargs:
-            is_recompute = build_kwargs["backend_kwargs"].get("is_recompute", False)
-
-        # Flatten all backend_kwargs parameters to top level for compatibility
-        if "backend_kwargs" in build_kwargs:
-            nested_params = build_kwargs.pop("backend_kwargs")
-            build_kwargs.update(nested_params)
-
        metric_enum = _get_diskann_metrics().get(
            build_kwargs.get("distance_metric", "mips").lower()
        )
@@ -266,30 +185,6 @@ class DiskannBuilder(LeannBackendBuilderInterface):
                    build_kwargs.get("pq_disk_bytes", 0),
                    "",
                )
-
-            # Auto-partition if is_recompute is enabled
-            if build_kwargs.get("is_recompute", False):
-                logger.info("is_recompute=True, starting automatic graph partitioning...")
-                from .graph_partition import partition_graph
-
-                # Partition the index using absolute paths
-                # Convert to absolute paths to avoid issues with working directory changes
-                absolute_index_dir = Path(index_dir).resolve()
-                absolute_index_prefix_path = str(absolute_index_dir / index_prefix)
-                disk_graph_path, partition_bin_path = partition_graph(
-                    index_prefix_path=absolute_index_prefix_path,
-                    output_dir=str(absolute_index_dir),
-                    partition_prefix=index_prefix,
-                )
-
-                # Safe cleanup: In partition mode, C++ doesn't read _disk.index content
-                # but still needs the derived files (_medoids.bin, _centroids.bin, etc.)
-                self._safe_cleanup_after_partition(index_dir, index_prefix)
-
-                logger.info("✅ Graph partitioning completed successfully!")
-                logger.info(f"  - Disk graph: {disk_graph_path}")
-                logger.info(f"  - Partition file: {partition_bin_path}")
-
        finally:
            temp_data_file = index_dir / data_filename
            if temp_data_file.exists():
@@ -318,26 +213,7 @@ class DiskannSearcher(BaseSearcher):

            # For DiskANN, we need to reinitialize the index when zmq_port changes
            # Store the initialization parameters for later use
-            # Note: C++ load method expects the BASE path (without _disk.index suffix)
-            # C++ internally constructs: index_prefix + "_disk.index"
-            index_name = self.index_path.stem  # "simple_test.leann" -> "simple_test"
-            diskann_index_prefix = str(self.index_dir / index_name)  # /path/to/simple_test
-            full_index_prefix = diskann_index_prefix  # /path/to/simple_test (base path)
-
-            # Auto-detect partition files and set partition_prefix
-            partition_graph_file = self.index_dir / f"{index_name}_disk_graph.index"
-            partition_bin_file = self.index_dir / f"{index_name}_partition.bin"
-
-            partition_prefix = ""
-            if partition_graph_file.exists() and partition_bin_file.exists():
-                # C++ expects full path prefix, not just filename
-                partition_prefix = str(self.index_dir / index_name)  # /path/to/simple_test
-                logger.info(
-                    f"✅ Detected partition files, using partition_prefix='{partition_prefix}'"
-                )
-            else:
-                logger.debug("No partition files detected, using standard index files")
-
+            full_index_prefix = str(self.index_dir / self.index_path.stem)
            self._init_params = {
                "metric_enum": metric_enum,
                "full_index_prefix": full_index_prefix,
@@ -345,14 +221,8 @@ class DiskannSearcher(BaseSearcher):
                "num_nodes_to_cache": kwargs.get("num_nodes_to_cache", 0),
                "cache_mechanism": 1,
                "pq_prefix": "",
-                "partition_prefix": partition_prefix,
+                "partition_prefix": "",
            }
-
-            # Log partition configuration for debugging
-            if partition_prefix:
-                logger.info(
-                    f"✅ Detected partition files, using partition_prefix='{partition_prefix}'"
-                )
            self._diskannpy = diskannpy
            self._current_zmq_port = None
            self._index = None
@@ -389,7 +259,7 @@ class DiskannSearcher(BaseSearcher):
        prune_ratio: float = 0.0,
        recompute_embeddings: bool = False,
        pruning_strategy: Literal["global", "local", "proportional"] = "global",
-        zmq_port: Optional[int] = None,
+        zmq_port: int | None = None,
        batch_recompute: bool = False,
        dedup_node_dis: bool = False,
        **kwargs,
@@ -441,14 +311,7 @@ class DiskannSearcher(BaseSearcher):
        else:  # "global"
            use_global_pruning = True

-        # Strategy:
-        # - Traversal always uses PQ distances
-        # - If recompute_embeddings=True, do a single final rerank via deferred fetch
-        #   (fetch embeddings for the final candidate set only)
-        # - Do not recompute neighbor distances along the path
-        use_deferred_fetch = True if recompute_embeddings else False
-        recompute_neighors = False  # Expected typo. For backward compatibility.
-
+        # Perform search with suppressed C++ output based on log level
        with suppress_cpp_output_if_needed():
            labels, distances = self._index.batch_search(
                query,
@@ -457,9 +320,9 @@ class DiskannSearcher(BaseSearcher):
                complexity,
                beam_width,
                self.num_threads,
-                use_deferred_fetch,
+                kwargs.get("USE_DEFERRED_FETCH", False),
                kwargs.get("skip_search_reorder", False),
-                recompute_neighors,
+                recompute_embeddings,
                dedup_node_dis,
                prune_ratio,
                batch_recompute,
--- a/packages/leann-backend-diskann/leann_backend_diskann/diskann_embedding_server.py
+++ b/packages/leann-backend-diskann/leann_backend_diskann/diskann_embedding_server.py
@@ -10,7 +10,6 @@ import sys
 import threading
 import time
 from pathlib import Path
-from typing import Optional

 import numpy as np
 import zmq
@@ -33,7 +32,7 @@ if not logger.handlers:


 def create_diskann_embedding_server(
-    passages_file: Optional[str] = None,
+    passages_file: str | None = None,
    zmq_port: int = 5555,
    model_name: str = "sentence-transformers/all-mpnet-base-v2",
    embedding_mode: str = "sentence-transformers",
@@ -81,8 +80,7 @@ def create_diskann_embedding_server(
    with open(passages_file) as f:
        meta = json.load(f)

-    logger.info(f"Loading PassageManager with metadata_file_path: {passages_file}")
-    passages = PassageManager(meta["passage_sources"], metadata_file_path=passages_file)
+    passages = PassageManager(meta["passage_sources"])
    logger.info(
        f"Loaded PassageManager with {len(passages.global_offset_map)} passages from metadata"
    )
@@ -103,9 +101,8 @@ def create_diskann_embedding_server(
        socket.bind(f"tcp://*:{zmq_port}")
        logger.info(f"DiskANN ZMQ REP server listening on port {zmq_port}")

-        socket.setsockopt(zmq.RCVTIMEO, 1000)
-        socket.setsockopt(zmq.SNDTIMEO, 1000)
-        socket.setsockopt(zmq.LINGER, 0)
+        socket.setsockopt(zmq.RCVTIMEO, 300000)
+        socket.setsockopt(zmq.SNDTIMEO, 300000)

        while True:
            try:
@@ -222,217 +219,30 @@ def create_diskann_embedding_server(
                traceback.print_exc()
                raise

-    def zmq_server_thread_with_shutdown(shutdown_event):
-        """ZMQ server thread that respects shutdown signal.
-
-        This creates its own REP socket, binds to zmq_port, and periodically
-        checks shutdown_event using recv timeouts to exit cleanly.
-        """
-        logger.info("DiskANN ZMQ server thread started with shutdown support")
-
-        context = zmq.Context()
-        rep_socket = context.socket(zmq.REP)
-        rep_socket.bind(f"tcp://*:{zmq_port}")
-        logger.info(f"DiskANN ZMQ REP server listening on port {zmq_port}")
-
-        # Set receive timeout so we can check shutdown_event periodically
-        rep_socket.setsockopt(zmq.RCVTIMEO, 1000)  # 1 second timeout
-        rep_socket.setsockopt(zmq.SNDTIMEO, 1000)
-        rep_socket.setsockopt(zmq.LINGER, 0)
-
-        try:
-            while not shutdown_event.is_set():
-                try:
-                    e2e_start = time.time()
-                    # REP socket receives single-part messages
-                    message = rep_socket.recv()
-
-                    # Check for empty messages - REP socket requires response to every request
-                    if not message:
-                        logger.warning("Received empty message, sending empty response")
-                        rep_socket.send(b"")
-                        continue
-
-                    # Try protobuf first (same logic as original)
-                    texts = []
-                    is_text_request = False
-
-                    try:
-                        req_proto = embedding_pb2.NodeEmbeddingRequest()
-                        req_proto.ParseFromString(message)
-                        node_ids = list(req_proto.node_ids)
-
-                        # Look up texts by node IDs
-                        for nid in node_ids:
-                            try:
-                                passage_data = passages.get_passage(str(nid))
-                                txt = passage_data["text"]
-                                if not txt:
-                                    raise RuntimeError(f"FATAL: Empty text for passage ID {nid}")
-                                texts.append(txt)
-                            except KeyError:
-                                raise RuntimeError(f"FATAL: Passage with ID {nid} not found")
-
-                        logger.info(f"ZMQ received protobuf request for {len(node_ids)} node IDs")
-                    except Exception:
-                        # Fallback to msgpack for text requests
-                        try:
-                            import msgpack
-
-                            request = msgpack.unpackb(message)
-                            if isinstance(request, list) and all(
-                                isinstance(item, str) for item in request
-                            ):
-                                texts = request
-                                is_text_request = True
-                                logger.info(
-                                    f"ZMQ received msgpack text request for {len(texts)} texts"
-                                )
-                            else:
-                                raise ValueError("Not a valid msgpack text request")
-                        except Exception:
-                            logger.error("Both protobuf and msgpack parsing failed!")
-                            # Send error response
-                            resp_proto = embedding_pb2.NodeEmbeddingResponse()
-                            rep_socket.send(resp_proto.SerializeToString())
-                            continue
-
-                    # Process the request
-                    embeddings = compute_embeddings(texts, model_name, mode=embedding_mode)
-                    logger.info(f"Computed embeddings shape: {embeddings.shape}")
-
-                    # Validation
-                    if np.isnan(embeddings).any() or np.isinf(embeddings).any():
-                        logger.error("NaN or Inf detected in embeddings!")
-                        # Send error response
-                        if is_text_request:
-                            import msgpack
-
-                            response_data = msgpack.packb([])
-                        else:
-                            resp_proto = embedding_pb2.NodeEmbeddingResponse()
-                            response_data = resp_proto.SerializeToString()
-                        rep_socket.send(response_data)
-                        continue
-
-                    # Prepare response based on request type
-                    if is_text_request:
-                        # For direct text requests, return msgpack
-                        import msgpack
-
-                        response_data = msgpack.packb(embeddings.tolist())
-                    else:
-                        # For protobuf requests, return protobuf
-                        resp_proto = embedding_pb2.NodeEmbeddingResponse()
-                        hidden_contiguous = np.ascontiguousarray(embeddings, dtype=np.float32)
-
-                        resp_proto.embeddings_data = hidden_contiguous.tobytes()
-                        resp_proto.dimensions.append(hidden_contiguous.shape[0])
-                        resp_proto.dimensions.append(hidden_contiguous.shape[1])
-
-                        response_data = resp_proto.SerializeToString()
-
-                    # Send response back to the client
-                    rep_socket.send(response_data)
-
-                    e2e_end = time.time()
-                    logger.info(f"⏱️  ZMQ E2E time: {e2e_end - e2e_start:.6f}s")
-
-                except zmq.Again:
-                    # Timeout - check shutdown_event and continue
-                    continue
-                except Exception as e:
-                    if not shutdown_event.is_set():
-                        logger.error(f"Error in ZMQ server loop: {e}")
-                        try:
-                            # Send error response for REP socket
-                            resp_proto = embedding_pb2.NodeEmbeddingResponse()
-                            rep_socket.send(resp_proto.SerializeToString())
-                        except Exception:
-                            pass
-                    else:
-                        logger.info("Shutdown in progress, ignoring ZMQ error")
-                        break
-        finally:
-            try:
-                rep_socket.close(0)
-            except Exception:
-                pass
-            try:
-                context.term()
-            except Exception:
-                pass
-
-        logger.info("DiskANN ZMQ server thread exiting gracefully")
-
-    # Add shutdown coordination
-    shutdown_event = threading.Event()
-
-    def shutdown_zmq_server():
-        """Gracefully shutdown ZMQ server."""
-        logger.info("Initiating graceful shutdown...")
-        shutdown_event.set()
-
-        if zmq_thread.is_alive():
-            logger.info("Waiting for ZMQ thread to finish...")
-            zmq_thread.join(timeout=5)
-            if zmq_thread.is_alive():
-                logger.warning("ZMQ thread did not finish in time")
-
-        # Clean up ZMQ resources
-        try:
-            # Note: socket and context are cleaned up by thread exit
-            logger.info("ZMQ resources cleaned up")
-        except Exception as e:
-            logger.warning(f"Error cleaning ZMQ resources: {e}")
-
-        # Clean up other resources
-        try:
-            import gc
-
-            gc.collect()
-            logger.info("Additional resources cleaned up")
-        except Exception as e:
-            logger.warning(f"Error cleaning additional resources: {e}")
-
-        logger.info("Graceful shutdown completed")
-        sys.exit(0)
-
-    # Register signal handlers within this function scope
-    import signal
-
-    def signal_handler(sig, frame):
-        logger.info(f"Received signal {sig}, shutting down gracefully...")
-        shutdown_zmq_server()
-
-    signal.signal(signal.SIGTERM, signal_handler)
-    signal.signal(signal.SIGINT, signal_handler)
-
-    # Start ZMQ thread (NOT daemon!)
-    zmq_thread = threading.Thread(
-        target=lambda: zmq_server_thread_with_shutdown(shutdown_event),
-        daemon=False,  # Not daemon - we want to wait for it
-    )
+    zmq_thread = threading.Thread(target=zmq_server_thread, daemon=True)
    zmq_thread.start()
    logger.info(f"Started DiskANN ZMQ server thread on port {zmq_port}")

    # Keep the main thread alive
    try:
-        while not shutdown_event.is_set():
-            time.sleep(0.1)  # Check shutdown more frequently
+        while True:
+            time.sleep(1)
    except KeyboardInterrupt:
        logger.info("DiskANN Server shutting down...")
-        shutdown_zmq_server()
        return

-    # If we reach here, shutdown was triggered by signal
-    logger.info("Main loop exited, process should be shutting down")
-

 if __name__ == "__main__":
+    import signal
    import sys

-    # Signal handlers are now registered within create_diskann_embedding_server
+    def signal_handler(sig, frame):
+        logger.info(f"Received signal {sig}, shutting down gracefully...")
+        sys.exit(0)
+
+    # Register signal handlers for graceful shutdown
+    signal.signal(signal.SIGTERM, signal_handler)
+    signal.signal(signal.SIGINT, signal_handler)

    parser = argparse.ArgumentParser(description="DiskANN Embedding service")
    parser.add_argument("--zmq-port", type=int, default=5555, help="ZMQ port to run on")
@@ -451,7 +261,7 @@ if __name__ == "__main__":
        "--embedding-mode",
        type=str,
        default="sentence-transformers",
-        choices=["sentence-transformers", "openai", "mlx", "ollama"],
+        choices=["sentence-transformers", "openai", "mlx"],
        help="Embedding backend mode",
    )
    parser.add_argument(
--- a/packages/leann-backend-diskann/leann_backend_diskann/graph_partition.py
+++ b/packages/leann-backend-diskann/leann_backend_diskann/graph_partition.py
@@ -1,299 +0,0 @@
-#!/usr/bin/env python3
-"""
-Graph Partition Module for LEANN DiskANN Backend
-
-This module provides Python bindings for the graph partition functionality
-of DiskANN, allowing users to partition disk-based indices for better
-performance.
-"""
-
-import os
-import shutil
-import subprocess
-import tempfile
-from pathlib import Path
-from typing import Optional
-
-
-class GraphPartitioner:
-    """
-    A Python interface for DiskANN's graph partition functionality.
-
-    This class provides methods to partition disk-based indices for improved
-    search performance and memory efficiency.
-    """
-
-    def __init__(self, build_type: str = "release"):
-        """
-        Initialize the GraphPartitioner.
-
-        Args:
-            build_type: Build type for the executables ("debug" or "release")
-        """
-        self.build_type = build_type
-        self._ensure_executables()
-
-    def _get_executable_path(self, name: str) -> str:
-        """Get the path to a graph partition executable."""
-        # Get the directory where this Python module is located
-        module_dir = Path(__file__).parent
-        # Navigate to the graph_partition directory
-        graph_partition_dir = module_dir.parent / "third_party" / "DiskANN" / "graph_partition"
-        executable_path = graph_partition_dir / "build" / self.build_type / "graph_partition" / name
-
-        if not executable_path.exists():
-            raise FileNotFoundError(f"Executable {name} not found at {executable_path}")
-
-        return str(executable_path)
-
-    def _ensure_executables(self):
-        """Ensure that the required executables are built."""
-        try:
-            self._get_executable_path("partitioner")
-            self._get_executable_path("index_relayout")
-        except FileNotFoundError:
-            # Try to build the executables automatically
-            print("Executables not found, attempting to build them...")
-            self._build_executables()
-
-    def _build_executables(self):
-        """Build the required executables."""
-        graph_partition_dir = (
-            Path(__file__).parent.parent / "third_party" / "DiskANN" / "graph_partition"
-        )
-        original_dir = os.getcwd()
-
-        try:
-            os.chdir(graph_partition_dir)
-
-            # Clean any existing build
-            if (graph_partition_dir / "build").exists():
-                shutil.rmtree(graph_partition_dir / "build")
-
-            # Run the build script
-            cmd = ["./build.sh", self.build_type, "split_graph", "/tmp/dummy"]
-            subprocess.run(cmd, capture_output=True, text=True, cwd=graph_partition_dir)
-
-            # Check if executables were created
-            partitioner_path = self._get_executable_path("partitioner")
-            relayout_path = self._get_executable_path("index_relayout")
-
-            print(f"✅ Built partitioner: {partitioner_path}")
-            print(f"✅ Built index_relayout: {relayout_path}")
-
-        except Exception as e:
-            raise RuntimeError(f"Failed to build executables: {e}")
-        finally:
-            os.chdir(original_dir)
-
-    def partition_graph(
-        self,
-        index_prefix_path: str,
-        output_dir: Optional[str] = None,
-        partition_prefix: Optional[str] = None,
-        **kwargs,
-    ) -> tuple[str, str]:
-        """
-        Partition a disk-based index for improved performance.
-
-        Args:
-            index_prefix_path: Path to the index prefix (e.g., "/path/to/index")
-            output_dir: Output directory for results (defaults to parent of index_prefix_path)
-            partition_prefix: Prefix for output files (defaults to basename of index_prefix_path)
-            **kwargs: Additional parameters for graph partitioning:
-                - gp_times: Number of LDG partition iterations (default: 10)
-                - lock_nums: Number of lock nodes (default: 10)
-                - cut: Cut adjacency list degree (default: 100)
-                - scale_factor: Scale factor (default: 1)
-                - data_type: Data type (default: "float")
-                - thread_nums: Number of threads (default: 10)
-
-        Returns:
-            Tuple of (disk_graph_index_path, partition_bin_path)
-
-        Raises:
-            RuntimeError: If the partitioning process fails
-        """
-        # Set default parameters
-        params = {
-            "gp_times": 10,
-            "lock_nums": 10,
-            "cut": 100,
-            "scale_factor": 1,
-            "data_type": "float",
-            "thread_nums": 10,
-            **kwargs,
-        }
-
-        # Determine output directory
-        if output_dir is None:
-            output_dir = str(Path(index_prefix_path).parent)
-
-        # Create output directory if it doesn't exist
-        Path(output_dir).mkdir(parents=True, exist_ok=True)
-
-        # Determine partition prefix
-        if partition_prefix is None:
-            partition_prefix = Path(index_prefix_path).name
-
-        # Get executable paths
-        partitioner_path = self._get_executable_path("partitioner")
-        relayout_path = self._get_executable_path("index_relayout")
-
-        # Create temporary directory for processing
-        with tempfile.TemporaryDirectory() as temp_dir:
-            # Change to the graph_partition directory for temporary files
-            graph_partition_dir = (
-                Path(__file__).parent.parent / "third_party" / "DiskANN" / "graph_partition"
-            )
-            original_dir = os.getcwd()
-
-            try:
-                os.chdir(graph_partition_dir)
-
-                # Create temporary data directory
-                temp_data_dir = Path(temp_dir) / "data"
-                temp_data_dir.mkdir(parents=True, exist_ok=True)
-
-                # Set up paths for temporary files
-                graph_path = temp_data_dir / "starling" / "_M_R_L_B" / "GRAPH"
-                graph_gp_path = (
-                    graph_path
-                    / f"GP_TIMES_{params['gp_times']}_LOCK_{params['lock_nums']}_GP_USE_FREQ0_CUT{params['cut']}_SCALE{params['scale_factor']}"
-                )
-                graph_gp_path.mkdir(parents=True, exist_ok=True)
-
-                # Find input index file
-                old_index_file = f"{index_prefix_path}_disk_beam_search.index"
-                if not os.path.exists(old_index_file):
-                    old_index_file = f"{index_prefix_path}_disk.index"
-
-                if not os.path.exists(old_index_file):
-                    raise RuntimeError(f"Index file not found: {old_index_file}")
-
-                # Run partitioner
-                gp_file_path = graph_gp_path / "_part.bin"
-                partitioner_cmd = [
-                    partitioner_path,
-                    "--index_file",
-                    old_index_file,
-                    "--data_type",
-                    params["data_type"],
-                    "--gp_file",
-                    str(gp_file_path),
-                    "-T",
-                    str(params["thread_nums"]),
-                    "--ldg_times",
-                    str(params["gp_times"]),
-                    "--scale",
-                    str(params["scale_factor"]),
-                    "--mode",
-                    "1",
-                ]
-
-                print(f"Running partitioner: {' '.join(partitioner_cmd)}")
-                result = subprocess.run(
-                    partitioner_cmd, capture_output=True, text=True, cwd=graph_partition_dir
-                )
-
-                if result.returncode != 0:
-                    raise RuntimeError(
-                        f"Partitioner failed with return code {result.returncode}.\n"
-                        f"stdout: {result.stdout}\n"
-                        f"stderr: {result.stderr}"
-                    )
-
-                # Run relayout
-                part_tmp_index = graph_gp_path / "_part_tmp.index"
-                relayout_cmd = [
-                    relayout_path,
-                    old_index_file,
-                    str(gp_file_path),
-                    params["data_type"],
-                    "1",
-                ]
-
-                print(f"Running relayout: {' '.join(relayout_cmd)}")
-                result = subprocess.run(
-                    relayout_cmd, capture_output=True, text=True, cwd=graph_partition_dir
-                )
-
-                if result.returncode != 0:
-                    raise RuntimeError(
-                        f"Relayout failed with return code {result.returncode}.\n"
-                        f"stdout: {result.stdout}\n"
-                        f"stderr: {result.stderr}"
-                    )
-
-                # Copy results to output directory
-                disk_graph_path = Path(output_dir) / f"{partition_prefix}_disk_graph.index"
-                partition_bin_path = Path(output_dir) / f"{partition_prefix}_partition.bin"
-
-                shutil.copy2(part_tmp_index, disk_graph_path)
-                shutil.copy2(gp_file_path, partition_bin_path)
-
-                print(f"Results copied to: {output_dir}")
-                return str(disk_graph_path), str(partition_bin_path)
-
-            finally:
-                os.chdir(original_dir)
-
-    def get_partition_info(self, partition_bin_path: str) -> dict:
-        """
-        Get information about a partition file.
-
-        Args:
-            partition_bin_path: Path to the partition binary file
-
-        Returns:
-            Dictionary containing partition information
-        """
-        if not os.path.exists(partition_bin_path):
-            raise FileNotFoundError(f"Partition file not found: {partition_bin_path}")
-
-        # For now, return basic file information
-        # In the future, this could parse the binary file for detailed info
-        stat = os.stat(partition_bin_path)
-        return {
-            "file_size": stat.st_size,
-            "file_path": partition_bin_path,
-            "modified_time": stat.st_mtime,
-        }
-
-
-def partition_graph(
-    index_prefix_path: str,
-    output_dir: Optional[str] = None,
-    partition_prefix: Optional[str] = None,
-    build_type: str = "release",
-    **kwargs,
-) -> tuple[str, str]:
-    """
-    Convenience function to partition a graph index.
-
-    Args:
-        index_prefix_path: Path to the index prefix
-        output_dir: Output directory (defaults to parent of index_prefix_path)
-        partition_prefix: Prefix for output files (defaults to basename of index_prefix_path)
-        build_type: Build type for executables ("debug" or "release")
-        **kwargs: Additional parameters for graph partitioning
-
-    Returns:
-        Tuple of (disk_graph_index_path, partition_bin_path)
-    """
-    partitioner = GraphPartitioner(build_type=build_type)
-    return partitioner.partition_graph(index_prefix_path, output_dir, partition_prefix, **kwargs)
-
-
-# Example usage:
-if __name__ == "__main__":
-    # Example: partition an index
-    try:
-        disk_graph_path, partition_bin_path = partition_graph(
-            "/path/to/your/index_prefix", gp_times=10, lock_nums=10, cut=100
-        )
-        print("Partitioning completed successfully!")
-        print(f"Disk graph index: {disk_graph_path}")
-        print(f"Partition binary: {partition_bin_path}")
-    except Exception as e:
-        print(f"Partitioning failed: {e}")
--- a/packages/leann-backend-diskann/pyproject.toml
+++ b/packages/leann-backend-diskann/pyproject.toml
@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"

 [project]
 name = "leann-backend-diskann"
-version = "0.2.9"
-dependencies = ["leann-core==0.2.9", "numpy", "protobuf>=3.19.0"]
+version = "0.1.16"
+dependencies = ["leann-core==0.1.16", "numpy", "protobuf>=3.19.0"]

 [tool.scikit-build]
 # Key: simplified CMake path
@@ -17,5 +17,3 @@ editable.mode = "redirect"
 cmake.build-type = "Release"
 build.verbose = true
 build.tool-args = ["-j8"]
-# Let CMake find packages via Homebrew prefix
-cmake.define = {CMAKE_PREFIX_PATH = {env = "CMAKE_PREFIX_PATH"}, OpenMP_ROOT = {env = "OpenMP_ROOT"}}
--- a/packages/leann-backend-diskann/third_party/DiskANN
+++ b/packages/leann-backend-diskann/third_party/DiskANN
--- a/packages/leann-backend-hnsw/CMakeLists.txt
+++ b/packages/leann-backend-hnsw/CMakeLists.txt
@@ -5,20 +5,11 @@ set(CMAKE_CXX_COMPILER_WORKS 1)

 # Set OpenMP path for macOS
 if(APPLE)
-    # Detect Homebrew installation path (Apple Silicon vs Intel)
-    if(EXISTS "/opt/homebrew/opt/libomp")
-        set(HOMEBREW_PREFIX "/opt/homebrew")
-    elseif(EXISTS "/usr/local/opt/libomp")
-        set(HOMEBREW_PREFIX "/usr/local")
-    else()
-        message(FATAL_ERROR "Could not find libomp installation. Please install with: brew install libomp")
-    endif()
-
-    set(OpenMP_C_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_PREFIX}/opt/libomp/include")
-    set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_PREFIX}/opt/libomp/include")
+    set(OpenMP_C_FLAGS "-Xpreprocessor -fopenmp -I/opt/homebrew/opt/libomp/include")
+    set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I/opt/homebrew/opt/libomp/include")
    set(OpenMP_C_LIB_NAMES "omp")
    set(OpenMP_CXX_LIB_NAMES "omp")
-    set(OpenMP_omp_LIBRARY "${HOMEBREW_PREFIX}/opt/libomp/lib/libomp.dylib")
+    set(OpenMP_omp_LIBRARY "/opt/homebrew/opt/libomp/lib/libomp.dylib")

    # Force use of system libc++ to avoid version mismatch
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
--- a/packages/leann-backend-hnsw/leann_backend_hnsw/convert_to_csr.py
+++ b/packages/leann-backend-hnsw/leann_backend_hnsw/convert_to_csr.py
@@ -1,6 +1,5 @@
 import argparse
 import gc  # Import garbage collector interface
-import logging
 import os
 import struct
 import sys
@@ -8,12 +7,6 @@ import time

 import numpy as np

-# Set up logging to avoid print buffer issues
-logger = logging.getLogger(__name__)
-LOG_LEVEL = os.getenv("LEANN_LOG_LEVEL", "WARNING").upper()
-log_level = getattr(logging, LOG_LEVEL, logging.WARNING)
-logger.setLevel(log_level)
-
 # --- FourCCs (add more if needed) ---
 INDEX_HNSW_FLAT_FOURCC = int.from_bytes(b"IHNf", "little")
 # Add other HNSW fourccs if you expect different storage types inside HNSW
@@ -250,8 +243,6 @@ def convert_hnsw_graph_to_csr(input_filename, output_filename, prune_embeddings=
        output_filename: Output CSR index file
        prune_embeddings: Whether to prune embedding storage (write NULL storage marker)
    """
-    # Keep prints simple; rely on CI runner to flush output as needed
-
    print(f"Starting conversion: {input_filename} -> {output_filename}")
    start_time = time.time()
    original_hnsw_data = {}
--- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py
+++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py
@@ -2,7 +2,7 @@ import logging
 import os
 import shutil
 from pathlib import Path
-from typing import Any, Literal, Optional
+from typing import Any, Literal

 import numpy as np
 from leann.interface import (
@@ -54,13 +54,12 @@ class HNSWBuilder(LeannBackendBuilderInterface):
        self.efConstruction = self.build_params.setdefault("efConstruction", 200)
        self.distance_metric = self.build_params.setdefault("distance_metric", "mips")
        self.dimensions = self.build_params.get("dimensions")
-        if not self.is_recompute and self.is_compact:
-            # Auto-correct: non-recompute requires non-compact storage for HNSW
-            logger.warning(
-                "is_recompute=False requires non-compact HNSW. Forcing is_compact=False."
-            )
-            self.is_compact = False
-            self.build_params["is_compact"] = False
+        if not self.is_recompute:
+            if self.is_compact:
+                # TODO: support this case @andy
+                raise ValueError(
+                    "is_recompute is False, but is_compact is True. This is not compatible now. change is compact to False and you can use the original HNSW index."
+                )

    def build(self, data: np.ndarray, ids: list[str], index_path: str, **kwargs):
        from . import faiss  # type: ignore
@@ -153,7 +152,7 @@ class HNSWSearcher(BaseSearcher):
        self,
        query: np.ndarray,
        top_k: int,
-        zmq_port: Optional[int] = None,
+        zmq_port: int | None = None,
        complexity: int = 64,
        beam_width: int = 1,
        prune_ratio: float = 0.0,
@@ -185,11 +184,9 @@ class HNSWSearcher(BaseSearcher):
        """
        from . import faiss  # type: ignore

-        if not recompute_embeddings and self.is_pruned:
-            raise RuntimeError(
-                "Recompute is required for pruned/compact HNSW index. "
-                "Re-run search with --recompute, or rebuild with --no-recompute and --no-compact."
-            )
+        if not recompute_embeddings:
+            if self.is_pruned:
+                raise RuntimeError("Recompute is required for pruned index.")
        if recompute_embeddings:
            if zmq_port is None:
                raise ValueError("zmq_port must be provided if recompute_embeddings is True")
--- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py
+++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py
@@ -10,7 +10,6 @@ import sys
 import threading
 import time
 from pathlib import Path
-from typing import Optional

 import msgpack
 import numpy as np
@@ -34,7 +33,7 @@ if not logger.handlers:


 def create_hnsw_embedding_server(
-    passages_file: Optional[str] = None,
+    passages_file: str | None = None,
    zmq_port: int = 5555,
    model_name: str = "sentence-transformers/all-mpnet-base-v2",
    distance_metric: str = "mips",
@@ -82,317 +81,199 @@ def create_hnsw_embedding_server(
    with open(passages_file) as f:
        meta = json.load(f)

-    # Let PassageManager handle path resolution uniformly. It supports fallback order:
-    # 1) path/index_path; 2) *_relative; 3) standard siblings next to meta
-    passages = PassageManager(meta["passage_sources"], metadata_file_path=passages_file)
-    # Dimension from metadata for shaping responses
-    try:
-        embedding_dim: int = int(meta.get("dimensions", 0))
-    except Exception:
-        embedding_dim = 0
+    # Convert relative paths to absolute paths based on metadata file location
+    metadata_dir = Path(passages_file).parent.parent  # Go up one level from the metadata file
+    passage_sources = []
+    for source in meta["passage_sources"]:
+        source_copy = source.copy()
+        # Convert relative paths to absolute paths
+        if not Path(source_copy["path"]).is_absolute():
+            source_copy["path"] = str(metadata_dir / source_copy["path"])
+        if not Path(source_copy["index_path"]).is_absolute():
+            source_copy["index_path"] = str(metadata_dir / source_copy["index_path"])
+        passage_sources.append(source_copy)
+
+    passages = PassageManager(passage_sources)
    logger.info(
        f"Loaded PassageManager with {len(passages.global_offset_map)} passages from metadata"
    )

-    # (legacy ZMQ thread removed; using shutdown-capable server only)
-
-    def zmq_server_thread_with_shutdown(shutdown_event):
-        """ZMQ server thread that respects shutdown signal.
-
-        Creates its own REP socket bound to zmq_port and polls with timeouts
-        to allow graceful shutdown.
-        """
-        logger.info("ZMQ server thread started with shutdown support")
-
+    def zmq_server_thread():
+        """ZMQ server thread"""
        context = zmq.Context()
-        rep_socket = context.socket(zmq.REP)
-        rep_socket.bind(f"tcp://*:{zmq_port}")
-        logger.info(f"HNSW ZMQ REP server listening on port {zmq_port}")
-        rep_socket.setsockopt(zmq.RCVTIMEO, 1000)
-        # Keep sends from blocking during shutdown; fail fast and drop on close
-        rep_socket.setsockopt(zmq.SNDTIMEO, 1000)
-        rep_socket.setsockopt(zmq.LINGER, 0)
+        socket = context.socket(zmq.REP)
+        socket.bind(f"tcp://*:{zmq_port}")
+        logger.info(f"HNSW ZMQ server listening on port {zmq_port}")

-        # Track last request type/length for shape-correct fallbacks
-        last_request_type = "unknown"  # 'text' | 'distance' | 'embedding' | 'unknown'
-        last_request_length = 0
+        socket.setsockopt(zmq.RCVTIMEO, 300000)
+        socket.setsockopt(zmq.SNDTIMEO, 300000)

-        try:
-            while not shutdown_event.is_set():
-                try:
-                    e2e_start = time.time()
-                    logger.debug("🔍 Waiting for ZMQ message...")
-                    request_bytes = rep_socket.recv()
+        while True:
+            try:
+                message_bytes = socket.recv()
+                logger.debug(f"Received ZMQ request of size {len(message_bytes)} bytes")

-                    # Rest of the processing logic (same as original)
-                    request = msgpack.unpackb(request_bytes)
+                e2e_start = time.time()
+                request_payload = msgpack.unpackb(message_bytes)

-                    if len(request) == 1 and request[0] == "__QUERY_MODEL__":
-                        response_bytes = msgpack.packb([model_name])
-                        rep_socket.send(response_bytes)
-                        continue
+                # Handle direct text embedding request
+                if isinstance(request_payload, list) and len(request_payload) > 0:
+                    # Check if this is a direct text request (list of strings)
+                    if all(isinstance(item, str) for item in request_payload):
+                        logger.info(
+                            f"Processing direct text embedding request for {len(request_payload)} texts in {embedding_mode} mode"
+                        )

-                    # Handle direct text embedding request
-                    if (
-                        isinstance(request, list)
-                        and request
-                        and all(isinstance(item, str) for item in request)
-                    ):
-                        last_request_type = "text"
-                        last_request_length = len(request)
-                        embeddings = compute_embeddings(request, model_name, mode=embedding_mode)
-                        rep_socket.send(msgpack.packb(embeddings.tolist()))
+                        # Use unified embedding computation (now with model caching)
+                        embeddings = compute_embeddings(
+                            request_payload, model_name, mode=embedding_mode
+                        )
+
+                        response = embeddings.tolist()
+                        socket.send(msgpack.packb(response))
                        e2e_end = time.time()
                        logger.info(f"⏱️  Text embedding E2E time: {e2e_end - e2e_start:.6f}s")
                        continue

-                    # Handle distance calculation request: [[ids], [query_vector]]
-                    if (
-                        isinstance(request, list)
-                        and len(request) == 2
-                        and isinstance(request[0], list)
-                        and isinstance(request[1], list)
-                    ):
-                        node_ids = request[0]
-                        # Handle nested [[ids]] shape defensively
-                        if len(node_ids) == 1 and isinstance(node_ids[0], list):
-                            node_ids = node_ids[0]
-                        query_vector = np.array(request[1], dtype=np.float32)
-                        last_request_type = "distance"
-                        last_request_length = len(node_ids)
+                # Handle distance calculation requests
+                if (
+                    isinstance(request_payload, list)
+                    and len(request_payload) == 2
+                    and isinstance(request_payload[0], list)
+                    and isinstance(request_payload[1], list)
+                ):
+                    node_ids = request_payload[0]
+                    query_vector = np.array(request_payload[1], dtype=np.float32)

-                        logger.debug("Distance calculation request received")
-                        logger.debug(f"    Node IDs: {node_ids}")
-                        logger.debug(f"    Query vector dim: {len(query_vector)}")
+                    logger.debug("Distance calculation request received")
+                    logger.debug(f"    Node IDs: {node_ids}")
+                    logger.debug(f"    Query vector dim: {len(query_vector)}")

-                        # Gather texts for found ids
-                        texts: list[str] = []
-                        found_indices: list[int] = []
-                        for idx, nid in enumerate(node_ids):
-                            try:
-                                passage_data = passages.get_passage(str(nid))
-                                txt = passage_data.get("text", "")
-                                if isinstance(txt, str) and len(txt) > 0:
-                                    texts.append(txt)
-                                    found_indices.append(idx)
-                                else:
-                                    logger.error(f"Empty text for passage ID {nid}")
-                            except KeyError:
-                                logger.error(f"Passage ID {nid} not found")
-                            except Exception as e:
-                                logger.error(f"Exception looking up passage ID {nid}: {e}")
-
-                        # Prepare full-length response with large sentinel values
-                        large_distance = 1e9
-                        response_distances = [large_distance] * len(node_ids)
-
-                        if texts:
-                            try:
-                                embeddings = compute_embeddings(
-                                    texts, model_name, mode=embedding_mode
-                                )
-                                logger.info(
-                                    f"Computed embeddings for {len(texts)} texts, shape: {embeddings.shape}"
-                                )
-                                if distance_metric == "l2":
-                                    partial = np.sum(
-                                        np.square(embeddings - query_vector.reshape(1, -1)), axis=1
-                                    )
-                                else:  # mips or cosine
-                                    partial = -np.dot(embeddings, query_vector)
-
-                                for pos, dval in zip(found_indices, partial.flatten().tolist()):
-                                    response_distances[pos] = float(dval)
-                            except Exception as e:
-                                logger.error(f"Distance computation error, using sentinels: {e}")
-
-                        # Send response in expected shape [[distances]]
-                        rep_socket.send(msgpack.packb([response_distances], use_single_float=True))
-                        e2e_end = time.time()
-                        logger.info(f"⏱️  Distance calculation E2E time: {e2e_end - e2e_start:.6f}s")
-                        continue
-
-                    # Fallback: treat as embedding-by-id request
-                    if (
-                        isinstance(request, list)
-                        and len(request) == 1
-                        and isinstance(request[0], list)
-                    ):
-                        node_ids = request[0]
-                    elif isinstance(request, list):
-                        node_ids = request
-                    else:
-                        node_ids = []
-                    last_request_type = "embedding"
-                    last_request_length = len(node_ids)
-                    logger.info(f"ZMQ received {len(node_ids)} node IDs for embedding fetch")
-
-                    # Preallocate zero-filled flat data for robustness
-                    if embedding_dim <= 0:
-                        dims = [0, 0]
-                        flat_data: list[float] = []
-                    else:
-                        dims = [len(node_ids), embedding_dim]
-                        flat_data = [0.0] * (dims[0] * dims[1])
-
-                    # Collect texts for found ids
-                    texts: list[str] = []
-                    found_indices: list[int] = []
-                    for idx, nid in enumerate(node_ids):
+                    # Get embeddings for node IDs
+                    texts = []
+                    for nid in node_ids:
                        try:
                            passage_data = passages.get_passage(str(nid))
-                            txt = passage_data.get("text", "")
-                            if isinstance(txt, str) and len(txt) > 0:
-                                texts.append(txt)
-                                found_indices.append(idx)
-                            else:
-                                logger.error(f"Empty text for passage ID {nid}")
+                            txt = passage_data["text"]
+                            texts.append(txt)
                        except KeyError:
-                            logger.error(f"Passage with ID {nid} not found")
+                            logger.error(f"Passage ID {nid} not found")
+                            raise RuntimeError(f"FATAL: Passage with ID {nid} not found")
                        except Exception as e:
                            logger.error(f"Exception looking up passage ID {nid}: {e}")
+                            raise

-                    if texts:
-                        try:
-                            embeddings = compute_embeddings(texts, model_name, mode=embedding_mode)
-                            logger.info(
-                                f"Computed embeddings for {len(texts)} texts, shape: {embeddings.shape}"
-                            )
+                    # Process embeddings
+                    embeddings = compute_embeddings(texts, model_name, mode=embedding_mode)
+                    logger.info(
+                        f"Computed embeddings for {len(texts)} texts, shape: {embeddings.shape}"
+                    )

-                            if np.isnan(embeddings).any() or np.isinf(embeddings).any():
-                                logger.error(
-                                    f"NaN or Inf detected in embeddings! Requested IDs: {node_ids[:5]}..."
-                                )
-                                dims = [0, embedding_dim]
-                                flat_data = []
-                            else:
-                                emb_f32 = np.ascontiguousarray(embeddings, dtype=np.float32)
-                                flat = emb_f32.flatten().tolist()
-                                for j, pos in enumerate(found_indices):
-                                    start = pos * embedding_dim
-                                    end = start + embedding_dim
-                                    if end <= len(flat_data):
-                                        flat_data[start:end] = flat[
-                                            j * embedding_dim : (j + 1) * embedding_dim
-                                        ]
-                        except Exception as e:
-                            logger.error(f"Embedding computation error, returning zeros: {e}")
+                    # Calculate distances
+                    if distance_metric == "l2":
+                        distances = np.sum(
+                            np.square(embeddings - query_vector.reshape(1, -1)), axis=1
+                        )
+                    else:  # mips or cosine
+                        distances = -np.dot(embeddings, query_vector)

-                    response_payload = [dims, flat_data]
-                    response_bytes = msgpack.packb(response_payload, use_single_float=True)
+                    response_payload = distances.flatten().tolist()
+                    response_bytes = msgpack.packb([response_payload], use_single_float=True)
+                    logger.debug(f"Sending distance response with {len(distances)} distances")

-                    rep_socket.send(response_bytes)
+                    socket.send(response_bytes)
                    e2e_end = time.time()
-                    logger.info(f"⏱️  ZMQ E2E time: {e2e_end - e2e_start:.6f}s")
-
-                except zmq.Again:
-                    # Timeout - check shutdown_event and continue
+                    logger.info(f"⏱️  Distance calculation E2E time: {e2e_end - e2e_start:.6f}s")
                    continue
-                except Exception as e:
-                    if not shutdown_event.is_set():
-                        logger.error(f"Error in ZMQ server loop: {e}")
-                        # Shape-correct fallback
-                        try:
-                            if last_request_type == "distance":
-                                large_distance = 1e9
-                                fallback_len = max(0, int(last_request_length))
-                                safe = [[large_distance] * fallback_len]
-                            elif last_request_type == "embedding":
-                                bsz = max(0, int(last_request_length))
-                                dim = max(0, int(embedding_dim))
-                                safe = (
-                                    [[bsz, dim], [0.0] * (bsz * dim)] if dim > 0 else [[0, 0], []]
-                                )
-                            elif last_request_type == "text":
-                                safe = []  # direct text embeddings expectation is a flat list
-                            else:
-                                safe = [[0, int(embedding_dim) if embedding_dim > 0 else 0], []]
-                            rep_socket.send(msgpack.packb(safe, use_single_float=True))
-                        except Exception:
-                            pass
-                    else:
-                        logger.info("Shutdown in progress, ignoring ZMQ error")
-                        break
-        finally:
-            try:
-                rep_socket.close(0)
-            except Exception:
-                pass
-            try:
-                context.term()
-            except Exception:
-                pass

-        logger.info("ZMQ server thread exiting gracefully")
+                # Standard embedding request (passage ID lookup)
+                if (
+                    not isinstance(request_payload, list)
+                    or len(request_payload) != 1
+                    or not isinstance(request_payload[0], list)
+                ):
+                    logger.error(
+                        f"Invalid MessagePack request format. Expected [[ids...]] or [texts...], got: {type(request_payload)}"
+                    )
+                    socket.send(msgpack.packb([[], []]))
+                    continue

-    # Add shutdown coordination
-    shutdown_event = threading.Event()
+                node_ids = request_payload[0]
+                logger.debug(f"Request for {len(node_ids)} node embeddings")

-    def shutdown_zmq_server():
-        """Gracefully shutdown ZMQ server."""
-        logger.info("Initiating graceful shutdown...")
-        shutdown_event.set()
+                # Look up texts by node IDs
+                texts = []
+                for nid in node_ids:
+                    try:
+                        passage_data = passages.get_passage(str(nid))
+                        txt = passage_data["text"]
+                        if not txt:
+                            raise RuntimeError(f"FATAL: Empty text for passage ID {nid}")
+                        texts.append(txt)
+                    except KeyError:
+                        raise RuntimeError(f"FATAL: Passage with ID {nid} not found")
+                    except Exception as e:
+                        logger.error(f"Exception looking up passage ID {nid}: {e}")
+                        raise

-        if zmq_thread.is_alive():
-            logger.info("Waiting for ZMQ thread to finish...")
-            zmq_thread.join(timeout=5)
-            if zmq_thread.is_alive():
-                logger.warning("ZMQ thread did not finish in time")
+                # Process embeddings
+                embeddings = compute_embeddings(texts, model_name, mode=embedding_mode)
+                logger.info(
+                    f"Computed embeddings for {len(texts)} texts, shape: {embeddings.shape}"
+                )

-        # Clean up ZMQ resources
-        try:
-            # Note: socket and context are cleaned up by thread exit
-            logger.info("ZMQ resources cleaned up")
-        except Exception as e:
-            logger.warning(f"Error cleaning ZMQ resources: {e}")
+                # Serialization and response
+                if np.isnan(embeddings).any() or np.isinf(embeddings).any():
+                    logger.error(
+                        f"NaN or Inf detected in embeddings! Requested IDs: {node_ids[:5]}..."
+                    )
+                    raise AssertionError()

-        # Clean up other resources
-        try:
-            import gc
+                hidden_contiguous_f32 = np.ascontiguousarray(embeddings, dtype=np.float32)
+                response_payload = [
+                    list(hidden_contiguous_f32.shape),
+                    hidden_contiguous_f32.flatten().tolist(),
+                ]
+                response_bytes = msgpack.packb(response_payload, use_single_float=True)

-            gc.collect()
-            logger.info("Additional resources cleaned up")
-        except Exception as e:
-            logger.warning(f"Error cleaning additional resources: {e}")
+                socket.send(response_bytes)
+                e2e_end = time.time()
+                logger.info(f"⏱️  ZMQ E2E time: {e2e_end - e2e_start:.6f}s")

-        logger.info("Graceful shutdown completed")
-        sys.exit(0)
+            except zmq.Again:
+                logger.debug("ZMQ socket timeout, continuing to listen")
+                continue
+            except Exception as e:
+                logger.error(f"Error in ZMQ server loop: {e}")
+                import traceback

-    # Register signal handlers within this function scope
-    import signal
+                traceback.print_exc()
+                socket.send(msgpack.packb([[], []]))

-    def signal_handler(sig, frame):
-        logger.info(f"Received signal {sig}, shutting down gracefully...")
-        shutdown_zmq_server()
-
-    signal.signal(signal.SIGTERM, signal_handler)
-    signal.signal(signal.SIGINT, signal_handler)
-
-    # Pass shutdown_event to ZMQ thread
-    zmq_thread = threading.Thread(
-        target=lambda: zmq_server_thread_with_shutdown(shutdown_event),
-        daemon=False,  # Not daemon - we want to wait for it
-    )
+    zmq_thread = threading.Thread(target=zmq_server_thread, daemon=True)
    zmq_thread.start()
    logger.info(f"Started HNSW ZMQ server thread on port {zmq_port}")

    # Keep the main thread alive
    try:
-        while not shutdown_event.is_set():
-            time.sleep(0.1)  # Check shutdown more frequently
+        while True:
+            time.sleep(1)
    except KeyboardInterrupt:
        logger.info("HNSW Server shutting down...")
-        shutdown_zmq_server()
        return

-    # If we reach here, shutdown was triggered by signal
-    logger.info("Main loop exited, process should be shutting down")
-

 if __name__ == "__main__":
+    import signal
    import sys

-    # Signal handlers are now registered within create_hnsw_embedding_server
+    def signal_handler(sig, frame):
+        logger.info(f"Received signal {sig}, shutting down gracefully...")
+        sys.exit(0)
+
+    # Register signal handlers for graceful shutdown
+    signal.signal(signal.SIGTERM, signal_handler)
+    signal.signal(signal.SIGINT, signal_handler)

    parser = argparse.ArgumentParser(description="HNSW Embedding service")
    parser.add_argument("--zmq-port", type=int, default=5555, help="ZMQ port to run on")
@@ -414,7 +295,7 @@ if __name__ == "__main__":
        "--embedding-mode",
        type=str,
        default="sentence-transformers",
-        choices=["sentence-transformers", "openai", "mlx", "ollama"],
+        choices=["sentence-transformers", "openai", "mlx"],
        help="Embedding backend mode",
    )

--- a/packages/leann-backend-hnsw/pyproject.toml
+++ b/packages/leann-backend-hnsw/pyproject.toml
@@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build"

 [project]
 name = "leann-backend-hnsw"
-version = "0.2.9"
+version = "0.1.16"
 description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
 dependencies = [
-    "leann-core==0.2.9",
+    "leann-core==0.1.16",
    "numpy",
    "pyzmq>=23.0.0",
    "msgpack>=1.0.0",
@@ -22,8 +22,6 @@ cmake.build-type = "Release"
 build.verbose = true
 build.tool-args = ["-j8"]

-# CMake definitions to optimize compilation and find Homebrew packages
+# CMake definitions to optimize compilation
 [tool.scikit-build.cmake.define]
 CMAKE_BUILD_PARALLEL_LEVEL = "8"
-CMAKE_PREFIX_PATH = {env = "CMAKE_PREFIX_PATH"}
-OpenMP_ROOT = {env = "OpenMP_ROOT"}
--- a/packages/leann-backend-hnsw/third_party/faiss
+++ b/packages/leann-backend-hnsw/third_party/faiss
--- a/packages/leann-core/pyproject.toml
+++ b/packages/leann-core/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "leann-core"
-version = "0.2.9"
+version = "0.1.16"
 description = "Core API and plugin system for LEANN"
 readme = "README.md"
 requires-python = ">=3.9"
@@ -31,10 +31,8 @@ dependencies = [
    "PyPDF2>=3.0.0",
    "pymupdf>=1.23.0",
    "pdfplumber>=0.10.0",
-    "nbconvert>=7.0.0",  # For .ipynb file support
-    "gitignore-parser>=0.1.12",  # For proper .gitignore handling
-    "mlx>=0.26.3; sys_platform == 'darwin' and platform_machine == 'arm64'",
-    "mlx-lm>=0.26.0; sys_platform == 'darwin' and platform_machine == 'arm64'",
+    "mlx>=0.26.3; sys_platform == 'darwin'",
+    "mlx-lm>=0.26.0; sys_platform == 'darwin'",
 ]

 [project.optional-dependencies]
@@ -46,7 +44,6 @@ colab = [

 [project.scripts]
 leann = "leann.cli:main"
-leann_mcp = "leann.mcp:main"

 [tool.setuptools.packages.find]
 where = ["src"]
--- a/packages/leann-core/src/leann/api.py
+++ b/packages/leann-core/src/leann/api.py
@@ -10,7 +10,7 @@ import time
 import warnings
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Literal, Optional
+from typing import Any, Literal

 import numpy as np

@@ -33,7 +33,7 @@ def compute_embeddings(
    model_name: str,
    mode: str = "sentence-transformers",
    use_server: bool = True,
-    port: Optional[int] = None,
+    port: int | None = None,
    is_build=False,
 ) -> np.ndarray:
    """
@@ -115,62 +115,20 @@ class SearchResult:


 class PassageManager:
-    def __init__(
-        self, passage_sources: list[dict[str, Any]], metadata_file_path: Optional[str] = None
-    ):
+    def __init__(self, passage_sources: list[dict[str, Any]]):
        self.offset_maps = {}
        self.passage_files = {}
        self.global_offset_map = {}  # Combined map for fast lookup

-        # Derive index base name for standard sibling fallbacks, e.g., <index_name>.passages.*
-        index_name_base = None
-        if metadata_file_path:
-            meta_name = Path(metadata_file_path).name
-            if meta_name.endswith(".meta.json"):
-                index_name_base = meta_name[: -len(".meta.json")]
-
        for source in passage_sources:
            assert source["type"] == "jsonl", "only jsonl is supported"
-            passage_file = source.get("path", "")
-            index_file = source.get("index_path", "")  # .idx file
+            passage_file = source["path"]
+            index_file = source["index_path"]  # .idx file

-            # Fix path resolution - relative paths should be relative to metadata file directory
-            def _resolve_candidates(
-                primary: str,
-                relative_key: str,
-                default_name: Optional[str],
-                source_dict: dict[str, Any],
-            ) -> list[Path]:
-                candidates: list[Path] = []
-                # 1) Primary as-is (absolute or relative)
-                if primary:
-                    p = Path(primary)
-                    candidates.append(p if p.is_absolute() else (Path.cwd() / p))
-                # 2) metadata-relative explicit relative key
-                if metadata_file_path and source_dict.get(relative_key):
-                    candidates.append(Path(metadata_file_path).parent / source_dict[relative_key])
-                # 3) metadata-relative standard sibling filename
-                if metadata_file_path and default_name:
-                    candidates.append(Path(metadata_file_path).parent / default_name)
-                return candidates
-
-            # Build candidate lists and pick first existing; otherwise keep last candidate for error message
-            idx_default = f"{index_name_base}.passages.idx" if index_name_base else None
-            idx_candidates = _resolve_candidates(
-                index_file, "index_path_relative", idx_default, source
-            )
-            pas_default = f"{index_name_base}.passages.jsonl" if index_name_base else None
-            pas_candidates = _resolve_candidates(passage_file, "path_relative", pas_default, source)
-
-            def _pick_existing(cands: list[Path]) -> str:
-                for c in cands:
-                    if c.exists():
-                        return str(c.resolve())
-                # Fallback to last candidate (best guess) even if not exists; will error below
-                return str(cands[-1].resolve()) if cands else ""
-
-            index_file = _pick_existing(idx_candidates)
-            passage_file = _pick_existing(pas_candidates)
+            # Fix path resolution for Colab and other environments
+            if not Path(index_file).is_absolute():
+                # If relative path, try to resolve it properly
+                index_file = str(Path(index_file).resolve())

            if not Path(index_file).exists():
                raise FileNotFoundError(f"Passage index file not found: {index_file}")
@@ -199,24 +157,12 @@ class LeannBuilder:
        self,
        backend_name: str,
        embedding_model: str = "facebook/contriever",
-        dimensions: Optional[int] = None,
+        dimensions: int | None = None,
        embedding_mode: str = "sentence-transformers",
        **backend_kwargs,
    ):
        self.backend_name = backend_name
-        # Normalize incompatible combinations early (for consistent metadata)
-        if backend_name == "hnsw":
-            is_recompute = backend_kwargs.get("is_recompute", True)
-            is_compact = backend_kwargs.get("is_compact", True)
-            if is_recompute is False and is_compact is True:
-                warnings.warn(
-                    "HNSW with is_recompute=False requires non-compact storage. Forcing is_compact=False.",
-                    UserWarning,
-                    stacklevel=2,
-                )
-                backend_kwargs["is_compact"] = False
-
-        backend_factory: Optional[LeannBackendFactoryInterface] = BACKEND_REGISTRY.get(backend_name)
+        backend_factory: LeannBackendFactoryInterface | None = BACKEND_REGISTRY.get(backend_name)
        if backend_factory is None:
            raise ValueError(f"Backend '{backend_name}' not found or not registered.")
        self.backend_factory = backend_factory
@@ -296,7 +242,7 @@ class LeannBuilder:
        self.backend_kwargs = backend_kwargs
        self.chunks: list[dict[str, Any]] = []

-    def add_text(self, text: str, metadata: Optional[dict[str, Any]] = None):
+    def add_text(self, text: str, metadata: dict[str, Any] | None = None):
        if metadata is None:
            metadata = {}
        passage_id = metadata.get("id", str(len(self.chunks)))
@@ -306,23 +252,6 @@ class LeannBuilder:
    def build_index(self, index_path: str):
        if not self.chunks:
            raise ValueError("No chunks added.")
-
-        # Filter out invalid/empty text chunks early to keep passage and embedding counts aligned
-        valid_chunks: list[dict[str, Any]] = []
-        skipped = 0
-        for chunk in self.chunks:
-            text = chunk.get("text", "")
-            if isinstance(text, str) and text.strip():
-                valid_chunks.append(chunk)
-            else:
-                skipped += 1
-        if skipped > 0:
-            print(
-                f"Warning: Skipping {skipped} empty/invalid text chunk(s). Processing {len(valid_chunks)} valid chunks"
-            )
-            self.chunks = valid_chunks
-            if not self.chunks:
-                raise ValueError("All provided chunks are empty or invalid. Nothing to index.")
        if self.dimensions is None:
            self.dimensions = len(
                compute_embeddings(
@@ -385,12 +314,8 @@ class LeannBuilder:
            "passage_sources": [
                {
                    "type": "jsonl",
-                    # Preserve existing relative file names (backward-compatible)
-                    "path": passages_file.name,
-                    "index_path": offset_file.name,
-                    # Add optional redundant relative keys for remote build portability (non-breaking)
-                    "path_relative": passages_file.name,
-                    "index_path_relative": offset_file.name,
+                    "path": str(passages_file),
+                    "index_path": str(offset_file),
                }
            ],
        }
@@ -505,12 +430,8 @@ class LeannBuilder:
            "passage_sources": [
                {
                    "type": "jsonl",
-                    # Preserve existing relative file names (backward-compatible)
-                    "path": passages_file.name,
-                    "index_path": offset_file.name,
-                    # Add optional redundant relative keys for remote build portability (non-breaking)
-                    "path_relative": passages_file.name,
-                    "index_path_relative": offset_file.name,
+                    "path": str(passages_file),
+                    "index_path": str(offset_file),
                }
            ],
            "built_from_precomputed_embeddings": True,
@@ -538,24 +459,14 @@ class LeannSearcher:

        self.meta_path_str = f"{index_path}.meta.json"
        if not Path(self.meta_path_str).exists():
-            parent_dir = Path(index_path).parent
-            print(
-                f"Leann metadata file not found at {self.meta_path_str}, and you may need to rm -rf {parent_dir}"
-            )
-            # highlight in red the filenotfound error
-            raise FileNotFoundError(
-                f"Leann metadata file not found at {self.meta_path_str}, \033[91m you may need to rm -rf {parent_dir}\033[0m"
-            )
+            raise FileNotFoundError(f"Leann metadata file not found at {self.meta_path_str}")
        with open(self.meta_path_str, encoding="utf-8") as f:
            self.meta_data = json.load(f)
        backend_name = self.meta_data["backend_name"]
        self.embedding_model = self.meta_data["embedding_model"]
        # Support both old and new format
        self.embedding_mode = self.meta_data.get("embedding_mode", "sentence-transformers")
-        # Delegate portability handling to PassageManager
-        self.passage_manager = PassageManager(
-            self.meta_data.get("passage_sources", []), metadata_file_path=self.meta_path_str
-        )
+        self.passage_manager = PassageManager(self.meta_data.get("passage_sources", []))
        backend_factory = BACKEND_REGISTRY.get(backend_name)
        if backend_factory is None:
            raise ValueError(f"Backend '{backend_name}' not found.")
@@ -582,16 +493,6 @@ class LeannSearcher:
        logger.info(f"  Top_k: {top_k}")
        logger.info(f"  Additional kwargs: {kwargs}")

-        # Smart top_k detection and adjustment
-        total_docs = len(self.passage_manager.global_offset_map)
-        original_top_k = top_k
-        if top_k > total_docs:
-            top_k = total_docs
-            logger.warning(
-                f"  ⚠️  Requested top_k ({original_top_k}) exceeds total documents ({total_docs})"
-            )
-            logger.warning(f"  ✅ Auto-adjusted top_k to {top_k} to match available documents")
-
        zmq_port = None

        start_time = time.time()
@@ -628,15 +529,15 @@ class LeannSearcher:
            zmq_port=zmq_port,
            **kwargs,
        )
+        time.time() - start_time
        # logger.info(f"  Search time: {search_time} seconds")
        logger.info(f"  Backend returned: labels={len(results.get('labels', [[]])[0])} results")

        enriched_results = []
        if "labels" in results and "distances" in results:
            logger.info(f"  Processing {len(results['labels'][0])} passage IDs:")
-            # Python 3.9 does not support zip(strict=...); lengths are expected to match
            for i, (string_id, dist) in enumerate(
-                zip(results["labels"][0], results["distances"][0])
+                zip(results["labels"][0], results["distances"][0], strict=False)
            ):
                try:
                    passage_data = self.passage_manager.get_passage(string_id)
@@ -662,49 +563,19 @@ class LeannSearcher:
                    )
                except KeyError:
                    RED = "\033[91m"
-                    RESET = "\033[0m"
                    logger.error(
                        f"   {RED}✗{RESET} [{i + 1:2d}] ID: '{string_id}' -> {RED}ERROR: Passage not found!{RESET}"
                    )

-        # Define color codes outside the loop for final message
-        GREEN = "\033[92m"
-        RESET = "\033[0m"
        logger.info(f"  {GREEN}✓ Final enriched results: {len(enriched_results)} passages{RESET}")
        return enriched_results

-    def cleanup(self):
-        """Explicitly cleanup embedding server resources.
-
-        This method should be called after you're done using the searcher,
-        especially in test environments or batch processing scenarios.
-        """
-        if hasattr(self.backend_impl, "embedding_server_manager"):
-            self.backend_impl.embedding_server_manager.stop_server()
-
-    # Enable automatic cleanup patterns
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type, exc, tb):
-        try:
-            self.cleanup()
-        except Exception:
-            pass
-
-    def __del__(self):
-        try:
-            self.cleanup()
-        except Exception:
-            # Avoid noisy errors during interpreter shutdown
-            pass
-

 class LeannChat:
    def __init__(
        self,
        index_path: str,
-        llm_config: Optional[dict[str, Any]] = None,
+        llm_config: dict[str, Any] | None = None,
        enable_warmup: bool = False,
        **kwargs,
    ):
@@ -720,7 +591,7 @@ class LeannChat:
        prune_ratio: float = 0.0,
        recompute_embeddings: bool = True,
        pruning_strategy: Literal["global", "local", "proportional"] = "global",
-        llm_kwargs: Optional[dict[str, Any]] = None,
+        llm_kwargs: dict[str, Any] | None = None,
        expected_zmq_port: int = 5557,
        **search_kwargs,
    ):
@@ -748,10 +619,7 @@ class LeannChat:
            "Please provide the best answer you can based on this context and your knowledge."
        )

-        ask_time = time.time()
        ans = self.llm.ask(prompt, **llm_kwargs)
-        ask_time = time.time() - ask_time
-        logger.info(f"  Ask time: {ask_time} seconds")
        return ans

    def start_interactive(self):
@@ -768,28 +636,3 @@ class LeannChat:
            except (KeyboardInterrupt, EOFError):
                print("\nGoodbye!")
                break
-
-    def cleanup(self):
-        """Explicitly cleanup embedding server resources.
-
-        This method should be called after you're done using the chat interface,
-        especially in test environments or batch processing scenarios.
-        """
-        if hasattr(self.searcher, "cleanup"):
-            self.searcher.cleanup()
-
-    # Enable automatic cleanup patterns
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type, exc, tb):
-        try:
-            self.cleanup()
-        except Exception:
-            pass
-
-    def __del__(self):
-        try:
-            self.cleanup()
-        except Exception:
-            pass
--- a/packages/leann-core/src/leann/chat.py
+++ b/packages/leann-core/src/leann/chat.py
@@ -8,7 +8,7 @@ import difflib
 import logging
 import os
 from abc import ABC, abstractmethod
-from typing import Any, Optional
+from typing import Any

 import torch

@@ -17,12 +17,12 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)


-def check_ollama_models(host: str) -> list[str]:
+def check_ollama_models() -> list[str]:
    """Check available Ollama models and return a list"""
    try:
        import requests

-        response = requests.get(f"{host}/api/tags", timeout=5)
+        response = requests.get("http://localhost:11434/api/tags", timeout=5)
        if response.status_code == 200:
            data = response.json()
            return [model["name"] for model in data.get("models", [])]
@@ -309,12 +309,10 @@ def search_hf_models(query: str, limit: int = 10) -> list[str]:
    return search_hf_models_fuzzy(query, limit)


-def validate_model_and_suggest(
-    model_name: str, llm_type: str, host: str = "http://localhost:11434"
-) -> Optional[str]:
+def validate_model_and_suggest(model_name: str, llm_type: str) -> str | None:
    """Validate model name and provide suggestions if invalid"""
    if llm_type == "ollama":
-        available_models = check_ollama_models(host)
+        available_models = check_ollama_models()
        if available_models and model_name not in available_models:
            error_msg = f"Model '{model_name}' not found in your local Ollama installation."

@@ -360,11 +358,7 @@ def validate_model_and_suggest(
                error_msg += f"\n\nModel '{model_name}' was not found in Ollama's library."

                if suggestions:
-                    error_msg += (
-                        "\n\nDid you mean one of these installed models?\n"
-                        + "\nTry to use ollama pull to install the model you need\n"
-                    )
-
+                    error_msg += "\n\nDid you mean one of these installed models?\n"
                    for i, suggestion in enumerate(suggestions, 1):
                        error_msg += f"  {i}. {suggestion}\n"
                else:
@@ -422,6 +416,7 @@ class LLMInterface(ABC):
                top_k=10,
                complexity=64,
                beam_width=8,
+                USE_DEFERRED_FETCH=True,
                skip_search_reorder=True,
                recompute_beighbor_embeddings=True,
                dedup_node_dis=True,
@@ -433,6 +428,7 @@ class LLMInterface(ABC):
        Supported kwargs:
            - complexity (int): Search complexity parameter (default: 32)
            - beam_width (int): Beam width for search (default: 4)
+            - USE_DEFERRED_FETCH (bool): Enable deferred fetch mode (default: False)
            - skip_search_reorder (bool): Skip search reorder step (default: False)
            - recompute_beighbor_embeddings (bool): Enable ZMQ embedding server for neighbor recomputation (default: False)
            - dedup_node_dis (bool): Deduplicate nodes by distance (default: False)
@@ -469,7 +465,7 @@ class OllamaChat(LLMInterface):
                requests.get(host)

            # Pre-check model availability with helpful suggestions
-            model_error = validate_model_and_suggest(model, "ollama", host)
+            model_error = validate_model_and_suggest(model, "ollama")
            if model_error:
                raise ValueError(model_error)

@@ -489,35 +485,11 @@ class OllamaChat(LLMInterface):
        import requests

        full_url = f"{self.host}/api/generate"
-
-        # Handle thinking budget for reasoning models
-        options = kwargs.copy()
-        thinking_budget = kwargs.get("thinking_budget")
-        if thinking_budget:
-            # Remove thinking_budget from options as it's not a standard Ollama option
-            options.pop("thinking_budget", None)
-            # Only apply reasoning parameters to models that support it
-            reasoning_supported_models = [
-                "gpt-oss:20b",
-                "gpt-oss:120b",
-                "deepseek-r1",
-                "deepseek-coder",
-            ]
-
-            if thinking_budget in ["low", "medium", "high"]:
-                if any(model in self.model.lower() for model in reasoning_supported_models):
-                    options["reasoning"] = {"effort": thinking_budget, "exclude": False}
-                    logger.info(f"Applied reasoning effort={thinking_budget} to model {self.model}")
-                else:
-                    logger.warning(
-                        f"Thinking budget '{thinking_budget}' requested but model '{self.model}' may not support reasoning parameters. Proceeding without reasoning."
-                    )
-
        payload = {
            "model": self.model,
            "prompt": prompt,
            "stream": False,  # Keep it simple for now
-            "options": options,
+            "options": kwargs,
        }
        logger.debug(f"Sending request to Ollama: {payload}")
        try:
@@ -683,7 +655,7 @@ class HFChat(LLMInterface):
 class OpenAIChat(LLMInterface):
    """LLM interface for OpenAI models."""

-    def __init__(self, model: str = "gpt-4o", api_key: Optional[str] = None):
+    def __init__(self, model: str = "gpt-4o", api_key: str | None = None):
        self.model = model
        self.api_key = api_key or os.getenv("OPENAI_API_KEY")

@@ -708,38 +680,11 @@ class OpenAIChat(LLMInterface):
        params = {
            "model": self.model,
            "messages": [{"role": "user", "content": prompt}],
+            "max_tokens": kwargs.get("max_tokens", 1000),
            "temperature": kwargs.get("temperature", 0.7),
+            **{k: v for k, v in kwargs.items() if k not in ["max_tokens", "temperature"]},
        }

-        # Handle max_tokens vs max_completion_tokens based on model
-        max_tokens = kwargs.get("max_tokens", 1000)
-        if "o3" in self.model or "o4" in self.model or "o1" in self.model:
-            # o-series models use max_completion_tokens
-            params["max_completion_tokens"] = max_tokens
-            params["temperature"] = 1.0
-        else:
-            # Other models use max_tokens
-            params["max_tokens"] = max_tokens
-
-        # Handle thinking budget for reasoning models
-        thinking_budget = kwargs.get("thinking_budget")
-        if thinking_budget and thinking_budget in ["low", "medium", "high"]:
-            # Check if this is an o-series model (partial match for model names)
-            o_series_models = ["o3", "o3-mini", "o4-mini", "o1", "o3-pro", "o3-deep-research"]
-            if any(model in self.model for model in o_series_models):
-                # Use the correct OpenAI reasoning parameter format
-                params["reasoning_effort"] = thinking_budget
-                logger.info(f"Applied reasoning_effort={thinking_budget} to model {self.model}")
-            else:
-                logger.warning(
-                    f"Thinking budget '{thinking_budget}' requested but model '{self.model}' may not support reasoning parameters. Proceeding without reasoning."
-                )
-
-        # Add other kwargs (excluding thinking_budget as it's handled above)
-        for k, v in kwargs.items():
-            if k not in ["max_tokens", "temperature", "thinking_budget"]:
-                params[k] = v
-
        logger.info(f"Sending request to OpenAI with model {self.model}")

        try:
@@ -759,7 +704,7 @@ class SimulatedChat(LLMInterface):
        return "This is a simulated answer from the LLM based on the retrieved context."


-def get_llm(llm_config: Optional[dict[str, Any]] = None) -> LLMInterface:
+def get_llm(llm_config: dict[str, Any] | None = None) -> LLMInterface:
    """
    Factory function to get an LLM interface based on configuration.

--- a/packages/leann-core/src/leann/cli.py
+++ b/packages/leann-core/src/leann/cli.py
@@ -1,11 +1,9 @@
 import argparse
 import asyncio
 from pathlib import Path
-from typing import Union

 from llama_index.core import SimpleDirectoryReader
 from llama_index.core.node_parser import SentenceSplitter
-from tqdm import tqdm

 from .api import LeannBuilder, LeannChat, LeannSearcher

@@ -43,23 +41,13 @@ def extract_pdf_text_with_pdfplumber(file_path: str) -> str:

 class LeannCLI:
    def __init__(self):
-        # Always use project-local .leann directory (like .git)
-        self.indexes_dir = Path.cwd() / ".leann" / "indexes"
+        self.indexes_dir = Path.home() / ".leann" / "indexes"
        self.indexes_dir.mkdir(parents=True, exist_ok=True)

-        # Default parser for documents
        self.node_parser = SentenceSplitter(
            chunk_size=256, chunk_overlap=128, separator=" ", paragraph_separator="\n\n"
        )

-        # Code-optimized parser
-        self.code_parser = SentenceSplitter(
-            chunk_size=512,  # Larger chunks for code context
-            chunk_overlap=50,  # Less overlap to preserve function boundaries
-            separator="\n",  # Split by lines for code
-            paragraph_separator="\n\n",  # Preserve logical code blocks
-        )
-
    def get_index_path(self, index_name: str) -> str:
        index_dir = self.indexes_dir / index_name
        return str(index_dir / "documents.leann")
@@ -72,18 +60,14 @@ class LeannCLI:
    def create_parser(self) -> argparse.ArgumentParser:
        parser = argparse.ArgumentParser(
            prog="leann",
-            description="The smallest vector index in the world. RAG Everything with LEANN!",
+            description="LEANN - Local Enhanced AI Navigation",
            formatter_class=argparse.RawDescriptionHelpFormatter,
            epilog="""
 Examples:
-  leann build my-docs --docs ./documents                                  # Build index from directory
-  leann build my-code --docs ./src ./tests ./config                      # Build index from multiple directories
-  leann build my-files --docs ./file1.py ./file2.txt ./docs/             # Build index from files and directories
-  leann build my-mixed --docs ./readme.md ./src/ ./config.json           # Build index from mixed files/dirs
-  leann build my-ppts --docs ./ --file-types .pptx,.pdf                  # Index only PowerPoint and PDF files
-  leann search my-docs "query"                                           # Search in my-docs index
-  leann ask my-docs "question"                                           # Ask my-docs index
-  leann list                                                             # List all stored indexes
+  leann build my-docs --docs ./documents    # Build index named my-docs
+  leann search my-docs "query"             # Search in my-docs index
+  leann ask my-docs "question"             # Ask my-docs index
+  leann list                              # List all stored indexes
            """,
        )

@@ -91,112 +75,32 @@ Examples:

        # Build command
        build_parser = subparsers.add_parser("build", help="Build document index")
+        build_parser.add_argument("index_name", help="Index name")
+        build_parser.add_argument("--docs", type=str, required=True, help="Documents directory")
        build_parser.add_argument(
-            "index_name", nargs="?", help="Index name (default: current directory name)"
-        )
-        build_parser.add_argument(
-            "--docs",
-            type=str,
-            nargs="+",
-            default=["."],
-            help="Documents directories and/or files (default: current directory)",
-        )
-        build_parser.add_argument(
-            "--backend",
-            type=str,
-            default="hnsw",
-            choices=["hnsw", "diskann"],
-            help="Backend to use (default: hnsw)",
-        )
-        build_parser.add_argument(
-            "--embedding-model",
-            type=str,
-            default="facebook/contriever",
-            help="Embedding model (default: facebook/contriever)",
-        )
-        build_parser.add_argument(
-            "--embedding-mode",
-            type=str,
-            default="sentence-transformers",
-            choices=["sentence-transformers", "openai", "mlx", "ollama"],
-            help="Embedding backend mode (default: sentence-transformers)",
-        )
-        build_parser.add_argument(
-            "--force", "-f", action="store_true", help="Force rebuild existing index"
-        )
-        build_parser.add_argument(
-            "--graph-degree", type=int, default=32, help="Graph degree (default: 32)"
-        )
-        build_parser.add_argument(
-            "--complexity", type=int, default=64, help="Build complexity (default: 64)"
+            "--backend", type=str, default="hnsw", choices=["hnsw", "diskann"]
        )
+        build_parser.add_argument("--embedding-model", type=str, default="facebook/contriever")
+        build_parser.add_argument("--force", "-f", action="store_true", help="Force rebuild")
+        build_parser.add_argument("--graph-degree", type=int, default=32)
+        build_parser.add_argument("--complexity", type=int, default=64)
        build_parser.add_argument("--num-threads", type=int, default=1)
-        build_parser.add_argument(
-            "--compact",
-            action=argparse.BooleanOptionalAction,
-            default=True,
-            help="Use compact storage (default: true). Must be `no-compact` for `no-recompute` build.",
-        )
-        build_parser.add_argument(
-            "--recompute",
-            action=argparse.BooleanOptionalAction,
-            default=True,
-            help="Enable recomputation (default: true)",
-        )
-        build_parser.add_argument(
-            "--file-types",
-            type=str,
-            help="Comma-separated list of file extensions to include (e.g., '.txt,.pdf,.pptx'). If not specified, uses default supported types.",
-        )
-        build_parser.add_argument(
-            "--doc-chunk-size",
-            type=int,
-            default=256,
-            help="Document chunk size in tokens/characters (default: 256)",
-        )
-        build_parser.add_argument(
-            "--doc-chunk-overlap",
-            type=int,
-            default=128,
-            help="Document chunk overlap (default: 128)",
-        )
-        build_parser.add_argument(
-            "--code-chunk-size",
-            type=int,
-            default=512,
-            help="Code chunk size in tokens/lines (default: 512)",
-        )
-        build_parser.add_argument(
-            "--code-chunk-overlap",
-            type=int,
-            default=50,
-            help="Code chunk overlap (default: 50)",
-        )
+        build_parser.add_argument("--compact", action="store_true", default=True)
+        build_parser.add_argument("--recompute", action="store_true", default=True)

        # Search command
        search_parser = subparsers.add_parser("search", help="Search documents")
        search_parser.add_argument("index_name", help="Index name")
        search_parser.add_argument("query", help="Search query")
-        search_parser.add_argument(
-            "--top-k", type=int, default=5, help="Number of results (default: 5)"
-        )
-        search_parser.add_argument(
-            "--complexity", type=int, default=64, help="Search complexity (default: 64)"
-        )
+        search_parser.add_argument("--top-k", type=int, default=5)
+        search_parser.add_argument("--complexity", type=int, default=64)
        search_parser.add_argument("--beam-width", type=int, default=1)
        search_parser.add_argument("--prune-ratio", type=float, default=0.0)
-        search_parser.add_argument(
-            "--recompute",
-            dest="recompute_embeddings",
-            action=argparse.BooleanOptionalAction,
-            default=True,
-            help="Enable/disable embedding recomputation (default: enabled). Should not do a `no-recompute` search in a `recompute` build.",
-        )
+        search_parser.add_argument("--recompute-embeddings", action="store_true")
        search_parser.add_argument(
            "--pruning-strategy",
            choices=["global", "local", "proportional"],
            default="global",
-            help="Pruning strategy (default: global)",
        )

        # Ask command
@@ -207,513 +111,102 @@ Examples:
            type=str,
            default="ollama",
            choices=["simulated", "ollama", "hf", "openai"],
-            help="LLM provider (default: ollama)",
-        )
-        ask_parser.add_argument(
-            "--model", type=str, default="qwen3:8b", help="Model name (default: qwen3:8b)"
        )
+        ask_parser.add_argument("--model", type=str, default="qwen3:8b")
        ask_parser.add_argument("--host", type=str, default="http://localhost:11434")
-        ask_parser.add_argument(
-            "--interactive", "-i", action="store_true", help="Interactive chat mode"
-        )
-        ask_parser.add_argument(
-            "--top-k", type=int, default=20, help="Retrieval count (default: 20)"
-        )
+        ask_parser.add_argument("--interactive", "-i", action="store_true")
+        ask_parser.add_argument("--top-k", type=int, default=20)
        ask_parser.add_argument("--complexity", type=int, default=32)
        ask_parser.add_argument("--beam-width", type=int, default=1)
        ask_parser.add_argument("--prune-ratio", type=float, default=0.0)
-        ask_parser.add_argument(
-            "--recompute",
-            dest="recompute_embeddings",
-            action=argparse.BooleanOptionalAction,
-            default=True,
-            help="Enable/disable embedding recomputation during ask (default: enabled)",
-        )
+        ask_parser.add_argument("--recompute-embeddings", action="store_true")
        ask_parser.add_argument(
            "--pruning-strategy",
            choices=["global", "local", "proportional"],
            default="global",
        )
-        ask_parser.add_argument(
-            "--thinking-budget",
-            type=str,
-            choices=["low", "medium", "high"],
-            default=None,
-            help="Thinking budget for reasoning models (low/medium/high). Supported by GPT-Oss:20b and other reasoning models.",
-        )

        # List command
        subparsers.add_parser("list", help="List all indexes")

        return parser

-    def register_project_dir(self):
-        """Register current project directory in global registry"""
-        global_registry = Path.home() / ".leann" / "projects.json"
-        global_registry.parent.mkdir(exist_ok=True)
-
-        current_dir = str(Path.cwd())
-
-        # Load existing registry
-        projects = []
-        if global_registry.exists():
-            try:
-                import json
-
-                with open(global_registry) as f:
-                    projects = json.load(f)
-            except Exception:
-                projects = []
-
-        # Add current directory if not already present
-        if current_dir not in projects:
-            projects.append(current_dir)
-
-        # Save registry
-        import json
-
-        with open(global_registry, "w") as f:
-            json.dump(projects, f, indent=2)
-
-    def _build_gitignore_parser(self, docs_dir: str):
-        """Build gitignore parser using gitignore-parser library."""
-        from gitignore_parser import parse_gitignore
-
-        # Try to parse the root .gitignore
-        gitignore_path = Path(docs_dir) / ".gitignore"
-
-        if gitignore_path.exists():
-            try:
-                # gitignore-parser automatically handles all subdirectory .gitignore files!
-                matches = parse_gitignore(str(gitignore_path))
-                print(f"📋 Loaded .gitignore from {docs_dir} (includes all subdirectories)")
-                return matches
-            except Exception as e:
-                print(f"Warning: Could not parse .gitignore: {e}")
-        else:
-            print("📋 No .gitignore found")
-
-        # Fallback: basic pattern matching for essential files
-        essential_patterns = {".git", ".DS_Store", "__pycache__", "node_modules", ".venv", "venv"}
-
-        def basic_matches(file_path):
-            path_parts = Path(file_path).parts
-            return any(part in essential_patterns for part in path_parts)
-
-        return basic_matches
-
-    def _should_exclude_file(self, relative_path: Path, gitignore_matches) -> bool:
-        """Check if a file should be excluded using gitignore parser."""
-        return gitignore_matches(str(relative_path))
-
-    def _is_git_submodule(self, path: Path) -> bool:
-        """Check if a path is a git submodule."""
-        try:
-            # Find the git repo root
-            current_dir = Path.cwd()
-            while current_dir != current_dir.parent:
-                if (current_dir / ".git").exists():
-                    gitmodules_path = current_dir / ".gitmodules"
-                    if gitmodules_path.exists():
-                        # Read .gitmodules to check if this path is a submodule
-                        gitmodules_content = gitmodules_path.read_text()
-                        # Convert path to relative to git root
-                        try:
-                            relative_path = path.resolve().relative_to(current_dir)
-                            # Check if this path appears in .gitmodules
-                            return f"path = {relative_path}" in gitmodules_content
-                        except ValueError:
-                            # Path is not under git root
-                            return False
-                    break
-                current_dir = current_dir.parent
-            return False
-        except Exception:
-            # If anything goes wrong, assume it's not a submodule
-            return False
-
    def list_indexes(self):
        print("Stored LEANN indexes:")

-        # Get all project directories with .leann
-        global_registry = Path.home() / ".leann" / "projects.json"
-        all_projects = []
-
-        if global_registry.exists():
-            try:
-                import json
-
-                with open(global_registry) as f:
-                    all_projects = json.load(f)
-            except Exception:
-                pass
-
-        # Filter to only existing directories with .leann
-        valid_projects = []
-        for project_dir in all_projects:
-            project_path = Path(project_dir)
-            if project_path.exists() and (project_path / ".leann" / "indexes").exists():
-                valid_projects.append(project_path)
-
-        # Add current project if it has .leann but not in registry
-        current_path = Path.cwd()
-        if (current_path / ".leann" / "indexes").exists() and current_path not in valid_projects:
-            valid_projects.append(current_path)
-
-        if not valid_projects:
-            print(
-                "No indexes found. Use 'leann build <name> --docs <dir> [<dir2> ...]' to create one."
-            )
+        if not self.indexes_dir.exists():
+            print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
            return

-        total_indexes = 0
-        current_dir = Path.cwd()
+        index_dirs = [d for d in self.indexes_dir.iterdir() if d.is_dir()]

-        for project_path in valid_projects:
-            indexes_dir = project_path / ".leann" / "indexes"
-            if not indexes_dir.exists():
-                continue
+        if not index_dirs:
+            print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
+            return

-            index_dirs = [d for d in indexes_dir.iterdir() if d.is_dir()]
-            if not index_dirs:
-                continue
+        print(f"Found {len(index_dirs)} indexes:")
+        for i, index_dir in enumerate(index_dirs, 1):
+            index_name = index_dir.name
+            status = "✓" if self.index_exists(index_name) else "✗"

-            # Show project header
-            if project_path == current_dir:
-                print(f"\n📁 Current project ({project_path}):")
+            print(f"  {i}. {index_name} [{status}]")
+            if self.index_exists(index_name):
+                index_dir / "documents.leann.meta.json"
+                size_mb = sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / (
+                    1024 * 1024
+                )
+                print(f"     Size: {size_mb:.1f} MB")
+
+        if index_dirs:
+            example_name = index_dirs[0].name
+            print("\nUsage:")
+            print(f'  leann search {example_name} "your query"')
+            print(f"  leann ask {example_name} --interactive")
+
+    def load_documents(self, docs_dir: str):
+        print(f"Loading documents from {docs_dir}...")
+
+        # Try to use better PDF parsers first
+        documents = []
+        docs_path = Path(docs_dir)
+
+        for file_path in docs_path.rglob("*.pdf"):
+            print(f"Processing PDF: {file_path}")
+
+            # Try PyMuPDF first (best quality)
+            text = extract_pdf_text_with_pymupdf(str(file_path))
+            if text is None:
+                # Try pdfplumber
+                text = extract_pdf_text_with_pdfplumber(str(file_path))
+
+            if text:
+                # Create a simple document structure
+                from llama_index.core import Document
+
+                doc = Document(text=text, metadata={"source": str(file_path)})
+                documents.append(doc)
            else:
-                print(f"\n📂 {project_path}:")
-
-            for index_dir in index_dirs:
-                total_indexes += 1
-                index_name = index_dir.name
-                meta_file = index_dir / "documents.leann.meta.json"
-                status = "✓" if meta_file.exists() else "✗"
-
-                print(f"  {total_indexes}. {index_name} [{status}]")
-                if status == "✓":
-                    size_mb = sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / (
-                        1024 * 1024
-                    )
-                    print(f"     Size: {size_mb:.1f} MB")
-
-        if total_indexes > 0:
-            print(f"\nTotal: {total_indexes} indexes across {len(valid_projects)} projects")
-            print("\nUsage (current project only):")
-
-            # Show example from current project
-            current_indexes_dir = current_dir / ".leann" / "indexes"
-            if current_indexes_dir.exists():
-                current_index_dirs = [d for d in current_indexes_dir.iterdir() if d.is_dir()]
-                if current_index_dirs:
-                    example_name = current_index_dirs[0].name
-                    print(f'  leann search {example_name} "your query"')
-                    print(f"  leann ask {example_name} --interactive")
-
-    def load_documents(
-        self, docs_paths: Union[str, list], custom_file_types: Union[str, None] = None
-    ):
-        # Handle both single path (string) and multiple paths (list) for backward compatibility
-        if isinstance(docs_paths, str):
-            docs_paths = [docs_paths]
-
-        # Separate files and directories
-        files = []
-        directories = []
-        for path in docs_paths:
-            path_obj = Path(path)
-            if path_obj.is_file():
-                files.append(str(path_obj))
-            elif path_obj.is_dir():
-                # Check if this is a git submodule - if so, skip it
-                if self._is_git_submodule(path_obj):
-                    print(f"⚠️  Skipping git submodule: {path}")
-                    continue
-                directories.append(str(path_obj))
-            else:
-                print(f"⚠️  Warning: Path '{path}' does not exist, skipping...")
-                continue
-
-        # Print summary of what we're processing
-        total_items = len(files) + len(directories)
-        items_desc = []
-        if files:
-            items_desc.append(f"{len(files)} file{'s' if len(files) > 1 else ''}")
-        if directories:
-            items_desc.append(
-                f"{len(directories)} director{'ies' if len(directories) > 1 else 'y'}"
-            )
-
-        print(f"Loading documents from {' and '.join(items_desc)} ({total_items} total):")
-        if files:
-            print(f"  📄 Files: {', '.join([Path(f).name for f in files])}")
-        if directories:
-            print(f"  📁 Directories: {', '.join(directories)}")
-
-        if custom_file_types:
-            print(f"Using custom file types: {custom_file_types}")
-
-        all_documents = []
-
-        # First, process individual files if any
-        if files:
-            print(f"\n🔄 Processing {len(files)} individual file{'s' if len(files) > 1 else ''}...")
-
-            # Load individual files using SimpleDirectoryReader with input_files
-            # Note: We skip gitignore filtering for explicitly specified files
-            try:
-                # Group files by their parent directory for efficient loading
-                from collections import defaultdict
-
-                files_by_dir = defaultdict(list)
-                for file_path in files:
-                    parent_dir = str(Path(file_path).parent)
-                    files_by_dir[parent_dir].append(file_path)
-
-                # Load files from each parent directory
-                for parent_dir, file_list in files_by_dir.items():
-                    print(
-                        f"  Loading {len(file_list)} file{'s' if len(file_list) > 1 else ''} from {parent_dir}"
-                    )
-                    try:
-                        file_docs = SimpleDirectoryReader(
-                            parent_dir,
-                            input_files=file_list,
-                            filename_as_id=True,
-                        ).load_data()
-                        all_documents.extend(file_docs)
-                        print(
-                            f"    ✅ Loaded {len(file_docs)} document{'s' if len(file_docs) > 1 else ''}"
-                        )
-                    except Exception as e:
-                        print(f"    ❌ Warning: Could not load files from {parent_dir}: {e}")
-
-            except Exception as e:
-                print(f"❌ Error processing individual files: {e}")
-
-        # Define file extensions to process
-        if custom_file_types:
-            # Parse custom file types from comma-separated string
-            code_extensions = [ext.strip() for ext in custom_file_types.split(",") if ext.strip()]
-            # Ensure extensions start with a dot
-            code_extensions = [ext if ext.startswith(".") else f".{ext}" for ext in code_extensions]
-        else:
-            # Use default supported file types
-            code_extensions = [
-                # Original document types
-                ".txt",
-                ".md",
-                ".docx",
-                ".pptx",
-                # Code files for Claude Code integration
-                ".py",
-                ".js",
-                ".ts",
-                ".jsx",
-                ".tsx",
-                ".java",
-                ".cpp",
-                ".c",
-                ".h",
-                ".hpp",
-                ".cs",
-                ".go",
-                ".rs",
-                ".rb",
-                ".php",
-                ".swift",
-                ".kt",
-                ".scala",
-                ".r",
-                ".sql",
-                ".sh",
-                ".bash",
-                ".zsh",
-                ".fish",
-                ".ps1",
-                ".bat",
-                # Config and markup files
-                ".json",
-                ".yaml",
-                ".yml",
-                ".xml",
-                ".toml",
-                ".ini",
-                ".cfg",
-                ".conf",
-                ".html",
-                ".css",
-                ".scss",
-                ".less",
-                ".vue",
-                ".svelte",
-                # Data science
-                ".ipynb",
-                ".R",
-                ".py",
-                ".jl",
-            ]
-
-        # Process each directory
-        if directories:
-            print(
-                f"\n🔄 Processing {len(directories)} director{'ies' if len(directories) > 1 else 'y'}..."
-            )
-
-        for docs_dir in directories:
-            print(f"Processing directory: {docs_dir}")
-            # Build gitignore parser for each directory
-            gitignore_matches = self._build_gitignore_parser(docs_dir)
-
-            # Try to use better PDF parsers first, but only if PDFs are requested
-            documents = []
-            docs_path = Path(docs_dir)
-
-            # Check if we should process PDFs
-            should_process_pdfs = custom_file_types is None or ".pdf" in custom_file_types
-
-            if should_process_pdfs:
-                for file_path in docs_path.rglob("*.pdf"):
-                    # Check if file matches any exclude pattern
-                    try:
-                        relative_path = file_path.relative_to(docs_path)
-                        if self._should_exclude_file(relative_path, gitignore_matches):
-                            continue
-                    except ValueError:
-                        # Skip files that can't be made relative to docs_path
-                        print(f"⚠️  Skipping file outside directory scope: {file_path}")
-                        continue
-
-                    print(f"Processing PDF: {file_path}")
-
-                    # Try PyMuPDF first (best quality)
-                    text = extract_pdf_text_with_pymupdf(str(file_path))
-                    if text is None:
-                        # Try pdfplumber
-                        text = extract_pdf_text_with_pdfplumber(str(file_path))
-
-                    if text:
-                        # Create a simple document structure
-                        from llama_index.core import Document
-
-                        doc = Document(text=text, metadata={"source": str(file_path)})
-                        documents.append(doc)
-                    else:
-                        # Fallback to default reader
-                        print(f"Using default reader for {file_path}")
-                        try:
-                            default_docs = SimpleDirectoryReader(
-                                str(file_path.parent),
-                                filename_as_id=True,
-                                required_exts=[file_path.suffix],
-                            ).load_data()
-                            documents.extend(default_docs)
-                        except Exception as e:
-                            print(f"Warning: Could not process {file_path}: {e}")
-
-            # Load other file types with default reader
-            try:
-                # Create a custom file filter function using our PathSpec
-                def file_filter(
-                    file_path: str, docs_dir=docs_dir, gitignore_matches=gitignore_matches
-                ) -> bool:
-                    """Return True if file should be included (not excluded)"""
-                    try:
-                        docs_path_obj = Path(docs_dir)
-                        file_path_obj = Path(file_path)
-                        relative_path = file_path_obj.relative_to(docs_path_obj)
-                        return not self._should_exclude_file(relative_path, gitignore_matches)
-                    except (ValueError, OSError):
-                        return True  # Include files that can't be processed
-
-                other_docs = SimpleDirectoryReader(
-                    docs_dir,
-                    recursive=True,
-                    encoding="utf-8",
-                    required_exts=code_extensions,
-                    file_extractor={},  # Use default extractors
+                # Fallback to default reader
+                print(f"Using default reader for {file_path}")
+                default_docs = SimpleDirectoryReader(
+                    str(file_path.parent),
                    filename_as_id=True,
-                ).load_data(show_progress=True)
+                    required_exts=[file_path.suffix],
+                ).load_data()
+                documents.extend(default_docs)

-                # Filter documents after loading based on gitignore rules
-                filtered_docs = []
-                for doc in other_docs:
-                    file_path = doc.metadata.get("file_path", "")
-                    if file_filter(file_path):
-                        filtered_docs.append(doc)
-
-                documents.extend(filtered_docs)
-            except ValueError as e:
-                if "No files found" in str(e):
-                    print(f"No additional files found for other supported types in {docs_dir}.")
-                else:
-                    raise e
-
-            all_documents.extend(documents)
-            print(f"Loaded {len(documents)} documents from {docs_dir}")
-
-        documents = all_documents
+        # Load other file types with default reader
+        other_docs = SimpleDirectoryReader(
+            docs_dir,
+            recursive=True,
+            encoding="utf-8",
+            required_exts=[".txt", ".md", ".docx"],
+        ).load_data(show_progress=True)
+        documents.extend(other_docs)

        all_texts = []
-
-        # Define code file extensions for intelligent chunking
-        code_file_exts = {
-            ".py",
-            ".js",
-            ".ts",
-            ".jsx",
-            ".tsx",
-            ".java",
-            ".cpp",
-            ".c",
-            ".h",
-            ".hpp",
-            ".cs",
-            ".go",
-            ".rs",
-            ".rb",
-            ".php",
-            ".swift",
-            ".kt",
-            ".scala",
-            ".r",
-            ".sql",
-            ".sh",
-            ".bash",
-            ".zsh",
-            ".fish",
-            ".ps1",
-            ".bat",
-            ".json",
-            ".yaml",
-            ".yml",
-            ".xml",
-            ".toml",
-            ".ini",
-            ".cfg",
-            ".conf",
-            ".html",
-            ".css",
-            ".scss",
-            ".less",
-            ".vue",
-            ".svelte",
-            ".ipynb",
-            ".R",
-            ".jl",
-        }
-
-        print("start chunking documents")
-        # Add progress bar for document chunking
-        for doc in tqdm(documents, desc="Chunking documents", unit="doc"):
-            # Check if this is a code file based on source path
-            source_path = doc.metadata.get("source", "")
-            is_code_file = any(source_path.endswith(ext) for ext in code_file_exts)
-
-            # Use appropriate parser based on file type
-            parser = self.code_parser if is_code_file else self.node_parser
-            nodes = parser.get_nodes_from_documents([doc])
-
+        for doc in documents:
+            nodes = self.node_parser.get_nodes_from_documents([doc])
            for node in nodes:
                all_texts.append(node.get_content())

@@ -721,67 +214,16 @@ Examples:
        return all_texts

    async def build_index(self, args):
-        docs_paths = args.docs
-        # Use current directory name if index_name not provided
-        if args.index_name:
-            index_name = args.index_name
-        else:
-            index_name = Path.cwd().name
-            print(f"Using current directory name as index: '{index_name}'")
-
+        docs_dir = args.docs
+        index_name = args.index_name
        index_dir = self.indexes_dir / index_name
        index_path = self.get_index_path(index_name)

-        # Display all paths being indexed with file/directory distinction
-        files = [p for p in docs_paths if Path(p).is_file()]
-        directories = [p for p in docs_paths if Path(p).is_dir()]
-
-        print(f"📂 Indexing {len(docs_paths)} path{'s' if len(docs_paths) > 1 else ''}:")
-        if files:
-            print(f"  📄 Files ({len(files)}):")
-            for i, file_path in enumerate(files, 1):
-                print(f"    {i}. {Path(file_path).resolve()}")
-        if directories:
-            print(f"  📁 Directories ({len(directories)}):")
-            for i, dir_path in enumerate(directories, 1):
-                print(f"    {i}. {Path(dir_path).resolve()}")
-
        if index_dir.exists() and not args.force:
            print(f"Index '{index_name}' already exists. Use --force to rebuild.")
            return

-        # Configure chunking based on CLI args before loading documents
-        # Guard against invalid configurations
-        doc_chunk_size = max(1, int(args.doc_chunk_size))
-        doc_chunk_overlap = max(0, int(args.doc_chunk_overlap))
-        if doc_chunk_overlap >= doc_chunk_size:
-            print(
-                f"⚠️  Adjusting doc chunk overlap from {doc_chunk_overlap} to {doc_chunk_size - 1} (must be < chunk size)"
-            )
-            doc_chunk_overlap = doc_chunk_size - 1
-
-        code_chunk_size = max(1, int(args.code_chunk_size))
-        code_chunk_overlap = max(0, int(args.code_chunk_overlap))
-        if code_chunk_overlap >= code_chunk_size:
-            print(
-                f"⚠️  Adjusting code chunk overlap from {code_chunk_overlap} to {code_chunk_size - 1} (must be < chunk size)"
-            )
-            code_chunk_overlap = code_chunk_size - 1
-
-        self.node_parser = SentenceSplitter(
-            chunk_size=doc_chunk_size,
-            chunk_overlap=doc_chunk_overlap,
-            separator=" ",
-            paragraph_separator="\n\n",
-        )
-        self.code_parser = SentenceSplitter(
-            chunk_size=code_chunk_size,
-            chunk_overlap=code_chunk_overlap,
-            separator="\n",
-            paragraph_separator="\n\n",
-        )
-
-        all_texts = self.load_documents(docs_paths, args.file_types)
+        all_texts = self.load_documents(docs_dir)
        if not all_texts:
            print("No documents found")
            return
@@ -793,7 +235,6 @@ Examples:
        builder = LeannBuilder(
            backend_name=args.backend,
            embedding_model=args.embedding_model,
-            embedding_mode=args.embedding_mode,
            graph_degree=args.graph_degree,
            complexity=args.complexity,
            is_compact=args.compact,
@@ -807,9 +248,6 @@ Examples:
        builder.build_index(index_path)
        print(f"Index built at {index_path}")

-        # Register this project directory in global registry
-        self.register_project_dir()
-
    async def search_documents(self, args):
        index_name = args.index_name
        query = args.query
@@ -817,7 +255,7 @@ Examples:

        if not self.index_exists(index_name):
            print(
-                f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir> [<dir2> ...]' to create it."
+                f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it."
            )
            return

@@ -844,7 +282,7 @@ Examples:

        if not self.index_exists(index_name):
            print(
-                f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir> [<dir2> ...]' to create it."
+                f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it."
            )
            return

@@ -870,11 +308,6 @@ Examples:
                if not user_input:
                    continue

-                # Prepare LLM kwargs with thinking budget if specified
-                llm_kwargs = {}
-                if args.thinking_budget:
-                    llm_kwargs["thinking_budget"] = args.thinking_budget
-
                response = chat.ask(
                    user_input,
                    top_k=args.top_k,
@@ -883,17 +316,11 @@ Examples:
                    prune_ratio=args.prune_ratio,
                    recompute_embeddings=args.recompute_embeddings,
                    pruning_strategy=args.pruning_strategy,
-                    llm_kwargs=llm_kwargs,
                )
                print(f"LEANN: {response}")
        else:
            query = input("Enter your question: ").strip()
            if query:
-                # Prepare LLM kwargs with thinking budget if specified
-                llm_kwargs = {}
-                if args.thinking_budget:
-                    llm_kwargs["thinking_budget"] = args.thinking_budget
-
                response = chat.ask(
                    query,
                    top_k=args.top_k,
@@ -902,7 +329,6 @@ Examples:
                    prune_ratio=args.prune_ratio,
                    recompute_embeddings=args.recompute_embeddings,
                    pruning_strategy=args.pruning_strategy,
-                    llm_kwargs=llm_kwargs,
                )
                print(f"LEANN: {response}")

--- a/packages/leann-core/src/leann/embedding_compute.py
+++ b/packages/leann-core/src/leann/embedding_compute.py
@@ -35,7 +35,7 @@ def compute_embeddings(
    Args:
        texts: List of texts to compute embeddings for
        model_name: Model name
-        mode: Computation mode ('sentence-transformers', 'openai', 'mlx', 'ollama')
+        mode: Computation mode ('sentence-transformers', 'openai', 'mlx')
        is_build: Whether this is a build operation (shows progress bar)
        batch_size: Batch size for processing
        adaptive_optimization: Whether to use adaptive optimization based on batch size
@@ -55,8 +55,6 @@ def compute_embeddings(
        return compute_embeddings_openai(texts, model_name)
    elif mode == "mlx":
        return compute_embeddings_mlx(texts, model_name)
-    elif mode == "ollama":
-        return compute_embeddings_ollama(texts, model_name, is_build=is_build)
    else:
        raise ValueError(f"Unsupported embedding mode: {mode}")

@@ -244,16 +242,6 @@ def compute_embeddings_openai(texts: list[str], model_name: str) -> np.ndarray:
    except ImportError as e:
        raise ImportError(f"OpenAI package not installed: {e}")

-    # Validate input list
-    if not texts:
-        raise ValueError("Cannot compute embeddings for empty text list")
-    # Extra validation: abort early if any item is empty/whitespace
-    invalid_count = sum(1 for t in texts if not isinstance(t, str) or not t.strip())
-    if invalid_count > 0:
-        raise ValueError(
-            f"Found {invalid_count} empty/invalid text(s) in input. Upstream should filter before calling OpenAI."
-        )
-
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise RuntimeError("OPENAI_API_KEY environment variable not set")
@@ -273,16 +261,8 @@ def compute_embeddings_openai(texts: list[str], model_name: str) -> np.ndarray:
    print(f"len of texts: {len(texts)}")

    # OpenAI has limits on batch size and input length
-    max_batch_size = 800  # Conservative batch size because the token limit is 300K
+    max_batch_size = 1000  # Conservative batch size
    all_embeddings = []
-    # get the avg len of texts
-    avg_len = sum(len(text) for text in texts) / len(texts)
-    print(f"avg len of texts: {avg_len}")
-    # if avg len is less than 1000, use the max batch size
-    if avg_len > 300:
-        max_batch_size = 500
-
-    # if avg len is less than 1000, use the max batch size

    try:
        from tqdm import tqdm
@@ -385,286 +365,3 @@ def compute_embeddings_mlx(chunks: list[str], model_name: str, batch_size: int =

    # Stack numpy arrays
    return np.stack(all_embeddings)
-
-
-def compute_embeddings_ollama(
-    texts: list[str], model_name: str, is_build: bool = False, host: str = "http://localhost:11434"
-) -> np.ndarray:
-    """
-    Compute embeddings using Ollama API with simplified batch processing.
-
-    Uses batch size of 32 for MPS/CPU and 128 for CUDA to optimize performance.
-
-    Args:
-        texts: List of texts to compute embeddings for
-        model_name: Ollama model name (e.g., "nomic-embed-text", "mxbai-embed-large")
-        is_build: Whether this is a build operation (shows progress bar)
-        host: Ollama host URL (default: http://localhost:11434)
-
-    Returns:
-        Normalized embeddings array, shape: (len(texts), embedding_dim)
-    """
-    try:
-        import requests
-    except ImportError:
-        raise ImportError(
-            "The 'requests' library is required for Ollama embeddings. Install with: uv pip install requests"
-        )
-
-    if not texts:
-        raise ValueError("Cannot compute embeddings for empty text list")
-
-    logger.info(
-        f"Computing embeddings for {len(texts)} texts using Ollama API, model: '{model_name}'"
-    )
-
-    # Check if Ollama is running
-    try:
-        response = requests.get(f"{host}/api/version", timeout=5)
-        response.raise_for_status()
-    except requests.exceptions.ConnectionError:
-        error_msg = (
-            f"❌ Could not connect to Ollama at {host}.\n\n"
-            "Please ensure Ollama is running:\n"
-            "  • macOS/Linux: ollama serve\n"
-            "  • Windows: Make sure Ollama is running in the system tray\n\n"
-            "Installation: https://ollama.com/download"
-        )
-        raise RuntimeError(error_msg)
-    except Exception as e:
-        raise RuntimeError(f"Unexpected error connecting to Ollama: {e}")
-
-    # Check if model exists and provide helpful suggestions
-    try:
-        response = requests.get(f"{host}/api/tags", timeout=5)
-        response.raise_for_status()
-        models = response.json()
-        model_names = [model["name"] for model in models.get("models", [])]
-
-        # Filter for embedding models (models that support embeddings)
-        embedding_models = []
-        suggested_embedding_models = [
-            "nomic-embed-text",
-            "mxbai-embed-large",
-            "bge-m3",
-            "all-minilm",
-            "snowflake-arctic-embed",
-        ]
-
-        for model in model_names:
-            # Check if it's an embedding model (by name patterns or known models)
-            base_name = model.split(":")[0]
-            if any(emb in base_name for emb in ["embed", "bge", "minilm", "e5"]):
-                embedding_models.append(model)
-
-        # Check if model exists (handle versioned names) and resolve to full name
-        resolved_model_name = None
-        for name in model_names:
-            # Exact match
-            if model_name == name:
-                resolved_model_name = name
-                break
-            # Match without version tag (use the versioned name)
-            elif model_name == name.split(":")[0]:
-                resolved_model_name = name
-                break
-
-        if not resolved_model_name:
-            error_msg = f"❌ Model '{model_name}' not found in local Ollama.\n\n"
-
-            # Suggest pulling the model
-            error_msg += "📦 To install this embedding model:\n"
-            error_msg += f"   ollama pull {model_name}\n\n"
-
-            # Show available embedding models
-            if embedding_models:
-                error_msg += "✅ Available embedding models:\n"
-                for model in embedding_models[:5]:
-                    error_msg += f"   • {model}\n"
-                if len(embedding_models) > 5:
-                    error_msg += f"   ... and {len(embedding_models) - 5} more\n"
-            else:
-                error_msg += "💡 Popular embedding models to install:\n"
-                for model in suggested_embedding_models[:3]:
-                    error_msg += f"   • ollama pull {model}\n"
-
-            error_msg += "\n📚 Browse more: https://ollama.com/library"
-            raise ValueError(error_msg)
-
-        # Use the resolved model name for all subsequent operations
-        if resolved_model_name != model_name:
-            logger.info(f"Resolved model name '{model_name}' to '{resolved_model_name}'")
-        model_name = resolved_model_name
-
-        # Verify the model supports embeddings by testing it
-        try:
-            test_response = requests.post(
-                f"{host}/api/embeddings", json={"model": model_name, "prompt": "test"}, timeout=10
-            )
-            if test_response.status_code != 200:
-                error_msg = (
-                    f"⚠️ Model '{model_name}' exists but may not support embeddings.\n\n"
-                    f"Please use an embedding model like:\n"
-                )
-                for model in suggested_embedding_models[:3]:
-                    error_msg += f"   • {model}\n"
-                raise ValueError(error_msg)
-        except requests.exceptions.RequestException:
-            # If test fails, continue anyway - model might still work
-            pass
-
-    except requests.exceptions.RequestException as e:
-        logger.warning(f"Could not verify model existence: {e}")
-
-    # Determine batch size based on device availability
-    # Check for CUDA/MPS availability using torch if available
-    batch_size = 32  # Default for MPS/CPU
-    try:
-        import torch
-
-        if torch.cuda.is_available():
-            batch_size = 128  # CUDA gets larger batch size
-        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-            batch_size = 32  # MPS gets smaller batch size
-    except ImportError:
-        # If torch is not available, use conservative batch size
-        batch_size = 32
-
-    logger.info(f"Using batch size: {batch_size}")
-
-    def get_batch_embeddings(batch_texts):
-        """Get embeddings for a batch of texts."""
-        all_embeddings = []
-        failed_indices = []
-
-        for i, text in enumerate(batch_texts):
-            max_retries = 3
-            retry_count = 0
-
-            # Truncate very long texts to avoid API issues
-            truncated_text = text[:8000] if len(text) > 8000 else text
-            while retry_count < max_retries:
-                try:
-                    response = requests.post(
-                        f"{host}/api/embeddings",
-                        json={"model": model_name, "prompt": truncated_text},
-                        timeout=30,
-                    )
-                    response.raise_for_status()
-
-                    result = response.json()
-                    embedding = result.get("embedding")
-
-                    if embedding is None:
-                        raise ValueError(f"No embedding returned for text {i}")
-
-                    if not isinstance(embedding, list) or len(embedding) == 0:
-                        raise ValueError(f"Invalid embedding format for text {i}")
-
-                    all_embeddings.append(embedding)
-                    break
-
-                except requests.exceptions.Timeout:
-                    retry_count += 1
-                    if retry_count >= max_retries:
-                        logger.warning(f"Timeout for text {i} after {max_retries} retries")
-                        failed_indices.append(i)
-                        all_embeddings.append(None)
-                        break
-
-                except Exception as e:
-                    retry_count += 1
-                    if retry_count >= max_retries:
-                        logger.error(f"Failed to get embedding for text {i}: {e}")
-                        failed_indices.append(i)
-                        all_embeddings.append(None)
-                        break
-        return all_embeddings, failed_indices
-
-    # Process texts in batches
-    all_embeddings = []
-    all_failed_indices = []
-
-    # Setup progress bar if needed
-    show_progress = is_build or len(texts) > 10
-    try:
-        if show_progress:
-            from tqdm import tqdm
-    except ImportError:
-        show_progress = False
-
-    # Process batches
-    num_batches = (len(texts) + batch_size - 1) // batch_size
-
-    if show_progress:
-        batch_iterator = tqdm(range(num_batches), desc="Computing Ollama embeddings")
-    else:
-        batch_iterator = range(num_batches)
-
-    for batch_idx in batch_iterator:
-        start_idx = batch_idx * batch_size
-        end_idx = min(start_idx + batch_size, len(texts))
-        batch_texts = texts[start_idx:end_idx]
-
-        batch_embeddings, batch_failed = get_batch_embeddings(batch_texts)
-
-        # Adjust failed indices to global indices
-        global_failed = [start_idx + idx for idx in batch_failed]
-        all_failed_indices.extend(global_failed)
-        all_embeddings.extend(batch_embeddings)
-
-    # Handle failed embeddings
-    if all_failed_indices:
-        if len(all_failed_indices) == len(texts):
-            raise RuntimeError("Failed to compute any embeddings")
-
-        logger.warning(
-            f"Failed to compute embeddings for {len(all_failed_indices)}/{len(texts)} texts"
-        )
-
-        # Use zero embeddings as fallback for failed ones
-        valid_embedding = next((e for e in all_embeddings if e is not None), None)
-        if valid_embedding:
-            embedding_dim = len(valid_embedding)
-            for i, embedding in enumerate(all_embeddings):
-                if embedding is None:
-                    all_embeddings[i] = [0.0] * embedding_dim
-
-    # Remove None values
-    all_embeddings = [e for e in all_embeddings if e is not None]
-
-    if not all_embeddings:
-        raise RuntimeError("No valid embeddings were computed")
-
-    # Validate embedding dimensions
-    expected_dim = len(all_embeddings[0])
-    inconsistent_dims = []
-    for i, embedding in enumerate(all_embeddings):
-        if len(embedding) != expected_dim:
-            inconsistent_dims.append((i, len(embedding)))
-
-    if inconsistent_dims:
-        error_msg = f"Ollama returned inconsistent embedding dimensions. Expected {expected_dim}, but got:\n"
-        for idx, dim in inconsistent_dims[:10]:  # Show first 10 inconsistent ones
-            error_msg += f"  - Text {idx}: {dim} dimensions\n"
-        if len(inconsistent_dims) > 10:
-            error_msg += f"  ... and {len(inconsistent_dims) - 10} more\n"
-        error_msg += f"\nThis is likely an Ollama API bug with model '{model_name}'. Please try:\n"
-        error_msg += "1. Restart Ollama service: 'ollama serve'\n"
-        error_msg += f"2. Re-pull the model: 'ollama pull {model_name}'\n"
-        error_msg += (
-            "3. Use sentence-transformers instead: --embedding-mode sentence-transformers\n"
-        )
-        error_msg += "4. Report this issue to Ollama: https://github.com/ollama/ollama/issues"
-        raise ValueError(error_msg)
-
-    # Convert to numpy array and normalize
-    embeddings = np.array(all_embeddings, dtype=np.float32)
-
-    # Normalize embeddings (L2 normalization)
-    norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
-    embeddings = embeddings / (norms + 1e-8)  # Add small epsilon to avoid division by zero
-
-    logger.info(f"Generated {len(embeddings)} embeddings, dimension: {embeddings.shape[1]}")
-
-    return embeddings
--- a/packages/leann-core/src/leann/embedding_server_manager.py
+++ b/packages/leann-core/src/leann/embedding_server_manager.py
@@ -6,9 +6,8 @@ import subprocess
 import sys
 import time
 from pathlib import Path
-from typing import Optional

-# Lightweight, self-contained server manager with no cross-process inspection
+import psutil

 # Set up logging based on environment variable
 LOG_LEVEL = os.getenv("LEANN_LOG_LEVEL", "WARNING").upper()
@@ -43,7 +42,130 @@ def _check_port(port: int) -> bool:
        return s.connect_ex(("localhost", port)) == 0


-# Note: All cross-process scanning helpers removed for simplicity
+def _check_process_matches_config(
+    port: int, expected_model: str, expected_passages_file: str
+) -> bool:
+    """
+    Check if the process using the port matches our expected model and passages file.
+    Returns True if matches, False otherwise.
+    """
+    try:
+        for proc in psutil.process_iter(["pid", "cmdline"]):
+            if not _is_process_listening_on_port(proc, port):
+                continue
+
+            cmdline = proc.info["cmdline"]
+            if not cmdline:
+                continue
+
+            return _check_cmdline_matches_config(
+                cmdline, port, expected_model, expected_passages_file
+            )
+
+        logger.debug(f"No process found listening on port {port}")
+        return False
+
+    except Exception as e:
+        logger.warning(f"Could not check process on port {port}: {e}")
+        return False
+
+
+def _is_process_listening_on_port(proc, port: int) -> bool:
+    """Check if a process is listening on the given port."""
+    try:
+        connections = proc.net_connections()
+        for conn in connections:
+            if conn.laddr.port == port and conn.status == psutil.CONN_LISTEN:
+                return True
+        return False
+    except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
+        return False
+
+
+def _check_cmdline_matches_config(
+    cmdline: list, port: int, expected_model: str, expected_passages_file: str
+) -> bool:
+    """Check if command line matches our expected configuration."""
+    cmdline_str = " ".join(cmdline)
+    logger.debug(f"Found process on port {port}: {cmdline_str}")
+
+    # Check if it's our embedding server
+    is_embedding_server = any(
+        server_type in cmdline_str
+        for server_type in [
+            "embedding_server",
+            "leann_backend_diskann.embedding_server",
+            "leann_backend_hnsw.hnsw_embedding_server",
+        ]
+    )
+
+    if not is_embedding_server:
+        logger.debug(f"Process on port {port} is not our embedding server")
+        return False
+
+    # Check model name
+    model_matches = _check_model_in_cmdline(cmdline, expected_model)
+
+    # Check passages file if provided
+    passages_matches = _check_passages_in_cmdline(cmdline, expected_passages_file)
+
+    result = model_matches and passages_matches
+    logger.debug(
+        f"model_matches: {model_matches}, passages_matches: {passages_matches}, overall: {result}"
+    )
+    return result
+
+
+def _check_model_in_cmdline(cmdline: list, expected_model: str) -> bool:
+    """Check if the command line contains the expected model."""
+    if "--model-name" not in cmdline:
+        return False
+
+    model_idx = cmdline.index("--model-name")
+    if model_idx + 1 >= len(cmdline):
+        return False
+
+    actual_model = cmdline[model_idx + 1]
+    return actual_model == expected_model
+
+
+def _check_passages_in_cmdline(cmdline: list, expected_passages_file: str) -> bool:
+    """Check if the command line contains the expected passages file."""
+    if "--passages-file" not in cmdline:
+        return False  # Expected but not found
+
+    passages_idx = cmdline.index("--passages-file")
+    if passages_idx + 1 >= len(cmdline):
+        return False
+
+    actual_passages = cmdline[passages_idx + 1]
+    expected_path = Path(expected_passages_file).resolve()
+    actual_path = Path(actual_passages).resolve()
+    return actual_path == expected_path
+
+
+def _find_compatible_port_or_next_available(
+    start_port: int, model_name: str, passages_file: str, max_attempts: int = 100
+) -> tuple[int, bool]:
+    """
+    Find a port that either has a compatible server or is available.
+    Returns (port, is_compatible) where is_compatible indicates if we found a matching server.
+    """
+    for port in range(start_port, start_port + max_attempts):
+        if not _check_port(port):
+            # Port is available
+            return port, False
+
+        # Port is in use, check if it's compatible
+        if _check_process_matches_config(port, model_name, passages_file):
+            logger.info(f"Found compatible server on port {port}")
+            return port, True
+        else:
+            logger.info(f"Port {port} has incompatible server, trying next port...")
+
+    raise RuntimeError(
+        f"Could not find compatible or available port in range {start_port}-{start_port + max_attempts}"
+    )


 class EmbeddingServerManager:
@@ -60,18 +182,9 @@ class EmbeddingServerManager:
                                       e.g., "leann_backend_diskann.embedding_server"
        """
        self.backend_module_name = backend_module_name
-        self.server_process: Optional[subprocess.Popen] = None
-        self.server_port: Optional[int] = None
-        # Track last-started config for in-process reuse only
-        self._server_config: Optional[dict] = None
+        self.server_process: subprocess.Popen | None = None
+        self.server_port: int | None = None
        self._atexit_registered = False
-        # Also register a weakref finalizer to ensure cleanup when manager is GC'ed
-        try:
-            import weakref
-
-            self._finalizer = weakref.finalize(self, self._finalize_process)
-        except Exception:
-            self._finalizer = None

    def start_server(
        self,
@@ -81,24 +194,26 @@ class EmbeddingServerManager:
        **kwargs,
    ) -> tuple[bool, int]:
        """Start the embedding server."""
-        # passages_file may be present in kwargs for server CLI, but we don't need it here
+        passages_file = kwargs.get("passages_file")

-        # If this manager already has a live server, just reuse it
-        if self.server_process and self.server_process.poll() is None and self.server_port:
-            logger.info("Reusing in-process server")
-            return True, self.server_port
+        # Check if we have a compatible server already running
+        if self._has_compatible_running_server(model_name, passages_file):
+            logger.info("Found compatible running server!")
+            return True, port

        # For Colab environment, use a different strategy
        if _is_colab_environment():
            logger.info("Detected Colab environment, using alternative startup strategy")
            return self._start_server_colab(port, model_name, embedding_mode, **kwargs)

-        # Always pick a fresh available port
-        try:
-            actual_port = _get_available_port(port)
-        except RuntimeError:
-            logger.error("No available ports found")
-            return False, port
+        # Find a compatible port or next available
+        actual_port, is_compatible = _find_compatible_port_or_next_available(
+            port, model_name, passages_file
+        )
+
+        if is_compatible:
+            logger.info(f"Found compatible server on port {actual_port}")
+            return True, actual_port

        # Start a new server
        return self._start_new_server(actual_port, model_name, embedding_mode, **kwargs)
@@ -131,7 +246,17 @@ class EmbeddingServerManager:
            logger.error(f"Failed to start embedding server in Colab: {e}")
            return False, actual_port

-    # Note: No compatibility check needed; manager is per-searcher and configs are stable per instance
+    def _has_compatible_running_server(self, model_name: str, passages_file: str) -> bool:
+        """Check if we have a compatible running server."""
+        if not (self.server_process and self.server_process.poll() is None and self.server_port):
+            return False
+
+        if _check_process_matches_config(self.server_port, model_name, passages_file):
+            logger.info(f"Existing server process (PID {self.server_process.pid}) is compatible")
+            return True
+
+        logger.info("Existing server process is incompatible. Should start a new server.")
+        return False

    def _start_new_server(
        self, port: int, model_name: str, embedding_mode: str, **kwargs
@@ -178,61 +303,22 @@ class EmbeddingServerManager:
        project_root = Path(__file__).parent.parent.parent.parent.parent
        logger.info(f"Command: {' '.join(command)}")

-        # In CI environment, redirect stdout to avoid buffer deadlock but keep stderr for debugging
-        # Embedding servers use many print statements that can fill stdout buffers
-        is_ci = os.environ.get("CI") == "true"
-        if is_ci:
-            stdout_target = subprocess.DEVNULL
-            stderr_target = None  # Keep stderr for error debugging in CI
-            logger.info(
-                "CI environment detected, redirecting embedding server stdout to DEVNULL, keeping stderr"
-            )
-        else:
-            stdout_target = None  # Direct to console for visible logs
-            stderr_target = None  # Direct to console for visible logs
-
-        # Start embedding server subprocess
+        # Let server output go directly to console
+        # The server will respect LEANN_LOG_LEVEL environment variable
        self.server_process = subprocess.Popen(
            command,
            cwd=project_root,
-            stdout=stdout_target,
-            stderr=stderr_target,
+            stdout=None,  # Direct to console
+            stderr=None,  # Direct to console
        )
        self.server_port = port
-        # Record config for in-process reuse
-        try:
-            self._server_config = {
-                "model_name": command[command.index("--model-name") + 1]
-                if "--model-name" in command
-                else "",
-                "passages_file": command[command.index("--passages-file") + 1]
-                if "--passages-file" in command
-                else "",
-                "embedding_mode": command[command.index("--embedding-mode") + 1]
-                if "--embedding-mode" in command
-                else "sentence-transformers",
-            }
-        except Exception:
-            self._server_config = {
-                "model_name": "",
-                "passages_file": "",
-                "embedding_mode": "sentence-transformers",
-            }
        logger.info(f"Server process started with PID: {self.server_process.pid}")

        # Register atexit callback only when we actually start a process
        if not self._atexit_registered:
-            # Always attempt best-effort finalize at interpreter exit
-            atexit.register(self._finalize_process)
+            # Use a lambda to avoid issues with bound methods
+            atexit.register(lambda: self.stop_server() if self.server_process else None)
            self._atexit_registered = True
-        # Touch finalizer so it knows there is a live process
-        if getattr(self, "_finalizer", None) is not None and not self._finalizer.alive:
-            try:
-                import weakref
-
-                self._finalizer = weakref.finalize(self, self._finalize_process)
-            except Exception:
-                pass

    def _wait_for_server_ready(self, port: int) -> tuple[bool, int]:
        """Wait for the server to be ready."""
@@ -257,35 +343,24 @@ class EmbeddingServerManager:
        if not self.server_process:
            return

-        if self.server_process and self.server_process.poll() is not None:
+        if self.server_process.poll() is not None:
            # Process already terminated
            self.server_process = None
-            self.server_port = None
-            self._server_config = None
            return

        logger.info(
            f"Terminating server process (PID: {self.server_process.pid}) for backend {self.backend_module_name}..."
        )
-
-        # Use simple termination first; if the server installed signal handlers,
-        # it will exit cleanly. Otherwise escalate to kill after a short wait.
-        try:
-            self.server_process.terminate()
-        except Exception:
-            pass
+        self.server_process.terminate()

        try:
-            self.server_process.wait(timeout=5)  # Give more time for graceful shutdown
-            logger.info(f"Server process {self.server_process.pid} terminated gracefully.")
+            self.server_process.wait(timeout=3)
+            logger.info(f"Server process {self.server_process.pid} terminated.")
        except subprocess.TimeoutExpired:
            logger.warning(
-                f"Server process {self.server_process.pid} did not terminate within 5 seconds, force killing..."
+                f"Server process {self.server_process.pid} did not terminate gracefully within 3 seconds, killing it."
            )
-            try:
-                self.server_process.kill()
-            except Exception:
-                pass
+            self.server_process.kill()
            try:
                self.server_process.wait(timeout=2)
                logger.info(f"Server process {self.server_process.pid} killed successfully.")
@@ -293,33 +368,15 @@ class EmbeddingServerManager:
                logger.error(
                    f"Failed to kill server process {self.server_process.pid} - it may be hung"
                )
+                # Don't hang indefinitely

-        # Clean up process resources with timeout to avoid CI hang
+        # Clean up process resources to prevent resource tracker warnings
        try:
-            # Use shorter timeout in CI environments
-            is_ci = os.environ.get("CI") == "true"
-            timeout = 3 if is_ci else 10
-            self.server_process.wait(timeout=timeout)
-            logger.info(f"Server process {self.server_process.pid} cleanup completed")
-        except subprocess.TimeoutExpired:
-            logger.warning(f"Process cleanup timeout after {timeout}s, proceeding anyway")
-        except Exception as e:
-            logger.warning(f"Error during process cleanup: {e}")
-        finally:
-            self.server_process = None
-            self.server_port = None
-            self._server_config = None
-
-    def _finalize_process(self) -> None:
-        """Best-effort cleanup used by weakref.finalize/atexit."""
-        try:
-            self.stop_server()
+            self.server_process.wait()  # Ensure process is fully cleaned up
        except Exception:
            pass

-    def _adopt_existing_server(self, *args, **kwargs) -> None:
-        # Removed: cross-process adoption no longer supported
-        return
+        self.server_process = None

    def _launch_server_process_colab(self, command: list, port: int) -> None:
        """Launch the server process with Colab-specific settings."""
@@ -335,16 +392,10 @@ class EmbeddingServerManager:
        self.server_port = port
        logger.info(f"Colab server process started with PID: {self.server_process.pid}")

-        # Register atexit callback (unified)
+        # Register atexit callback
        if not self._atexit_registered:
-            atexit.register(self._finalize_process)
+            atexit.register(lambda: self.stop_server() if self.server_process else None)
            self._atexit_registered = True
-        # Record config for in-process reuse is best-effort in Colab mode
-        self._server_config = {
-            "model_name": "",
-            "passages_file": "",
-            "embedding_mode": "sentence-transformers",
-        }

    def _wait_for_server_ready_colab(self, port: int) -> tuple[bool, int]:
        """Wait for the server to be ready with Colab-specific timeout."""
--- a/packages/leann-core/src/leann/interface.py
+++ b/packages/leann-core/src/leann/interface.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Any, Literal, Optional
+from typing import Any, Literal

 import numpy as np

@@ -34,9 +34,7 @@ class LeannBackendSearcherInterface(ABC):
        pass

    @abstractmethod
-    def _ensure_server_running(
-        self, passages_source_file: str, port: Optional[int], **kwargs
-    ) -> int:
+    def _ensure_server_running(self, passages_source_file: str, port: int | None, **kwargs) -> int:
        """Ensure server is running"""
        pass

@@ -50,7 +48,7 @@ class LeannBackendSearcherInterface(ABC):
        prune_ratio: float = 0.0,
        recompute_embeddings: bool = False,
        pruning_strategy: Literal["global", "local", "proportional"] = "global",
-        zmq_port: Optional[int] = None,
+        zmq_port: int | None = None,
        **kwargs,
    ) -> dict[str, Any]:
        """Search for nearest neighbors
@@ -76,7 +74,7 @@ class LeannBackendSearcherInterface(ABC):
        self,
        query: str,
        use_server_if_available: bool = True,
-        zmq_port: Optional[int] = None,
+        zmq_port: int | None = None,
    ) -> np.ndarray:
        """Compute embedding for a query string

--- a/packages/leann-core/src/leann/mcp.py
+++ b/packages/leann-core/src/leann/mcp.py
@@ -1,153 +0,0 @@
-#!/usr/bin/env python3
-
-import json
-import subprocess
-import sys
-
-
-def handle_request(request):
-    if request.get("method") == "initialize":
-        return {
-            "jsonrpc": "2.0",
-            "id": request.get("id"),
-            "result": {
-                "capabilities": {"tools": {}},
-                "protocolVersion": "2024-11-05",
-                "serverInfo": {"name": "leann-mcp", "version": "1.0.0"},
-            },
-        }
-
-    elif request.get("method") == "tools/list":
-        return {
-            "jsonrpc": "2.0",
-            "id": request.get("id"),
-            "result": {
-                "tools": [
-                    {
-                        "name": "leann_search",
-                        "description": """🔍 Search code using natural language - like having a coding assistant who knows your entire codebase!
-
-🎯 **Perfect for**:
- "How does authentication work?" → finds auth-related code
- "Error handling patterns" → locates try-catch blocks and error logic
- "Database connection setup" → finds DB initialization code
- "API endpoint definitions" → locates route handlers
- "Configuration management" → finds config files and usage
-
-💡 **Pro tip**: Use this before making any changes to understand existing patterns and conventions.""",
-                        "inputSchema": {
-                            "type": "object",
-                            "properties": {
-                                "index_name": {
-                                    "type": "string",
-                                    "description": "Name of the LEANN index to search. Use 'leann_list' first to see available indexes.",
-                                },
-                                "query": {
-                                    "type": "string",
-                                    "description": "Search query - can be natural language (e.g., 'how to handle errors') or technical terms (e.g., 'async function definition')",
-                                },
-                                "top_k": {
-                                    "type": "integer",
-                                    "default": 5,
-                                    "minimum": 1,
-                                    "maximum": 20,
-                                    "description": "Number of search results to return. Use 5-10 for focused results, 15-20 for comprehensive exploration.",
-                                },
-                                "complexity": {
-                                    "type": "integer",
-                                    "default": 32,
-                                    "minimum": 16,
-                                    "maximum": 128,
-                                    "description": "Search complexity level. Use 16-32 for fast searches (recommended), 64+ for higher precision when needed.",
-                                },
-                            },
-                            "required": ["index_name", "query"],
-                        },
-                    },
-                    {
-                        "name": "leann_list",
-                        "description": "📋 Show all your indexed codebases - your personal code library! Use this to see what's available for search.",
-                        "inputSchema": {"type": "object", "properties": {}},
-                    },
-                ]
-            },
-        }
-
-    elif request.get("method") == "tools/call":
-        tool_name = request["params"]["name"]
-        args = request["params"].get("arguments", {})
-
-        try:
-            if tool_name == "leann_search":
-                # Validate required parameters
-                if not args.get("index_name") or not args.get("query"):
-                    return {
-                        "jsonrpc": "2.0",
-                        "id": request.get("id"),
-                        "result": {
-                            "content": [
-                                {
-                                    "type": "text",
-                                    "text": "Error: Both index_name and query are required",
-                                }
-                            ]
-                        },
-                    }
-
-                # Build simplified command
-                cmd = [
-                    "leann",
-                    "search",
-                    args["index_name"],
-                    args["query"],
-                    f"--top-k={args.get('top_k', 5)}",
-                    f"--complexity={args.get('complexity', 32)}",
-                ]
-                result = subprocess.run(cmd, capture_output=True, text=True)
-
-            elif tool_name == "leann_list":
-                result = subprocess.run(["leann", "list"], capture_output=True, text=True)
-
-            return {
-                "jsonrpc": "2.0",
-                "id": request.get("id"),
-                "result": {
-                    "content": [
-                        {
-                            "type": "text",
-                            "text": result.stdout
-                            if result.returncode == 0
-                            else f"Error: {result.stderr}",
-                        }
-                    ]
-                },
-            }
-
-        except Exception as e:
-            return {
-                "jsonrpc": "2.0",
-                "id": request.get("id"),
-                "error": {"code": -1, "message": str(e)},
-            }
-
-
-def main():
-    for line in sys.stdin:
-        try:
-            request = json.loads(line.strip())
-            response = handle_request(request)
-            if response:
-                print(json.dumps(response))
-                sys.stdout.flush()
-        except Exception as e:
-            error_response = {
-                "jsonrpc": "2.0",
-                "id": None,
-                "error": {"code": -1, "message": str(e)},
-            }
-            print(json.dumps(error_response))
-            sys.stdout.flush()
-
-
-if __name__ == "__main__":
-    main()
--- a/packages/leann-core/src/leann/searcher_base.py
+++ b/packages/leann-core/src/leann/searcher_base.py
@@ -1,7 +1,7 @@
 import json
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Any, Literal, Optional
+from typing import Any, Literal

 import numpy as np

@@ -169,7 +169,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
        prune_ratio: float = 0.0,
        recompute_embeddings: bool = False,
        pruning_strategy: Literal["global", "local", "proportional"] = "global",
-        zmq_port: Optional[int] = None,
+        zmq_port: int | None = None,
        **kwargs,
    ) -> dict[str, Any]:
        """
--- a/packages/leann-mcp/README.md
+++ b/packages/leann-mcp/README.md
@@ -1,147 +0,0 @@
-# 🔥 LEANN Claude Code Integration
-
-Transform your development workflow with intelligent code assistance using LEANN's semantic search directly in Claude Code.
-
-## Prerequisites
-
-Install LEANN globally for MCP integration (with default backend):
-
-```bash
-uv tool install leann-core --with leann
-```
-This installs the `leann` CLI into an isolated tool environment and includes both backends so `leann build` works out-of-the-box.
-
-## 🚀 Quick Setup
-
-Add the LEANN MCP server to Claude Code. Choose the scope based on how widely you want it available. Below is the command to install it globally; if you prefer a local install, skip this step:
-
-```bash
-# Global (recommended): available in all projects for your user
-claude mcp add --scope user leann-server -- leann_mcp
-```
-
- `leann-server`: the display name of the MCP server in Claude Code (you can change it).
- `leann_mcp`: the Python entry point installed with LEANN that starts the MCP server.
-
-Verify it is registered globally:
-
-```bash
-claude mcp list | cat
-```
-
-## 🛠️ Available Tools
-
-Once connected, you'll have access to these powerful semantic search tools in Claude Code:
-
- **`leann_list`** - List all available indexes across your projects
- **`leann_search`** - Perform semantic searches across code and documents
-
-
-## 🎯 Quick Start Example
-
-```bash
-# Add locally if you did not add it globally (current folder only; default if --scope is omitted)
-claude mcp add leann-server -- leann_mcp
-
-# Build an index for your project (change to your actual path)
-# See the advanced examples below for more ways to configure indexing
-# Set the index name (replace 'my-project' with your own)
-leann build my-project --docs $(git ls-files)
-
-# Start Claude Code
-claude
-```
-
-## 🚀 Advanced Usage Examples to build the index
-
-### Index Entire Git Repository
-```bash
-# Index all tracked files in your Git repository.
-# Note: submodules are currently skipped; we can add them back if needed.
-leann build my-repo --docs $(git ls-files) --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-
-# Index only tracked Python files from Git.
-leann build my-python-code --docs $(git ls-files "*.py") --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-
-# If you encounter empty requests caused by empty files (e.g., __init__.py), exclude zero-byte files. Thanks @ww2283 for pointing [that](https://github.com/yichuan-w/LEANN/issues/48) out
-leann build leann-prospec-lig --docs $(find ./src -name "*.py" -not -empty) --embedding-mode openai --embedding-model text-embedding-3-small
-```
-
-### Multiple Directories and Files
-```bash
-# Index multiple directories
-leann build my-codebase --docs ./src ./tests ./docs ./config --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-
-# Mix files and directories
-leann build my-project --docs ./README.md ./src/ ./package.json ./docs/ --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-
-# Specific files only
-leann build my-configs --docs ./tsconfig.json ./package.json ./webpack.config.js --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-```
-
-### Advanced Git Integration
-```bash
-# Index recently modified files
-leann build recent-changes --docs $(git diff --name-only HEAD~10..HEAD) --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-
-# Index files matching pattern
-leann build frontend --docs $(git ls-files "*.tsx" "*.ts" "*.jsx" "*.js") --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-
-# Index documentation and config files
-leann build docs-and-configs --docs $(git ls-files "*.md" "*.yml" "*.yaml" "*.json" "*.toml") --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-```
-
-
-## **Try this in Claude Code:**
-```
-Help me understand this codebase. List available indexes and search for authentication patterns.
-```
-
-<p align="center">
-  <img src="../../assets/claude_code_leann.png" alt="LEANN in Claude Code" width="80%">
-</p>
-
-If you see a prompt asking whether to proceed with LEANN, you can now use it in your chat!
-
-## 🧠 How It Works
-
-The integration consists of three key components working seamlessly together:
-
- **`leann`** - Core CLI tool for indexing and searching (installed globally via `uv tool install`)
- **`leann_mcp`** - MCP server that wraps `leann` commands for Claude Code integration
- **Claude Code** - Calls `leann_mcp`, which executes `leann` commands and returns intelligent results
-
-## 📁 File Support
-
-LEANN understands **30+ file types** including:
- **Programming**: Python, JavaScript, TypeScript, Java, Go, Rust, C++, C#
- **Data**: SQL, YAML, JSON, CSV, XML
- **Documentation**: Markdown, TXT, PDF
- **And many more!**
-
-## 💾 Storage & Organization
-
- **Project indexes**: Stored in `.leann/` directory (just like `.git`)
- **Global registry**: Project tracking at `~/.leann/projects.json`
- **Multi-project support**: Switch between different codebases seamlessly
- **Portable**: Transfer indexes between machines with minimal overhead
-
-## 🗑️ Uninstalling
-
-To remove the LEANN MCP server from Claude Code:
-
-```bash
-claude mcp remove leann-server
-```
-To remove LEANN
-```
-uv pip uninstall leann leann-backend-hnsw leann-core
-```
-
-To globally remove LEANN (for version update)
-```
-uv tool list | cat
-uv tool uninstall leann-core
-command -v leann || echo "leann gone"
-command -v leann_mcp || echo "leann_mcp gone"
-```
--- a/packages/leann/pyproject.toml
+++ b/packages/leann/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "leann"
-version = "0.2.9"
+version = "0.1.16"
 description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
 readme = "README.md"
 requires-python = ">=3.9"
--- a/packages/wechat-exporter/init.py
+++ b/packages/wechat-exporter/init.py
@@ -1 +0,0 @@
-__all__ = []
--- a/packages/wechat-exporter/main.py
+++ b/packages/wechat-exporter/main.py
@@ -136,9 +136,5 @@ def export_sqlite(
    connection.commit()


-def main():
-    app()
-
-
 if __name__ == "__main__":
-    main()
+    app()
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,6 @@ requires-python = ">=3.9"
 dependencies = [
    "leann-core",
    "leann-backend-hnsw",
-    "typer>=0.12.3",
    "numpy>=1.26.0",
    "torch",
    "tqdm",
@@ -33,7 +32,7 @@ dependencies = [
    "pypdfium2>=4.30.0",
    # LlamaIndex core and readers - updated versions
    "llama-index>=0.12.44",
-    "llama-index-readers-file>=0.4.0", # Essential for PDF parsing
+    "llama-index-readers-file>=0.4.0",  # Essential for PDF parsing
    # "llama-index-readers-docling",  # Requires Python >= 3.10
    # "llama-index-node-parser-docling",  # Requires Python >= 3.10
    "llama-index-vector-stores-faiss>=0.4.0",
@@ -41,13 +40,9 @@ dependencies = [
    # Other dependencies
    "ipykernel==6.29.5",
    "msgpack>=1.1.1",
-    "mlx>=0.26.3; sys_platform == 'darwin' and platform_machine == 'arm64'",
-    "mlx-lm>=0.26.0; sys_platform == 'darwin' and platform_machine == 'arm64'",
+    "mlx>=0.26.3; sys_platform == 'darwin'",
+    "mlx-lm>=0.26.0; sys_platform == 'darwin'",
    "psutil>=5.8.0",
-    "pybind11>=3.0.0",
-    "pathspec>=0.12.1",
-    "nbconvert>=7.16.6",
-    "gitignore-parser>=0.1.12",
 ]

 [project.optional-dependencies]
@@ -56,7 +51,7 @@ dev = [
    "pytest-cov>=4.0",
    "pytest-xdist>=3.0",  # For parallel test execution
    "black>=23.0",
-    "ruff==0.12.7",  # Fixed version to ensure consistent formatting across all environments
+    "ruff>=0.1.0",
    "matplotlib",
    "huggingface-hub>=0.20.0",
    "pre-commit>=3.5.0",
@@ -85,11 +80,6 @@ documents = [

 [tool.setuptools]
 py-modules = []
-packages = ["wechat_exporter"]
-package-dir = { "wechat_exporter" = "packages/wechat-exporter" }
-
-[project.scripts]
-wechat-exporter = "wechat_exporter.main:main"


 [tool.uv.sources]
@@ -98,7 +88,7 @@ leann-backend-diskann = { path = "packages/leann-backend-diskann", editable = tr
 leann-backend-hnsw = { path = "packages/leann-backend-hnsw", editable = true }

 [tool.ruff]
-target-version = "py39"
+target-version = "py310"
 line-length = 100
 extend-exclude = [
    "third_party",
@@ -144,14 +134,6 @@ dev = [
    "ruff>=0.12.4",
 ]

-[tool.lychee]
-accept = ["200", "403", "429", "503"]
-timeout = 20
-max_retries = 2
-exclude = ["localhost", "127.0.0.1", "example.com"]
-exclude_path = [".git/", ".venv/", "__pycache__/", "third_party/"]
-scheme = ["https", "http"]
-
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 python_files = ["test_*.py"]
@@ -161,7 +143,7 @@ markers = [
    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
    "openai: marks tests that require OpenAI API key",
 ]
-timeout = 300  # Reduced from 600s (10min) to 300s (5min) for CI safety
+timeout = 600
 addopts = [
    "-v",
    "--tb=short",
--- a/sky/leann-build.yaml
+++ b/sky/leann-build.yaml
@@ -1,76 +0,0 @@
-name: leann-build
-
-resources:
-  # Choose a GPU for fast embeddings (examples: L4, A10G, A100). CPU also works but is slower.
-  accelerators: L4:1
-  # Optionally pin a cloud, otherwise SkyPilot will auto-select
-  # cloud: aws
-  disk_size: 100
-
-envs:
-  # Build parameters (override with: sky launch -c leann-gpu sky/leann-build.yaml -e key=value)
-  index_name: my-index
-  docs: ./data
-  backend: hnsw               # hnsw | diskann
-  complexity: 64
-  graph_degree: 32
-  num_threads: 8
-  # Embedding selection
-  embedding_mode: sentence-transformers   # sentence-transformers | openai | mlx | ollama
-  embedding_model: facebook/contriever
-  # Storage/latency knobs
-  recompute: true             # true => selective recomputation (recommended)
-  compact: true               # for HNSW only
-  # Optional pass-through
-  extra_args: ""
-  # Rebuild control
-  force: true
-
-# Sync local paths to the remote VM. Adjust as needed.
-file_mounts:
-  # Example: mount your local data directory used for building
-  ~/leann-data: ${docs}
-
-setup: |
-  set -e
-  # Install uv (package manager)
-  curl -LsSf https://astral.sh/uv/install.sh | sh
-  export PATH="$HOME/.local/bin:$PATH"
-
-  # Ensure modern libstdc++ for FAISS (GLIBCXX >= 3.4.30)
-  sudo apt-get update -y
-  sudo apt-get install -y libstdc++6 libgomp1
-  # Also upgrade conda's libstdc++ in base env (Skypilot images include conda)
-  if command -v conda >/dev/null 2>&1; then
-    conda install -y -n base -c conda-forge libstdcxx-ng
-  fi
-
-  # Install LEANN CLI and backends into the user environment
-  uv pip install --upgrade pip
-  uv pip install leann-core leann-backend-hnsw leann-backend-diskann
-
-run: |
-  export PATH="$HOME/.local/bin:$PATH"
-  # Derive flags from env
-  recompute_flag=""
-  if [ "${recompute}" = "false" ] || [ "${recompute}" = "0" ]; then
-    recompute_flag="--no-recompute"
-  fi
-  force_flag=""
-  if [ "${force}" = "true" ] || [ "${force}" = "1" ]; then
-    force_flag="--force"
-  fi
-
-  # Build command
-  python -m leann.cli build ${index_name} \
-    --docs ~/leann-data \
-    --backend ${backend} \
-    --complexity ${complexity} \
-    --graph-degree ${graph_degree} \
-    --num-threads ${num_threads} \
-    --embedding-mode ${embedding_mode} \
-    --embedding-model ${embedding_model} \
-    ${recompute_flag} ${force_flag} ${extra_args}
-
-  # Print where the index is stored for downstream rsync
-  echo "INDEX_OUT_DIR=~/.leann/indexes/${index_name}"
--- a/test/build_mlx_index.py
+++ b/test/build_mlx_index.py
--- a/test/mail_reader_llamaindex.py
+++ b/test/mail_reader_llamaindex.py
@@ -0,0 +1,161 @@
+import email
+import os
+from typing import Any
+
+from llama_index.core import Document, VectorStoreIndex
+from llama_index.core.readers.base import BaseReader
+
+
+class EmlxReader(BaseReader):
+    """
+    Apple Mail .emlx file reader.
+
+    Reads individual .emlx files from Apple Mail's storage format.
+    """
+
+    def __init__(self) -> None:
+        """Initialize."""
+        pass
+
+    def load_data(self, input_dir: str, **load_kwargs: Any) -> list[Document]:
+        """
+        Load data from the input directory containing .emlx files.
+
+        Args:
+            input_dir: Directory containing .emlx files
+            **load_kwargs:
+                max_count (int): Maximum amount of messages to read.
+        """
+        docs: list[Document] = []
+        max_count = load_kwargs.get("max_count", 1000)
+        count = 0
+
+        # Walk through the directory recursively
+        for dirpath, dirnames, filenames in os.walk(input_dir):
+            # Skip hidden directories
+            dirnames[:] = [d for d in dirnames if not d.startswith(".")]
+
+            for filename in filenames:
+                if count >= max_count:
+                    break
+
+                if filename.endswith(".emlx"):
+                    filepath = os.path.join(dirpath, filename)
+                    try:
+                        # Read the .emlx file
+                        with open(filepath, encoding="utf-8", errors="ignore") as f:
+                            content = f.read()
+
+                        # .emlx files have a length prefix followed by the email content
+                        # The first line contains the length, followed by the email
+                        lines = content.split("\n", 1)
+                        if len(lines) >= 2:
+                            email_content = lines[1]
+
+                            # Parse the email using Python's email module
+                            try:
+                                msg = email.message_from_string(email_content)
+
+                                # Extract email metadata
+                                subject = msg.get("Subject", "No Subject")
+                                from_addr = msg.get("From", "Unknown")
+                                to_addr = msg.get("To", "Unknown")
+                                date = msg.get("Date", "Unknown")
+
+                                # Extract email body
+                                body = ""
+                                if msg.is_multipart():
+                                    for part in msg.walk():
+                                        if (
+                                            part.get_content_type() == "text/plain"
+                                            or part.get_content_type() == "text/html"
+                                        ):
+                                            body += part.get_payload(decode=True).decode(
+                                                "utf-8", errors="ignore"
+                                            )
+                                            # break
+                                else:
+                                    body = msg.get_payload(decode=True).decode(
+                                        "utf-8", errors="ignore"
+                                    )
+
+                                # Create document content
+                                doc_content = f"""
+From: {from_addr}
+To: {to_addr}
+Subject: {subject}
+Date: {date}
+
+{body}
+"""
+
+                                # Create metadata
+                                metadata = {
+                                    "file_path": filepath,
+                                    "subject": subject,
+                                    "from": from_addr,
+                                    "to": to_addr,
+                                    "date": date,
+                                    "filename": filename,
+                                }
+                                if count == 0:
+                                    print("--------------------------------")
+                                    print("dir path", dirpath)
+                                    print(metadata)
+                                    print(doc_content)
+                                    print("--------------------------------")
+                                    body = []
+                                    if msg.is_multipart():
+                                        for part in msg.walk():
+                                            print(
+                                                "--------------------------------  get content type -------------------------------"
+                                            )
+                                            print(part.get_content_type())
+                                            print(part)
+                                            # body.append(part.get_payload(decode=True).decode('utf-8', errors='ignore'))
+                                            print(
+                                                "--------------------------------  get content type -------------------------------"
+                                            )
+                                    else:
+                                        body = msg.get_payload(decode=True).decode(
+                                            "utf-8", errors="ignore"
+                                        )
+                                        print(body)
+
+                                    print(body)
+                                    print("--------------------------------")
+                                doc = Document(text=doc_content, metadata=metadata)
+                                docs.append(doc)
+                                count += 1
+
+                            except Exception as e:
+                                print(f"!!!!!!! Error parsing email from {filepath}: {e} !!!!!!!!")
+                                continue
+
+                    except Exception as e:
+                        print(f"!!!!!!! Error reading file !!!!!!!! {filepath}: {e}")
+                        continue
+
+        print(f"Loaded {len(docs)} email documents")
+        return docs
+
+
+# Use the custom EmlxReader instead of MboxReader
+documents = EmlxReader().load_data(
+    "/Users/yichuan/Library/Mail/V10/0FCA0879-FD8C-4B7E-83BF-FDDA930791C5/[Gmail].mbox/All Mail.mbox/78BA5BE1-8819-4F9A-9613-EB63772F1DD0/Data/9/Messages",
+    max_count=1000,
+)  # Returns list of documents
+
+# Configure the index with larger chunk size to handle long metadata
+from llama_index.core.node_parser import SentenceSplitter
+
+# Create a custom text splitter with larger chunk size
+text_splitter = SentenceSplitter(chunk_size=2048, chunk_overlap=200)
+
+index = VectorStoreIndex.from_documents(
+    documents, transformations=[text_splitter]
+)  # Initialize index with documents
+
+query_engine = index.as_query_engine()
+res = query_engine.query("Hows Berkeley Graduate Student Instructor")
+print(res)
--- a/test/mail_reader_save_load.py
+++ b/test/mail_reader_save_load.py
@@ -0,0 +1,219 @@
+import email
+import os
+from typing import Any
+
+from llama_index.core import Document, StorageContext, VectorStoreIndex
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core.readers.base import BaseReader
+
+
+class EmlxReader(BaseReader):
+    """
+    Apple Mail .emlx file reader.
+
+    Reads individual .emlx files from Apple Mail's storage format.
+    """
+
+    def __init__(self) -> None:
+        """Initialize."""
+        pass
+
+    def load_data(self, input_dir: str, **load_kwargs: Any) -> list[Document]:
+        """
+        Load data from the input directory containing .emlx files.
+
+        Args:
+            input_dir: Directory containing .emlx files
+            **load_kwargs:
+                max_count (int): Maximum amount of messages to read.
+        """
+        docs: list[Document] = []
+        max_count = load_kwargs.get("max_count", 1000)
+        count = 0
+
+        # Walk through the directory recursively
+        for dirpath, dirnames, filenames in os.walk(input_dir):
+            # Skip hidden directories
+            dirnames[:] = [d for d in dirnames if not d.startswith(".")]
+
+            for filename in filenames:
+                if count >= max_count:
+                    break
+
+                if filename.endswith(".emlx"):
+                    filepath = os.path.join(dirpath, filename)
+                    try:
+                        # Read the .emlx file
+                        with open(filepath, encoding="utf-8", errors="ignore") as f:
+                            content = f.read()
+
+                        # .emlx files have a length prefix followed by the email content
+                        # The first line contains the length, followed by the email
+                        lines = content.split("\n", 1)
+                        if len(lines) >= 2:
+                            email_content = lines[1]
+
+                            # Parse the email using Python's email module
+                            try:
+                                msg = email.message_from_string(email_content)
+
+                                # Extract email metadata
+                                subject = msg.get("Subject", "No Subject")
+                                from_addr = msg.get("From", "Unknown")
+                                to_addr = msg.get("To", "Unknown")
+                                date = msg.get("Date", "Unknown")
+
+                                # Extract email body
+                                body = ""
+                                if msg.is_multipart():
+                                    for part in msg.walk():
+                                        if part.get_content_type() == "text/plain":
+                                            body = part.get_payload(decode=True).decode(
+                                                "utf-8", errors="ignore"
+                                            )
+                                            break
+                                else:
+                                    body = msg.get_payload(decode=True).decode(
+                                        "utf-8", errors="ignore"
+                                    )
+
+                                # Create document content
+                                doc_content = f"""
+From: {from_addr}
+To: {to_addr}
+Subject: {subject}
+Date: {date}
+
+{body}
+"""
+
+                                # Create metadata
+                                metadata = {
+                                    "file_path": filepath,
+                                    "subject": subject,
+                                    "from": from_addr,
+                                    "to": to_addr,
+                                    "date": date,
+                                    "filename": filename,
+                                }
+
+                                doc = Document(text=doc_content, metadata=metadata)
+                                docs.append(doc)
+                                count += 1
+
+                            except Exception as e:
+                                print(f"Error parsing email from {filepath}: {e}")
+                                continue
+
+                    except Exception as e:
+                        print(f"Error reading file {filepath}: {e}")
+                        continue
+
+        print(f"Loaded {len(docs)} email documents")
+        return docs
+
+
+def create_and_save_index(mail_path: str, save_dir: str = "mail_index", max_count: int = 1000):
+    """
+    Create the index from mail data and save it to disk.
+
+    Args:
+        mail_path: Path to the mail directory
+        save_dir: Directory to save the index
+        max_count: Maximum number of emails to process
+    """
+    print("Creating index from mail data...")
+
+    # Load documents
+    documents = EmlxReader().load_data(mail_path, max_count=max_count)
+
+    if not documents:
+        print("No documents loaded. Exiting.")
+        return None
+
+    # Create text splitter
+    text_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=0)
+
+    # Create index
+    index = VectorStoreIndex.from_documents(documents, transformations=[text_splitter])
+
+    # Save the index
+    os.makedirs(save_dir, exist_ok=True)
+    index.storage_context.persist(persist_dir=save_dir)
+    print(f"Index saved to {save_dir}")
+
+    return index
+
+
+def load_index(save_dir: str = "mail_index"):
+    """
+    Load the saved index from disk.
+
+    Args:
+        save_dir: Directory where the index is saved
+
+    Returns:
+        Loaded index or None if loading fails
+    """
+    try:
+        # Load storage context
+        storage_context = StorageContext.from_defaults(persist_dir=save_dir)
+
+        # Load index
+        index = VectorStoreIndex.from_vector_store(
+            storage_context.vector_store, storage_context=storage_context
+        )
+
+        print(f"Index loaded from {save_dir}")
+        return index
+
+    except Exception as e:
+        print(f"Error loading index: {e}")
+        return None
+
+
+def query_index(index, query: str):
+    """
+    Query the loaded index.
+
+    Args:
+        index: The loaded index
+        query: The query string
+    """
+    if index is None:
+        print("No index available for querying.")
+        return
+
+    query_engine = index.as_query_engine()
+    response = query_engine.query(query)
+    print(f"Query: {query}")
+    print(f"Response: {response}")
+
+
+def main():
+    mail_path = "/Users/yichuan/Library/Mail/V10/0FCA0879-FD8C-4B7E-83BF-FDDA930791C5/[Gmail].mbox/All Mail.mbox/78BA5BE1-8819-4F9A-9613-EB63772F1DD0/Data/9/Messages"
+    save_dir = "mail_index"
+
+    # Check if index already exists
+    if os.path.exists(save_dir) and os.path.exists(os.path.join(save_dir, "vector_store.json")):
+        print("Loading existing index...")
+        index = load_index(save_dir)
+    else:
+        print("Creating new index...")
+        index = create_and_save_index(mail_path, save_dir, max_count=1000)
+
+    if index:
+        # Example queries
+        queries = [
+            "Hows Berkeley Graduate Student Instructor",
+            "What emails mention GSR appointments?",
+            "Find emails about deadlines",
+        ]
+
+        for query in queries:
+            print("\n" + "=" * 50)
+            query_index(index, query)
+
+
+if __name__ == "__main__":
+    main()
--- a/test/mail_reader_small_chunks.py
+++ b/test/mail_reader_small_chunks.py
@@ -0,0 +1,219 @@
+import email
+import os
+from typing import Any
+
+from llama_index.core import Document, StorageContext, VectorStoreIndex
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core.readers.base import BaseReader
+
+
+class EmlxReader(BaseReader):
+    """
+    Apple Mail .emlx file reader with reduced metadata.
+
+    Reads individual .emlx files from Apple Mail's storage format.
+    """
+
+    def __init__(self) -> None:
+        """Initialize."""
+        pass
+
+    def load_data(self, input_dir: str, **load_kwargs: Any) -> list[Document]:
+        """
+        Load data from the input directory containing .emlx files.
+
+        Args:
+            input_dir: Directory containing .emlx files
+            **load_kwargs:
+                max_count (int): Maximum amount of messages to read.
+        """
+        docs: list[Document] = []
+        max_count = load_kwargs.get("max_count", 1000)
+        count = 0
+
+        # Walk through the directory recursively
+        for dirpath, dirnames, filenames in os.walk(input_dir):
+            # Skip hidden directories
+            dirnames[:] = [d for d in dirnames if not d.startswith(".")]
+
+            for filename in filenames:
+                if count >= max_count:
+                    break
+
+                if filename.endswith(".emlx"):
+                    filepath = os.path.join(dirpath, filename)
+                    try:
+                        # Read the .emlx file
+                        with open(filepath, encoding="utf-8", errors="ignore") as f:
+                            content = f.read()
+
+                        # .emlx files have a length prefix followed by the email content
+                        # The first line contains the length, followed by the email
+                        lines = content.split("\n", 1)
+                        if len(lines) >= 2:
+                            email_content = lines[1]
+
+                            # Parse the email using Python's email module
+                            try:
+                                msg = email.message_from_string(email_content)
+
+                                # Extract email metadata
+                                subject = msg.get("Subject", "No Subject")
+                                from_addr = msg.get("From", "Unknown")
+                                to_addr = msg.get("To", "Unknown")
+                                date = msg.get("Date", "Unknown")
+
+                                # Extract email body
+                                body = ""
+                                if msg.is_multipart():
+                                    for part in msg.walk():
+                                        if part.get_content_type() == "text/plain":
+                                            body = part.get_payload(decode=True).decode(
+                                                "utf-8", errors="ignore"
+                                            )
+                                            break
+                                else:
+                                    body = msg.get_payload(decode=True).decode(
+                                        "utf-8", errors="ignore"
+                                    )
+
+                                # Create document content with metadata embedded in text
+                                doc_content = f"""
+From: {from_addr}
+To: {to_addr}
+Subject: {subject}
+Date: {date}
+
+{body}
+"""
+
+                                # Create minimal metadata (only essential info)
+                                metadata = {
+                                    "subject": subject[:50],  # Truncate subject
+                                    "from": from_addr[:30],  # Truncate from
+                                    "date": date[:20],  # Truncate date
+                                    "filename": filename,  # Keep filename
+                                }
+
+                                doc = Document(text=doc_content, metadata=metadata)
+                                docs.append(doc)
+                                count += 1
+
+                            except Exception as e:
+                                print(f"Error parsing email from {filepath}: {e}")
+                                continue
+
+                    except Exception as e:
+                        print(f"Error reading file {filepath}: {e}")
+                        continue
+
+        print(f"Loaded {len(docs)} email documents")
+        return docs
+
+
+def create_and_save_index(
+    mail_path: str, save_dir: str = "mail_index_small", max_count: int = 1000
+):
+    """
+    Create the index from mail data and save it to disk.
+
+    Args:
+        mail_path: Path to the mail directory
+        save_dir: Directory to save the index
+        max_count: Maximum number of emails to process
+    """
+    print("Creating index from mail data with small chunks...")
+
+    # Load documents
+    documents = EmlxReader().load_data(mail_path, max_count=max_count)
+
+    if not documents:
+        print("No documents loaded. Exiting.")
+        return None
+
+    # Create text splitter with small chunk size
+    text_splitter = SentenceSplitter(chunk_size=512, chunk_overlap=50)
+
+    # Create index
+    index = VectorStoreIndex.from_documents(documents, transformations=[text_splitter])
+
+    # Save the index
+    os.makedirs(save_dir, exist_ok=True)
+    index.storage_context.persist(persist_dir=save_dir)
+    print(f"Index saved to {save_dir}")
+
+    return index
+
+
+def load_index(save_dir: str = "mail_index_small"):
+    """
+    Load the saved index from disk.
+
+    Args:
+        save_dir: Directory where the index is saved
+
+    Returns:
+        Loaded index or None if loading fails
+    """
+    try:
+        # Load storage context
+        storage_context = StorageContext.from_defaults(persist_dir=save_dir)
+
+        # Load index
+        index = VectorStoreIndex.from_vector_store(
+            storage_context.vector_store, storage_context=storage_context
+        )
+
+        print(f"Index loaded from {save_dir}")
+        return index
+
+    except Exception as e:
+        print(f"Error loading index: {e}")
+        return None
+
+
+def query_index(index, query: str):
+    """
+    Query the loaded index.
+
+    Args:
+        index: The loaded index
+        query: The query string
+    """
+    if index is None:
+        print("No index available for querying.")
+        return
+
+    query_engine = index.as_query_engine()
+    response = query_engine.query(query)
+    print(f"Query: {query}")
+    print(f"Response: {response}")
+
+
+def main():
+    mail_path = "/Users/yichuan/Library/Mail/V10/0FCA0879-FD8C-4B7E-83BF-FDDA930791C5/[Gmail].mbox/All Mail.mbox/78BA5BE1-8819-4F9A-9613-EB63772F1DD0/Data/9/Messages"
+    save_dir = "mail_index_small"
+
+    # Check if index already exists
+    if os.path.exists(save_dir) and os.path.exists(os.path.join(save_dir, "vector_store.json")):
+        print("Loading existing index...")
+        index = load_index(save_dir)
+    else:
+        print("Creating new index...")
+        index = create_and_save_index(mail_path, save_dir, max_count=1000)
+
+    if index:
+        # Example queries
+        queries = [
+            "Hows Berkeley Graduate Student Instructor",
+            "What emails mention GSR appointments?",
+            "Find emails about deadlines",
+        ]
+
+        for query in queries:
+            print("\n" + "=" * 50)
+            query_index(index, query)
+
+
+if __name__ == "__main__":
+    main()
--- a/test/mail_reader_test.py
+++ b/test/mail_reader_test.py
@@ -0,0 +1,154 @@
+import email
+import os
+from typing import Any
+
+from llama_index.core import Document, VectorStoreIndex
+from llama_index.core.readers.base import BaseReader
+
+
+class EmlxReader(BaseReader):
+    """
+    Apple Mail .emlx file reader.
+
+    Reads individual .emlx files from Apple Mail's storage format.
+    """
+
+    def __init__(self) -> None:
+        """Initialize."""
+        pass
+
+    def load_data(self, input_dir: str, **load_kwargs: Any) -> list[Document]:
+        """
+        Load data from the input directory containing .emlx files.
+
+        Args:
+            input_dir: Directory containing .emlx files
+            **load_kwargs:
+                max_count (int): Maximum amount of messages to read.
+        """
+        docs: list[Document] = []
+        max_count = load_kwargs.get("max_count", 1000)
+        count = 0
+
+        # Check if directory exists and is accessible
+        if not os.path.exists(input_dir):
+            print(f"Error: Directory '{input_dir}' does not exist")
+            return docs
+
+        if not os.access(input_dir, os.R_OK):
+            print(f"Error: Directory '{input_dir}' is not accessible (permission denied)")
+            print("This is likely due to macOS security restrictions on Mail app data")
+            return docs
+
+        print(f"Scanning directory: {input_dir}")
+
+        # Walk through the directory recursively
+        for dirpath, dirnames, filenames in os.walk(input_dir):
+            # Skip hidden directories
+            dirnames[:] = [d for d in dirnames if not d.startswith(".")]
+
+            for filename in filenames:
+                if count >= max_count:
+                    break
+
+                if filename.endswith(".emlx"):
+                    filepath = os.path.join(dirpath, filename)
+                    print(f"Found .emlx file: {filepath}")
+                    try:
+                        # Read the .emlx file
+                        with open(filepath, encoding="utf-8", errors="ignore") as f:
+                            content = f.read()
+
+                        # .emlx files have a length prefix followed by the email content
+                        # The first line contains the length, followed by the email
+                        lines = content.split("\n", 1)
+                        if len(lines) >= 2:
+                            email_content = lines[1]
+
+                            # Parse the email using Python's email module
+                            try:
+                                msg = email.message_from_string(email_content)
+
+                                # Extract email metadata
+                                subject = msg.get("Subject", "No Subject")
+                                from_addr = msg.get("From", "Unknown")
+                                to_addr = msg.get("To", "Unknown")
+                                date = msg.get("Date", "Unknown")
+
+                                # Extract email body
+                                body = ""
+                                if msg.is_multipart():
+                                    for part in msg.walk():
+                                        if part.get_content_type() == "text/plain":
+                                            body = part.get_payload(decode=True).decode(
+                                                "utf-8", errors="ignore"
+                                            )
+                                            break
+                                else:
+                                    body = msg.get_payload(decode=True).decode(
+                                        "utf-8", errors="ignore"
+                                    )
+
+                                # Create document content
+                                doc_content = f"""
+From: {from_addr}
+To: {to_addr}
+Subject: {subject}
+Date: {date}
+
+{body}
+"""
+
+                                # Create metadata
+                                metadata = {
+                                    "file_path": filepath,
+                                    "subject": subject,
+                                    "from": from_addr,
+                                    "to": to_addr,
+                                    "date": date,
+                                    "filename": filename,
+                                }
+
+                                doc = Document(text=doc_content, metadata=metadata)
+                                docs.append(doc)
+                                count += 1
+
+                            except Exception as e:
+                                print(f"Error parsing email from {filepath}: {e}")
+                                continue
+
+                    except Exception as e:
+                        print(f"Error reading file {filepath}: {e}")
+                        continue
+
+        print(f"Loaded {len(docs)} email documents")
+        return docs
+
+
+def main():
+    # Use the current directory where the sample.emlx file is located
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+
+    print("Testing EmlxReader with sample .emlx file...")
+    print(f"Scanning directory: {current_dir}")
+
+    # Use the custom EmlxReader
+    documents = EmlxReader().load_data(current_dir, max_count=1000)
+
+    if not documents:
+        print("No documents loaded. Make sure sample.emlx exists in the examples directory.")
+        return
+
+    print(f"\nSuccessfully loaded {len(documents)} document(s)")
+
+    # Initialize index with documents
+    index = VectorStoreIndex.from_documents(documents)
+    query_engine = index.as_query_engine()
+
+    print("\nTesting query: 'Hows Berkeley Graduate Student Instructor'")
+    res = query_engine.query("Hows Berkeley Graduate Student Instructor")
+    print(f"Response: {res}")
+
+
+if __name__ == "__main__":
+    main()
--- a/benchmarks/micro_tpt.py
+++ b/benchmarks/micro_tpt.py
--- a/test/query_saved_index.py
+++ b/test/query_saved_index.py
@@ -0,0 +1,105 @@
+import os
+
+from llama_index.core import StorageContext, VectorStoreIndex
+
+
+def load_index(save_dir: str = "mail_index"):
+    """
+    Load the saved index from disk.
+
+    Args:
+        save_dir: Directory where the index is saved
+
+    Returns:
+        Loaded index or None if loading fails
+    """
+    try:
+        # Load storage context
+        storage_context = StorageContext.from_defaults(persist_dir=save_dir)
+
+        # Load index
+        index = VectorStoreIndex.from_vector_store(
+            storage_context.vector_store, storage_context=storage_context
+        )
+
+        print(f"Index loaded from {save_dir}")
+        return index
+
+    except Exception as e:
+        print(f"Error loading index: {e}")
+        return None
+
+
+def query_index(index, query: str):
+    """
+    Query the loaded index.
+
+    Args:
+        index: The loaded index
+        query: The query string
+    """
+    if index is None:
+        print("No index available for querying.")
+        return
+
+    query_engine = index.as_query_engine()
+    response = query_engine.query(query)
+    print(f"\nQuery: {query}")
+    print(f"Response: {response}")
+
+
+def main():
+    save_dir = "mail_index"
+
+    # Check if index exists
+    if not os.path.exists(save_dir) or not os.path.exists(
+        os.path.join(save_dir, "vector_store.json")
+    ):
+        print(f"Index not found in {save_dir}")
+        print("Please run mail_reader_save_load.py first to create the index.")
+        return
+
+    # Load the index
+    index = load_index(save_dir)
+
+    if not index:
+        print("Failed to load index.")
+        return
+
+    print("\n" + "=" * 60)
+    print("Email Query Interface")
+    print("=" * 60)
+    print("Type 'quit' to exit")
+    print("Type 'help' for example queries")
+    print("=" * 60)
+
+    # Interactive query loop
+    while True:
+        try:
+            query = input("\nEnter your query: ").strip()
+
+            if query.lower() == "quit":
+                print("Goodbye!")
+                break
+            elif query.lower() == "help":
+                print("\nExample queries:")
+                print("- Hows Berkeley Graduate Student Instructor")
+                print("- What emails mention GSR appointments?")
+                print("- Find emails about deadlines")
+                print("- Search for emails from specific sender")
+                print("- Find emails about meetings")
+                continue
+            elif not query:
+                continue
+
+            query_index(index, query)
+
+        except KeyboardInterrupt:
+            print("\nGoodbye!")
+            break
+        except Exception as e:
+            print(f"Error processing query: {e}")
+
+
+if __name__ == "__main__":
+    main()
--- a/test/sanity_checks/README.md
+++ b/test/sanity_checks/README.md
@@ -1,24 +1,9 @@
-# 🧪 LEANN Benchmarks & Testing
+# 🧪 Leann Sanity Checks

-This directory contains performance benchmarks and comprehensive tests for the LEANN system, including backend comparisons and sanity checks across different configurations.
+This directory contains comprehensive sanity checks for the Leann system, ensuring all components work correctly across different configurations.

 ## 📁 Test Files

-### `diskann_vs_hnsw_speed_comparison.py`
-Performance comparison between DiskANN and HNSW backends:
- ✅ **Search latency** comparison with both backends using recompute
- ✅ **Index size** and **build time** measurements
- ✅ **Score validity** testing (ensures no -inf scores)
- ✅ **Configurable dataset sizes** for different scales
-
-```bash
-# Quick comparison with 500 docs, 10 queries
-python benchmarks/diskann_vs_hnsw_speed_comparison.py
-
-# Large-scale comparison with 2000 docs, 20 queries
-python benchmarks/diskann_vs_hnsw_speed_comparison.py 2000 20
-```
-
 ### `test_distance_functions.py`
 Tests all supported distance functions across DiskANN backend:
 - ✅ **MIPS** (Maximum Inner Product Search)
--- a/test/sanity_checks/benchmark_embeddings.py
+++ b/test/sanity_checks/benchmark_embeddings.py
--- a/test/sanity_checks/debug_zmq_issue.py
+++ b/test/sanity_checks/debug_zmq_issue.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+"""
+Debug script to test ZMQ communication with the exact same setup as main_cli_example.py
+"""
+
+import sys
+import time
+
+import zmq
+
+sys.path.append("packages/leann-backend-diskann")
+from leann_backend_diskann import embedding_pb2
+
+
+def test_zmq_with_same_model():
+    print("=== Testing ZMQ with same model as main_cli_example.py ===")
+
+    # Test the exact same model that main_cli_example.py uses
+    model_name = "sentence-transformers/all-mpnet-base-v2"
+
+    # Start server with the same model
+    import subprocess
+
+    server_cmd = [
+        sys.executable,
+        "-m",
+        "packages.leann-backend-diskann.leann_backend_diskann.embedding_server",
+        "--zmq-port",
+        "5556",  # Use different port to avoid conflicts
+        "--model-name",
+        model_name,
+    ]
+
+    print(f"Starting server with command: {' '.join(server_cmd)}")
+    server_process = subprocess.Popen(
+        server_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+    )
+
+    # Wait for server to start
+    print("Waiting for server to start...")
+    time.sleep(10)
+
+    # Check if server is running
+    if server_process.poll() is not None:
+        stdout, stderr = server_process.communicate()
+        print(f"Server failed to start. stdout: {stdout}")
+        print(f"Server failed to start. stderr: {stderr}")
+        return False
+
+    print(f"Server started with PID: {server_process.pid}")
+
+    try:
+        # Test client
+        context = zmq.Context()
+        socket = context.socket(zmq.REQ)
+        socket.connect("tcp://127.0.0.1:5556")
+        socket.setsockopt(zmq.RCVTIMEO, 30000)  # 30 second timeout like C++
+        socket.setsockopt(zmq.SNDTIMEO, 30000)
+
+        # Create request with same format as C++
+        request = embedding_pb2.NodeEmbeddingRequest()
+        request.node_ids.extend([0, 1, 2, 3, 4])  # Test with some node IDs
+
+        print(f"Sending request with {len(request.node_ids)} node IDs...")
+        start_time = time.time()
+
+        # Send request
+        socket.send(request.SerializeToString())
+
+        # Receive response
+        response_data = socket.recv()
+        end_time = time.time()
+
+        print(f"Received response in {end_time - start_time:.3f} seconds")
+        print(f"Response size: {len(response_data)} bytes")
+
+        # Parse response
+        response = embedding_pb2.NodeEmbeddingResponse()
+        response.ParseFromString(response_data)
+
+        print(f"Response dimensions: {list(response.dimensions)}")
+        print(f"Embeddings data size: {len(response.embeddings_data)} bytes")
+        print(f"Missing IDs: {list(response.missing_ids)}")
+
+        # Calculate expected size
+        if len(response.dimensions) == 2:
+            batch_size = response.dimensions[0]
+            embedding_dim = response.dimensions[1]
+            expected_bytes = batch_size * embedding_dim * 4  # 4 bytes per float
+            print(f"Expected bytes: {expected_bytes}, Actual: {len(response.embeddings_data)}")
+
+            if len(response.embeddings_data) == expected_bytes:
+                print("✅ Response format is correct!")
+                return True
+            else:
+                print("❌ Response format mismatch!")
+                return False
+        else:
+            print("❌ Invalid response dimensions!")
+            return False
+
+    except Exception as e:
+        print(f"❌ Error during ZMQ test: {e}")
+        return False
+    finally:
+        # Clean up
+        server_process.terminate()
+        server_process.wait()
+        print("Server terminated")
+
+
+if __name__ == "__main__":
+    success = test_zmq_with_same_model()
+    if success:
+        print("\n✅ ZMQ communication test passed!")
+    else:
+        print("\n❌ ZMQ communication test failed!")
--- a/benchmarks/simple_mac_tpt_test.py
+++ b/benchmarks/simple_mac_tpt_test.py
--- a/tests/README.md
+++ b/tests/README.md
@@ -6,11 +6,10 @@ This directory contains automated tests for the LEANN project using pytest.

 ### `test_readme_examples.py`
 Tests the examples shown in README.md:
- The basic example code that users see first (parametrized for both HNSW and DiskANN backends)
+- The basic example code that users see first
 - Import statements work correctly
 - Different backend options (HNSW, DiskANN)
- Different LLM configuration options (parametrized for both backends)
- **All main README examples are tested with both HNSW and DiskANN backends using pytest parametrization**
+- Different LLM configuration options

 ### `test_basic.py`
 Basic functionality tests that verify:
@@ -19,23 +18,13 @@ Basic functionality tests that verify:
 - Basic index building and searching works for both HNSW and DiskANN backends
 - Uses parametrized tests to test both backends

-### `test_document_rag.py`
-Tests the document RAG example functionality:
+### `test_main_cli.py`
+Tests the main CLI example functionality:
 - Tests with facebook/contriever embeddings
 - Tests with OpenAI embeddings (if API key is available)
 - Tests error handling with invalid parameters
 - Verifies that normalized embeddings are detected and cosine distance is used

-### `test_diskann_partition.py`
-Tests DiskANN graph partitioning functionality:
- Tests DiskANN index building without partitioning (baseline)
- Tests automatic graph partitioning with `is_recompute=True`
- Verifies that partition files are created and large files are cleaned up for storage saving
- Tests search functionality with partitioned indices
- Validates medoid and max_base_norm file generation and usage
- Includes performance comparison between DiskANN (with partition) and HNSW
- **Note**: These tests are skipped in CI due to hardware requirements and computation time
-
 ## Running Tests

 ### Install test dependencies:
@@ -65,23 +54,15 @@ pytest tests/ -m "not openai"

 # Skip slow tests
 pytest tests/ -m "not slow"
-
-# Run DiskANN partition tests (requires local machine, not CI)
-pytest tests/test_diskann_partition.py
 ```

 ### Run with specific backend:
 ```bash
 # Test only HNSW backend
 pytest tests/test_basic.py::test_backend_basic[hnsw]
-pytest tests/test_readme_examples.py::test_readme_basic_example[hnsw]

 # Test only DiskANN backend
 pytest tests/test_basic.py::test_backend_basic[diskann]
-pytest tests/test_readme_examples.py::test_readme_basic_example[diskann]
-
-# All DiskANN tests (parametrized + specialized partition tests)
-pytest tests/ -k diskann
 ```

 ## CI/CD Integration
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -64,9 +64,6 @@ def test_backend_basic(backend_name):
        assert isinstance(results[0], SearchResult)
        assert "topic 2" in results[0].text or "document" in results[0].text

-        # Ensure cleanup to avoid hanging background servers
-        searcher.cleanup()
-

@pytest.mark.skipif(
    os.environ.get("CI") == "true", reason="Skip model tests in CI to avoid MPS memory issues"
@@ -93,5 +90,3 @@ def test_large_index():
        searcher = LeannSearcher(index_path)
        results = searcher.search(["word10 word20"], top_k=10)
        assert len(results[0]) == 10
-        # Cleanup
-        searcher.cleanup()
--- a/tests/test_ci_minimal.py
+++ b/tests/test_ci_minimal.py
@@ -20,7 +20,7 @@ def test_package_imports():
 def test_cli_help():
    """Test that CLI example shows help."""
    result = subprocess.run(
-        [sys.executable, "apps/document_rag.py", "--help"], capture_output=True, text=True
+        [sys.executable, "examples/main_cli_example.py", "--help"], capture_output=True, text=True
    )

    assert result.returncode == 0
--- a/tests/test_diskann_partition.py
+++ b/tests/test_diskann_partition.py
@@ -1,369 +0,0 @@
-"""
-Test DiskANN graph partitioning functionality.
-
-Tests the automatic graph partitioning feature that was implemented to save
-storage space by partitioning large DiskANN indices and safely deleting
-redundant files while maintaining search functionality.
-"""
-
-import os
-import tempfile
-from pathlib import Path
-
-import pytest
-
-
-@pytest.mark.skipif(
-    os.environ.get("CI") == "true",
-    reason="Skip DiskANN partition tests in CI - requires specific hardware and large memory",
-)
-def test_diskann_without_partition():
-    """Test DiskANN index building without partition (baseline)."""
-    from leann.api import LeannBuilder, LeannSearcher
-
-    with tempfile.TemporaryDirectory() as temp_dir:
-        index_path = str(Path(temp_dir) / "test_no_partition.leann")
-
-        # Test data - enough to trigger index building
-        texts = [
-            f"Document {i} discusses topic {i % 10} with detailed analysis of subject {i // 10}."
-            for i in range(500)
-        ]
-
-        # Build without partition (is_recompute=False)
-        builder = LeannBuilder(
-            backend_name="diskann",
-            embedding_model="facebook/contriever",
-            embedding_mode="sentence-transformers",
-            num_neighbors=32,
-            search_list_size=50,
-            is_recompute=False,  # No partition
-        )
-
-        for text in texts:
-            builder.add_text(text)
-
-        builder.build_index(index_path)
-
-        # Verify index was created
-        index_dir = Path(index_path).parent
-        assert index_dir.exists()
-
-        # Check that traditional DiskANN files exist
-        index_prefix = Path(index_path).stem
-        # Core DiskANN files (beam search index may not be created for small datasets)
-        required_files = [
-            f"{index_prefix}_disk.index",
-            f"{index_prefix}_pq_compressed.bin",
-            f"{index_prefix}_pq_pivots.bin",
-        ]
-
-        # Check all generated files first for debugging
-        generated_files = [f.name for f in index_dir.glob(f"{index_prefix}*")]
-        print(f"Generated files: {generated_files}")
-
-        for required_file in required_files:
-            file_path = index_dir / required_file
-            assert file_path.exists(), f"Required file {required_file} not found"
-
-        # Ensure no partition files exist in non-partition mode
-        partition_files = [f"{index_prefix}_disk_graph.index", f"{index_prefix}_partition.bin"]
-
-        for partition_file in partition_files:
-            file_path = index_dir / partition_file
-            assert not file_path.exists(), (
-                f"Partition file {partition_file} should not exist in non-partition mode"
-            )
-
-        # Test search functionality
-        searcher = LeannSearcher(index_path)
-        results = searcher.search("topic 3 analysis", top_k=3)
-
-        assert len(results) > 0
-        assert all(result.score is not None and result.score != float("-inf") for result in results)
-
-
-@pytest.mark.skipif(
-    os.environ.get("CI") == "true",
-    reason="Skip DiskANN partition tests in CI - requires specific hardware and large memory",
-)
-def test_diskann_with_partition():
-    """Test DiskANN index building with automatic graph partitioning."""
-    from leann.api import LeannBuilder
-
-    with tempfile.TemporaryDirectory() as temp_dir:
-        index_path = str(Path(temp_dir) / "test_with_partition.leann")
-
-        # Test data - enough to trigger partitioning
-        texts = [
-            f"Document {i} explores subject {i % 15} with comprehensive coverage of area {i // 15}."
-            for i in range(500)
-        ]
-
-        # Build with partition (is_recompute=True)
-        builder = LeannBuilder(
-            backend_name="diskann",
-            embedding_model="facebook/contriever",
-            embedding_mode="sentence-transformers",
-            num_neighbors=32,
-            search_list_size=50,
-            is_recompute=True,  # Enable automatic partitioning
-        )
-
-        for text in texts:
-            builder.add_text(text)
-
-        builder.build_index(index_path)
-
-        # Verify index was created
-        index_dir = Path(index_path).parent
-        assert index_dir.exists()
-
-        # Check that partition files exist
-        index_prefix = Path(index_path).stem
-        partition_files = [
-            f"{index_prefix}_disk_graph.index",  # Partitioned graph
-            f"{index_prefix}_partition.bin",  # Partition metadata
-            f"{index_prefix}_pq_compressed.bin",
-            f"{index_prefix}_pq_pivots.bin",
-        ]
-
-        for partition_file in partition_files:
-            file_path = index_dir / partition_file
-            assert file_path.exists(), f"Expected partition file {partition_file} not found"
-
-        # Check that large files were cleaned up (storage saving goal)
-        large_files = [f"{index_prefix}_disk.index", f"{index_prefix}_disk_beam_search.index"]
-
-        for large_file in large_files:
-            file_path = index_dir / large_file
-            assert not file_path.exists(), (
-                f"Large file {large_file} should have been deleted for storage saving"
-            )
-
-        # Verify required auxiliary files for partition mode exist
-        required_files = [
-            f"{index_prefix}_disk.index_medoids.bin",
-            f"{index_prefix}_disk.index_max_base_norm.bin",
-        ]
-
-        for req_file in required_files:
-            file_path = index_dir / req_file
-            assert file_path.exists(), (
-                f"Required auxiliary file {req_file} missing for partition mode"
-            )
-
-
-@pytest.mark.skipif(
-    os.environ.get("CI") == "true",
-    reason="Skip DiskANN partition tests in CI - requires specific hardware and large memory",
-)
-def test_diskann_partition_search_functionality():
-    """Test that search works correctly with partitioned indices."""
-    from leann.api import LeannBuilder, LeannSearcher
-
-    with tempfile.TemporaryDirectory() as temp_dir:
-        index_path = str(Path(temp_dir) / "test_partition_search.leann")
-
-        # Create diverse test data
-        texts = [
-            "LEANN is a storage-efficient approximate nearest neighbor search system.",
-            "Graph partitioning helps reduce memory usage in large scale vector search.",
-            "DiskANN provides high-performance disk-based approximate nearest neighbor search.",
-            "Vector embeddings enable semantic search over unstructured text data.",
-            "Approximate nearest neighbor algorithms trade accuracy for speed and storage.",
-        ] * 100  # Repeat to get enough data
-
-        # Build with partitioning
-        builder = LeannBuilder(
-            backend_name="diskann",
-            embedding_model="facebook/contriever",
-            embedding_mode="sentence-transformers",
-            is_recompute=True,  # Enable partitioning
-        )
-
-        for text in texts:
-            builder.add_text(text)
-
-        builder.build_index(index_path)
-
-        # Test search with partitioned index
-        searcher = LeannSearcher(index_path)
-
-        # Test various queries
-        test_queries = [
-            ("vector search algorithms", 5),
-            ("LEANN storage efficiency", 3),
-            ("graph partitioning memory", 4),
-            ("approximate nearest neighbor", 7),
-        ]
-
-        for query, top_k in test_queries:
-            results = searcher.search(query, top_k=top_k)
-
-            # Verify search results
-            assert len(results) == top_k, f"Expected {top_k} results for query '{query}'"
-            assert all(result.score is not None for result in results), (
-                "All results should have scores"
-            )
-            assert all(result.score != float("-inf") for result in results), (
-                "No result should have -inf score"
-            )
-            assert all(result.text is not None for result in results), (
-                "All results should have text"
-            )
-
-            # Scores should be in descending order (higher similarity first)
-            scores = [result.score for result in results]
-            assert scores == sorted(scores, reverse=True), (
-                "Results should be sorted by score descending"
-            )
-
-
-@pytest.mark.skipif(
-    os.environ.get("CI") == "true",
-    reason="Skip DiskANN partition tests in CI - requires specific hardware and large memory",
-)
-def test_diskann_medoid_and_norm_files():
-    """Test that medoid and max_base_norm files are correctly generated and used."""
-    import struct
-
-    from leann.api import LeannBuilder, LeannSearcher
-
-    with tempfile.TemporaryDirectory() as temp_dir:
-        index_path = str(Path(temp_dir) / "test_medoid_norm.leann")
-
-        # Small but sufficient dataset
-        texts = [f"Test document {i} with content about subject {i % 10}." for i in range(200)]
-
-        builder = LeannBuilder(
-            backend_name="diskann",
-            embedding_model="facebook/contriever",
-            embedding_mode="sentence-transformers",
-            is_recompute=True,
-        )
-
-        for text in texts:
-            builder.add_text(text)
-
-        builder.build_index(index_path)
-
-        index_dir = Path(index_path).parent
-        index_prefix = Path(index_path).stem
-
-        # Test medoids file
-        medoids_file = index_dir / f"{index_prefix}_disk.index_medoids.bin"
-        assert medoids_file.exists(), "Medoids file should be generated"
-
-        # Read and validate medoids file format
-        with open(medoids_file, "rb") as f:
-            nshards = struct.unpack("<I", f.read(4))[0]
-            one_val = struct.unpack("<I", f.read(4))[0]
-            medoid_id = struct.unpack("<I", f.read(4))[0]
-
-            assert nshards == 1, "Single-shot build should have 1 shard"
-            assert one_val == 1, "Expected value should be 1"
-            assert medoid_id >= 0, "Medoid ID should be valid (not hardcoded 0)"
-
-        # Test max_base_norm file
-        norm_file = index_dir / f"{index_prefix}_disk.index_max_base_norm.bin"
-        assert norm_file.exists(), "Max base norm file should be generated"
-
-        # Read and validate norm file
-        with open(norm_file, "rb") as f:
-            npts = struct.unpack("<I", f.read(4))[0]
-            ndims = struct.unpack("<I", f.read(4))[0]
-            norm_val = struct.unpack("<f", f.read(4))[0]
-
-            assert npts == 1, "Should have 1 norm point"
-            assert ndims == 1, "Should have 1 dimension"
-            assert norm_val > 0, "Norm value should be positive"
-            assert norm_val != float("inf"), "Norm value should be finite"
-
-        # Test that search works with these files
-        searcher = LeannSearcher(index_path)
-        results = searcher.search("test subject", top_k=3)
-
-        # Verify that scores are not -inf (which indicates norm file was loaded correctly)
-        assert len(results) > 0
-        assert all(result.score != float("-inf") for result in results), (
-            "Scores should not be -inf when norm file is correct"
-        )
-
-
-@pytest.mark.skipif(
-    os.environ.get("CI") == "true",
-    reason="Skip performance comparison in CI - requires significant compute time",
-)
-def test_diskann_vs_hnsw_performance():
-    """Compare DiskANN (with partition) vs HNSW performance."""
-    import time
-
-    from leann.api import LeannBuilder, LeannSearcher
-
-    with tempfile.TemporaryDirectory() as temp_dir:
-        # Test data
-        texts = [
-            f"Performance test document {i} covering topic {i % 20} in detail." for i in range(1000)
-        ]
-        query = "performance topic test"
-
-        # Test DiskANN with partitioning
-        diskann_path = str(Path(temp_dir) / "perf_diskann.leann")
-        diskann_builder = LeannBuilder(
-            backend_name="diskann",
-            embedding_model="facebook/contriever",
-            embedding_mode="sentence-transformers",
-            is_recompute=True,
-        )
-
-        for text in texts:
-            diskann_builder.add_text(text)
-
-        start_time = time.time()
-        diskann_builder.build_index(diskann_path)
-
-        # Test HNSW
-        hnsw_path = str(Path(temp_dir) / "perf_hnsw.leann")
-        hnsw_builder = LeannBuilder(
-            backend_name="hnsw",
-            embedding_model="facebook/contriever",
-            embedding_mode="sentence-transformers",
-            is_recompute=True,
-        )
-
-        for text in texts:
-            hnsw_builder.add_text(text)
-
-        start_time = time.time()
-        hnsw_builder.build_index(hnsw_path)
-
-        # Compare search performance
-        diskann_searcher = LeannSearcher(diskann_path)
-        hnsw_searcher = LeannSearcher(hnsw_path)
-
-        # Warm up searches
-        diskann_searcher.search(query, top_k=5)
-        hnsw_searcher.search(query, top_k=5)
-
-        # Timed searches
-        start_time = time.time()
-        diskann_results = diskann_searcher.search(query, top_k=10)
-        diskann_search_time = time.time() - start_time
-
-        start_time = time.time()
-        hnsw_results = hnsw_searcher.search(query, top_k=10)
-        hnsw_search_time = time.time() - start_time
-
-        # Basic assertions
-        assert len(diskann_results) == 10
-        assert len(hnsw_results) == 10
-        assert all(r.score != float("-inf") for r in diskann_results)
-        assert all(r.score != float("-inf") for r in hnsw_results)
-
-        # Performance ratio (informational)
-        if hnsw_search_time > 0:
-            speed_ratio = hnsw_search_time / diskann_search_time
-            print(f"DiskANN search time: {diskann_search_time:.4f}s")
-            print(f"HNSW search time: {hnsw_search_time:.4f}s")
-            print(f"DiskANN is {speed_ratio:.2f}x faster than HNSW")
--- a/tests/test_document_rag.py
+++ b/tests/test_document_rag.py
@@ -1,5 +1,5 @@
 """
-Test document_rag functionality using pytest.
+Test main_cli_example functionality using pytest.
 """

 import os
@@ -14,20 +14,20 @@ import pytest
@pytest.fixture
 def test_data_dir():
    """Return the path to test data directory."""
-    return Path("data")
+    return Path("examples/data")


@pytest.mark.skipif(
    os.environ.get("CI") == "true", reason="Skip model tests in CI to avoid MPS memory issues"
 )
-def test_document_rag_simulated(test_data_dir):
-    """Test document_rag with simulated LLM."""
+def test_main_cli_simulated(test_data_dir):
+    """Test main_cli with simulated LLM."""
    with tempfile.TemporaryDirectory() as temp_dir:
        # Use a subdirectory that doesn't exist yet to force index creation
        index_dir = Path(temp_dir) / "test_index"
        cmd = [
            sys.executable,
-            "apps/document_rag.py",
+            "examples/main_cli_example.py",
            "--llm",
            "simulated",
            "--embedding-model",
@@ -53,22 +53,19 @@ def test_document_rag_simulated(test_data_dir):

        # Verify output
        output = result.stdout + result.stderr
-        assert "Index saved to" in output or "Using existing index" in output
+        assert "Leann index built at" in output or "Using existing index" in output
        assert "This is a simulated answer" in output


@pytest.mark.skipif(not os.environ.get("OPENAI_API_KEY"), reason="OpenAI API key not available")
-@pytest.mark.skipif(
-    os.environ.get("CI") == "true", reason="Skip OpenAI tests in CI to avoid API costs"
-)
-def test_document_rag_openai(test_data_dir):
-    """Test document_rag with OpenAI embeddings."""
+def test_main_cli_openai(test_data_dir):
+    """Test main_cli with OpenAI embeddings."""
    with tempfile.TemporaryDirectory() as temp_dir:
        # Use a subdirectory that doesn't exist yet to force index creation
        index_dir = Path(temp_dir) / "test_index_openai"
        cmd = [
            sys.executable,
-            "apps/document_rag.py",
+            "examples/main_cli_example.py",
            "--llm",
            "simulated",  # Use simulated LLM to avoid GPT-4 costs
            "--embedding-model",
@@ -102,12 +99,12 @@ def test_document_rag_openai(test_data_dir):
        )


-def test_document_rag_error_handling(test_data_dir):
-    """Test document_rag with invalid parameters."""
+def test_main_cli_error_handling(test_data_dir):
+    """Test main_cli with invalid parameters."""
    with tempfile.TemporaryDirectory() as temp_dir:
        cmd = [
            sys.executable,
-            "apps/document_rag.py",
+            "examples/main_cli_example.py",
            "--llm",
            "invalid_llm_type",
            "--index-dir",
@@ -120,4 +117,4 @@ def test_document_rag_error_handling(test_data_dir):

        # Should fail with invalid LLM type
        assert result.returncode != 0
-        assert "invalid choice" in result.stderr or "invalid_llm_type" in result.stderr
+        assert "Unknown LLM type" in result.stderr or "invalid_llm_type" in result.stderr
--- a/tests/test_readme_examples.py
+++ b/tests/test_readme_examples.py
@@ -10,33 +10,29 @@ from pathlib import Path
 import pytest


-@pytest.mark.parametrize("backend_name", ["hnsw", "diskann"])
-def test_readme_basic_example(backend_name):
-    """Test the basic example from README.md with both backends."""
+def test_readme_basic_example():
+    """Test the basic example from README.md."""
    # Skip on macOS CI due to MPS environment issues with all-MiniLM-L6-v2
    if os.environ.get("CI") == "true" and platform.system() == "Darwin":
        pytest.skip("Skipping on macOS CI due to MPS environment issues with all-MiniLM-L6-v2")
-    # Skip DiskANN on CI (Linux runners) due to C++ extension memory/hardware constraints
-    if os.environ.get("CI") == "true" and backend_name == "diskann":
-        pytest.skip("Skip DiskANN tests in CI due to resource constraints and instability")

    # This is the exact code from README (with smaller model for CI)
    from leann import LeannBuilder, LeannChat, LeannSearcher
    from leann.api import SearchResult

    with tempfile.TemporaryDirectory() as temp_dir:
-        INDEX_PATH = str(Path(temp_dir) / f"demo_{backend_name}.leann")
+        INDEX_PATH = str(Path(temp_dir) / "demo.leann")

        # Build an index
        # In CI, use a smaller model to avoid memory issues
        if os.environ.get("CI") == "true":
            builder = LeannBuilder(
-                backend_name=backend_name,
+                backend_name="hnsw",
                embedding_model="sentence-transformers/all-MiniLM-L6-v2",  # Smaller model
                dimensions=384,  # Smaller dimensions
            )
        else:
-            builder = LeannBuilder(backend_name=backend_name)
+            builder = LeannBuilder(backend_name="hnsw")
        builder.add_text("LEANN saves 97% storage compared to traditional vector databases.")
        builder.add_text("Tung Tung Tung Sahur called—they need their banana-crocodile hybrid back")
        builder.build_index(INDEX_PATH)
@@ -56,15 +52,9 @@ def test_readme_basic_example(backend_name):
        # Verify search results
        assert len(results) > 0
        assert isinstance(results[0], SearchResult)
-        assert results[0].score != float("-inf"), (
-            f"should return valid scores, got {results[0].score}"
-        )
        # The second text about banana-crocodile should be more relevant
        assert "banana" in results[0].text or "crocodile" in results[0].text

-        # Ensure we cleanup background embedding server
-        searcher.cleanup()
-
        # Chat with your data (using simulated LLM to avoid external dependencies)
        chat = LeannChat(INDEX_PATH, llm_config={"type": "simulated"})
        response = chat.ask("How much storage does LEANN save?", top_k=1)
@@ -72,8 +62,6 @@ def test_readme_basic_example(backend_name):
        # Verify chat works
        assert isinstance(response, str)
        assert len(response) > 0
-        # Cleanup chat resources
-        chat.cleanup()


 def test_readme_imports():
@@ -122,31 +110,26 @@ def test_backend_options():
        assert len(list(Path(diskann_path).parent.glob(f"{Path(diskann_path).stem}.*"))) > 0


-@pytest.mark.parametrize("backend_name", ["hnsw", "diskann"])
-def test_llm_config_simulated(backend_name):
-    """Test simulated LLM configuration option with both backends."""
+def test_llm_config_simulated():
+    """Test simulated LLM configuration option."""
    # Skip on macOS CI due to MPS environment issues with all-MiniLM-L6-v2
    if os.environ.get("CI") == "true" and platform.system() == "Darwin":
        pytest.skip("Skipping on macOS CI due to MPS environment issues with all-MiniLM-L6-v2")

-    # Skip DiskANN tests in CI due to hardware requirements
-    if os.environ.get("CI") == "true" and backend_name == "diskann":
-        pytest.skip("Skip DiskANN tests in CI - requires specific hardware and large memory")
-
    from leann import LeannBuilder, LeannChat

    with tempfile.TemporaryDirectory() as temp_dir:
        # Build a simple index
-        index_path = str(Path(temp_dir) / f"test_{backend_name}.leann")
+        index_path = str(Path(temp_dir) / "test.leann")
        # Use smaller model in CI to avoid memory issues
        if os.environ.get("CI") == "true":
            builder = LeannBuilder(
-                backend_name=backend_name,
+                backend_name="hnsw",
                embedding_model="sentence-transformers/all-MiniLM-L6-v2",
                dimensions=384,
            )
        else:
-            builder = LeannBuilder(backend_name=backend_name)
+            builder = LeannBuilder(backend_name="hnsw")
        builder.add_text("Test document for LLM testing")
        builder.build_index(index_path)

--- a/uv.lock
+++ b/uv.lock
@@ -294,23 +294,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/09/71/54e999902aed72baf26bca0d50781b01838251a462612966e9fc4891eadd/black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", size = 207646 },
 ]

-[[package]]
-name = "bleach"
-version = "6.2.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "webencodings" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/76/9a/0e33f5054c54d349ea62c277191c020c2d6ef1d65ab2cb1993f91ec846d1/bleach-6.2.0.tar.gz", hash = "sha256:123e894118b8a599fd80d3ec1a6d4cc7ce4e5882b1317a7e1ba69b56e95f991f", size = 203083 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fc/55/96142937f66150805c25c4d0f31ee4132fd33497753400734f9dfdcbdc66/bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e", size = 163406 },
-]
-
-[package.optional-dependencies]
-css = [
-    { name = "tinycss2" },
-]
-
 [[package]]
 name = "blinker"
 version = "1.9.0"
@@ -1269,15 +1252,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702 },
 ]

-[[package]]
-name = "fastjsonschema"
-version = "2.21.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/8b/50/4b769ce1ac4071a1ef6d86b1a3fb56cdc3a37615e8c5519e1af96cdac366/fastjsonschema-2.21.1.tar.gz", hash = "sha256:794d4f0a58f848961ba16af7b9c85a3e88cd360df008c59aac6fc5ae9323b5d4", size = 373939 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/90/2b/0817a2b257fe88725c25589d89aec060581aabf668707a8d03b2e9e0cb2a/fastjsonschema-2.21.1-py3-none-any.whl", hash = "sha256:c9e5b7e908310918cf494a434eeb31384dd84a98b57a30bcb1f535015b554667", size = 23924 },
-]
-
 [[package]]
 name = "filelock"
 version = "3.18.0"
@@ -1504,12 +1478,6 @@ http = [
    { name = "aiohttp" },
 ]

-[[package]]
-name = "gitignore-parser"
-version = "0.1.12"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/86/a8/faf07759672973362e3f1f9742281a90aec7846e8a4043c4df5652990054/gitignore_parser-0.1.12.tar.gz", hash = "sha256:78b22243adc0f02102c56c5e8c9a1d9121394142ca6143a90daa7f8d7a07a17e", size = 5407 }
-
 [[package]]
 name = "greenlet"
 version = "3.2.3"
@@ -1682,7 +1650,7 @@ name = "importlib-metadata"
 version = "8.7.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "zipp", marker = "python_full_version < '3.10'" },
+    { name = "zipp" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641 }
 wheels = [
@@ -1959,33 +1927,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/7d/4f/1195bbac8e0c2acc5f740661631d8d750dc38d4a32b23ee5df3cde6f4e0d/joblib-1.5.1-py3-none-any.whl", hash = "sha256:4719a31f054c7d766948dcd83e9613686b27114f190f717cec7eaa2084f8a74a", size = 307746 },
 ]

-[[package]]
-name = "jsonschema"
-version = "4.25.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "attrs" },
-    { name = "jsonschema-specifications" },
-    { name = "referencing" },
-    { name = "rpds-py" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/d5/00/a297a868e9d0784450faa7365c2172a7d6110c763e30ba861867c32ae6a9/jsonschema-4.25.0.tar.gz", hash = "sha256:e63acf5c11762c0e6672ffb61482bdf57f0876684d8d249c0fe2d730d48bc55f", size = 356830 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fe/54/c86cd8e011fe98803d7e382fd67c0df5ceab8d2b7ad8c5a81524f791551c/jsonschema-4.25.0-py3-none-any.whl", hash = "sha256:24c2e8da302de79c8b9382fee3e76b355e44d2a4364bb207159ce10b517bd716", size = 89184 },
-]
-
-[[package]]
-name = "jsonschema-specifications"
-version = "2025.4.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "referencing" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/bf/ce/46fbd9c8119cfc3581ee5643ea49464d168028cfb5caff5fc0596d0cf914/jsonschema_specifications-2025.4.1.tar.gz", hash = "sha256:630159c9f4dbea161a6a2205c3011cc4f18ff381b189fff48bb39b9bf26ae608", size = 15513 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/01/0e/b27cdbaccf30b890c40ed1da9fd4a3593a5cf94dae54fb34f8a4b74fcd3f/jsonschema_specifications-2025.4.1-py3-none-any.whl", hash = "sha256:4653bffbd6584f7de83a67e0d620ef16900b390ddc7939d56684d6c81e33f1af", size = 18437 },
-]
-
 [[package]]
 name = "jupyter-client"
 version = "8.6.3"
@@ -2017,15 +1958,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/2f/57/6bffd4b20b88da3800c5d691e0337761576ee688eb01299eae865689d2df/jupyter_core-5.8.1-py3-none-any.whl", hash = "sha256:c28d268fc90fb53f1338ded2eb410704c5449a358406e8a948b75706e24863d0", size = 28880 },
 ]

-[[package]]
-name = "jupyterlab-pygments"
-version = "0.3.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/90/51/9187be60d989df97f5f0aba133fa54e7300f17616e065d1ada7d7646b6d6/jupyterlab_pygments-0.3.0.tar.gz", hash = "sha256:721aca4d9029252b11cfa9d185e5b5af4d54772bb8072f9b7036f4170054d35d", size = 512900 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/b1/dd/ead9d8ea85bf202d90cc513b533f9c363121c7792674f78e0d8a854b63b4/jupyterlab_pygments-0.3.0-py3-none-any.whl", hash = "sha256:841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780", size = 15884 },
-]
-
 [[package]]
 name = "kiwisolver"
 version = "1.4.7"
@@ -2223,7 +2155,7 @@ wheels = [

 [[package]]
 name = "leann-backend-diskann"
-version = "0.2.9"
+version = "0.1.15"
 source = { editable = "packages/leann-backend-diskann" }
 dependencies = [
    { name = "leann-core" },
@@ -2235,14 +2167,14 @@ dependencies = [

 [package.metadata]
 requires-dist = [
-    { name = "leann-core", specifier = "==0.2.9" },
+    { name = "leann-core", specifier = "==0.1.15" },
    { name = "numpy" },
    { name = "protobuf", specifier = ">=3.19.0" },
 ]

 [[package]]
 name = "leann-backend-hnsw"
-version = "0.2.9"
+version = "0.1.15"
 source = { editable = "packages/leann-backend-hnsw" }
 dependencies = [
    { name = "leann-core" },
@@ -2255,7 +2187,7 @@ dependencies = [

 [package.metadata]
 requires-dist = [
-    { name = "leann-core", specifier = "==0.2.9" },
+    { name = "leann-core", specifier = "==0.1.15" },
    { name = "msgpack", specifier = ">=1.0.0" },
    { name = "numpy" },
    { name = "pyzmq", specifier = ">=23.0.0" },
@@ -2263,19 +2195,17 @@ requires-dist = [

 [[package]]
 name = "leann-core"
-version = "0.2.9"
+version = "0.1.15"
 source = { editable = "packages/leann-core" }
 dependencies = [
    { name = "accelerate" },
-    { name = "gitignore-parser" },
    { name = "huggingface-hub" },
    { name = "llama-index-core" },
    { name = "llama-index-embeddings-huggingface" },
    { name = "llama-index-readers-file" },
-    { name = "mlx", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'" },
-    { name = "mlx-lm", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'" },
+    { name = "mlx", marker = "sys_platform == 'darwin'" },
+    { name = "mlx-lm", marker = "sys_platform == 'darwin'" },
    { name = "msgpack" },
-    { name = "nbconvert" },
    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
    { name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
@@ -2297,15 +2227,13 @@ dependencies = [
 requires-dist = [
    { name = "accelerate", specifier = ">=0.20.0" },
    { name = "accelerate", marker = "extra == 'colab'", specifier = ">=0.20.0,<1.0.0" },
-    { name = "gitignore-parser", specifier = ">=0.1.12" },
    { name = "huggingface-hub", specifier = ">=0.20.0" },
    { name = "llama-index-core", specifier = ">=0.12.0" },
    { name = "llama-index-embeddings-huggingface", specifier = ">=0.5.5" },
    { name = "llama-index-readers-file", specifier = ">=0.4.0" },
-    { name = "mlx", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'", specifier = ">=0.26.3" },
-    { name = "mlx-lm", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'", specifier = ">=0.26.0" },
+    { name = "mlx", marker = "sys_platform == 'darwin'", specifier = ">=0.26.3" },
+    { name = "mlx-lm", marker = "sys_platform == 'darwin'", specifier = ">=0.26.0" },
    { name = "msgpack", specifier = ">=1.0.0" },
-    { name = "nbconvert", specifier = ">=7.0.0" },
    { name = "numpy", specifier = ">=1.20.0" },
    { name = "openai", specifier = ">=1.0.0" },
    { name = "pdfplumber", specifier = ">=0.10.0" },
@@ -2335,7 +2263,6 @@ dependencies = [
    { name = "evaluate" },
    { name = "flask" },
    { name = "flask-compress" },
-    { name = "gitignore-parser" },
    { name = "ipykernel" },
    { name = "leann-backend-hnsw" },
    { name = "leann-core" },
@@ -2343,20 +2270,17 @@ dependencies = [
    { name = "llama-index-embeddings-huggingface" },
    { name = "llama-index-readers-file" },
    { name = "llama-index-vector-stores-faiss" },
-    { name = "mlx", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'" },
-    { name = "mlx-lm", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'" },
+    { name = "mlx", marker = "sys_platform == 'darwin'" },
+    { name = "mlx-lm", marker = "sys_platform == 'darwin'" },
    { name = "msgpack" },
-    { name = "nbconvert" },
    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
    { name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
    { name = "ollama" },
    { name = "openai" },
-    { name = "pathspec" },
    { name = "pdfplumber" },
    { name = "protobuf" },
    { name = "psutil" },
-    { name = "pybind11" },
    { name = "pymupdf" },
    { name = "pypdf2" },
    { name = "pypdfium2" },
@@ -2412,7 +2336,6 @@ requires-dist = [
    { name = "evaluate" },
    { name = "flask" },
    { name = "flask-compress" },
-    { name = "gitignore-parser", specifier = ">=0.1.12" },
    { name = "huggingface-hub", marker = "extra == 'dev'", specifier = ">=0.20.0" },
    { name = "ipykernel", specifier = "==6.29.5" },
    { name = "leann-backend-diskann", marker = "extra == 'diskann'", editable = "packages/leann-backend-diskann" },
@@ -2425,21 +2348,18 @@ requires-dist = [
    { name = "llama-index-readers-file", marker = "extra == 'test'", specifier = ">=0.4.0" },
    { name = "llama-index-vector-stores-faiss", specifier = ">=0.4.0" },
    { name = "matplotlib", marker = "extra == 'dev'" },
-    { name = "mlx", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'", specifier = ">=0.26.3" },
-    { name = "mlx-lm", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'", specifier = ">=0.26.0" },
+    { name = "mlx", marker = "sys_platform == 'darwin'", specifier = ">=0.26.3" },
+    { name = "mlx-lm", marker = "sys_platform == 'darwin'", specifier = ">=0.26.0" },
    { name = "msgpack", specifier = ">=1.1.1" },
-    { name = "nbconvert", specifier = ">=7.16.6" },
    { name = "numpy", specifier = ">=1.26.0" },
    { name = "ollama" },
    { name = "openai", specifier = ">=1.0.0" },
    { name = "openpyxl", marker = "extra == 'documents'", specifier = ">=3.1.0" },
    { name = "pandas", marker = "extra == 'documents'", specifier = ">=2.2.0" },
-    { name = "pathspec", specifier = ">=0.12.1" },
    { name = "pdfplumber", specifier = ">=0.11.0" },
    { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.5.0" },
    { name = "protobuf", specifier = "==4.25.3" },
    { name = "psutil", specifier = ">=5.8.0" },
-    { name = "pybind11", specifier = ">=3.0.0" },
    { name = "pymupdf", specifier = ">=1.26.0" },
    { name = "pypdf2", specifier = ">=3.0.0" },
    { name = "pypdfium2", specifier = ">=4.30.0" },
@@ -2451,7 +2371,7 @@ requires-dist = [
    { name = "python-docx", marker = "extra == 'documents'", specifier = ">=0.8.11" },
    { name = "python-dotenv", marker = "extra == 'test'", specifier = ">=1.0.0" },
    { name = "requests", specifier = ">=2.25.0" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = "==0.12.7" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" },
    { name = "sentence-transformers", specifier = ">=2.2.0" },
    { name = "sentence-transformers", marker = "extra == 'test'", specifier = ">=2.2.0" },
    { name = "sglang" },
@@ -3074,18 +2994,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899 },
 ]

-[[package]]
-name = "mistune"
-version = "3.1.3"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/c4/79/bda47f7dd7c3c55770478d6d02c9960c430b0cf1773b72366ff89126ea31/mistune-3.1.3.tar.gz", hash = "sha256:a7035c21782b2becb6be62f8f25d3df81ccb4d6fa477a6525b15af06539f02a0", size = 94347 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/01/4d/23c4e4f09da849e127e9f123241946c23c1e30f45a88366879e064211815/mistune-3.1.3-py3-none-any.whl", hash = "sha256:1a32314113cff28aa6432e99e522677c8587fd83e3d51c29b82a52409c842bd9", size = 53410 },
-]
-
 [[package]]
 name = "mlx"
 version = "0.27.1"
@@ -3356,62 +3264,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963 },
 ]

-[[package]]
-name = "nbclient"
-version = "0.10.2"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "jupyter-client" },
-    { name = "jupyter-core" },
-    { name = "nbformat" },
-    { name = "traitlets" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/87/66/7ffd18d58eae90d5721f9f39212327695b749e23ad44b3881744eaf4d9e8/nbclient-0.10.2.tar.gz", hash = "sha256:90b7fc6b810630db87a6d0c2250b1f0ab4cf4d3c27a299b0cde78a4ed3fd9193", size = 62424 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/34/6d/e7fa07f03a4a7b221d94b4d586edb754a9b0dc3c9e2c93353e9fa4e0d117/nbclient-0.10.2-py3-none-any.whl", hash = "sha256:4ffee11e788b4a27fabeb7955547e4318a5298f34342a4bfd01f2e1faaeadc3d", size = 25434 },
-]
-
-[[package]]
-name = "nbconvert"
-version = "7.16.6"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "beautifulsoup4" },
-    { name = "bleach", extra = ["css"] },
-    { name = "defusedxml" },
-    { name = "importlib-metadata", marker = "python_full_version < '3.10'" },
-    { name = "jinja2" },
-    { name = "jupyter-core" },
-    { name = "jupyterlab-pygments" },
-    { name = "markupsafe" },
-    { name = "mistune" },
-    { name = "nbclient" },
-    { name = "nbformat" },
-    { name = "packaging" },
-    { name = "pandocfilters" },
-    { name = "pygments" },
-    { name = "traitlets" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/a3/59/f28e15fc47ffb73af68a8d9b47367a8630d76e97ae85ad18271b9db96fdf/nbconvert-7.16.6.tar.gz", hash = "sha256:576a7e37c6480da7b8465eefa66c17844243816ce1ccc372633c6b71c3c0f582", size = 857715 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/cc/9a/cd673b2f773a12c992f41309ef81b99da1690426bd2f96957a7ade0d3ed7/nbconvert-7.16.6-py3-none-any.whl", hash = "sha256:1375a7b67e0c2883678c48e506dc320febb57685e5ee67faa51b18a90f3a712b", size = 258525 },
-]
-
-[[package]]
-name = "nbformat"
-version = "5.10.4"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "fastjsonschema" },
-    { name = "jsonschema" },
-    { name = "jupyter-core" },
-    { name = "traitlets" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/6d/fd/91545e604bc3dad7dca9ed03284086039b294c6b3d75c0d2fa45f9e9caf3/nbformat-5.10.4.tar.gz", hash = "sha256:322168b14f937a5d11362988ecac2a4952d3d8e3a2cbeb2319584631226d5b3a", size = 142749 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/a9/82/0340caa499416c78e5d8f5f05947ae4bc3cba53c9f038ab6e9ed964e22f1/nbformat-5.10.4-py3-none-any.whl", hash = "sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b", size = 78454 },
-]
-
 [[package]]
 name = "nest-asyncio"
 version = "1.6.0"
@@ -3931,15 +3783,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/2f/49/5c30646e96c684570925b772eac4eb0a8cb0ca590fa978f56c5d3ae73ea1/pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e", size = 11618011 },
 ]

-[[package]]
-name = "pandocfilters"
-version = "1.5.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/70/6f/3dd4940bbe001c06a65f88e36bad298bc7a0de5036115639926b0c5c0458/pandocfilters-1.5.1.tar.gz", hash = "sha256:002b4a555ee4ebc03f8b66307e287fa492e4a77b4ea14d3f934328297bb4939e", size = 8454 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ef/af/4fbc8cab944db5d21b7e2a5b8e9211a03a79852b1157e2c102fcc61ac440/pandocfilters-1.5.1-py2.py3-none-any.whl", hash = "sha256:93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc", size = 8663 },
-]
-
 [[package]]
 name = "parso"
 version = "0.8.4"
@@ -4360,15 +4203,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/10/15/6b30e77872012bbfe8265d42a01d5b3c17ef0ac0f2fae531ad91b6a6c02e/pyarrow-21.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:cdc4c17afda4dab2a9c0b79148a43a7f4e1094916b3e18d8975bfd6d6d52241f", size = 26227521 },
 ]

-[[package]]
-name = "pybind11"
-version = "3.0.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ef/83/698d120e257a116f2472c710932023ad779409adf2734d2e940f34eea2c5/pybind11-3.0.0.tar.gz", hash = "sha256:c3f07bce3ada51c3e4b76badfa85df11688d12c46111f9d242bc5c9415af7862", size = 544819 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/41/9c/85f50a5476832c3efc67b6d7997808388236ae4754bf53e1749b3bc27577/pybind11-3.0.0-py3-none-any.whl", hash = "sha256:7c5cac504da5a701b5163f0e6a7ba736c713a096a5378383c5b4b064b753f607", size = 292118 },
-]
-
 [[package]]
 name = "pycparser"
 version = "2.22"
@@ -4936,20 +4770,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/ee/21/c8726b1738d72c7f1602a6720996c4c227754b12335ad84e7db1300f8363/pyzstd-0.17.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a67d7ef18715875b31127eb90075c03ced722fd87902b34bca4b807a2ce1e4d9", size = 241664 },
 ]

-[[package]]
-name = "referencing"
-version = "0.36.2"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "attrs" },
-    { name = "rpds-py" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/2f/db/98b5c277be99dd18bfd91dd04e1b759cad18d1a338188c936e92f921c7e2/referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa", size = 74744 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c1/b1/3baf80dc6d2b7bc27a95a67752d0208e410351e3feb4eb78de5f77454d8d/referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0", size = 26775 },
-]
-
 [[package]]
 name = "regex"
 version = "2024.11.6"
@@ -5051,191 +4871,29 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/7c/e4/56027c4a6b4ae70ca9de302488c5ca95ad4a39e190093d6c1a8ace08341b/requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c", size = 64847 },
 ]

-[[package]]
-name = "rpds-py"
-version = "0.27.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/1e/d9/991a0dee12d9fc53ed027e26a26a64b151d77252ac477e22666b9688bc16/rpds_py-0.27.0.tar.gz", hash = "sha256:8b23cf252f180cda89220b378d917180f29d313cd6a07b2431c0d3b776aae86f", size = 27420 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/75/2d/ad2e37dee3f45580f7fa0066c412a521f9bee53d2718b0e9436d308a1ecd/rpds_py-0.27.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:130c1ffa5039a333f5926b09e346ab335f0d4ec393b030a18549a7c7e7c2cea4", size = 371511 },
-    { url = "https://files.pythonhosted.org/packages/f5/67/57b4b2479193fde9dd6983a13c2550b5f9c3bcdf8912dffac2068945eb14/rpds_py-0.27.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a4cf32a26fa744101b67bfd28c55d992cd19438aff611a46cac7f066afca8fd4", size = 354718 },
-    { url = "https://files.pythonhosted.org/packages/a3/be/c2b95ec4b813eb11f3a3c3d22f22bda8d3a48a074a0519cde968c4d102cf/rpds_py-0.27.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64a0fe3f334a40b989812de70160de6b0ec7e3c9e4a04c0bbc48d97c5d3600ae", size = 381518 },
-    { url = "https://files.pythonhosted.org/packages/a5/d2/5a7279bc2b93b20bd50865a2269016238cee45f7dc3cc33402a7f41bd447/rpds_py-0.27.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9a0ff7ee28583ab30a52f371b40f54e7138c52ca67f8ca17ccb7ccf0b383cb5f", size = 396694 },
-    { url = "https://files.pythonhosted.org/packages/65/e9/bac8b3714bd853c5bcb466e04acfb9a5da030d77e0ddf1dfad9afb791c31/rpds_py-0.27.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:15ea4d2e182345dd1b4286593601d766411b43f868924afe297570658c31a62b", size = 514813 },
-    { url = "https://files.pythonhosted.org/packages/1d/aa/293115e956d7d13b7d2a9e9a4121f74989a427aa125f00ce4426ca8b7b28/rpds_py-0.27.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:36184b44bf60a480863e51021c26aca3dfe8dd2f5eeabb33622b132b9d8b8b54", size = 402246 },
-    { url = "https://files.pythonhosted.org/packages/88/59/2d6789bb898fb3e2f0f7b82b7bcf27f579ebcb6cc36c24f4e208f7f58a5b/rpds_py-0.27.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b78430703cfcf5f5e86eb74027a1ed03a93509273d7c705babb547f03e60016", size = 383661 },
-    { url = "https://files.pythonhosted.org/packages/0c/55/add13a593a7a81243a9eed56d618d3d427be5dc1214931676e3f695dfdc1/rpds_py-0.27.0-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:dbd749cff1defbde270ca346b69b3baf5f1297213ef322254bf2a28537f0b046", size = 401691 },
-    { url = "https://files.pythonhosted.org/packages/04/09/3e8b2aad494ffaca571e4e19611a12cc18fcfd756d9274f3871a2d822445/rpds_py-0.27.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bde37765564cd22a676dd8101b657839a1854cfaa9c382c5abf6ff7accfd4ae", size = 416529 },
-    { url = "https://files.pythonhosted.org/packages/a4/6d/bd899234728f1d8f72c9610f50fdf1c140ecd0a141320e1f1d0f6b20595d/rpds_py-0.27.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1d66f45b9399036e890fb9c04e9f70c33857fd8f58ac8db9f3278cfa835440c3", size = 558673 },
-    { url = "https://files.pythonhosted.org/packages/79/f4/f3e02def5193fb899d797c232f90d6f8f0f2b9eca2faef6f0d34cbc89b2e/rpds_py-0.27.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:d85d784c619370d9329bbd670f41ff5f2ae62ea4519761b679d0f57f0f0ee267", size = 588426 },
-    { url = "https://files.pythonhosted.org/packages/e3/0c/88e716cd8fd760e5308835fe298255830de4a1c905fd51760b9bb40aa965/rpds_py-0.27.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5df559e9e7644d9042f626f2c3997b555f347d7a855a15f170b253f6c5bfe358", size = 554552 },
-    { url = "https://files.pythonhosted.org/packages/2b/a9/0a8243c182e7ac59b901083dff7e671feba6676a131bfff3f8d301cd2b36/rpds_py-0.27.0-cp310-cp310-win32.whl", hash = "sha256:b8a4131698b6992b2a56015f51646711ec5d893a0b314a4b985477868e240c87", size = 218081 },
-    { url = "https://files.pythonhosted.org/packages/0f/e7/202ff35852312760148be9e08fe2ba6900aa28e7a46940a313eae473c10c/rpds_py-0.27.0-cp310-cp310-win_amd64.whl", hash = "sha256:cbc619e84a5e3ab2d452de831c88bdcad824414e9c2d28cd101f94dbdf26329c", size = 230077 },
-    { url = "https://files.pythonhosted.org/packages/b4/c1/49d515434c1752e40f5e35b985260cf27af052593378580a2f139a5be6b8/rpds_py-0.27.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:dbc2ab5d10544eb485baa76c63c501303b716a5c405ff2469a1d8ceffaabf622", size = 371577 },
-    { url = "https://files.pythonhosted.org/packages/e1/6d/bf2715b2fee5087fa13b752b5fd573f1a93e4134c74d275f709e38e54fe7/rpds_py-0.27.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7ec85994f96a58cf7ed288caa344b7fe31fd1d503bdf13d7331ead5f70ab60d5", size = 354959 },
-    { url = "https://files.pythonhosted.org/packages/a3/5c/e7762808c746dd19733a81373c10da43926f6a6adcf4920a21119697a60a/rpds_py-0.27.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:190d7285cd3bb6d31d37a0534d7359c1ee191eb194c511c301f32a4afa5a1dd4", size = 381485 },
-    { url = "https://files.pythonhosted.org/packages/40/51/0d308eb0b558309ca0598bcba4243f52c4cd20e15fe991b5bd75824f2e61/rpds_py-0.27.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c10d92fb6d7fd827e44055fcd932ad93dac6a11e832d51534d77b97d1d85400f", size = 396816 },
-    { url = "https://files.pythonhosted.org/packages/5c/aa/2d585ec911d78f66458b2c91252134ca0c7c70f687a72c87283173dc0c96/rpds_py-0.27.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dd2c1d27ebfe6a015cfa2005b7fe8c52d5019f7bbdd801bc6f7499aab9ae739e", size = 514950 },
-    { url = "https://files.pythonhosted.org/packages/0b/ef/aced551cc1148179557aed84343073adadf252c91265263ee6203458a186/rpds_py-0.27.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4790c9d5dd565ddb3e9f656092f57268951398cef52e364c405ed3112dc7c7c1", size = 402132 },
-    { url = "https://files.pythonhosted.org/packages/4b/ac/cf644803d8d417653fe2b3604186861d62ea6afaef1b2284045741baef17/rpds_py-0.27.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4300e15e7d03660f04be84a125d1bdd0e6b2f674bc0723bc0fd0122f1a4585dc", size = 383660 },
-    { url = "https://files.pythonhosted.org/packages/c9/ec/caf47c55ce02b76cbaeeb2d3b36a73da9ca2e14324e3d75cf72b59dcdac5/rpds_py-0.27.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:59195dc244fc183209cf8a93406889cadde47dfd2f0a6b137783aa9c56d67c85", size = 401730 },
-    { url = "https://files.pythonhosted.org/packages/0b/71/c1f355afdcd5b99ffc253422aa4bdcb04ccf1491dcd1bda3688a0c07fd61/rpds_py-0.27.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fae4a01ef8c4cb2bbe92ef2063149596907dc4a881a8d26743b3f6b304713171", size = 416122 },
-    { url = "https://files.pythonhosted.org/packages/38/0f/f4b5b1eda724ed0e04d2b26d8911cdc131451a7ee4c4c020a1387e5c6ded/rpds_py-0.27.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e3dc8d4ede2dbae6c0fc2b6c958bf51ce9fd7e9b40c0f5b8835c3fde44f5807d", size = 558771 },
-    { url = "https://files.pythonhosted.org/packages/93/c0/5f8b834db2289ab48d5cffbecbb75e35410103a77ac0b8da36bf9544ec1c/rpds_py-0.27.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:c3782fb753aa825b4ccabc04292e07897e2fd941448eabf666856c5530277626", size = 587876 },
-    { url = "https://files.pythonhosted.org/packages/d2/dd/1a1df02ab8eb970115cff2ae31a6f73916609b900dc86961dc382b8c2e5e/rpds_py-0.27.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:887ab1f12b0d227e9260558a4a2320024b20102207ada65c43e1ffc4546df72e", size = 554359 },
-    { url = "https://files.pythonhosted.org/packages/a1/e4/95a014ab0d51ab6e3bebbdb476a42d992d2bbf9c489d24cff9fda998e925/rpds_py-0.27.0-cp311-cp311-win32.whl", hash = "sha256:5d6790ff400254137b81b8053b34417e2c46921e302d655181d55ea46df58cf7", size = 218084 },
-    { url = "https://files.pythonhosted.org/packages/49/78/f8d5b71ec65a0376b0de31efcbb5528ce17a9b7fdd19c3763303ccfdedec/rpds_py-0.27.0-cp311-cp311-win_amd64.whl", hash = "sha256:e24d8031a2c62f34853756d9208eeafa6b940a1efcbfe36e8f57d99d52bb7261", size = 230085 },
-    { url = "https://files.pythonhosted.org/packages/e7/d3/84429745184091e06b4cc70f8597408e314c2d2f7f5e13249af9ffab9e3d/rpds_py-0.27.0-cp311-cp311-win_arm64.whl", hash = "sha256:08680820d23df1df0a0260f714d12966bc6c42d02e8055a91d61e03f0c47dda0", size = 222112 },
-    { url = "https://files.pythonhosted.org/packages/cd/17/e67309ca1ac993fa1888a0d9b2f5ccc1f67196ace32e76c9f8e1dbbbd50c/rpds_py-0.27.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:19c990fdf5acecbf0623e906ae2e09ce1c58947197f9bced6bbd7482662231c4", size = 362611 },
-    { url = "https://files.pythonhosted.org/packages/93/2e/28c2fb84aa7aa5d75933d1862d0f7de6198ea22dfd9a0cca06e8a4e7509e/rpds_py-0.27.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6c27a7054b5224710fcfb1a626ec3ff4f28bcb89b899148c72873b18210e446b", size = 347680 },
-    { url = "https://files.pythonhosted.org/packages/44/3e/9834b4c8f4f5fe936b479e623832468aa4bd6beb8d014fecaee9eac6cdb1/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09965b314091829b378b60607022048953e25f0b396c2b70e7c4c81bcecf932e", size = 384600 },
-    { url = "https://files.pythonhosted.org/packages/19/78/744123c7b38865a965cd9e6f691fde7ef989a00a256fa8bf15b75240d12f/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:14f028eb47f59e9169bfdf9f7ceafd29dd64902141840633683d0bad5b04ff34", size = 400697 },
-    { url = "https://files.pythonhosted.org/packages/32/97/3c3d32fe7daee0a1f1a678b6d4dfb8c4dcf88197fa2441f9da7cb54a8466/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6168af0be75bba990a39f9431cdfae5f0ad501f4af32ae62e8856307200517b8", size = 517781 },
-    { url = "https://files.pythonhosted.org/packages/b2/be/28f0e3e733680aa13ecec1212fc0f585928a206292f14f89c0b8a684cad1/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ab47fe727c13c09d0e6f508e3a49e545008e23bf762a245b020391b621f5b726", size = 406449 },
-    { url = "https://files.pythonhosted.org/packages/95/ae/5d15c83e337c082d0367053baeb40bfba683f42459f6ebff63a2fd7e5518/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fa01b3d5e3b7d97efab65bd3d88f164e289ec323a8c033c5c38e53ee25c007e", size = 386150 },
-    { url = "https://files.pythonhosted.org/packages/bf/65/944e95f95d5931112829e040912b25a77b2e7ed913ea5fe5746aa5c1ce75/rpds_py-0.27.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:6c135708e987f46053e0a1246a206f53717f9fadfba27174a9769ad4befba5c3", size = 406100 },
-    { url = "https://files.pythonhosted.org/packages/21/a4/1664b83fae02894533cd11dc0b9f91d673797c2185b7be0f7496107ed6c5/rpds_py-0.27.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fc327f4497b7087d06204235199daf208fd01c82d80465dc5efa4ec9df1c5b4e", size = 421345 },
-    { url = "https://files.pythonhosted.org/packages/7c/26/b7303941c2b0823bfb34c71378249f8beedce57301f400acb04bb345d025/rpds_py-0.27.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7e57906e38583a2cba67046a09c2637e23297618dc1f3caddbc493f2be97c93f", size = 561891 },
-    { url = "https://files.pythonhosted.org/packages/9b/c8/48623d64d4a5a028fa99576c768a6159db49ab907230edddc0b8468b998b/rpds_py-0.27.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f4f69d7a4300fbf91efb1fb4916421bd57804c01ab938ab50ac9c4aa2212f03", size = 591756 },
-    { url = "https://files.pythonhosted.org/packages/b3/51/18f62617e8e61cc66334c9fb44b1ad7baae3438662098efbc55fb3fda453/rpds_py-0.27.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b4c4fbbcff474e1e5f38be1bf04511c03d492d42eec0babda5d03af3b5589374", size = 557088 },
-    { url = "https://files.pythonhosted.org/packages/bd/4c/e84c3a276e2496a93d245516be6b49e20499aa8ca1c94d59fada0d79addc/rpds_py-0.27.0-cp312-cp312-win32.whl", hash = "sha256:27bac29bbbf39601b2aab474daf99dbc8e7176ca3389237a23944b17f8913d97", size = 221926 },
-    { url = "https://files.pythonhosted.org/packages/83/89/9d0fbcef64340db0605eb0a0044f258076f3ae0a3b108983b2c614d96212/rpds_py-0.27.0-cp312-cp312-win_amd64.whl", hash = "sha256:8a06aa1197ec0281eb1d7daf6073e199eb832fe591ffa329b88bae28f25f5fe5", size = 233235 },
-    { url = "https://files.pythonhosted.org/packages/c9/b0/e177aa9f39cbab060f96de4a09df77d494f0279604dc2f509263e21b05f9/rpds_py-0.27.0-cp312-cp312-win_arm64.whl", hash = "sha256:e14aab02258cb776a108107bd15f5b5e4a1bbaa61ef33b36693dfab6f89d54f9", size = 223315 },
-    { url = "https://files.pythonhosted.org/packages/81/d2/dfdfd42565a923b9e5a29f93501664f5b984a802967d48d49200ad71be36/rpds_py-0.27.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:443d239d02d9ae55b74015234f2cd8eb09e59fbba30bf60baeb3123ad4c6d5ff", size = 362133 },
-    { url = "https://files.pythonhosted.org/packages/ac/4a/0a2e2460c4b66021d349ce9f6331df1d6c75d7eea90df9785d333a49df04/rpds_py-0.27.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b8a7acf04fda1f30f1007f3cc96d29d8cf0a53e626e4e1655fdf4eabc082d367", size = 347128 },
-    { url = "https://files.pythonhosted.org/packages/35/8d/7d1e4390dfe09d4213b3175a3f5a817514355cb3524593380733204f20b9/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d0f92b78cfc3b74a42239fdd8c1266f4715b573204c234d2f9fc3fc7a24f185", size = 384027 },
-    { url = "https://files.pythonhosted.org/packages/c1/65/78499d1a62172891c8cd45de737b2a4b84a414b6ad8315ab3ac4945a5b61/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ce4ed8e0c7dbc5b19352b9c2c6131dd23b95fa8698b5cdd076307a33626b72dc", size = 399973 },
-    { url = "https://files.pythonhosted.org/packages/10/a1/1c67c1d8cc889107b19570bb01f75cf49852068e95e6aee80d22915406fc/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fde355b02934cc6b07200cc3b27ab0c15870a757d1a72fd401aa92e2ea3c6bfe", size = 515295 },
-    { url = "https://files.pythonhosted.org/packages/df/27/700ec88e748436b6c7c4a2262d66e80f8c21ab585d5e98c45e02f13f21c0/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13bbc4846ae4c993f07c93feb21a24d8ec637573d567a924b1001e81c8ae80f9", size = 406737 },
-    { url = "https://files.pythonhosted.org/packages/33/cc/6b0ee8f0ba3f2df2daac1beda17fde5cf10897a7d466f252bd184ef20162/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be0744661afbc4099fef7f4e604e7f1ea1be1dd7284f357924af12a705cc7d5c", size = 385898 },
-    { url = "https://files.pythonhosted.org/packages/e8/7e/c927b37d7d33c0a0ebf249cc268dc2fcec52864c1b6309ecb960497f2285/rpds_py-0.27.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:069e0384a54f427bd65d7fda83b68a90606a3835901aaff42185fcd94f5a9295", size = 405785 },
-    { url = "https://files.pythonhosted.org/packages/5b/d2/8ed50746d909dcf402af3fa58b83d5a590ed43e07251d6b08fad1a535ba6/rpds_py-0.27.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4bc262ace5a1a7dc3e2eac2fa97b8257ae795389f688b5adf22c5db1e2431c43", size = 419760 },
-    { url = "https://files.pythonhosted.org/packages/d3/60/2b2071aee781cb3bd49f94d5d35686990b925e9b9f3e3d149235a6f5d5c1/rpds_py-0.27.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2fe6e18e5c8581f0361b35ae575043c7029d0a92cb3429e6e596c2cdde251432", size = 561201 },
-    { url = "https://files.pythonhosted.org/packages/98/1f/27b67304272521aaea02be293fecedce13fa351a4e41cdb9290576fc6d81/rpds_py-0.27.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d93ebdb82363d2e7bec64eecdc3632b59e84bd270d74fe5be1659f7787052f9b", size = 591021 },
-    { url = "https://files.pythonhosted.org/packages/db/9b/a2fadf823164dd085b1f894be6443b0762a54a7af6f36e98e8fcda69ee50/rpds_py-0.27.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0954e3a92e1d62e83a54ea7b3fdc9efa5d61acef8488a8a3d31fdafbfb00460d", size = 556368 },
-    { url = "https://files.pythonhosted.org/packages/24/f3/6d135d46a129cda2e3e6d4c5e91e2cc26ea0428c6cf152763f3f10b6dd05/rpds_py-0.27.0-cp313-cp313-win32.whl", hash = "sha256:2cff9bdd6c7b906cc562a505c04a57d92e82d37200027e8d362518df427f96cd", size = 221236 },
-    { url = "https://files.pythonhosted.org/packages/c5/44/65d7494f5448ecc755b545d78b188440f81da98b50ea0447ab5ebfdf9bd6/rpds_py-0.27.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc79d192fb76fc0c84f2c58672c17bbbc383fd26c3cdc29daae16ce3d927e8b2", size = 232634 },
-    { url = "https://files.pythonhosted.org/packages/70/d9/23852410fadab2abb611733933401de42a1964ce6600a3badae35fbd573e/rpds_py-0.27.0-cp313-cp313-win_arm64.whl", hash = "sha256:5b3a5c8089eed498a3af23ce87a80805ff98f6ef8f7bdb70bd1b7dae5105f6ac", size = 222783 },
-    { url = "https://files.pythonhosted.org/packages/15/75/03447917f78512b34463f4ef11066516067099a0c466545655503bed0c77/rpds_py-0.27.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:90fb790138c1a89a2e58c9282fe1089638401f2f3b8dddd758499041bc6e0774", size = 359154 },
-    { url = "https://files.pythonhosted.org/packages/6b/fc/4dac4fa756451f2122ddaf136e2c6aeb758dc6fdbe9ccc4bc95c98451d50/rpds_py-0.27.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:010c4843a3b92b54373e3d2291a7447d6c3fc29f591772cc2ea0e9f5c1da434b", size = 343909 },
-    { url = "https://files.pythonhosted.org/packages/7b/81/723c1ed8e6f57ed9d8c0c07578747a2d3d554aaefc1ab89f4e42cfeefa07/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9ce7a9e967afc0a2af7caa0d15a3e9c1054815f73d6a8cb9225b61921b419bd", size = 379340 },
-    { url = "https://files.pythonhosted.org/packages/98/16/7e3740413de71818ce1997df82ba5f94bae9fff90c0a578c0e24658e6201/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:aa0bf113d15e8abdfee92aa4db86761b709a09954083afcb5bf0f952d6065fdb", size = 391655 },
-    { url = "https://files.pythonhosted.org/packages/e0/63/2a9f510e124d80660f60ecce07953f3f2d5f0b96192c1365443859b9c87f/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb91d252b35004a84670dfeafadb042528b19842a0080d8b53e5ec1128e8f433", size = 513017 },
-    { url = "https://files.pythonhosted.org/packages/2c/4e/cf6ff311d09776c53ea1b4f2e6700b9d43bb4e99551006817ade4bbd6f78/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:db8a6313dbac934193fc17fe7610f70cd8181c542a91382531bef5ed785e5615", size = 402058 },
-    { url = "https://files.pythonhosted.org/packages/88/11/5e36096d474cb10f2a2d68b22af60a3bc4164fd8db15078769a568d9d3ac/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce96ab0bdfcef1b8c371ada2100767ace6804ea35aacce0aef3aeb4f3f499ca8", size = 383474 },
-    { url = "https://files.pythonhosted.org/packages/db/a2/3dff02805b06058760b5eaa6d8cb8db3eb3e46c9e452453ad5fc5b5ad9fe/rpds_py-0.27.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:7451ede3560086abe1aa27dcdcf55cd15c96b56f543fb12e5826eee6f721f858", size = 400067 },
-    { url = "https://files.pythonhosted.org/packages/67/87/eed7369b0b265518e21ea836456a4ed4a6744c8c12422ce05bce760bb3cf/rpds_py-0.27.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:32196b5a99821476537b3f7732432d64d93a58d680a52c5e12a190ee0135d8b5", size = 412085 },
-    { url = "https://files.pythonhosted.org/packages/8b/48/f50b2ab2fbb422fbb389fe296e70b7a6b5ea31b263ada5c61377e710a924/rpds_py-0.27.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a029be818059870664157194e46ce0e995082ac49926f1423c1f058534d2aaa9", size = 555928 },
-    { url = "https://files.pythonhosted.org/packages/98/41/b18eb51045d06887666c3560cd4bbb6819127b43d758f5adb82b5f56f7d1/rpds_py-0.27.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3841f66c1ffdc6cebce8aed64e36db71466f1dc23c0d9a5592e2a782a3042c79", size = 585527 },
-    { url = "https://files.pythonhosted.org/packages/be/03/a3dd6470fc76499959b00ae56295b76b4bdf7c6ffc60d62006b1217567e1/rpds_py-0.27.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:42894616da0fc0dcb2ec08a77896c3f56e9cb2f4b66acd76fc8992c3557ceb1c", size = 554211 },
-    { url = "https://files.pythonhosted.org/packages/bf/d1/ee5fd1be395a07423ac4ca0bcc05280bf95db2b155d03adefeb47d5ebf7e/rpds_py-0.27.0-cp313-cp313t-win32.whl", hash = "sha256:b1fef1f13c842a39a03409e30ca0bf87b39a1e2a305a9924deadb75a43105d23", size = 216624 },
-    { url = "https://files.pythonhosted.org/packages/1c/94/4814c4c858833bf46706f87349c37ca45e154da7dbbec9ff09f1abeb08cc/rpds_py-0.27.0-cp313-cp313t-win_amd64.whl", hash = "sha256:183f5e221ba3e283cd36fdfbe311d95cd87699a083330b4f792543987167eff1", size = 230007 },
-    { url = "https://files.pythonhosted.org/packages/0e/a5/8fffe1c7dc7c055aa02df310f9fb71cfc693a4d5ccc5de2d3456ea5fb022/rpds_py-0.27.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:f3cd110e02c5bf17d8fb562f6c9df5c20e73029d587cf8602a2da6c5ef1e32cb", size = 362595 },
-    { url = "https://files.pythonhosted.org/packages/bc/c7/4e4253fd2d4bb0edbc0b0b10d9f280612ca4f0f990e3c04c599000fe7d71/rpds_py-0.27.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8d0e09cf4863c74106b5265c2c310f36146e2b445ff7b3018a56799f28f39f6f", size = 347252 },
-    { url = "https://files.pythonhosted.org/packages/f3/c8/3d1a954d30f0174dd6baf18b57c215da03cf7846a9d6e0143304e784cddc/rpds_py-0.27.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64f689ab822f9b5eb6dfc69893b4b9366db1d2420f7db1f6a2adf2a9ca15ad64", size = 384886 },
-    { url = "https://files.pythonhosted.org/packages/e0/52/3c5835f2df389832b28f9276dd5395b5a965cea34226e7c88c8fbec2093c/rpds_py-0.27.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e36c80c49853b3ffda7aa1831bf175c13356b210c73128c861f3aa93c3cc4015", size = 399716 },
-    { url = "https://files.pythonhosted.org/packages/40/73/176e46992461a1749686a2a441e24df51ff86b99c2d34bf39f2a5273b987/rpds_py-0.27.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6de6a7f622860af0146cb9ee148682ff4d0cea0b8fd3ad51ce4d40efb2f061d0", size = 517030 },
-    { url = "https://files.pythonhosted.org/packages/79/2a/7266c75840e8c6e70effeb0d38922a45720904f2cd695e68a0150e5407e2/rpds_py-0.27.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4045e2fc4b37ec4b48e8907a5819bdd3380708c139d7cc358f03a3653abedb89", size = 408448 },
-    { url = "https://files.pythonhosted.org/packages/e6/5f/a7efc572b8e235093dc6cf39f4dbc8a7f08e65fdbcec7ff4daeb3585eef1/rpds_py-0.27.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9da162b718b12c4219eeeeb68a5b7552fbc7aadedf2efee440f88b9c0e54b45d", size = 387320 },
-    { url = "https://files.pythonhosted.org/packages/a2/eb/9ff6bc92efe57cf5a2cb74dee20453ba444b6fdc85275d8c99e0d27239d1/rpds_py-0.27.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:0665be515767dc727ffa5f74bd2ef60b0ff85dad6bb8f50d91eaa6b5fb226f51", size = 407414 },
-    { url = "https://files.pythonhosted.org/packages/fb/bd/3b9b19b00d5c6e1bd0f418c229ab0f8d3b110ddf7ec5d9d689ef783d0268/rpds_py-0.27.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:203f581accef67300a942e49a37d74c12ceeef4514874c7cede21b012613ca2c", size = 420766 },
-    { url = "https://files.pythonhosted.org/packages/17/6b/521a7b1079ce16258c70805166e3ac6ec4ee2139d023fe07954dc9b2d568/rpds_py-0.27.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7873b65686a6471c0037139aa000d23fe94628e0daaa27b6e40607c90e3f5ec4", size = 562409 },
-    { url = "https://files.pythonhosted.org/packages/8b/bf/65db5bfb14ccc55e39de8419a659d05a2a9cd232f0a699a516bb0991da7b/rpds_py-0.27.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:249ab91ceaa6b41abc5f19513cb95b45c6f956f6b89f1fe3d99c81255a849f9e", size = 590793 },
-    { url = "https://files.pythonhosted.org/packages/db/b8/82d368b378325191ba7aae8f40f009b78057b598d4394d1f2cdabaf67b3f/rpds_py-0.27.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d2f184336bc1d6abfaaa1262ed42739c3789b1e3a65a29916a615307d22ffd2e", size = 558178 },
-    { url = "https://files.pythonhosted.org/packages/f6/ff/f270bddbfbc3812500f8131b1ebbd97afd014cd554b604a3f73f03133a36/rpds_py-0.27.0-cp314-cp314-win32.whl", hash = "sha256:d3c622c39f04d5751408f5b801ecb527e6e0a471b367f420a877f7a660d583f6", size = 222355 },
-    { url = "https://files.pythonhosted.org/packages/bf/20/fdab055b1460c02ed356a0e0b0a78c1dd32dc64e82a544f7b31c9ac643dc/rpds_py-0.27.0-cp314-cp314-win_amd64.whl", hash = "sha256:cf824aceaeffff029ccfba0da637d432ca71ab21f13e7f6f5179cd88ebc77a8a", size = 234007 },
-    { url = "https://files.pythonhosted.org/packages/4d/a8/694c060005421797a3be4943dab8347c76c2b429a9bef68fb2c87c9e70c7/rpds_py-0.27.0-cp314-cp314-win_arm64.whl", hash = "sha256:86aca1616922b40d8ac1b3073a1ead4255a2f13405e5700c01f7c8d29a03972d", size = 223527 },
-    { url = "https://files.pythonhosted.org/packages/1e/f9/77f4c90f79d2c5ca8ce6ec6a76cb4734ee247de6b3a4f337e289e1f00372/rpds_py-0.27.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:341d8acb6724c0c17bdf714319c393bb27f6d23d39bc74f94221b3e59fc31828", size = 359469 },
-    { url = "https://files.pythonhosted.org/packages/c0/22/b97878d2f1284286fef4172069e84b0b42b546ea7d053e5fb7adb9ac6494/rpds_py-0.27.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6b96b0b784fe5fd03beffff2b1533dc0d85e92bab8d1b2c24ef3a5dc8fac5669", size = 343960 },
-    { url = "https://files.pythonhosted.org/packages/b1/b0/dfd55b5bb480eda0578ae94ef256d3061d20b19a0f5e18c482f03e65464f/rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c431bfb91478d7cbe368d0a699978050d3b112d7f1d440a41e90faa325557fd", size = 380201 },
-    { url = "https://files.pythonhosted.org/packages/28/22/e1fa64e50d58ad2b2053077e3ec81a979147c43428de9e6de68ddf6aff4e/rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:20e222a44ae9f507d0f2678ee3dd0c45ec1e930f6875d99b8459631c24058aec", size = 392111 },
-    { url = "https://files.pythonhosted.org/packages/49/f9/43ab7a43e97aedf6cea6af70fdcbe18abbbc41d4ae6cdec1bfc23bbad403/rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:184f0d7b342967f6cda94a07d0e1fae177d11d0b8f17d73e06e36ac02889f303", size = 515863 },
-    { url = "https://files.pythonhosted.org/packages/38/9b/9bd59dcc636cd04d86a2d20ad967770bf348f5eb5922a8f29b547c074243/rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a00c91104c173c9043bc46f7b30ee5e6d2f6b1149f11f545580f5d6fdff42c0b", size = 402398 },
-    { url = "https://files.pythonhosted.org/packages/71/bf/f099328c6c85667aba6b66fa5c35a8882db06dcd462ea214be72813a0dd2/rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7a37dd208f0d658e0487522078b1ed68cd6bce20ef4b5a915d2809b9094b410", size = 384665 },
-    { url = "https://files.pythonhosted.org/packages/a9/c5/9c1f03121ece6634818490bd3c8be2c82a70928a19de03467fb25a3ae2a8/rpds_py-0.27.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:92f3b3ec3e6008a1fe00b7c0946a170f161ac00645cde35e3c9a68c2475e8156", size = 400405 },
-    { url = "https://files.pythonhosted.org/packages/b5/b8/e25d54af3e63ac94f0c16d8fe143779fe71ff209445a0c00d0f6984b6b2c/rpds_py-0.27.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a1b3db5fae5cbce2131b7420a3f83553d4d89514c03d67804ced36161fe8b6b2", size = 413179 },
-    { url = "https://files.pythonhosted.org/packages/f9/d1/406b3316433fe49c3021546293a04bc33f1478e3ec7950215a7fce1a1208/rpds_py-0.27.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5355527adaa713ab693cbce7c1e0ec71682f599f61b128cf19d07e5c13c9b1f1", size = 556895 },
-    { url = "https://files.pythonhosted.org/packages/5f/bc/3697c0c21fcb9a54d46ae3b735eb2365eea0c2be076b8f770f98e07998de/rpds_py-0.27.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:fcc01c57ce6e70b728af02b2401c5bc853a9e14eb07deda30624374f0aebfe42", size = 585464 },
-    { url = "https://files.pythonhosted.org/packages/63/09/ee1bb5536f99f42c839b177d552f6114aa3142d82f49cef49261ed28dbe0/rpds_py-0.27.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3001013dae10f806380ba739d40dee11db1ecb91684febb8406a87c2ded23dae", size = 555090 },
-    { url = "https://files.pythonhosted.org/packages/7d/2c/363eada9e89f7059199d3724135a86c47082cbf72790d6ba2f336d146ddb/rpds_py-0.27.0-cp314-cp314t-win32.whl", hash = "sha256:0f401c369186a5743694dd9fc08cba66cf70908757552e1f714bfc5219c655b5", size = 218001 },
-    { url = "https://files.pythonhosted.org/packages/e2/3f/d6c216ed5199c9ef79e2a33955601f454ed1e7420a93b89670133bca5ace/rpds_py-0.27.0-cp314-cp314t-win_amd64.whl", hash = "sha256:8a1dca5507fa1337f75dcd5070218b20bc68cf8844271c923c1b79dfcbc20391", size = 230993 },
-    { url = "https://files.pythonhosted.org/packages/a3/2e/82fee0cb7142bc32a9ce586eadd24a945257c016902d575bb377ad5feb10/rpds_py-0.27.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:e0d7151a1bd5d0a203a5008fc4ae51a159a610cb82ab0a9b2c4d80241745582e", size = 371495 },
-    { url = "https://files.pythonhosted.org/packages/f9/b5/b421756c7e5cc1d2bb438a34b16f750363d0d87caf2bfa6f2326423c42e5/rpds_py-0.27.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:42ccc57ff99166a55a59d8c7d14f1a357b7749f9ed3584df74053fd098243451", size = 354823 },
-    { url = "https://files.pythonhosted.org/packages/f9/4a/63337bbabfa38d4094144d0e689758e8452372fd3e45359b806fc1b4c022/rpds_py-0.27.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e377e4cf8795cdbdff75b8f0223d7b6c68ff4fef36799d88ccf3a995a91c0112", size = 381538 },
-    { url = "https://files.pythonhosted.org/packages/33/8b/14eb61fb9a5bb830d28c548e3e67046fd04cae06c2ce6afe7f30aba7f7f0/rpds_py-0.27.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:79af163a4b40bbd8cfd7ca86ec8b54b81121d3b213b4435ea27d6568bcba3e9d", size = 396724 },
-    { url = "https://files.pythonhosted.org/packages/03/54/47faf6aa4040443b108b24ae08e9db6fe6daaa8140b696f905833f325293/rpds_py-0.27.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2eff8ee57c5996b0d2a07c3601fb4ce5fbc37547344a26945dd9e5cbd1ed27a", size = 517084 },
-    { url = "https://files.pythonhosted.org/packages/0b/88/a78dbacc9a96e3ea7e83d9bed8f272754e618c629ed6a9f8e2a506c84419/rpds_py-0.27.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7cf9bc4508efb18d8dff6934b602324eb9f8c6644749627ce001d6f38a490889", size = 402397 },
-    { url = "https://files.pythonhosted.org/packages/6b/88/268c6422c0c3a0f01bf6e79086f6e4dbc6a2e60a6e95413ad17e3392ec0a/rpds_py-0.27.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05284439ebe7d9f5f5a668d4d8a0a1d851d16f7d47c78e1fab968c8ad30cab04", size = 383570 },
-    { url = "https://files.pythonhosted.org/packages/9c/1a/34f5a2459b9752cc08e02c3845c8f570222f7dbd48c7baac4b827701a40e/rpds_py-0.27.0-cp39-cp39-manylinux_2_31_riscv64.whl", hash = "sha256:1321bce595ad70e80f97f998db37356b2e22cf98094eba6fe91782e626da2f71", size = 401771 },
-    { url = "https://files.pythonhosted.org/packages/4e/9b/16979115f2ec783ca06454a141a0f32f082763ef874675c5f756e6e76fcd/rpds_py-0.27.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:737005088449ddd3b3df5a95476ee1c2c5c669f5c30eed909548a92939c0e12d", size = 416215 },
-    { url = "https://files.pythonhosted.org/packages/81/0b/0305df88fb22db8efe81753ce4ec51b821555448fd94ec77ae4e5dfd57b7/rpds_py-0.27.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9b2a4e17bfd68536c3b801800941c95a1d4a06e3cada11c146093ba939d9638d", size = 558573 },
-    { url = "https://files.pythonhosted.org/packages/84/9a/c48be4da43a556495cf66d6bf71a16e8e3e22ae8e724b678e430521d0702/rpds_py-0.27.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:dc6b0d5a1ea0318ef2def2b6a55dccf1dcaf77d605672347271ed7b829860765", size = 587956 },
-    { url = "https://files.pythonhosted.org/packages/76/95/deb1111abde461330c4dad22b14347d064161fb7cb249746a06accc07633/rpds_py-0.27.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:4c3f8a0d4802df34fcdbeb3dfe3a4d8c9a530baea8fafdf80816fcaac5379d83", size = 554493 },
-    { url = "https://files.pythonhosted.org/packages/cb/16/5342d91917f26da91fc193932d9fbf422e2903aaee9bd3c6ecb4875ef17f/rpds_py-0.27.0-cp39-cp39-win32.whl", hash = "sha256:699c346abc73993962cac7bb4f02f58e438840fa5458a048d3a178a7a670ba86", size = 218302 },
-    { url = "https://files.pythonhosted.org/packages/fb/a3/0346108a47efe41b50d8781688b7fb16b18d252053486c932d10b18977c9/rpds_py-0.27.0-cp39-cp39-win_amd64.whl", hash = "sha256:be806e2961cd390a89d6c3ce8c2ae34271cfcd05660f716257838bb560f1c3b6", size = 229977 },
-    { url = "https://files.pythonhosted.org/packages/47/55/287068956f9ba1cb40896d291213f09fdd4527630709058b45a592bc09dc/rpds_py-0.27.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:46f48482c1a4748ab2773f75fffbdd1951eb59794e32788834b945da857c47a8", size = 371566 },
-    { url = "https://files.pythonhosted.org/packages/a2/fb/443af59cbe552e89680bb0f1d1ba47f6387b92083e28a45b8c8863b86c5a/rpds_py-0.27.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:419dd9c98bcc9fb0242be89e0c6e922df333b975d4268faa90d58499fd9c9ebe", size = 355781 },
-    { url = "https://files.pythonhosted.org/packages/ad/f0/35f48bb073b5ca42b1dcc55cb148f4a3bd4411a3e584f6a18d26f0ea8832/rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d42a0ef2bdf6bc81e1cc2d49d12460f63c6ae1423c4f4851b828e454ccf6f1", size = 382575 },
-    { url = "https://files.pythonhosted.org/packages/51/e1/5f5296a21d1189f0f116a938af2e346d83172bf814d373695e54004a936f/rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2e39169ac6aae06dd79c07c8a69d9da867cef6a6d7883a0186b46bb46ccfb0c3", size = 397435 },
-    { url = "https://files.pythonhosted.org/packages/97/79/3af99b7852b2b55cad8a08863725cbe9dc14781bcf7dc6ecead0c3e1dc54/rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:935afcdea4751b0ac918047a2df3f720212892347767aea28f5b3bf7be4f27c0", size = 514861 },
-    { url = "https://files.pythonhosted.org/packages/df/3e/11fd6033708ed3ae0e6947bb94f762f56bb46bf59a1b16eef6944e8a62ee/rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8de567dec6d451649a781633d36f5c7501711adee329d76c095be2178855b042", size = 402776 },
-    { url = "https://files.pythonhosted.org/packages/b7/89/f9375ceaa996116de9cbc949874804c7874d42fb258c384c037a46d730b8/rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:555ed147cbe8c8f76e72a4c6cd3b7b761cbf9987891b9448808148204aed74a5", size = 384665 },
-    { url = "https://files.pythonhosted.org/packages/48/bf/0061e55c6f1f573a63c0f82306b8984ed3b394adafc66854a936d5db3522/rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:d2cc2b34f9e1d31ce255174da82902ad75bd7c0d88a33df54a77a22f2ef421ee", size = 402518 },
-    { url = "https://files.pythonhosted.org/packages/ae/dc/8d506676bfe87b3b683332ec8e6ab2b0be118a3d3595ed021e3274a63191/rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cb0702c12983be3b2fab98ead349ac63a98216d28dda6f518f52da5498a27a1b", size = 416247 },
-    { url = "https://files.pythonhosted.org/packages/2e/02/9a89eea1b75c69e81632de7963076e455b1e00e1cfb46dfdabb055fa03e3/rpds_py-0.27.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:ba783541be46f27c8faea5a6645e193943c17ea2f0ffe593639d906a327a9bcc", size = 559456 },
-    { url = "https://files.pythonhosted.org/packages/38/4a/0f3ac4351957847c0d322be6ec72f916e43804a2c1d04e9672ea4a67c315/rpds_py-0.27.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:2406d034635d1497c596c40c85f86ecf2bf9611c1df73d14078af8444fe48031", size = 587778 },
-    { url = "https://files.pythonhosted.org/packages/c2/8e/39d0d7401095bed5a5ad5ef304fae96383f9bef40ca3f3a0807ff5b68d9d/rpds_py-0.27.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:dea0808153f1fbbad772669d906cddd92100277533a03845de6893cadeffc8be", size = 555247 },
-    { url = "https://files.pythonhosted.org/packages/e0/04/6b8311e811e620b9eaca67cd80a118ff9159558a719201052a7b2abb88bf/rpds_py-0.27.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d2a81bdcfde4245468f7030a75a37d50400ac2455c3a4819d9d550c937f90ab5", size = 230256 },
-    { url = "https://files.pythonhosted.org/packages/59/64/72ab5b911fdcc48058359b0e786e5363e3fde885156116026f1a2ba9a5b5/rpds_py-0.27.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e6491658dd2569f05860bad645569145c8626ac231877b0fb2d5f9bcb7054089", size = 371658 },
-    { url = "https://files.pythonhosted.org/packages/6c/4b/90ff04b4da055db53d8fea57640d8d5d55456343a1ec9a866c0ecfe10fd1/rpds_py-0.27.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:bec77545d188f8bdd29d42bccb9191682a46fb2e655e3d1fb446d47c55ac3b8d", size = 355529 },
-    { url = "https://files.pythonhosted.org/packages/a4/be/527491fb1afcd86fc5ce5812eb37bc70428ee017d77fee20de18155c3937/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25a4aebf8ca02bbb90a9b3e7a463bbf3bee02ab1c446840ca07b1695a68ce424", size = 382822 },
-    { url = "https://files.pythonhosted.org/packages/e0/a5/dcdb8725ce11e6d0913e6fcf782a13f4b8a517e8acc70946031830b98441/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:44524b96481a4c9b8e6c46d6afe43fa1fb485c261e359fbe32b63ff60e3884d8", size = 397233 },
-    { url = "https://files.pythonhosted.org/packages/33/f9/0947920d1927e9f144660590cc38cadb0795d78fe0d9aae0ef71c1513b7c/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:45d04a73c54b6a5fd2bab91a4b5bc8b426949586e61340e212a8484919183859", size = 514892 },
-    { url = "https://files.pythonhosted.org/packages/1d/ed/d1343398c1417c68f8daa1afce56ef6ce5cc587daaf98e29347b00a80ff2/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:343cf24de9ed6c728abefc5d5c851d5de06497caa7ac37e5e65dd572921ed1b5", size = 402733 },
-    { url = "https://files.pythonhosted.org/packages/1d/0b/646f55442cd14014fb64d143428f25667a100f82092c90087b9ea7101c74/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7aed8118ae20515974650d08eb724150dc2e20c2814bcc307089569995e88a14", size = 384447 },
-    { url = "https://files.pythonhosted.org/packages/4b/15/0596ef7529828e33a6c81ecf5013d1dd33a511a3e0be0561f83079cda227/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:af9d4fd79ee1cc8e7caf693ee02737daabfc0fcf2773ca0a4735b356c8ad6f7c", size = 402502 },
-    { url = "https://files.pythonhosted.org/packages/c3/8d/986af3c42f8454a6cafff8729d99fb178ae9b08a9816325ac7a8fa57c0c0/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f0396e894bd1e66c74ecbc08b4f6a03dc331140942c4b1d345dd131b68574a60", size = 416651 },
-    { url = "https://files.pythonhosted.org/packages/e9/9a/b4ec3629b7b447e896eec574469159b5b60b7781d3711c914748bf32de05/rpds_py-0.27.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:59714ab0a5af25d723d8e9816638faf7f4254234decb7d212715c1aa71eee7be", size = 559460 },
-    { url = "https://files.pythonhosted.org/packages/61/63/d1e127b40c3e4733b3a6f26ae7a063cdf2bc1caa5272c89075425c7d397a/rpds_py-0.27.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:88051c3b7d5325409f433c5a40328fcb0685fc04e5db49ff936e910901d10114", size = 588072 },
-    { url = "https://files.pythonhosted.org/packages/04/7e/8ffc71a8f6833d9c9fb999f5b0ee736b8b159fd66968e05c7afc2dbcd57e/rpds_py-0.27.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:181bc29e59e5e5e6e9d63b143ff4d5191224d355e246b5a48c88ce6b35c4e466", size = 555083 },
-    { url = "https://files.pythonhosted.org/packages/a8/fc/ef6386838e0e91d6ba79b741ccce6ca987e89619aa86f418fecf381eba23/rpds_py-0.27.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9ad08547995a57e74fea6abaf5940d399447935faebbd2612b3b0ca6f987946b", size = 371849 },
-    { url = "https://files.pythonhosted.org/packages/2c/f8/f30394aff811bc0f13fab8d8e4b9f880fcb678234eb0af7d2c4b6232f44f/rpds_py-0.27.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:61490d57e82e23b45c66f96184237994bfafa914433b8cd1a9bb57fecfced59d", size = 356437 },
-    { url = "https://files.pythonhosted.org/packages/87/56/ed704fc668c9abc56d3686b723e4d6f2585597daf4b68b654ade7c97930d/rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7cf5e726b6fa977e428a61880fb108a62f28b6d0c7ef675b117eaff7076df49", size = 382247 },
-    { url = "https://files.pythonhosted.org/packages/48/55/6ef2c9b7caae3c1c360d9556a70979e16f21bfb1e94f50f481d224f3b8aa/rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dc662bc9375a6a394b62dfd331874c434819f10ee3902123200dbcf116963f89", size = 397223 },
-    { url = "https://files.pythonhosted.org/packages/63/04/8fc2059411daaca733155fc2613cc91dc728d7abe31fd0c0fa4c7ec5ff1a/rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:299a245537e697f28a7511d01038c310ac74e8ea213c0019e1fc65f52c0dcb23", size = 516308 },
-    { url = "https://files.pythonhosted.org/packages/a4/d0/b79d3fe07c47bfa989139e692f85371f5a0e1376696b173dabe7ac77b7d1/rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:be3964f7312ea05ed283b20f87cb533fdc555b2e428cc7be64612c0b2124f08c", size = 401967 },
-    { url = "https://files.pythonhosted.org/packages/cd/b1/55014f6da5ec8029d1d7d7d2a884b9d7ad7f217e05bb9cb782f06d8209c4/rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33ba649a6e55ae3808e4c39e01580dc9a9b0d5b02e77b66bb86ef117922b1264", size = 384584 },
-    { url = "https://files.pythonhosted.org/packages/86/34/5c5c1a8550ac172dd6cd53925c321363d94b2a1f0b3173743dbbfd87b8ec/rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:81f81bbd7cdb4bdc418c09a73809abeda8f263a6bf8f9c7f93ed98b5597af39d", size = 401879 },
-    { url = "https://files.pythonhosted.org/packages/35/07/009bbc659388c4c5a256f05f56df207633cda2f5d61a8d54c50c427e435e/rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:11e8e28c0ba0373d052818b600474cfee2fafa6c9f36c8587d217b13ee28ca7d", size = 416908 },
-    { url = "https://files.pythonhosted.org/packages/7a/cc/8949c13dc5a05d955cb88909bfac4004805974dec7b0d02543de55e43272/rpds_py-0.27.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:e3acb9c16530362aeaef4e84d57db357002dc5cbfac9a23414c3e73c08301ab2", size = 559105 },
-    { url = "https://files.pythonhosted.org/packages/ea/40/574da2033b01d6e2e7fa3b021993321565c6634f9d0021707d210ce35b58/rpds_py-0.27.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:2e307cb5f66c59ede95c00e93cd84190a5b7f3533d7953690b2036780622ba81", size = 588335 },
-    { url = "https://files.pythonhosted.org/packages/1d/83/72ed1ce357d8c63bde0bba2458a502e7cc4e150e272139161e1d205a9d67/rpds_py-0.27.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:f09c9d4c26fa79c1bad927efb05aca2391350b8e61c38cbc0d7d3c814e463124", size = 555094 },
-    { url = "https://files.pythonhosted.org/packages/6f/15/fc639de53b3798340233f37959d252311b30d1834b65a02741e3373407fa/rpds_py-0.27.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:af22763a0a1eff106426a6e1f13c4582e0d0ad89c1493ab6c058236174cd6c6a", size = 230031 },
-]
-
 [[package]]
 name = "ruff"
-version = "0.12.7"
+version = "0.12.5"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a1/81/0bd3594fa0f690466e41bd033bdcdf86cba8288345ac77ad4afbe5ec743a/ruff-0.12.7.tar.gz", hash = "sha256:1fc3193f238bc2d7968772c82831a4ff69252f673be371fb49663f0068b7ec71", size = 5197814 }
+sdist = { url = "https://files.pythonhosted.org/packages/30/cd/01015eb5034605fd98d829c5839ec2c6b4582b479707f7c1c2af861e8258/ruff-0.12.5.tar.gz", hash = "sha256:b209db6102b66f13625940b7f8c7d0f18e20039bb7f6101fbdac935c9612057e", size = 5170722 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e1/d2/6cb35e9c85e7a91e8d22ab32ae07ac39cc34a71f1009a6f9e4a2a019e602/ruff-0.12.7-py3-none-linux_armv6l.whl", hash = "sha256:76e4f31529899b8c434c3c1dede98c4483b89590e15fb49f2d46183801565303", size = 11852189 },
-    { url = "https://files.pythonhosted.org/packages/63/5b/a4136b9921aa84638f1a6be7fb086f8cad0fde538ba76bda3682f2599a2f/ruff-0.12.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:789b7a03e72507c54fb3ba6209e4bb36517b90f1a3569ea17084e3fd295500fb", size = 12519389 },
-    { url = "https://files.pythonhosted.org/packages/a8/c9/3e24a8472484269b6b1821794141f879c54645a111ded4b6f58f9ab0705f/ruff-0.12.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:2e1c2a3b8626339bb6369116e7030a4cf194ea48f49b64bb505732a7fce4f4e3", size = 11743384 },
-    { url = "https://files.pythonhosted.org/packages/26/7c/458dd25deeb3452c43eaee853c0b17a1e84169f8021a26d500ead77964fd/ruff-0.12.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32dec41817623d388e645612ec70d5757a6d9c035f3744a52c7b195a57e03860", size = 11943759 },
-    { url = "https://files.pythonhosted.org/packages/7f/8b/658798472ef260ca050e400ab96ef7e85c366c39cf3dfbef4d0a46a528b6/ruff-0.12.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47ef751f722053a5df5fa48d412dbb54d41ab9b17875c6840a58ec63ff0c247c", size = 11654028 },
-    { url = "https://files.pythonhosted.org/packages/a8/86/9c2336f13b2a3326d06d39178fd3448dcc7025f82514d1b15816fe42bfe8/ruff-0.12.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a828a5fc25a3efd3e1ff7b241fd392686c9386f20e5ac90aa9234a5faa12c423", size = 13225209 },
-    { url = "https://files.pythonhosted.org/packages/76/69/df73f65f53d6c463b19b6b312fd2391dc36425d926ec237a7ed028a90fc1/ruff-0.12.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5726f59b171111fa6a69d82aef48f00b56598b03a22f0f4170664ff4d8298efb", size = 14182353 },
-    { url = "https://files.pythonhosted.org/packages/58/1e/de6cda406d99fea84b66811c189b5ea139814b98125b052424b55d28a41c/ruff-0.12.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:74e6f5c04c4dd4aba223f4fe6e7104f79e0eebf7d307e4f9b18c18362124bccd", size = 13631555 },
-    { url = "https://files.pythonhosted.org/packages/6f/ae/625d46d5164a6cc9261945a5e89df24457dc8262539ace3ac36c40f0b51e/ruff-0.12.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d0bfe4e77fba61bf2ccadf8cf005d6133e3ce08793bbe870dd1c734f2699a3e", size = 12667556 },
-    { url = "https://files.pythonhosted.org/packages/55/bf/9cb1ea5e3066779e42ade8d0cd3d3b0582a5720a814ae1586f85014656b6/ruff-0.12.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06bfb01e1623bf7f59ea749a841da56f8f653d641bfd046edee32ede7ff6c606", size = 12939784 },
-    { url = "https://files.pythonhosted.org/packages/55/7f/7ead2663be5627c04be83754c4f3096603bf5e99ed856c7cd29618c691bd/ruff-0.12.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e41df94a957d50083fd09b916d6e89e497246698c3f3d5c681c8b3e7b9bb4ac8", size = 11771356 },
-    { url = "https://files.pythonhosted.org/packages/17/40/a95352ea16edf78cd3a938085dccc55df692a4d8ba1b3af7accbe2c806b0/ruff-0.12.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:4000623300563c709458d0ce170c3d0d788c23a058912f28bbadc6f905d67afa", size = 11612124 },
-    { url = "https://files.pythonhosted.org/packages/4d/74/633b04871c669e23b8917877e812376827c06df866e1677f15abfadc95cb/ruff-0.12.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:69ffe0e5f9b2cf2b8e289a3f8945b402a1b19eff24ec389f45f23c42a3dd6fb5", size = 12479945 },
-    { url = "https://files.pythonhosted.org/packages/be/34/c3ef2d7799c9778b835a76189c6f53c179d3bdebc8c65288c29032e03613/ruff-0.12.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:a07a5c8ffa2611a52732bdc67bf88e243abd84fe2d7f6daef3826b59abbfeda4", size = 12998677 },
-    { url = "https://files.pythonhosted.org/packages/77/ab/aca2e756ad7b09b3d662a41773f3edcbd262872a4fc81f920dc1ffa44541/ruff-0.12.7-py3-none-win32.whl", hash = "sha256:c928f1b2ec59fb77dfdf70e0419408898b63998789cc98197e15f560b9e77f77", size = 11756687 },
-    { url = "https://files.pythonhosted.org/packages/b4/71/26d45a5042bc71db22ddd8252ca9d01e9ca454f230e2996bb04f16d72799/ruff-0.12.7-py3-none-win_amd64.whl", hash = "sha256:9c18f3d707ee9edf89da76131956aba1270c6348bfee8f6c647de841eac7194f", size = 12912365 },
-    { url = "https://files.pythonhosted.org/packages/4c/9b/0b8aa09817b63e78d94b4977f18b1fcaead3165a5ee49251c5d5c245bb2d/ruff-0.12.7-py3-none-win_arm64.whl", hash = "sha256:dfce05101dbd11833a0776716d5d1578641b7fddb537fe7fa956ab85d1769b69", size = 11982083 },
+    { url = "https://files.pythonhosted.org/packages/d4/de/ad2f68f0798ff15dd8c0bcc2889558970d9a685b3249565a937cd820ad34/ruff-0.12.5-py3-none-linux_armv6l.whl", hash = "sha256:1de2c887e9dec6cb31fcb9948299de5b2db38144e66403b9660c9548a67abd92", size = 11819133 },
+    { url = "https://files.pythonhosted.org/packages/f8/fc/c6b65cd0e7fbe60f17e7ad619dca796aa49fbca34bb9bea5f8faf1ec2643/ruff-0.12.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d1ab65e7d8152f519e7dea4de892317c9da7a108da1c56b6a3c1d5e7cf4c5e9a", size = 12501114 },
+    { url = "https://files.pythonhosted.org/packages/c5/de/c6bec1dce5ead9f9e6a946ea15e8d698c35f19edc508289d70a577921b30/ruff-0.12.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:962775ed5b27c7aa3fdc0d8f4d4433deae7659ef99ea20f783d666e77338b8cf", size = 11716873 },
+    { url = "https://files.pythonhosted.org/packages/a1/16/cf372d2ebe91e4eb5b82a2275c3acfa879e0566a7ac94d331ea37b765ac8/ruff-0.12.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:73b4cae449597e7195a49eb1cdca89fd9fbb16140c7579899e87f4c85bf82f73", size = 11958829 },
+    { url = "https://files.pythonhosted.org/packages/25/bf/cd07e8f6a3a6ec746c62556b4c4b79eeb9b0328b362bb8431b7b8afd3856/ruff-0.12.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8b13489c3dc50de5e2d40110c0cce371e00186b880842e245186ca862bf9a1ac", size = 11626619 },
+    { url = "https://files.pythonhosted.org/packages/d8/c9/c2ccb3b8cbb5661ffda6925f81a13edbb786e623876141b04919d1128370/ruff-0.12.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f1504fea81461cf4841778b3ef0a078757602a3b3ea4b008feb1308cb3f23e08", size = 13221894 },
+    { url = "https://files.pythonhosted.org/packages/6b/58/68a5be2c8e5590ecdad922b2bcd5583af19ba648f7648f95c51c3c1eca81/ruff-0.12.5-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c7da4129016ae26c32dfcbd5b671fe652b5ab7fc40095d80dcff78175e7eddd4", size = 14163909 },
+    { url = "https://files.pythonhosted.org/packages/bd/d1/ef6b19622009ba8386fdb792c0743f709cf917b0b2f1400589cbe4739a33/ruff-0.12.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ca972c80f7ebcfd8af75a0f18b17c42d9f1ef203d163669150453f50ca98ab7b", size = 13583652 },
+    { url = "https://files.pythonhosted.org/packages/62/e3/1c98c566fe6809a0c83751d825a03727f242cdbe0d142c9e292725585521/ruff-0.12.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8dbbf9f25dfb501f4237ae7501d6364b76a01341c6f1b2cd6764fe449124bb2a", size = 12700451 },
+    { url = "https://files.pythonhosted.org/packages/24/ff/96058f6506aac0fbc0d0fc0d60b0d0bd746240a0594657a2d94ad28033ba/ruff-0.12.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c47dea6ae39421851685141ba9734767f960113d51e83fd7bb9958d5be8763a", size = 12937465 },
+    { url = "https://files.pythonhosted.org/packages/eb/d3/68bc5e7ab96c94b3589d1789f2dd6dd4b27b263310019529ac9be1e8f31b/ruff-0.12.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c5076aa0e61e30f848846f0265c873c249d4b558105b221be1828f9f79903dc5", size = 11771136 },
+    { url = "https://files.pythonhosted.org/packages/52/75/7356af30a14584981cabfefcf6106dea98cec9a7af4acb5daaf4b114845f/ruff-0.12.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a5a4c7830dadd3d8c39b1cc85386e2c1e62344f20766be6f173c22fb5f72f293", size = 11601644 },
+    { url = "https://files.pythonhosted.org/packages/c2/67/91c71d27205871737cae11025ee2b098f512104e26ffd8656fd93d0ada0a/ruff-0.12.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:46699f73c2b5b137b9dc0fc1a190b43e35b008b398c6066ea1350cce6326adcb", size = 12478068 },
+    { url = "https://files.pythonhosted.org/packages/34/04/b6b00383cf2f48e8e78e14eb258942fdf2a9bf0287fbf5cdd398b749193a/ruff-0.12.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5a655a0a0d396f0f072faafc18ebd59adde8ca85fb848dc1b0d9f024b9c4d3bb", size = 12991537 },
+    { url = "https://files.pythonhosted.org/packages/3e/b9/053d6445dc7544fb6594785056d8ece61daae7214859ada4a152ad56b6e0/ruff-0.12.5-py3-none-win32.whl", hash = "sha256:dfeb2627c459b0b78ca2bbdc38dd11cc9a0a88bf91db982058b26ce41714ffa9", size = 11751575 },
+    { url = "https://files.pythonhosted.org/packages/bc/0f/ab16e8259493137598b9149734fec2e06fdeda9837e6f634f5c4e35916da/ruff-0.12.5-py3-none-win_amd64.whl", hash = "sha256:ae0d90cf5f49466c954991b9d8b953bd093c32c27608e409ae3564c63c5306a5", size = 12882273 },
+    { url = "https://files.pythonhosted.org/packages/00/db/c376b0661c24cf770cb8815268190668ec1330eba8374a126ceef8c72d55/ruff-0.12.5-py3-none-win_arm64.whl", hash = "sha256:48cdbfc633de2c5c37d9f090ba3b352d1576b0015bfc3bc98eaf230275b7e805", size = 11951564 },
 ]

 [[package]]
@@ -5850,18 +5508,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/70/22/e8fc1bf9cdecc439b7ddc28a45b976a8c699a38874c070749d855696368a/tiktoken-0.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:26242ca9dc8b58e875ff4ca078b9a94d2f0813e6a535dcd2205df5d49d927cc7", size = 894215 },
 ]

-[[package]]
-name = "tinycss2"
-version = "1.4.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "webencodings" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/7a/fd/7a5ee21fd08ff70d3d33a5781c255cbe779659bd03278feb98b19ee550f4/tinycss2-1.4.0.tar.gz", hash = "sha256:10c0972f6fc0fbee87c3edb76549357415e94548c1ae10ebccdea16fb404a9b7", size = 87085 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e6/34/ebdc18bae6aa14fbee1a08b63c015c72b64868ff7dae68808ab500c492e2/tinycss2-1.4.0-py3-none-any.whl", hash = "sha256:3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289", size = 26610 },
-]
-
 [[package]]
 name = "tokenizers"
 version = "0.21.4"
@@ -6154,15 +5800,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 },
 ]

-[[package]]
-name = "webencodings"
-version = "0.5.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/0b/02/ae6ceac1baeda530866a85075641cec12989bd8d31af6d5ab4a3e8c92f47/webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923", size = 9721 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f4/24/2a3e3df732393fed8b3ebf2ec078f05546de641fe1b667ee316ec1dcf3b7/webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", size = 11774 },
-]
-
 [[package]]
 name = "werkzeug"
 version = "3.1.3"