chore: release v0.1.14

chore: consolidate essential fixes and add pre-commit hooks
- Add pre-commit configuration with ruff and black - Fix lint CI job to use uv tool install instead of sync - Add essential LlamaIndex dependencies to leann-core Co-Authored-By: Yichuan Wang <73766326+yichuan-w@users.noreply.github.com>
2025-07-27 08:50:56 +00:00 · 2025-07-27 01:24:24 -07:00 · 2025-07-26 22:38:13 -07:00 · 2025-07-26 21:51:14 -07:00 · 2025-07-26 21:47:55 -07:00 · 2025-07-26 21:46:02 -07:00
59 changed files with 5954 additions and 5190 deletions
--- a/.github/workflows/build-reusable.yml
+++ b/.github/workflows/build-reusable.yml
@@ -10,7 +10,36 @@ on:
        default: ''

 jobs:
+  lint:
+    name: Lint and Format Check
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.ref }}
+      
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+      
+      - name: Install ruff
+        run: |
+          uv tool install ruff
+      
+      - name: Run ruff check
+        run: |
+          ruff check .
+      
+      - name: Run ruff format check
+        run: |
+          ruff format --check .
+
  build:
+    needs: lint
    name: Build ${{ matrix.os }} Python ${{ matrix.python }}
    strategy:
      matrix:
--- a/.github/workflows/release-manual.yml
+++ b/.github/workflows/release-manual.yml
@@ -22,11 +22,14 @@ jobs:
      
      - name: Validate version
        run: |
-          if ! [[ "${{ inputs.version }}" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
-            echo "❌ Invalid version format"
+          # Remove 'v' prefix if present for validation
+          VERSION_CLEAN="${{ inputs.version }}"
+          VERSION_CLEAN="${VERSION_CLEAN#v}"
+          if ! [[ "$VERSION_CLEAN" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+            echo "❌ Invalid version format. Expected format: X.Y.Z or vX.Y.Z"
            exit 1
          fi
-          echo "✅ Version format valid"
+          echo "✅ Version format valid: ${{ inputs.version }}"
      
      - name: Update versions and push
        id: push
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,23 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
+      - id: check-merge-conflict
+      - id: debug-statements
+
+  - repo: https://github.com/psf/black
+    rev: 24.1.1
+    hooks:
+      - id: black
+        language_version: python3
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.2.1
+    hooks:
+      - id: ruff
+        args: [--fix]
+      - id: ruff-format
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@
    The smallest vector index in the world. RAG Everything with LEANN!
 </h2>

-LEANN is a revolutionary vector database that democratizes personal AI. Transform your laptop into a powerful RAG system that can index and search through millions of documents while using **97% less storage** than traditional solutions **without accuracy loss**.
+LEANN is an innovative vector database that democratizes personal AI. Transform your laptop into a powerful RAG system that can index and search through millions of documents while using **97% less storage** than traditional solutions **without accuracy loss**.

 LEANN achieves this through *graph-based selective recomputation* with *high-degree preserving pruning*, computing embeddings on-demand instead of storing them all. [Illustration Fig →](#️-architecture--how-it-works) | [Paper →](https://arxiv.org/abs/2506.08276)

--- a/demo.ipynb
+++ b/demo.ipynb
@@ -4,7 +4,11 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Quick Start in 30s"
+    "# Quick Start in 30s\n",
+    "\n",
+    "**Home GitHub Repository:** [LEANN on GitHub](https://github.com/yichuan-w/LEANN)\n",
+    "\n",
+    "**Important for Colab users:** Set your runtime type to T4 GPU for optimal performance. Go to Runtime → Change runtime type → Hardware accelerator → T4 GPU."
   ]
  },
  {
@@ -14,11 +18,24 @@
   "outputs": [],
   "source": [
    "# install this if you are using colab\n",
-    "! pip install leann\n",
-    "\n",
+    "! uv pip install leann-core leann-backend-hnsw --no-deps\n",
+    "! uv pip install leann --no-deps\n",
    "# For Colab environment, we need to set some environment variables\n",
    "import os\n",
-    "os.environ['LEANN_LOG_LEVEL'] = 'INFO'  # Enable more detailed logging"
+    "\n",
+    "os.environ[\"LEANN_LOG_LEVEL\"] = \"INFO\"  # Enable more detailed logging"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "INDEX_DIR = Path(\"./\").resolve()\n",
+    "INDEX_PATH = str(INDEX_DIR / \"demo.leann\")"
   ]
  },
  {
@@ -32,17 +49,80 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Writing passages: 100%|██████████| 5/5 [00:00<00:00, 17077.79chunk/s]\n",
+      "Batches: 100%|██████████| 1/1 [00:00<00:00, 36.43it/s]\n",
+      "WARNING:leann_backend_hnsw.hnsw_backend:Converting data to float32, shape: (5, 768)\n",
+      "INFO:leann_backend_hnsw.hnsw_backend:INFO: Converting HNSW index to CSR-pruned format...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "M: 64 for level: 0\n",
+      "Starting conversion: index.index -> index.csr.tmp\n",
+      "[0.00s] Reading Index HNSW header...\n",
+      "[0.00s]   Header read: d=768, ntotal=5\n",
+      "[0.00s] Reading HNSW struct vectors...\n",
+      "  Reading vector (dtype=<class 'numpy.float64'>, fmt='d')... Count=6, Bytes=48\n",
+      "[0.00s]   Read assign_probas (6)\n",
+      "  Reading vector (dtype=<class 'numpy.int32'>, fmt='i')... Count=7, Bytes=28\n",
+      "[0.14s]   Read cum_nneighbor_per_level (7)\n",
+      "  Reading vector (dtype=<class 'numpy.int32'>, fmt='i')... Count=5, Bytes=20\n",
+      "[0.24s]   Read levels (5)\n",
+      "[0.33s]   Probing for compact storage flag...\n",
+      "[0.33s]   Found compact flag: False\n",
+      "[0.33s]   Compact flag is False, reading original format...\n",
+      "[0.33s]   Probing for potential extra byte before non-compact offsets...\n",
+      "[0.33s]   Found and consumed an unexpected 0x00 byte.\n",
+      "  Reading vector (dtype=<class 'numpy.uint64'>, fmt='Q')... Count=6, Bytes=48\n",
+      "[0.33s]   Read offsets (6)\n",
+      "[0.41s]   Attempting to read neighbors vector...\n",
+      "  Reading vector (dtype=<class 'numpy.int32'>, fmt='i')... Count=320, Bytes=1280\n",
+      "[0.41s]   Read neighbors (320)\n",
+      "[0.54s]   Read scalar params (ep=4, max_lvl=0)\n",
+      "[0.54s] Checking for storage data...\n",
+      "[0.54s]   Found storage fourcc: 49467849.\n",
+      "[0.54s] Converting to CSR format...\n",
+      "[0.54s]   Conversion loop finished.                        \n",
+      "[0.54s] Running validation checks...\n",
+      "    Checking total valid neighbor count...\n",
+      "    OK: Total valid neighbors = 20\n",
+      "    Checking final pointer indices...\n",
+      "    OK: Final pointers match data size.\n",
+      "[0.54s] Deleting original neighbors and offsets arrays...\n",
+      "    CSR Stats: |data|=20, |level_ptr|=10\n",
+      "[0.63s] Writing CSR HNSW graph data in FAISS-compatible order...\n",
+      "   Pruning embeddings: Writing NULL storage marker.\n",
+      "[0.71s] Conversion complete.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:leann_backend_hnsw.hnsw_backend:✅ CSR conversion successful.\n",
+      "INFO:leann_backend_hnsw.hnsw_backend:INFO: Replaced original index with CSR-pruned version at 'index.index'\n"
+     ]
+    }
+   ],
   "source": [
    "from leann.api import LeannBuilder\n",
    "\n",
    "builder = LeannBuilder(backend_name=\"hnsw\")\n",
    "builder.add_text(\"C# is a powerful programming language and it is good at game development\")\n",
-    "builder.add_text(\"Python is a powerful programming language and it is good at machine learning tasks\")\n",
+    "builder.add_text(\n",
+    "    \"Python is a powerful programming language and it is good at machine learning tasks\"\n",
+    ")\n",
    "builder.add_text(\"Machine learning transforms industries\")\n",
    "builder.add_text(\"Neural networks process complex data\")\n",
    "builder.add_text(\"Leann is a great storage saving engine for RAG on your MacBook\")\n",
-    "builder.build_index(\"knowledge.leann\")"
+    "builder.build_index(INDEX_PATH)"
   ]
  },
  {
@@ -56,11 +136,85 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:leann.api:🔍 LeannSearcher.search() called:\n",
+      "INFO:leann.api:  Query: 'programming languages'\n",
+      "INFO:leann.api:  Top_k: 2\n",
+      "INFO:leann.api:  Additional kwargs: {}\n",
+      "INFO:leann.embedding_server_manager:Port 5557 has incompatible server, trying next port...\n",
+      "INFO:leann.embedding_server_manager:Port 5558 has incompatible server, trying next port...\n",
+      "INFO:leann.embedding_server_manager:Port 5559 has incompatible server, trying next port...\n",
+      "INFO:leann.embedding_server_manager:Port 5560 has incompatible server, trying next port...\n",
+      "INFO:leann.embedding_server_manager:Port 5561 has incompatible server, trying next port...\n",
+      "INFO:leann.embedding_server_manager:Port 5562 has incompatible server, trying next port...\n",
+      "INFO:leann.embedding_server_manager:Starting embedding server on port 5563...\n",
+      "INFO:leann.embedding_server_manager:Command: /Users/yichuan/Desktop/code/test_leann_pip/LEANN/.venv/bin/python -m leann_backend_hnsw.hnsw_embedding_server --zmq-port 5563 --model-name facebook/contriever --passages-file /Users/yichuan/Desktop/code/test_leann_pip/LEANN/content/index.meta.json\n",
+      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+      "To disable this warning, you can either:\n",
+      "\t- Avoid using `tokenizers` before the fork if possible\n",
+      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
+      "INFO:leann.embedding_server_manager:Server process started with PID: 31699\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[read_HNSW - CSR NL v4] Reading metadata & CSR indices (manual offset)...\n",
+      "[read_HNSW NL v4] Read levels vector, size: 5\n",
+      "[read_HNSW NL v4] Reading Compact Storage format indices...\n",
+      "[read_HNSW NL v4] Read compact_level_ptr, size: 10\n",
+      "[read_HNSW NL v4] Read compact_node_offsets, size: 6\n",
+      "[read_HNSW NL v4] Read entry_point: 4, max_level: 0\n",
+      "[read_HNSW NL v4] Read storage fourcc: 0x6c6c756e\n",
+      "[read_HNSW NL v4 FIX] Detected FileIOReader. Neighbors size field offset: 326\n",
+      "[read_HNSW NL v4] Reading neighbors data into memory.\n",
+      "[read_HNSW NL v4] Read neighbors data, size: 20\n",
+      "[read_HNSW NL v4] Finished reading metadata and CSR indices.\n",
+      "INFO: Skipping external storage loading, since is_recompute is true.\n",
+      "INFO: Registering backend 'hnsw'\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\n",
+      "  File \"<frozen runpy>\", line 198, in _run_module_as_main\n",
+      "  File \"<frozen runpy>\", line 88, in _run_code\n",
+      "  File \"/Users/yichuan/Desktop/code/test_leann_pip/LEANN/.venv/lib/python3.11/site-packages/leann_backend_hnsw/hnsw_embedding_server.py\", line 323, in <module>\n",
+      "    create_hnsw_embedding_server(\n",
+      "  File \"/Users/yichuan/Desktop/code/test_leann_pip/LEANN/.venv/lib/python3.11/site-packages/leann_backend_hnsw/hnsw_embedding_server.py\", line 98, in create_hnsw_embedding_server\n",
+      "    passages = PassageManager(passage_sources)\n",
+      "               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/Users/yichuan/Desktop/code/test_leann_pip/LEANN/.venv/lib/python3.11/site-packages/leann/api.py\", line 127, in __init__\n",
+      "    raise FileNotFoundError(f\"Passage index file not found: {index_file}\")\n",
+      "FileNotFoundError: Passage index file not found: /Users/yichuan/Desktop/code/test_leann_pip/LEANN/index.passages.idx\n",
+      "ERROR:leann.embedding_server_manager:Server terminated during startup.\n"
+     ]
+    },
+    {
+     "ename": "RuntimeError",
+     "evalue": "Failed to start embedding server on port 5563",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mRuntimeError\u001b[39m                              Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mleann\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mapi\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m LeannSearcher\n\u001b[32m      3\u001b[39m searcher = LeannSearcher(\u001b[33m\"\u001b[39m\u001b[33mindex\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m results = \u001b[43msearcher\u001b[49m\u001b[43m.\u001b[49m\u001b[43msearch\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mprogramming languages\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtop_k\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m2\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m      5\u001b[39m results\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/code/test_leann_pip/LEANN/.venv/lib/python3.11/site-packages/leann/api.py:439\u001b[39m, in \u001b[36mLeannSearcher.search\u001b[39m\u001b[34m(self, query, top_k, complexity, beam_width, prune_ratio, recompute_embeddings, pruning_strategy, expected_zmq_port, **kwargs)\u001b[39m\n\u001b[32m    437\u001b[39m start_time = time.time()\n\u001b[32m    438\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m recompute_embeddings:\n\u001b[32m--> \u001b[39m\u001b[32m439\u001b[39m     zmq_port = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mbackend_impl\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_ensure_server_running\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    440\u001b[39m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mmeta_path_str\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    441\u001b[39m \u001b[43m        \u001b[49m\u001b[43mport\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexpected_zmq_port\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    442\u001b[39m \u001b[43m        \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    443\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    444\u001b[39m     \u001b[38;5;28;01mdel\u001b[39;00m expected_zmq_port\n\u001b[32m    445\u001b[39m zmq_time = time.time() - start_time\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/code/test_leann_pip/LEANN/.venv/lib/python3.11/site-packages/leann/searcher_base.py:81\u001b[39m, in \u001b[36mBaseSearcher._ensure_server_running\u001b[39m\u001b[34m(self, passages_source_file, port, **kwargs)\u001b[39m\n\u001b[32m     72\u001b[39m server_started, actual_port = \u001b[38;5;28mself\u001b[39m.embedding_server_manager.start_server(\n\u001b[32m     73\u001b[39m     port=port,\n\u001b[32m     74\u001b[39m     model_name=\u001b[38;5;28mself\u001b[39m.embedding_model,\n\u001b[32m   (...)\u001b[39m\u001b[32m     78\u001b[39m     enable_warmup=kwargs.get(\u001b[33m\"\u001b[39m\u001b[33menable_warmup\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m),\n\u001b[32m     79\u001b[39m )\n\u001b[32m     80\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m server_started:\n\u001b[32m---> \u001b[39m\u001b[32m81\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[32m     82\u001b[39m         \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mFailed to start embedding server on port \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mactual_port\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m     83\u001b[39m     )\n\u001b[32m     85\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m actual_port\n",
+      "\u001b[31mRuntimeError\u001b[39m: Failed to start embedding server on port 5563"
+     ]
+    }
+   ],
   "source": [
    "from leann.api import LeannSearcher\n",
    "\n",
-    "searcher = LeannSearcher(\"knowledge.leann\")\n",
+    "searcher = LeannSearcher(INDEX_PATH)\n",
    "results = searcher.search(\"programming languages\", top_k=2)\n",
    "results"
   ]
@@ -85,11 +239,11 @@
    "    \"model\": \"Qwen/Qwen3-0.6B\",\n",
    "}\n",
    "\n",
-    "chat = LeannChat(index_path=\"knowledge.leann\", llm_config=llm_config)\n",
+    "chat = LeannChat(index_path=INDEX_PATH, llm_config=llm_config)\n",
    "response = chat.ask(\n",
    "    \"Compare the two retrieved programming languages and tell me their advantages.\",\n",
    "    top_k=2,\n",
-    "    llm_kwargs={\"max_tokens\": 128}\n",
+    "    llm_kwargs={\"max_tokens\": 128},\n",
    ")\n",
    "response"
   ]
--- a/examples/compare_faiss_vs_leann.py
+++ b/examples/compare_faiss_vs_leann.py
@@ -3,14 +3,15 @@
 Memory comparison between Faiss HNSW and LEANN HNSW backend
 """

+import gc
 import logging
 import os
+import subprocess
 import sys
 import time
-import psutil
-import gc
-import subprocess
 from pathlib import Path
+
+import psutil
 from llama_index.core.node_parser import SentenceSplitter

 # Setup logging
@@ -83,9 +84,7 @@ def test_faiss_hnsw():

        for line in lines:
            if "Peak Memory:" in line:
-                peak_memory = float(
-                    line.split("Peak Memory:")[1].split("MB")[0].strip()
-                )
+                peak_memory = float(line.split("Peak Memory:")[1].split("MB")[0].strip())

        return {"peak_memory": peak_memory}

@@ -111,9 +110,8 @@ def test_leann_hnsw():

    tracker.checkpoint("After imports")

+    from leann.api import LeannBuilder
    from llama_index.core import SimpleDirectoryReader
-    from leann.api import LeannBuilder, LeannSearcher
-

    # Load and parse documents
    documents = SimpleDirectoryReader(
@@ -197,16 +195,14 @@ def test_leann_hnsw():
    runtime_start_mem = get_memory_usage()
    print(f"Before load memory: {runtime_start_mem:.1f} MB")
    tracker.checkpoint("Before load memory")
-    
+
    # Load searcher
    searcher = LeannSearcher(index_path)
    tracker.checkpoint("After searcher loading")

-
-
    print("Running search queries...")
    queries = [
-        "什么是盘古大模型以及盘古开发过程中遇到了什么阴暗面，任务令一般在什么城市颁发",
+        "什么是盘古大模型以及盘古开发过程中遇到了什么阴暗面,任务令一般在什么城市颁发",
        "What is LEANN and how does it work?",
        "华为诺亚方舟实验室的主要研究内容",
    ]
@@ -304,21 +300,15 @@ def main():

        print("\nLEANN vs Faiss Performance:")
        memory_saving = faiss_results["peak_memory"] - leann_results["peak_memory"]
-        print(
-            f"  Search Memory: {memory_ratio:.1f}x less ({memory_saving:.1f} MB saved)"
-        )
+        print(f"  Search Memory: {memory_ratio:.1f}x less ({memory_saving:.1f} MB saved)")

        # Storage comparison
        if leann_storage_size > faiss_storage_size:
            storage_ratio = leann_storage_size / faiss_storage_size
-            print(
-                f"  Storage Size: {storage_ratio:.1f}x larger (LEANN uses more storage)"
-            )
+            print(f"  Storage Size: {storage_ratio:.1f}x larger (LEANN uses more storage)")
        elif faiss_storage_size > leann_storage_size:
            storage_ratio = faiss_storage_size / leann_storage_size
-            print(
-                f"  Storage Size: {storage_ratio:.1f}x smaller (LEANN uses less storage)"
-            )
+            print(f"  Storage Size: {storage_ratio:.1f}x smaller (LEANN uses less storage)")
        else:
            print("  Storage Size: similar")
    else:
--- a/examples/document_search.py
+++ b/examples/document_search.py
@@ -3,37 +3,44 @@
 Document search demo with recompute mode
 """

-import os
-from pathlib import Path
 import shutil
 import time
+from pathlib import Path

 # Import backend packages to trigger plugin registration
 try:
-    import leann_backend_diskann
-    import leann_backend_hnsw
+    import leann_backend_diskann  # noqa: F401
+    import leann_backend_hnsw  # noqa: F401
+
    print("INFO: Backend packages imported successfully.")
 except ImportError as e:
    print(f"WARNING: Could not import backend packages. Error: {e}")

 # Import upper-level API from leann-core
-from leann.api import LeannBuilder, LeannSearcher, LeannChat
+from leann.api import LeannBuilder, LeannChat, LeannSearcher


 def load_sample_documents():
    """Create sample documents for demonstration"""
    docs = [
-        {"title": "Intro to Python", "content": "Python is a high-level, interpreted language known for simplicity."},
+        {
+            "title": "Intro to Python",
+            "content": "Python is a high-level, interpreted language known for simplicity.",
+        },
        {"title": "ML Basics", "content": "Machine learning builds systems that learn from data."},
-        {"title": "Data Structures", "content": "Data structures like arrays, lists, and graphs organize data."},
+        {
+            "title": "Data Structures",
+            "content": "Data structures like arrays, lists, and graphs organize data.",
+        },
    ]
    return docs

+
 def main():
    print("==========================================================")
    print("=== Leann Document Search Demo (DiskANN + Recompute) ===")
    print("==========================================================")
-    
+
    INDEX_DIR = Path("./test_indices")
    INDEX_PATH = str(INDEX_DIR / "documents.diskann")
    BACKEND_TO_TEST = "diskann"
@@ -44,94 +51,96 @@ def main():

    # --- 1. Build index ---
    print(f"\n[PHASE 1] Building index using '{BACKEND_TO_TEST}' backend...")
-    
-    builder = LeannBuilder(
-        backend_name=BACKEND_TO_TEST, 
-        graph_degree=32, 
-        complexity=64
-    )
-    
+
+    builder = LeannBuilder(backend_name=BACKEND_TO_TEST, graph_degree=32, complexity=64)
+
    documents = load_sample_documents()
    print(f"Loaded {len(documents)} sample documents.")
    for doc in documents:
        builder.add_text(doc["content"], metadata={"title": doc["title"]})
-        
+
    builder.build_index(INDEX_PATH)
-    print(f"\nIndex built!")
+    print("\nIndex built!")

    # --- 2. Basic search demo ---
    print(f"\n[PHASE 2] Basic search using '{BACKEND_TO_TEST}' backend...")
    searcher = LeannSearcher(index_path=INDEX_PATH)
-    
+
    query = "What is machine learning?"
    print(f"\nQuery: '{query}'")
-    
+
    print("\n--- Basic search mode (PQ computation) ---")
    start_time = time.time()
    results = searcher.search(query, top_k=2)
    basic_time = time.time() - start_time
-    
+
    print(f"⏱️  Basic search time: {basic_time:.3f} seconds")
    print(">>> Basic search results <<<")
    for i, res in enumerate(results, 1):
-        print(f"  {i}. ID: {res.id}, Score: {res.score:.4f}, Text: '{res.text}', Metadata: {res.metadata}")
+        print(
+            f"  {i}. ID: {res.id}, Score: {res.score:.4f}, Text: '{res.text}', Metadata: {res.metadata}"
+        )

    # --- 3. Recompute search demo ---
-    print(f"\n[PHASE 3] Recompute search using embedding server...")
-    
+    print("\n[PHASE 3] Recompute search using embedding server...")
+
    print("\n--- Recompute search mode (get real embeddings via network) ---")
-    
+
    # Configure recompute parameters
    recompute_params = {
        "recompute_beighbor_embeddings": True,  # Enable network recomputation
-        "USE_DEFERRED_FETCH": False,           # Don't use deferred fetch
-        "skip_search_reorder": True,           # Skip search reordering
-        "dedup_node_dis": True,               # Enable node distance deduplication
-        "prune_ratio": 0.1,                   # Pruning ratio 10%
-        "batch_recompute": False,             # Don't use batch recomputation
-        "global_pruning": False,              # Don't use global pruning
-        "zmq_port": 5555,                     # ZMQ port
-        "embedding_model": "sentence-transformers/all-mpnet-base-v2"
+        "USE_DEFERRED_FETCH": False,  # Don't use deferred fetch
+        "skip_search_reorder": True,  # Skip search reordering
+        "dedup_node_dis": True,  # Enable node distance deduplication
+        "prune_ratio": 0.1,  # Pruning ratio 10%
+        "batch_recompute": False,  # Don't use batch recomputation
+        "global_pruning": False,  # Don't use global pruning
+        "zmq_port": 5555,  # ZMQ port
+        "embedding_model": "sentence-transformers/all-mpnet-base-v2",
    }
-    
+
    print("Recompute parameter configuration:")
    for key, value in recompute_params.items():
        print(f"  {key}: {value}")
-    
-    print(f"\n🔄 Executing Recompute search...")
+
+    print("\n🔄 Executing Recompute search...")
    try:
        start_time = time.time()
        recompute_results = searcher.search(query, top_k=2, **recompute_params)
        recompute_time = time.time() - start_time
-        
+
        print(f"⏱️  Recompute search time: {recompute_time:.3f} seconds")
        print(">>> Recompute search results <<<")
        for i, res in enumerate(recompute_results, 1):
-            print(f"  {i}. ID: {res.id}, Score: {res.score:.4f}, Text: '{res.text}', Metadata: {res.metadata}")
-        
+            print(
+                f"  {i}. ID: {res.id}, Score: {res.score:.4f}, Text: '{res.text}', Metadata: {res.metadata}"
+            )
+
        # Compare results
-        print(f"\n--- Result comparison ---")
+        print("\n--- Result comparison ---")
        print(f"Basic search time: {basic_time:.3f} seconds")
        print(f"Recompute time: {recompute_time:.3f} seconds")
-        
+
        print("\nBasic search vs Recompute results:")
        for i in range(min(len(results), len(recompute_results))):
            basic_score = results[i].score
            recompute_score = recompute_results[i].score
            score_diff = abs(basic_score - recompute_score)
-            print(f"  Position {i+1}: PQ={basic_score:.4f}, Recompute={recompute_score:.4f}, Difference={score_diff:.4f}")
-        
+            print(
+                f"  Position {i + 1}: PQ={basic_score:.4f}, Recompute={recompute_score:.4f}, Difference={score_diff:.4f}"
+            )
+
        if recompute_time > basic_time:
-            print(f"✅ Recompute mode working correctly (more accurate but slower)")
+            print("✅ Recompute mode working correctly (more accurate but slower)")
        else:
-            print(f"ℹ️  Recompute time is unusually fast, network recomputation may not be enabled")
-            
+            print("i️  Recompute time is unusually fast, network recomputation may not be enabled")
+
    except Exception as e:
        print(f"❌ Recompute search failed: {e}")
        print("This usually indicates an embedding server connection issue")

    # --- 4. Chat demo ---
-    print(f"\n[PHASE 4] Starting chat session...")
+    print("\n[PHASE 4] Starting chat session...")
    chat = LeannChat(index_path=INDEX_PATH)
    chat_response = chat.ask(query)
    print(f"You: {query}")
@@ -143,4 +152,4 @@ def main():


 if __name__ == "__main__":
-    main()
+    main()
--- a/examples/email_data/LEANN_email_reader.py
+++ b/examples/email_data/LEANN_email_reader.py
@@ -1,11 +1,13 @@
-import os
 import email
+import os
 from pathlib import Path
-from typing import List, Any
+from typing import Any
+
 from llama_index.core import Document
 from llama_index.core.readers.base import BaseReader

-def find_all_messages_directories(root: str = None) -> List[Path]:
+
+def find_all_messages_directories(root: str | None = None) -> list[Path]:
    """
    Recursively find all 'Messages' directories under the given root.
    Returns a list of Path objects.
@@ -14,86 +16,97 @@ def find_all_messages_directories(root: str = None) -> List[Path]:
        # Auto-detect user's mail path
        home_dir = os.path.expanduser("~")
        root = os.path.join(home_dir, "Library", "Mail")
-    
+
    messages_dirs = []
-    for dirpath, dirnames, filenames in os.walk(root):
+    for dirpath, _dirnames, _filenames in os.walk(root):
        if os.path.basename(dirpath) == "Messages":
            messages_dirs.append(Path(dirpath))
    return messages_dirs

+
 class EmlxReader(BaseReader):
    """
    Apple Mail .emlx file reader with embedded metadata.
-    
+
    Reads individual .emlx files from Apple Mail's storage format.
    """
-    
+
    def __init__(self, include_html: bool = False) -> None:
        """
        Initialize.
-        
+
        Args:
            include_html: Whether to include HTML content in the email body (default: False)
        """
        self.include_html = include_html
-    
-    def load_data(self, input_dir: str, **load_kwargs: Any) -> List[Document]:
+
+    def load_data(self, input_dir: str, **load_kwargs: Any) -> list[Document]:
        """
        Load data from the input directory containing .emlx files.
-        
+
        Args:
            input_dir: Directory containing .emlx files
            **load_kwargs:
                max_count (int): Maximum amount of messages to read.
        """
-        docs: List[Document] = []
-        max_count = load_kwargs.get('max_count', 1000)
+        docs: list[Document] = []
+        max_count = load_kwargs.get("max_count", 1000)
        count = 0
-        
+
        # Walk through the directory recursively
        for dirpath, dirnames, filenames in os.walk(input_dir):
            # Skip hidden directories
            dirnames[:] = [d for d in dirnames if not d.startswith(".")]
-            
+
            for filename in filenames:
                if count >= max_count:
                    break
-                    
+
                if filename.endswith(".emlx"):
                    filepath = os.path.join(dirpath, filename)
                    try:
                        # Read the .emlx file
-                        with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
+                        with open(filepath, encoding="utf-8", errors="ignore") as f:
                            content = f.read()
-                        
+
                        # .emlx files have a length prefix followed by the email content
                        # The first line contains the length, followed by the email
-                        lines = content.split('\n', 1)
+                        lines = content.split("\n", 1)
                        if len(lines) >= 2:
                            email_content = lines[1]
-                            
+
                            # Parse the email using Python's email module
                            try:
                                msg = email.message_from_string(email_content)
-                                
+
                                # Extract email metadata
-                                subject = msg.get('Subject', 'No Subject')
-                                from_addr = msg.get('From', 'Unknown')
-                                to_addr = msg.get('To', 'Unknown')
-                                date = msg.get('Date', 'Unknown')
-                                
+                                subject = msg.get("Subject", "No Subject")
+                                from_addr = msg.get("From", "Unknown")
+                                to_addr = msg.get("To", "Unknown")
+                                date = msg.get("Date", "Unknown")
+
                                # Extract email body
                                body = ""
                                if msg.is_multipart():
                                    for part in msg.walk():
-                                        if part.get_content_type() == "text/plain" or part.get_content_type() == "text/html":
-                                            if part.get_content_type() == "text/html" and not self.include_html:
+                                        if (
+                                            part.get_content_type() == "text/plain"
+                                            or part.get_content_type() == "text/html"
+                                        ):
+                                            if (
+                                                part.get_content_type() == "text/html"
+                                                and not self.include_html
+                                            ):
                                                continue
-                                            body += part.get_payload(decode=True).decode('utf-8', errors='ignore')
+                                            body += part.get_payload(decode=True).decode(
+                                                "utf-8", errors="ignore"
+                                            )
                                            # break
                                else:
-                                    body = msg.get_payload(decode=True).decode('utf-8', errors='ignore')
-                                
+                                    body = msg.get_payload(decode=True).decode(
+                                        "utf-8", errors="ignore"
+                                    )
+
                                # Create document content with metadata embedded in text
                                doc_content = f"""
 [File]: {filename}
@@ -104,19 +117,19 @@ class EmlxReader(BaseReader):
 [EMAIL BODY Start]:
 {body}
 """
-                                
+
                                # No separate metadata - everything is in the text
                                doc = Document(text=doc_content, metadata={})
                                docs.append(doc)
                                count += 1
-                                
+
                            except Exception as e:
                                print(f"Error parsing email from {filepath}: {e}")
                                continue
-                                
+
                    except Exception as e:
                        print(f"Error reading file {filepath}: {e}")
                        continue
-        
+
        print(f"Loaded {len(docs)} email documents")
-        return docs 
+        return docs
--- a/examples/email_data/email.py
+++ b/examples/email_data/email.py
@@ -7,9 +7,9 @@ Contains simple parser for mbox files.

 import logging
 from pathlib import Path
-from typing import Any, Dict, List, Optional
-from fsspec import AbstractFileSystem
+from typing import Any

+from fsspec import AbstractFileSystem
 from llama_index.core.readers.base import BaseReader
 from llama_index.core.schema import Document

@@ -27,11 +27,7 @@ class MboxReader(BaseReader):
    """

    DEFAULT_MESSAGE_FORMAT: str = (
-        "Date: {_date}\n"
-        "From: {_from}\n"
-        "To: {_to}\n"
-        "Subject: {_subject}\n"
-        "Content: {_content}"
+        "Date: {_date}\nFrom: {_from}\nTo: {_to}\nSubject: {_subject}\nContent: {_content}"
    )

    def __init__(
@@ -45,9 +41,7 @@ class MboxReader(BaseReader):
        try:
            from bs4 import BeautifulSoup  # noqa
        except ImportError:
-            raise ImportError(
-                "`beautifulsoup4` package not found: `pip install beautifulsoup4`"
-            )
+            raise ImportError("`beautifulsoup4` package not found: `pip install beautifulsoup4`")

        super().__init__(*args, **kwargs)
        self.max_count = max_count
@@ -56,9 +50,9 @@ class MboxReader(BaseReader):
    def load_data(
        self,
        file: Path,
-        extra_info: Optional[Dict] = None,
-        fs: Optional[AbstractFileSystem] = None,
-    ) -> List[Document]:
+        extra_info: dict | None = None,
+        fs: AbstractFileSystem | None = None,
+    ) -> list[Document]:
        """Parse file into string."""
        # Import required libraries
        import mailbox
@@ -74,7 +68,7 @@ class MboxReader(BaseReader):
            )

        i = 0
-        results: List[str] = []
+        results: list[str] = []
        # Load file using mailbox
        bytes_parser = BytesParser(policy=default).parse
        mbox = mailbox.mbox(file, factory=bytes_parser)  # type: ignore
@@ -124,7 +118,7 @@ class MboxReader(BaseReader):
 class EmlxMboxReader(MboxReader):
    """
    EmlxMboxReader - Modified MboxReader that handles directories of .emlx files.
-    
+
    Extends MboxReader to work with Apple Mail's .emlx format by:
    1. Reading .emlx files from a directory
    2. Converting them to mbox format in memory
@@ -134,13 +128,13 @@ class EmlxMboxReader(MboxReader):
    def load_data(
        self,
        directory: Path,
-        extra_info: Optional[Dict] = None,
-        fs: Optional[AbstractFileSystem] = None,
-    ) -> List[Document]:
+        extra_info: dict | None = None,
+        fs: AbstractFileSystem | None = None,
+    ) -> list[Document]:
        """Parse .emlx files from directory into strings using MboxReader logic."""
-        import tempfile
        import os
-        
+        import tempfile
+
        if fs:
            logger.warning(
                "fs was specified but EmlxMboxReader doesn't support loading "
@@ -150,37 +144,37 @@ class EmlxMboxReader(MboxReader):
        # Find all .emlx files in the directory
        emlx_files = list(directory.glob("*.emlx"))
        logger.info(f"Found {len(emlx_files)} .emlx files in {directory}")
-        
+
        if not emlx_files:
            logger.warning(f"No .emlx files found in {directory}")
            return []

        # Create a temporary mbox file
-        with tempfile.NamedTemporaryFile(mode='w', suffix='.mbox', delete=False) as temp_mbox:
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".mbox", delete=False) as temp_mbox:
            temp_mbox_path = temp_mbox.name
-            
+
            # Convert .emlx files to mbox format
            for emlx_file in emlx_files:
                try:
                    # Read the .emlx file
-                    with open(emlx_file, 'r', encoding='utf-8', errors='ignore') as f:
+                    with open(emlx_file, encoding="utf-8", errors="ignore") as f:
                        content = f.read()
-                    
+
                    # .emlx format: first line is length, rest is email content
-                    lines = content.split('\n', 1)
+                    lines = content.split("\n", 1)
                    if len(lines) >= 2:
                        email_content = lines[1]  # Skip the length line
-                        
+
                        # Write to mbox format (each message starts with "From " and ends with blank line)
                        temp_mbox.write(f"From {emlx_file.name} {email_content}\n\n")
-                    
+
                except Exception as e:
                    logger.warning(f"Failed to process {emlx_file}: {e}")
                    continue
-            
+
            # Close the temporary file so MboxReader can read it
            temp_mbox.close()
-            
+
            try:
                # Use the parent MboxReader's logic to parse the mbox file
                return super().load_data(Path(temp_mbox_path), extra_info, fs)
@@ -188,5 +182,5 @@ class EmlxMboxReader(MboxReader):
                # Clean up temporary file
                try:
                    os.unlink(temp_mbox_path)
-                except:
-                    pass
+                except OSError:
+                    pass
--- a/examples/faiss_only.py
+++ b/examples/faiss_only.py
@@ -1,11 +1,11 @@
 #!/usr/bin/env python3
 """Test only Faiss HNSW"""

+import os
 import sys
 import time
+
 import psutil
-import gc
-import os


 def get_memory_usage():
@@ -37,20 +37,20 @@ def main():
        import faiss
    except ImportError:
        print("Faiss is not installed.")
-        print("Please install it with `uv pip install faiss-cpu` and you can  then run this script again")
+        print(
+            "Please install it with `uv pip install faiss-cpu` and you can  then run this script again"
+        )
        sys.exit(1)

    from llama_index.core import (
-        SimpleDirectoryReader,
-        VectorStoreIndex,
-        StorageContext,
        Settings,
-        node_parser,
-        Document,
+        SimpleDirectoryReader,
+        StorageContext,
+        VectorStoreIndex,
    )
    from llama_index.core.node_parser import SentenceSplitter
-    from llama_index.vector_stores.faiss import FaissVectorStore
    from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+    from llama_index.vector_stores.faiss import FaissVectorStore

    tracker = MemoryTracker("Faiss HNSW")
    tracker.checkpoint("Initial")
@@ -90,8 +90,9 @@ def main():
                vector_store=vector_store, persist_dir="./storage_faiss"
            )
            from llama_index.core import load_index_from_storage
+
            index = load_index_from_storage(storage_context=storage_context)
-            print(f"Index loaded from ./storage_faiss")
+            print("Index loaded from ./storage_faiss")
            tracker.checkpoint("After loading existing index")
            index_loaded = True
        except Exception as e:
@@ -99,19 +100,18 @@ def main():
            print("Cleaning up corrupted index and building new one...")
            # Clean up corrupted index
            import shutil
+
            if os.path.exists("./storage_faiss"):
                shutil.rmtree("./storage_faiss")
-    
+
    if not index_loaded:
        print("Building new Faiss HNSW index...")
-        
+
        # Use the correct Faiss building pattern from the example
        vector_store = FaissVectorStore(faiss_index=faiss_index)
        storage_context = StorageContext.from_defaults(vector_store=vector_store)
        index = VectorStoreIndex.from_documents(
-            documents, 
-            storage_context=storage_context,
-            transformations=[node_parser]
+            documents, storage_context=storage_context, transformations=[node_parser]
        )
        tracker.checkpoint("After index building")

@@ -124,10 +124,10 @@ def main():
    runtime_start_mem = get_memory_usage()
    print(f"Before load memory: {runtime_start_mem:.1f} MB")
    tracker.checkpoint("Before load memory")
-    
+
    query_engine = index.as_query_engine(similarity_top_k=20)
    queries = [
-        "什么是盘古大模型以及盘古开发过程中遇到了什么阴暗面，任务令一般在什么城市颁发",
+        "什么是盘古大模型以及盘古开发过程中遇到了什么阴暗面,任务令一般在什么城市颁发",
        "What is LEANN and how does it work?",
        "华为诺亚方舟实验室的主要研究内容",
    ]
@@ -141,7 +141,7 @@ def main():

    runtime_end_mem = get_memory_usage()
    runtime_overhead = runtime_end_mem - runtime_start_mem
-    
+
    peak_memory = tracker.summary()
    print(f"Peak Memory: {peak_memory:.1f} MB")
    print(f"Runtime Memory Overhead: {runtime_overhead:.1f} MB")
--- a/examples/google_history_reader_leann.py
+++ b/examples/google_history_reader_leann.py
@@ -1,15 +1,17 @@
-import os
-import asyncio
 import argparse
+import asyncio
+import os
+
 try:
    import dotenv
+
    dotenv.load_dotenv()
 except ModuleNotFoundError:
    # python-dotenv is not installed; skip loading environment variables
    dotenv = None
 from pathlib import Path
-from typing import List, Any
-from leann.api import LeannBuilder, LeannSearcher, LeannChat
+
+from leann.api import LeannBuilder, LeannChat
 from llama_index.core.node_parser import SentenceSplitter

 # dotenv.load_dotenv()  # handled above if python-dotenv is available
@@ -17,42 +19,45 @@ from llama_index.core.node_parser import SentenceSplitter
 # Default Chrome profile path
 DEFAULT_CHROME_PROFILE = os.path.expanduser("~/Library/Application Support/Google/Chrome/Default")

-def create_leann_index_from_multiple_chrome_profiles(profile_dirs: List[Path], index_path: str = "chrome_history_index.leann", max_count: int = -1):
+
+def create_leann_index_from_multiple_chrome_profiles(
+    profile_dirs: list[Path], index_path: str = "chrome_history_index.leann", max_count: int = -1
+):
    """
    Create LEANN index from multiple Chrome profile data sources.
-    
+
    Args:
        profile_dirs: List of Path objects pointing to Chrome profile directories
        index_path: Path to save the LEANN index
        max_count: Maximum number of history entries to process per profile
    """
    print("Creating LEANN index from multiple Chrome profile data sources...")
-    
+
    # Load documents using ChromeHistoryReader from history_data
    from history_data.history import ChromeHistoryReader
+
    reader = ChromeHistoryReader()
-    
+
    INDEX_DIR = Path(index_path).parent
-    
+
    if not INDEX_DIR.exists():
-        print(f"--- Index directory not found, building new index ---")
+        print("--- Index directory not found, building new index ---")
        all_documents = []
        total_processed = 0
-        
+
        # Process each Chrome profile directory
        for i, profile_dir in enumerate(profile_dirs):
-            print(f"\nProcessing Chrome profile {i+1}/{len(profile_dirs)}: {profile_dir}")
-            
+            print(f"\nProcessing Chrome profile {i + 1}/{len(profile_dirs)}: {profile_dir}")
+
            try:
                documents = reader.load_data(
-                    chrome_profile_path=str(profile_dir),
-                    max_count=max_count
+                    chrome_profile_path=str(profile_dir), max_count=max_count
                )
                if documents:
                    print(f"Loaded {len(documents)} history documents from {profile_dir}")
                    all_documents.extend(documents)
                    total_processed += len(documents)
-                    
+
                    # Check if we've reached the max count
                    if max_count > 0 and total_processed >= max_count:
                        print(f"Reached max count of {max_count} documents")
@@ -62,18 +67,22 @@ def create_leann_index_from_multiple_chrome_profiles(profile_dirs: List[Path], i
            except Exception as e:
                print(f"Error processing {profile_dir}: {e}")
                continue
-        
+
        if not all_documents:
            print("No documents loaded from any source. Exiting.")
            # highlight info that you need to close all chrome browser before running this script and high light the instruction!!
-            print("\033[91mYou need to close or quit all chrome browser before running this script\033[0m")
+            print(
+                "\033[91mYou need to close or quit all chrome browser before running this script\033[0m"
+            )
            return None
-        
-        print(f"\nTotal loaded {len(all_documents)} history documents from {len(profile_dirs)} profiles")
-        
+
+        print(
+            f"\nTotal loaded {len(all_documents)} history documents from {len(profile_dirs)} profiles"
+        )
+
        # Create text splitter with 256 chunk size
        text_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=128)
-        
+
        # Convert Documents to text strings and chunk them
        all_texts = []
        for doc in all_documents:
@@ -83,45 +92,48 @@ def create_leann_index_from_multiple_chrome_profiles(profile_dirs: List[Path], i
                text = node.get_content()
                # text = '[Title] ' + doc.metadata["title"] + '\n' + text
                all_texts.append(text)
-        
+
        print(f"Created {len(all_texts)} text chunks from {len(all_documents)} documents")
-        
+
        # Create LEANN index directory
-        print(f"--- Index directory not found, building new index ---")
+        print("--- Index directory not found, building new index ---")
        INDEX_DIR.mkdir(exist_ok=True)

-        print(f"--- Building new LEANN index ---")
-        
-        print(f"\n[PHASE 1] Building Leann index...")
+        print("--- Building new LEANN index ---")
+
+        print("\n[PHASE 1] Building Leann index...")

        # Use HNSW backend for better macOS compatibility
        builder = LeannBuilder(
            backend_name="hnsw",
-            embedding_model="text-embedding-3-small",
-            embedding_mode="openai",
-            
-            graph_degree=32, 
+            embedding_model="facebook/contriever",
+            graph_degree=32,
            complexity=64,
-            is_compact=False,
-            is_recompute=False,
-            num_threads=1  # Force single-threaded mode
+            is_compact=True,
+            is_recompute=True,
+            num_threads=1,  # Force single-threaded mode
        )

        print(f"Adding {len(all_texts)} history chunks to index...")
        for chunk_text in all_texts:
            builder.add_text(chunk_text)
-            
+
        builder.build_index(index_path)
        print(f"\nLEANN index built at {index_path}!")
    else:
        print(f"--- Using existing index at {INDEX_DIR} ---")
-    
+
    return index_path

-def create_leann_index(profile_path: str = None, index_path: str = "chrome_history_index.leann", max_count: int = 1000):
+
+def create_leann_index(
+    profile_path: str | None = None,
+    index_path: str = "chrome_history_index.leann",
+    max_count: int = 1000,
+):
    """
    Create LEANN index from Chrome history data.
-    
+
    Args:
        profile_path: Path to the Chrome profile directory (optional, uses default if None)
        index_path: Path to save the LEANN index
@@ -129,33 +141,31 @@ def create_leann_index(profile_path: str = None, index_path: str = "chrome_histo
    """
    print("Creating LEANN index from Chrome history data...")
    INDEX_DIR = Path(index_path).parent
-    
+
    if not INDEX_DIR.exists():
-        print(f"--- Index directory not found, building new index ---")
+        print("--- Index directory not found, building new index ---")
        INDEX_DIR.mkdir(exist_ok=True)

-        print(f"--- Building new LEANN index ---")
-        
-        print(f"\n[PHASE 1] Building Leann index...")
+        print("--- Building new LEANN index ---")
+
+        print("\n[PHASE 1] Building Leann index...")

        # Load documents using ChromeHistoryReader from history_data
        from history_data.history import ChromeHistoryReader
+
        reader = ChromeHistoryReader()
-        
-        documents = reader.load_data(
-            chrome_profile_path=profile_path,
-            max_count=max_count
-        )
-        
+
+        documents = reader.load_data(chrome_profile_path=profile_path, max_count=max_count)
+
        if not documents:
            print("No documents loaded. Exiting.")
            return None
-        
+
        print(f"Loaded {len(documents)} history documents")
-        
+
        # Create text splitter with 256 chunk size
        text_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=25)
-        
+
        # Convert Documents to text strings and chunk them
        all_texts = []
        for doc in documents:
@@ -163,54 +173,55 @@ def create_leann_index(profile_path: str = None, index_path: str = "chrome_histo
            nodes = text_splitter.get_nodes_from_documents([doc])
            for node in nodes:
                all_texts.append(node.get_content())
-        
+
        print(f"Created {len(all_texts)} text chunks from {len(documents)} documents")
-        
+
        # Create LEANN index directory
-        print(f"--- Index directory not found, building new index ---")
+        print("--- Index directory not found, building new index ---")
        INDEX_DIR.mkdir(exist_ok=True)

-        print(f"--- Building new LEANN index ---")
-        
-        print(f"\n[PHASE 1] Building Leann index...")
+        print("--- Building new LEANN index ---")
+
+        print("\n[PHASE 1] Building Leann index...")

        # Use HNSW backend for better macOS compatibility
        builder = LeannBuilder(
            backend_name="hnsw",
            embedding_model="facebook/contriever",
-            graph_degree=32, 
+            graph_degree=32,
            complexity=64,
            is_compact=True,
            is_recompute=True,
-            num_threads=1  # Force single-threaded mode
+            num_threads=1,  # Force single-threaded mode
        )

        print(f"Adding {len(all_texts)} history chunks to index...")
        for chunk_text in all_texts:
            builder.add_text(chunk_text)
-            
+
        builder.build_index(index_path)
        print(f"\nLEANN index built at {index_path}!")
    else:
        print(f"--- Using existing index at {INDEX_DIR} ---")
-    
+
    return index_path

+
 async def query_leann_index(index_path: str, query: str):
    """
    Query the LEANN index.
-    
+
    Args:
        index_path: Path to the LEANN index
        query: The query string
    """
-    print(f"\n[PHASE 2] Starting Leann chat session...")
+    print("\n[PHASE 2] Starting Leann chat session...")
    chat = LeannChat(index_path=index_path)
-    
+
    print(f"You: {query}")
    chat_response = chat.ask(
-        query, 
-        top_k=10, 
+        query,
+        top_k=10,
        recompute_beighbor_embeddings=True,
        complexity=32,
        beam_width=1,
@@ -219,40 +230,60 @@ async def query_leann_index(index_path: str, query: str):
            "model": "gpt-4o",
            "api_key": os.getenv("OPENAI_API_KEY"),
        },
-        llm_kwargs={
-            "temperature": 0.0,
-            "max_tokens": 1000
-        }
+        llm_kwargs={"temperature": 0.0, "max_tokens": 1000},
    )

    print(f"Leann chat response: \033[36m{chat_response}\033[0m")

+
 async def main():
    # Parse command line arguments
-    parser = argparse.ArgumentParser(description='LEANN Chrome History Reader - Create and query browser history index')
-    parser.add_argument('--chrome-profile', type=str, default=DEFAULT_CHROME_PROFILE,
-                       help=f'Path to Chrome profile directory (default: {DEFAULT_CHROME_PROFILE}), usually you dont need to change this')
-    parser.add_argument('--index-dir', type=str, default="./google_history_index",
-                       help='Directory to store the LEANN index (default: ./chrome_history_index_leann_test)')
-    parser.add_argument('--max-entries', type=int, default=1000,
-                       help='Maximum number of history entries to process (default: 1000)')
-    parser.add_argument('--query', type=str, default=None,
-                       help='Single query to run (default: runs example queries)')
-    parser.add_argument('--auto-find-profiles', action='store_true', default=True,
-                       help='Automatically find all Chrome profiles (default: True)')
-    
+    parser = argparse.ArgumentParser(
+        description="LEANN Chrome History Reader - Create and query browser history index"
+    )
+    parser.add_argument(
+        "--chrome-profile",
+        type=str,
+        default=DEFAULT_CHROME_PROFILE,
+        help=f"Path to Chrome profile directory (default: {DEFAULT_CHROME_PROFILE}), usually you dont need to change this",
+    )
+    parser.add_argument(
+        "--index-dir",
+        type=str,
+        default="./google_history_index",
+        help="Directory to store the LEANN index (default: ./chrome_history_index_leann_test)",
+    )
+    parser.add_argument(
+        "--max-entries",
+        type=int,
+        default=1000,
+        help="Maximum number of history entries to process (default: 1000)",
+    )
+    parser.add_argument(
+        "--query",
+        type=str,
+        default=None,
+        help="Single query to run (default: runs example queries)",
+    )
+    parser.add_argument(
+        "--auto-find-profiles",
+        action="store_true",
+        default=True,
+        help="Automatically find all Chrome profiles (default: True)",
+    )
+
    args = parser.parse_args()
-    
+
    INDEX_DIR = Path(args.index_dir)
    INDEX_PATH = str(INDEX_DIR / "chrome_history.leann")
-    
+
    print(f"Using Chrome profile: {args.chrome_profile}")
    print(f"Index directory: {INDEX_DIR}")
    print(f"Max entries: {args.max_entries}")
-    
+
    # Find Chrome profile directories
    from history_data.history import ChromeHistoryReader
-    
+
    if args.auto_find_profiles:
        profile_dirs = ChromeHistoryReader.find_chrome_profiles()
        if not profile_dirs:
@@ -265,10 +296,12 @@ async def main():
            print(f"Chrome profile not found: {profile_path}")
            return
        profile_dirs = [profile_path]
-    
+
    # Create or load the LEANN index from all sources
-    index_path = create_leann_index_from_multiple_chrome_profiles(profile_dirs, INDEX_PATH, args.max_entries)
-    
+    index_path = create_leann_index_from_multiple_chrome_profiles(
+        profile_dirs, INDEX_PATH, args.max_entries
+    )
+
    if index_path:
        if args.query:
            # Run single query
@@ -277,12 +310,13 @@ async def main():
            # Example queries
            queries = [
                "What websites did I visit about machine learning?",
-                "Find my search history about programming"
+                "Find my search history about programming",
            ]
-            
+
            for query in queries:
-                print("\n" + "="*60)
+                print("\n" + "=" * 60)
                await query_leann_index(index_path, query)

+
 if __name__ == "__main__":
-    asyncio.run(main()) 
+    asyncio.run(main())
--- a/examples/history_data/init.py
+++ b/examples/history_data/init.py
@@ -1,3 +1,3 @@
 from .history import ChromeHistoryReader

-__all__ = ['ChromeHistoryReader'] 
+__all__ = ["ChromeHistoryReader"]
--- a/examples/history_data/history.py
+++ b/examples/history_data/history.py
@@ -1,77 +1,81 @@
-import sqlite3
 import os
+import sqlite3
 from pathlib import Path
-from typing import List, Any
+from typing import Any
+
 from llama_index.core import Document
 from llama_index.core.readers.base import BaseReader

+
 class ChromeHistoryReader(BaseReader):
    """
    Chrome browser history reader that extracts browsing data from SQLite database.
-    
+
    Reads Chrome history from the default Chrome profile location and creates documents
    with embedded metadata similar to the email reader structure.
    """
-    
+
    def __init__(self) -> None:
        """Initialize."""
        pass
-    
-    def load_data(self, input_dir: str = None, **load_kwargs: Any) -> List[Document]:
+
+    def load_data(self, input_dir: str | None = None, **load_kwargs: Any) -> list[Document]:
        """
        Load Chrome history data from the default Chrome profile location.
-        
+
        Args:
            input_dir: Not used for Chrome history (kept for compatibility)
            **load_kwargs:
                max_count (int): Maximum amount of history entries to read.
                chrome_profile_path (str): Custom path to Chrome profile directory.
        """
-        docs: List[Document] = []
-        max_count = load_kwargs.get('max_count', 1000)
-        chrome_profile_path = load_kwargs.get('chrome_profile_path', None)
-        
+        docs: list[Document] = []
+        max_count = load_kwargs.get("max_count", 1000)
+        chrome_profile_path = load_kwargs.get("chrome_profile_path", None)
+
        # Default Chrome profile path on macOS
        if chrome_profile_path is None:
-            chrome_profile_path = os.path.expanduser("~/Library/Application Support/Google/Chrome/Default")
-        
+            chrome_profile_path = os.path.expanduser(
+                "~/Library/Application Support/Google/Chrome/Default"
+            )
+
        history_db_path = os.path.join(chrome_profile_path, "History")
-        
+
        if not os.path.exists(history_db_path):
            print(f"Chrome history database not found at: {history_db_path}")
            return docs
-        
+
        try:
            # Connect to the Chrome history database
            print(f"Connecting to database: {history_db_path}")
            conn = sqlite3.connect(history_db_path)
            cursor = conn.cursor()
-            
+
            # Query to get browsing history with metadata (removed created_time column)
            query = """
-            SELECT 
+            SELECT
                datetime(last_visit_time/1000000-11644473600,'unixepoch','localtime') as last_visit,
-                url, 
-                title, 
-                visit_count, 
-                typed_count, 
+                url,
+                title,
+                visit_count,
+                typed_count,
                hidden
-            FROM urls 
+            FROM urls
            ORDER BY last_visit_time DESC
            """
-            
+
            print(f"Executing query on database: {history_db_path}")
            cursor.execute(query)
            rows = cursor.fetchall()
            print(f"Query returned {len(rows)} rows")
-            
+
            count = 0
            for row in rows:
                if count >= max_count and max_count > 0:
                    break
-                
+
                last_visit, url, title, visit_count, typed_count, hidden = row
-                
+
                # Create document content with metadata embedded in text
                doc_content = f"""
 [Title]: {title}
@@ -80,38 +84,38 @@ class ChromeHistoryReader(BaseReader):
 [Visit times]: {visit_count}
 [Typed times]: {typed_count}
 """
-                
+
                # Create document with embedded metadata
-                doc = Document(text=doc_content, metadata={ "title": title[0:150]})
+                doc = Document(text=doc_content, metadata={"title": title[0:150]})
                # if len(title) > 150:
                #     print(f"Title is too long: {title}")
                docs.append(doc)
                count += 1
-            
+
            conn.close()
            print(f"Loaded {len(docs)} Chrome history documents")
-            
+
        except Exception as e:
            print(f"Error reading Chrome history: {e}")
            return docs
-        
+
        return docs

    @staticmethod
-    def find_chrome_profiles() -> List[Path]:
+    def find_chrome_profiles() -> list[Path]:
        """
        Find all Chrome profile directories.
-        
+
        Returns:
            List of Path objects pointing to Chrome profile directories
        """
        chrome_base_path = Path(os.path.expanduser("~/Library/Application Support/Google/Chrome"))
        profile_dirs = []
-        
+
        if not chrome_base_path.exists():
            print(f"Chrome directory not found at: {chrome_base_path}")
            return profile_dirs
-        
+
        # Find all profile directories
        for profile_dir in chrome_base_path.iterdir():
            if profile_dir.is_dir() and profile_dir.name != "System Profile":
@@ -119,53 +123,59 @@ class ChromeHistoryReader(BaseReader):
                if history_path.exists():
                    profile_dirs.append(profile_dir)
                    print(f"Found Chrome profile: {profile_dir}")
-        
+
        print(f"Found {len(profile_dirs)} Chrome profiles")
        return profile_dirs

    @staticmethod
-    def export_history_to_file(output_file: str = "chrome_history_export.txt", max_count: int = 1000):
+    def export_history_to_file(
+        output_file: str = "chrome_history_export.txt", max_count: int = 1000
+    ):
        """
        Export Chrome history to a text file using the same SQL query format.
-        
+
        Args:
            output_file: Path to the output file
            max_count: Maximum number of entries to export
        """
-        chrome_profile_path = os.path.expanduser("~/Library/Application Support/Google/Chrome/Default")
+        chrome_profile_path = os.path.expanduser(
+            "~/Library/Application Support/Google/Chrome/Default"
+        )
        history_db_path = os.path.join(chrome_profile_path, "History")
-        
+
        if not os.path.exists(history_db_path):
            print(f"Chrome history database not found at: {history_db_path}")
            return
-        
+
        try:
            conn = sqlite3.connect(history_db_path)
            cursor = conn.cursor()
-            
+
            query = """
-            SELECT 
+            SELECT
                datetime(last_visit_time/1000000-11644473600,'unixepoch','localtime') as last_visit,
-                url, 
-                title, 
-                visit_count, 
-                typed_count, 
+                url,
+                title,
+                visit_count,
+                typed_count,
                hidden
-            FROM urls 
+            FROM urls
            ORDER BY last_visit_time DESC
            LIMIT ?
            """
-            
+
            cursor.execute(query, (max_count,))
            rows = cursor.fetchall()
-            
-            with open(output_file, 'w', encoding='utf-8') as f:
+
+            with open(output_file, "w", encoding="utf-8") as f:
                for row in rows:
                    last_visit, url, title, visit_count, typed_count, hidden = row
-                    f.write(f"{last_visit}\t{url}\t{title}\t{visit_count}\t{typed_count}\t{hidden}\n")
-            
+                    f.write(
+                        f"{last_visit}\t{url}\t{title}\t{visit_count}\t{typed_count}\t{hidden}\n"
+                    )
+
            conn.close()
            print(f"Exported {len(rows)} history entries to {output_file}")
-            
+
        except Exception as e:
-            print(f"Error exporting Chrome history: {e}") 
+            print(f"Error exporting Chrome history: {e}")
--- a/examples/history_data/wechat_history.py
+++ b/examples/history_data/wechat_history.py
@@ -2,30 +2,31 @@ import json
 import os
 import re
 import subprocess
-import sys
 import time
+from datetime import datetime
 from pathlib import Path
-from typing import List, Any, Dict, Optional
+from typing import Any
+
 from llama_index.core import Document
 from llama_index.core.readers.base import BaseReader
-from datetime import datetime
+

 class WeChatHistoryReader(BaseReader):
    """
    WeChat chat history reader that extracts chat data from exported JSON files.
-    
+
    Reads WeChat chat history from exported JSON files (from wechat-exporter tool)
    and creates documents with embedded metadata similar to the Chrome history reader structure.
-    
+
    Also includes utilities for automatic WeChat chat history export.
    """
-    
+
    def __init__(self) -> None:
        """Initialize."""
        self.packages_dir = Path(__file__).parent.parent.parent / "packages"
        self.wechat_exporter_dir = self.packages_dir / "wechat-exporter"
        self.wechat_decipher_dir = self.packages_dir / "wechat-decipher-macos"
-    
+
    def check_wechat_running(self) -> bool:
        """Check if WeChat is currently running."""
        try:
@@ -33,24 +34,30 @@ class WeChatHistoryReader(BaseReader):
            return result.returncode == 0
        except Exception:
            return False
-    
+
    def install_wechattweak(self) -> bool:
        """Install WeChatTweak CLI tool."""
        try:
            # Create wechat-exporter directory if it doesn't exist
            self.wechat_exporter_dir.mkdir(parents=True, exist_ok=True)
-            
+
            wechattweak_path = self.wechat_exporter_dir / "wechattweak-cli"
            if not wechattweak_path.exists():
                print("Downloading WeChatTweak CLI...")
-                subprocess.run([
-                    "curl", "-L", "-o", str(wechattweak_path),
-                    "https://github.com/JettChenT/WeChatTweak-CLI/releases/latest/download/wechattweak-cli"
-                ], check=True)
-            
+                subprocess.run(
+                    [
+                        "curl",
+                        "-L",
+                        "-o",
+                        str(wechattweak_path),
+                        "https://github.com/JettChenT/WeChatTweak-CLI/releases/latest/download/wechattweak-cli",
+                    ],
+                    check=True,
+                )
+
            # Make executable
            wechattweak_path.chmod(0o755)
-            
+
            # Install WeChatTweak
            print("Installing WeChatTweak...")
            subprocess.run(["sudo", str(wechattweak_path), "install"], check=True)
@@ -58,7 +65,7 @@ class WeChatHistoryReader(BaseReader):
        except Exception as e:
            print(f"Error installing WeChatTweak: {e}")
            return False
-    
+
    def restart_wechat(self):
        """Restart WeChat to apply WeChatTweak."""
        try:
@@ -69,302 +76,325 @@ class WeChatHistoryReader(BaseReader):
            time.sleep(5)  # Wait for WeChat to start
        except Exception as e:
            print(f"Error restarting WeChat: {e}")
-    
+
    def check_api_available(self) -> bool:
        """Check if WeChatTweak API is available."""
        try:
-            result = subprocess.run([
-                "curl", "-s", "http://localhost:48065/wechat/allcontacts"
-            ], capture_output=True, text=True, timeout=5)
+            result = subprocess.run(
+                ["curl", "-s", "http://localhost:48065/wechat/allcontacts"],
+                capture_output=True,
+                text=True,
+                timeout=5,
+            )
            return result.returncode == 0 and result.stdout.strip()
        except Exception:
            return False
-    

-
-    
    def _extract_readable_text(self, content: str) -> str:
        """
        Extract readable text from message content, removing XML and system messages.
-        
+
        Args:
            content: The raw message content (can be string or dict)
-            
+
        Returns:
            Cleaned, readable text
        """
        if not content:
            return ""
-        
+
        # Handle dictionary content (like quoted messages)
        if isinstance(content, dict):
            # Extract text from dictionary structure
            text_parts = []
-            if 'title' in content:
-                text_parts.append(str(content['title']))
-            if 'quoted' in content:
-                text_parts.append(str(content['quoted']))
-            if 'content' in content:
-                text_parts.append(str(content['content']))
-            if 'text' in content:
-                text_parts.append(str(content['text']))
-            
+            if "title" in content:
+                text_parts.append(str(content["title"]))
+            if "quoted" in content:
+                text_parts.append(str(content["quoted"]))
+            if "content" in content:
+                text_parts.append(str(content["content"]))
+            if "text" in content:
+                text_parts.append(str(content["text"]))
+
            if text_parts:
                return " | ".join(text_parts)
            else:
                # If we can't extract meaningful text from dict, return empty
                return ""
-        
+
        # Handle string content
        if not isinstance(content, str):
            return ""
-        
+
        # Remove common prefixes like "wxid_xxx:\n"
-        clean_content = re.sub(r'^wxid_[^:]+:\s*', '', content)
-        clean_content = re.sub(r'^[^:]+:\s*', '', clean_content)
-        
+        clean_content = re.sub(r"^wxid_[^:]+:\s*", "", content)
+        clean_content = re.sub(r"^[^:]+:\s*", "", clean_content)
+
        # If it's just XML or system message, return empty
-        if clean_content.strip().startswith('<') or 'recalled a message' in clean_content:
+        if clean_content.strip().startswith("<") or "recalled a message" in clean_content:
            return ""
-        
+
        return clean_content.strip()
-    
+
    def _is_text_message(self, content: str) -> bool:
        """
        Check if a message contains readable text content.
-        
+
        Args:
            content: The message content (can be string or dict)
-            
+
        Returns:
            True if the message contains readable text, False otherwise
        """
        if not content:
            return False
-        
+
        # Handle dictionary content
        if isinstance(content, dict):
            # Check if dict has any readable text fields
-            text_fields = ['title', 'quoted', 'content', 'text']
+            text_fields = ["title", "quoted", "content", "text"]
            for field in text_fields:
-                if field in content and content[field]:
+                if content.get(field):
                    return True
            return False
-        
+
        # Handle string content
        if not isinstance(content, str):
            return False
-        
+
        # Skip image messages (contain XML with img tags)
-        if '<img' in content and 'cdnurl' in content:
+        if "<img" in content and "cdnurl" in content:
            return False
-        
+
        # Skip emoji messages (contain emoji XML tags)
-        if '<emoji' in content and 'productid' in content:
+        if "<emoji" in content and "productid" in content:
            return False
-        
+
        # Skip voice messages
-        if '<voice' in content:
+        if "<voice" in content:
            return False
-        
+
        # Skip video messages
-        if '<video' in content:
+        if "<video" in content:
            return False
-        
+
        # Skip file messages
-        if '<appmsg' in content and 'appid' in content:
+        if "<appmsg" in content and "appid" in content:
            return False
-        
+
        # Skip system messages (like "recalled a message")
-        if 'recalled a message' in content:
+        if "recalled a message" in content:
            return False
-        
+
        # Check if there's actual readable text (not just XML or system messages)
        # Remove common prefixes like "wxid_xxx:\n" and check for actual content
-        clean_content = re.sub(r'^wxid_[^:]+:\s*', '', content)
-        clean_content = re.sub(r'^[^:]+:\s*', '', clean_content)
-        
+        clean_content = re.sub(r"^wxid_[^:]+:\s*", "", content)
+        clean_content = re.sub(r"^[^:]+:\s*", "", clean_content)
+
        # If after cleaning we have meaningful text, consider it readable
-        if len(clean_content.strip()) > 0 and not clean_content.strip().startswith('<'):
+        if len(clean_content.strip()) > 0 and not clean_content.strip().startswith("<"):
            return True
-        
+
        return False
-    
-    def _concatenate_messages(self, messages: List[Dict], max_length: int = 128, 
-                             time_window_minutes: int = 30, overlap_messages: int = 0) -> List[Dict]:
+
+    def _concatenate_messages(
+        self,
+        messages: list[dict],
+        max_length: int = 128,
+        time_window_minutes: int = 30,
+        overlap_messages: int = 0,
+    ) -> list[dict]:
        """
        Concatenate messages based on length and time rules.
-        
+
        Args:
            messages: List of message dictionaries
            max_length: Maximum length for concatenated message groups. Use -1 to disable length constraint.
            time_window_minutes: Time window in minutes to group messages together. Use -1 to disable time constraint.
            overlap_messages: Number of messages to overlap between consecutive groups
-            
+
        Returns:
            List of concatenated message groups
        """
        if not messages:
            return []
-        
+
        concatenated_groups = []
        current_group = []
        current_length = 0
        last_timestamp = None
-        
+
        for message in messages:
            # Extract message info
-            content = message.get('content', '')
-            message_text = message.get('message', '')
-            create_time = message.get('createTime', 0)
-            from_user = message.get('fromUser', '')
-            to_user = message.get('toUser', '')
-            is_sent_from_self = message.get('isSentFromSelf', False)
-            
+            content = message.get("content", "")
+            message_text = message.get("message", "")
+            create_time = message.get("createTime", 0)
+            message.get("fromUser", "")
+            message.get("toUser", "")
+            message.get("isSentFromSelf", False)
+
            # Extract readable text
            readable_text = self._extract_readable_text(content)
            if not readable_text:
                readable_text = message_text
-            
+
            # Skip empty messages
            if not readable_text.strip():
                continue
-            
+
            # Check time window constraint (only if time_window_minutes != -1)
            if time_window_minutes != -1 and last_timestamp is not None and create_time > 0:
                time_diff_minutes = (create_time - last_timestamp) / 60
                if time_diff_minutes > time_window_minutes:
                    # Time gap too large, start new group
                    if current_group:
-                        concatenated_groups.append({
-                            'messages': current_group,
-                            'total_length': current_length,
-                            'start_time': current_group[0].get('createTime', 0),
-                            'end_time': current_group[-1].get('createTime', 0)
-                        })
+                        concatenated_groups.append(
+                            {
+                                "messages": current_group,
+                                "total_length": current_length,
+                                "start_time": current_group[0].get("createTime", 0),
+                                "end_time": current_group[-1].get("createTime", 0),
+                            }
+                        )
                        # Keep last few messages for overlap
                        if overlap_messages > 0 and len(current_group) > overlap_messages:
                            current_group = current_group[-overlap_messages:]
-                            current_length = sum(len(self._extract_readable_text(msg.get('content', '')) or msg.get('message', '')) for msg in current_group)
+                            current_length = sum(
+                                len(
+                                    self._extract_readable_text(msg.get("content", ""))
+                                    or msg.get("message", "")
+                                )
+                                for msg in current_group
+                            )
                        else:
                            current_group = []
                            current_length = 0
-            
+
            # Check length constraint (only if max_length != -1)
            message_length = len(readable_text)
            if max_length != -1 and current_length + message_length > max_length and current_group:
                # Current group would exceed max length, save it and start new
-                concatenated_groups.append({
-                    'messages': current_group,
-                    'total_length': current_length,
-                    'start_time': current_group[0].get('createTime', 0),
-                    'end_time': current_group[-1].get('createTime', 0)
-                })
+                concatenated_groups.append(
+                    {
+                        "messages": current_group,
+                        "total_length": current_length,
+                        "start_time": current_group[0].get("createTime", 0),
+                        "end_time": current_group[-1].get("createTime", 0),
+                    }
+                )
                # Keep last few messages for overlap
                if overlap_messages > 0 and len(current_group) > overlap_messages:
                    current_group = current_group[-overlap_messages:]
-                    current_length = sum(len(self._extract_readable_text(msg.get('content', '')) or msg.get('message', '')) for msg in current_group)
+                    current_length = sum(
+                        len(
+                            self._extract_readable_text(msg.get("content", ""))
+                            or msg.get("message", "")
+                        )
+                        for msg in current_group
+                    )
                else:
                    current_group = []
                    current_length = 0
-            
+
            # Add message to current group
            current_group.append(message)
            current_length += message_length
            last_timestamp = create_time
-        
+
        # Add the last group if it exists
        if current_group:
-            concatenated_groups.append({
-                'messages': current_group,
-                'total_length': current_length,
-                'start_time': current_group[0].get('createTime', 0),
-                'end_time': current_group[-1].get('createTime', 0)
-            })
-        
+            concatenated_groups.append(
+                {
+                    "messages": current_group,
+                    "total_length": current_length,
+                    "start_time": current_group[0].get("createTime", 0),
+                    "end_time": current_group[-1].get("createTime", 0),
+                }
+            )
+
        return concatenated_groups
-    
-    def _create_concatenated_content(self, message_group: Dict, contact_name: str) -> str:
+
+    def _create_concatenated_content(self, message_group: dict, contact_name: str) -> str:
        """
        Create concatenated content from a group of messages.
-        
+
        Args:
            message_group: Dictionary containing messages and metadata
            contact_name: Name of the contact
-            
+
        Returns:
            Formatted concatenated content
        """
-        messages = message_group['messages']
-        start_time = message_group['start_time']
-        end_time = message_group['end_time']
-        
+        messages = message_group["messages"]
+        start_time = message_group["start_time"]
+        end_time = message_group["end_time"]
+
        # Format timestamps
        if start_time:
            try:
                start_timestamp = datetime.fromtimestamp(start_time)
-                start_time_str = start_timestamp.strftime('%Y-%m-%d %H:%M:%S')
-            except:
+                start_time_str = start_timestamp.strftime("%Y-%m-%d %H:%M:%S")
+            except (ValueError, OSError):
                start_time_str = str(start_time)
        else:
            start_time_str = "Unknown"
-        
+
        if end_time:
            try:
                end_timestamp = datetime.fromtimestamp(end_time)
-                end_time_str = end_timestamp.strftime('%Y-%m-%d %H:%M:%S')
-            except:
+                end_time_str = end_timestamp.strftime("%Y-%m-%d %H:%M:%S")
+            except (ValueError, OSError):
                end_time_str = str(end_time)
        else:
            end_time_str = "Unknown"
-        
+
        # Build concatenated message content
        message_parts = []
        for message in messages:
-            content = message.get('content', '')
-            message_text = message.get('message', '')
-            create_time = message.get('createTime', 0)
-            is_sent_from_self = message.get('isSentFromSelf', False)
-            
+            content = message.get("content", "")
+            message_text = message.get("message", "")
+            create_time = message.get("createTime", 0)
+            is_sent_from_self = message.get("isSentFromSelf", False)
+
            # Extract readable text
            readable_text = self._extract_readable_text(content)
            if not readable_text:
                readable_text = message_text
-            
+
            # Format individual message
            if create_time:
                try:
                    timestamp = datetime.fromtimestamp(create_time)
                    # change to YYYY-MM-DD HH:MM:SS
-                    time_str = timestamp.strftime('%Y-%m-%d %H:%M:%S')
-                except:
+                    time_str = timestamp.strftime("%Y-%m-%d %H:%M:%S")
+                except (ValueError, OSError):
                    time_str = str(create_time)
            else:
                time_str = "Unknown"
-            
+
            sender = "[Me]" if is_sent_from_self else "[Contact]"
            message_parts.append(f"({time_str}) {sender}: {readable_text}")
-        
+
        concatenated_text = "\n".join(message_parts)
-        
+
        # Create final document content
        doc_content = f"""
 Contact: {contact_name}
 Time Range: {start_time_str} - {end_time_str}
-Messages ({len(messages)} messages, {message_group['total_length']} chars):
+Messages ({len(messages)} messages, {message_group["total_length"]} chars):

 {concatenated_text}
 """
-        # TODO @yichuan give better format and rich info here!    
+        # TODO @yichuan give better format and rich info here!
        doc_content = f"""
 {concatenated_text}
 """
        return doc_content, contact_name
-    
-    def load_data(self, input_dir: str = None, **load_kwargs: Any) -> List[Document]:
+
+    def load_data(self, input_dir: str | None = None, **load_kwargs: Any) -> list[Document]:
        """
        Load WeChat chat history data from exported JSON files.
-        
+
        Args:
            input_dir: Directory containing exported WeChat JSON files
            **load_kwargs:
@@ -376,97 +406,103 @@ Messages ({len(messages)} messages, {message_group['total_length']} chars):
                time_window_minutes (int): Time window in minutes to group messages together (default: 30).
                overlap_messages (int): Number of messages to overlap between consecutive groups (default: 2).
        """
-        docs: List[Document] = []
-        max_count = load_kwargs.get('max_count', 1000)
-        wechat_export_dir = load_kwargs.get('wechat_export_dir', None)
-        include_non_text = load_kwargs.get('include_non_text', False)
-        concatenate_messages = load_kwargs.get('concatenate_messages', False)
-        max_length = load_kwargs.get('max_length', 1000)
-        time_window_minutes = load_kwargs.get('time_window_minutes', 30)
-        
+        docs: list[Document] = []
+        max_count = load_kwargs.get("max_count", 1000)
+        wechat_export_dir = load_kwargs.get("wechat_export_dir", None)
+        include_non_text = load_kwargs.get("include_non_text", False)
+        concatenate_messages = load_kwargs.get("concatenate_messages", False)
+        load_kwargs.get("max_length", 1000)
+        load_kwargs.get("time_window_minutes", 30)
+
        # Default WeChat export path
        if wechat_export_dir is None:
            wechat_export_dir = "./wechat_export_test"
-        
+
        if not os.path.exists(wechat_export_dir):
            print(f"WeChat export directory not found at: {wechat_export_dir}")
            return docs
-        
+
        try:
            # Find all JSON files in the export directory
            json_files = list(Path(wechat_export_dir).glob("*.json"))
            print(f"Found {len(json_files)} WeChat chat history files")
-            
+
            count = 0
            for json_file in json_files:
                if count >= max_count and max_count > 0:
                    break
-                
+
                try:
-                    with open(json_file, 'r', encoding='utf-8') as f:
+                    with open(json_file, encoding="utf-8") as f:
                        chat_data = json.load(f)
-                    
+
                    # Extract contact name from filename
                    contact_name = json_file.stem
-                    
+
                    if concatenate_messages:
                        # Filter messages to only include readable text messages
                        readable_messages = []
                        for message in chat_data:
                            try:
-                                content = message.get('content', '')
+                                content = message.get("content", "")
                                if not include_non_text and not self._is_text_message(content):
                                    continue
-                                
+
                                readable_text = self._extract_readable_text(content)
                                if not readable_text and not include_non_text:
                                    continue
-                                
+
                                readable_messages.append(message)
                            except Exception as e:
                                print(f"Error processing message in {json_file}: {e}")
                                continue
-                        
+
                        # Concatenate messages based on rules
                        message_groups = self._concatenate_messages(
-                            readable_messages, 
-                            max_length=-1, 
+                            readable_messages,
+                            max_length=-1,
                            time_window_minutes=-1,
-                            overlap_messages=0  # Keep 2 messages overlap between groups
+                            overlap_messages=0,  # Keep 2 messages overlap between groups
                        )
-                        
+
                        # Create documents from concatenated groups
                        for message_group in message_groups:
                            if count >= max_count and max_count > 0:
                                break
-                            
-                            doc_content, contact_name  = self._create_concatenated_content(message_group, contact_name)
-                            doc = Document(text=doc_content, metadata={"contact_name": contact_name})
+
+                            doc_content, contact_name = self._create_concatenated_content(
+                                message_group, contact_name
+                            )
+                            doc = Document(
+                                text=doc_content, metadata={"contact_name": contact_name}
+                            )
                            docs.append(doc)
                            count += 1
-                        
-                        print(f"Created {len(message_groups)} concatenated message groups for {contact_name}")
-                        
+
+                        print(
+                            f"Created {len(message_groups)} concatenated message groups for {contact_name}"
+                        )
+
                    else:
                        # Original single-message processing
                        for message in chat_data:
                            if count >= max_count and max_count > 0:
                                break
-                            
+
                            # Extract message information
-                            from_user = message.get('fromUser', '')
-                            to_user = message.get('toUser', '')
-                            content = message.get('content', '')
-                            message_text = message.get('message', '')
-                            create_time = message.get('createTime', 0)
-                            is_sent_from_self = message.get('isSentFromSelf', False)
-                            
+                            message.get("fromUser", "")
+                            message.get("toUser", "")
+                            content = message.get("content", "")
+                            message_text = message.get("message", "")
+                            create_time = message.get("createTime", 0)
+                            is_sent_from_self = message.get("isSentFromSelf", False)
+
                            # Handle content that might be dict or string
                            try:
                                # Check if this is a readable text message
                                if not include_non_text and not self._is_text_message(content):
                                    continue
-                                
+
                                # Extract readable text
                                readable_text = self._extract_readable_text(content)
                                if not readable_text and not include_non_text:
@@ -475,17 +511,17 @@ Messages ({len(messages)} messages, {message_group['total_length']} chars):
                                # Skip messages that cause processing errors
                                print(f"Error processing message in {json_file}: {e}")
                                continue
-                            
+
                            # Convert timestamp to readable format
                            if create_time:
                                try:
                                    timestamp = datetime.fromtimestamp(create_time)
-                                    time_str = timestamp.strftime('%Y-%m-%d %H:%M:%S')
-                                except:
+                                    time_str = timestamp.strftime("%Y-%m-%d %H:%M:%S")
+                                except (ValueError, OSError):
                                    time_str = str(create_time)
                            else:
                                time_str = "Unknown"
-                            
+
                            # Create document content with metadata header and contact info
                            doc_content = f"""
 Contact: {contact_name}
@@ -493,57 +529,64 @@ Is sent from self: {is_sent_from_self}
 Time: {time_str}
 Message: {readable_text if readable_text else message_text}
 """
-                            
+
                            # Create document with embedded metadata
                            doc = Document(text=doc_content, metadata={})
                            docs.append(doc)
                            count += 1
-                        
+
                except Exception as e:
                    print(f"Error reading {json_file}: {e}")
                    continue
-            
+
            print(f"Loaded {len(docs)} WeChat chat documents")
-            
+
        except Exception as e:
            print(f"Error reading WeChat history: {e}")
            return docs
-        
+
        return docs

    @staticmethod
-    def find_wechat_export_dirs() -> List[Path]:
+    def find_wechat_export_dirs() -> list[Path]:
        """
        Find all WeChat export directories.
-        
+
        Returns:
            List of Path objects pointing to WeChat export directories
        """
        export_dirs = []
-        
+
        # Look for common export directory names
        possible_dirs = [
            Path("./wechat_export_test"),
            Path("./wechat_export"),
            Path("./wechat_chat_history"),
-            Path("./chat_export")
+            Path("./chat_export"),
        ]
-        
+
        for export_dir in possible_dirs:
            if export_dir.exists() and export_dir.is_dir():
                json_files = list(export_dir.glob("*.json"))
                if json_files:
                    export_dirs.append(export_dir)
-                    print(f"Found WeChat export directory: {export_dir} with {len(json_files)} files")
-        
+                    print(
+                        f"Found WeChat export directory: {export_dir} with {len(json_files)} files"
+                    )
+
        print(f"Found {len(export_dirs)} WeChat export directories")
        return export_dirs

    @staticmethod
-    def export_chat_to_file(output_file: str = "wechat_chat_export.txt", max_count: int = 1000, export_dir: str = None, include_non_text: bool = False):
+    def export_chat_to_file(
+        output_file: str = "wechat_chat_export.txt",
+        max_count: int = 1000,
+        export_dir: str | None = None,
+        include_non_text: bool = False,
+    ):
        """
        Export WeChat chat history to a text file.
-        
+
        Args:
            output_file: Path to the output file
            max_count: Maximum number of entries to export
@@ -552,36 +595,36 @@ Message: {readable_text if readable_text else message_text}
        """
        if export_dir is None:
            export_dir = "./wechat_export_test"
-        
+
        if not os.path.exists(export_dir):
            print(f"WeChat export directory not found at: {export_dir}")
            return
-        
+
        try:
            json_files = list(Path(export_dir).glob("*.json"))
-            
-            with open(output_file, 'w', encoding='utf-8') as f:
+
+            with open(output_file, "w", encoding="utf-8") as f:
                count = 0
                for json_file in json_files:
                    if count >= max_count and max_count > 0:
                        break
-                    
+
                    try:
-                        with open(json_file, 'r', encoding='utf-8') as json_f:
+                        with open(json_file, encoding="utf-8") as json_f:
                            chat_data = json.load(json_f)
-                        
+
                        contact_name = json_file.stem
                        f.write(f"\n=== Chat with {contact_name} ===\n")
-                        
+
                        for message in chat_data:
                            if count >= max_count and max_count > 0:
                                break
-                            
-                            from_user = message.get('fromUser', '')
-                            content = message.get('content', '')
-                            message_text = message.get('message', '')
-                            create_time = message.get('createTime', 0)
-                            
+
+                            from_user = message.get("fromUser", "")
+                            content = message.get("content", "")
+                            message_text = message.get("message", "")
+                            create_time = message.get("createTime", 0)
+
                            # Skip non-text messages unless requested
                            if not include_non_text:
                                reader = WeChatHistoryReader()
@@ -591,83 +634,90 @@ Message: {readable_text if readable_text else message_text}
                                if not readable_text:
                                    continue
                                message_text = readable_text
-                            
+
                            if create_time:
                                try:
                                    timestamp = datetime.fromtimestamp(create_time)
-                                    time_str = timestamp.strftime('%Y-%m-%d %H:%M:%S')
-                                except:
+                                    time_str = timestamp.strftime("%Y-%m-%d %H:%M:%S")
+                                except (ValueError, OSError):
                                    time_str = str(create_time)
                            else:
                                time_str = "Unknown"
-                            
+
                            f.write(f"[{time_str}] {from_user}: {message_text}\n")
                            count += 1
-                            
+
                    except Exception as e:
                        print(f"Error processing {json_file}: {e}")
                        continue
-            
+
            print(f"Exported {count} chat entries to {output_file}")
-            
+
        except Exception as e:
            print(f"Error exporting WeChat chat history: {e}")

-    def export_wechat_chat_history(self, export_dir: str = "./wechat_export_direct") -> Optional[Path]:
+    def export_wechat_chat_history(self, export_dir: str = "./wechat_export_direct") -> Path | None:
        """
        Export WeChat chat history using wechat-exporter tool.
-        
+
        Args:
            export_dir: Directory to save exported chat history
-            
+
        Returns:
            Path to export directory if successful, None otherwise
        """
        try:
            import subprocess
            import sys
-            
+
            # Create export directory
            export_path = Path(export_dir)
            export_path.mkdir(exist_ok=True)
-            
+
            print(f"Exporting WeChat chat history to {export_path}...")
-            
+
            # Check if wechat-exporter directory exists
            if not self.wechat_exporter_dir.exists():
                print(f"wechat-exporter directory not found at: {self.wechat_exporter_dir}")
                return None
-            
+
            # Install requirements if needed
            requirements_file = self.wechat_exporter_dir / "requirements.txt"
            if requirements_file.exists():
                print("Installing wechat-exporter requirements...")
-                subprocess.run([
-                    "uv", "pip", "install", "-r", str(requirements_file)
-                ], check=True)
-            
+                subprocess.run(["uv", "pip", "install", "-r", str(requirements_file)], check=True)
+
            # Run the export command
            print("Running wechat-exporter...")
-            result = subprocess.run([
-                sys.executable, str(self.wechat_exporter_dir / "main.py"), 
-                "export-all", str(export_path)
-            ], capture_output=True, text=True, check=True)
-            
+            result = subprocess.run(
+                [
+                    sys.executable,
+                    str(self.wechat_exporter_dir / "main.py"),
+                    "export-all",
+                    str(export_path),
+                ],
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+
            print("Export command output:")
            print(result.stdout)
            if result.stderr:
                print("Export errors:")
                print(result.stderr)
-            
+
            # Check if export was successful
            if export_path.exists() and any(export_path.glob("*.json")):
                json_files = list(export_path.glob("*.json"))
-                print(f"Successfully exported {len(json_files)} chat history files to {export_path}")
+                print(
+                    f"Successfully exported {len(json_files)} chat history files to {export_path}"
+                )
                return export_path
            else:
                print("Export completed but no JSON files found")
                return None
-                
+
        except subprocess.CalledProcessError as e:
            print(f"Export command failed: {e}")
            print(f"Command output: {e.stdout}")
@@ -678,18 +728,18 @@ Message: {readable_text if readable_text else message_text}
            print("Please ensure WeChat is running and WeChatTweak is installed.")
            return None

-    def find_or_export_wechat_data(self, export_dir: str = "./wechat_export_direct") -> List[Path]:
+    def find_or_export_wechat_data(self, export_dir: str = "./wechat_export_direct") -> list[Path]:
        """
        Find existing WeChat exports or create new ones.
-        
+
        Args:
            export_dir: Directory to save exported chat history if needed
-            
+
        Returns:
            List of Path objects pointing to WeChat export directories
        """
        export_dirs = []
-        
+
        # Look for existing exports in common locations
        possible_export_dirs = [
            Path("./wechat_database_export"),
@@ -697,23 +747,25 @@ Message: {readable_text if readable_text else message_text}
            Path("./wechat_export"),
            Path("./wechat_export_direct"),
            Path("./wechat_chat_history"),
-            Path("./chat_export")
+            Path("./chat_export"),
        ]
-        
+
        for export_dir_path in possible_export_dirs:
            if export_dir_path.exists() and any(export_dir_path.glob("*.json")):
                export_dirs.append(export_dir_path)
                print(f"Found existing export: {export_dir_path}")
-        
+
        # If no existing exports, try to export automatically
        if not export_dirs:
            print("No existing WeChat exports found. Starting direct export...")
-            
+
            # Try to export using wechat-exporter
            exported_path = self.export_wechat_chat_history(export_dir)
            if exported_path:
                export_dirs = [exported_path]
            else:
-                print("Failed to export WeChat data. Please ensure WeChat is running and WeChatTweak is installed.")
-        
-        return export_dirs 
+                print(
+                    "Failed to export WeChat data. Please ensure WeChat is running and WeChatTweak is installed."
+                )
+
+        return export_dirs
--- a/examples/mail_reader_leann.py
+++ b/examples/mail_reader_leann.py
@@ -1,33 +1,42 @@
+import argparse
+import asyncio
 import os
 import sys
-import asyncio
-import dotenv
-import argparse
 from pathlib import Path
-from typing import List, Any
+
+import dotenv

 # Add the project root to Python path so we can import from examples
 project_root = Path(__file__).parent.parent
 sys.path.insert(0, str(project_root))

-from leann.api import LeannBuilder, LeannSearcher, LeannChat
+from leann.api import LeannBuilder, LeannChat
 from llama_index.core.node_parser import SentenceSplitter

 dotenv.load_dotenv()

+
 # Auto-detect user's mail path
 def get_mail_path():
    """Get the mail path for the current user"""
    home_dir = os.path.expanduser("~")
    return os.path.join(home_dir, "Library", "Mail")

+
 # Default mail path for macOS
 DEFAULT_MAIL_PATH = "/Users/yichuan/Library/Mail/V10/0FCA0879-FD8C-4B7E-83BF-FDDA930791C5/[Gmail].mbox/All Mail.mbox/78BA5BE1-8819-4F9A-9613-EB63772F1DD0/Data"

-def create_leann_index_from_multiple_sources(messages_dirs: List[Path], index_path: str = "mail_index.leann", max_count: int = -1, include_html: bool = False, embedding_model: str = "facebook/contriever"):
+
+def create_leann_index_from_multiple_sources(
+    messages_dirs: list[Path],
+    index_path: str = "mail_index.leann",
+    max_count: int = -1,
+    include_html: bool = False,
+    embedding_model: str = "facebook/contriever",
+):
    """
    Create LEANN index from multiple mail data sources.
-    
+
    Args:
        messages_dirs: List of Path objects pointing to Messages directories
        index_path: Path to save the LEANN index
@@ -35,31 +44,32 @@ def create_leann_index_from_multiple_sources(messages_dirs: List[Path], index_pa
        include_html: Whether to include HTML content in email processing
    """
    print("Creating LEANN index from multiple mail data sources...")
-    
+
    # Load documents using EmlxReader from LEANN_email_reader
    from examples.email_data.LEANN_email_reader import EmlxReader
+
    reader = EmlxReader(include_html=include_html)
    # from email_data.email import EmlxMboxReader
    # from pathlib import Path
    # reader = EmlxMboxReader()
    INDEX_DIR = Path(index_path).parent
-    
+
    if not INDEX_DIR.exists():
-        print(f"--- Index directory not found, building new index ---")
+        print("--- Index directory not found, building new index ---")
        all_documents = []
        total_processed = 0
-        
+
        # Process each Messages directory
        for i, messages_dir in enumerate(messages_dirs):
-            print(f"\nProcessing Messages directory {i+1}/{len(messages_dirs)}: {messages_dir}")
-            
+            print(f"\nProcessing Messages directory {i + 1}/{len(messages_dirs)}: {messages_dir}")
+
            try:
                documents = reader.load_data(messages_dir)
                if documents:
                    print(f"Loaded {len(documents)} email documents from {messages_dir}")
                    all_documents.extend(documents)
                    total_processed += len(documents)
-                    
+
                    # Check if we've reached the max count
                    if max_count > 0 and total_processed >= max_count:
                        print(f"Reached max count of {max_count} documents")
@@ -69,16 +79,18 @@ def create_leann_index_from_multiple_sources(messages_dirs: List[Path], index_pa
            except Exception as e:
                print(f"Error processing {messages_dir}: {e}")
                continue
-        
+
        if not all_documents:
            print("No documents loaded from any source. Exiting.")
            return None
-        
-        print(f"\nTotal loaded {len(all_documents)} email documents from {len(messages_dirs)} directories and starting to split them into chunks")
-        
+
+        print(
+            f"\nTotal loaded {len(all_documents)} email documents from {len(messages_dirs)} directories and starting to split them into chunks"
+        )
+
        # Create text splitter with 256 chunk size
        text_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=25)
-        
+
        # Convert Documents to text strings and chunk them
        all_texts = []
        for doc in all_documents:
@@ -88,44 +100,53 @@ def create_leann_index_from_multiple_sources(messages_dirs: List[Path], index_pa
                text = node.get_content()
                # text = '[subject] ' + doc.metadata["subject"] + '\n' + text
                all_texts.append(text)
-        
-        print(f"Finished splitting {len(all_documents)} documents into {len(all_texts)} text chunks")
-        
+
+        print(
+            f"Finished splitting {len(all_documents)} documents into {len(all_texts)} text chunks"
+        )
+
        # Create LEANN index directory

-        print(f"--- Index directory not found, building new index ---")
+        print("--- Index directory not found, building new index ---")
        INDEX_DIR.mkdir(exist_ok=True)

-        print(f"--- Building new LEANN index ---")
-        
-        print(f"\n[PHASE 1] Building Leann index...")
+        print("--- Building new LEANN index ---")
+
+        print("\n[PHASE 1] Building Leann index...")

        # Use HNSW backend for better macOS compatibility
        builder = LeannBuilder(
            backend_name="hnsw",
            embedding_model=embedding_model,
-            graph_degree=32, 
+            graph_degree=32,
            complexity=64,
            is_compact=True,
            is_recompute=True,
-            num_threads=1  # Force single-threaded mode
+            num_threads=1,  # Force single-threaded mode
        )

        print(f"Adding {len(all_texts)} email chunks to index...")
        for chunk_text in all_texts:
            builder.add_text(chunk_text)
-            
+
        builder.build_index(index_path)
        print(f"\nLEANN index built at {index_path}!")
    else:
        print(f"--- Using existing index at {INDEX_DIR} ---")
-    
+
    return index_path

-def create_leann_index(mail_path: str, index_path: str = "mail_index.leann", max_count: int = 1000, include_html: bool = False, embedding_model: str = "facebook/contriever"):
+
+def create_leann_index(
+    mail_path: str,
+    index_path: str = "mail_index.leann",
+    max_count: int = 1000,
+    include_html: bool = False,
+    embedding_model: str = "facebook/contriever",
+):
    """
    Create LEANN index from mail data.
-    
+
    Args:
        mail_path: Path to the mail directory
        index_path: Path to save the LEANN index
@@ -134,32 +155,33 @@ def create_leann_index(mail_path: str, index_path: str = "mail_index.leann", max
    """
    print("Creating LEANN index from mail data...")
    INDEX_DIR = Path(index_path).parent
-    
+
    if not INDEX_DIR.exists():
-        print(f"--- Index directory not found, building new index ---")
+        print("--- Index directory not found, building new index ---")
        INDEX_DIR.mkdir(exist_ok=True)

-        print(f"--- Building new LEANN index ---")
-        
-        print(f"\n[PHASE 1] Building Leann index...")
+        print("--- Building new LEANN index ---")
+
+        print("\n[PHASE 1] Building Leann index...")

        # Load documents using EmlxReader from LEANN_email_reader
        from examples.email_data.LEANN_email_reader import EmlxReader
+
        reader = EmlxReader(include_html=include_html)
        # from email_data.email import EmlxMboxReader
        # from pathlib import Path
        # reader = EmlxMboxReader()
        documents = reader.load_data(Path(mail_path))
-        
+
        if not documents:
            print("No documents loaded. Exiting.")
            return None
-        
+
        print(f"Loaded {len(documents)} email documents")
-        
+
        # Create text splitter with 256 chunk size
        text_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=128)
-        
+
        # Convert Documents to text strings and chunk them
        all_texts = []
        for doc in documents:
@@ -167,111 +189,135 @@ def create_leann_index(mail_path: str, index_path: str = "mail_index.leann", max
            nodes = text_splitter.get_nodes_from_documents([doc])
            for node in nodes:
                all_texts.append(node.get_content())
-        
+
        print(f"Created {len(all_texts)} text chunks from {len(documents)} documents")
-        
+
        # Create LEANN index directory

-        print(f"--- Index directory not found, building new index ---")
+        print("--- Index directory not found, building new index ---")
        INDEX_DIR.mkdir(exist_ok=True)

-        print(f"--- Building new LEANN index ---")
-        
-        print(f"\n[PHASE 1] Building Leann index...")
+        print("--- Building new LEANN index ---")
+
+        print("\n[PHASE 1] Building Leann index...")

        # Use HNSW backend for better macOS compatibility
        builder = LeannBuilder(
            backend_name="hnsw",
            embedding_model=embedding_model,
-            graph_degree=32, 
+            graph_degree=32,
            complexity=64,
            is_compact=True,
            is_recompute=True,
-            num_threads=1  # Force single-threaded mode
+            num_threads=1,  # Force single-threaded mode
        )

        print(f"Adding {len(all_texts)} email chunks to index...")
        for chunk_text in all_texts:
            builder.add_text(chunk_text)
-            
+
        builder.build_index(index_path)
        print(f"\nLEANN index built at {index_path}!")
    else:
        print(f"--- Using existing index at {INDEX_DIR} ---")
-    
+
    return index_path

+
 async def query_leann_index(index_path: str, query: str):
    """
    Query the LEANN index.
-    
+
    Args:
        index_path: Path to the LEANN index
        query: The query string
    """
-    print(f"\n[PHASE 2] Starting Leann chat session...")
-    chat = LeannChat(index_path=index_path,
-                     llm_config={"type": "openai", "model": "gpt-4o"})
-    
+    print("\n[PHASE 2] Starting Leann chat session...")
+    chat = LeannChat(index_path=index_path, llm_config={"type": "openai", "model": "gpt-4o"})
+
    print(f"You: {query}")
    import time
-    start_time = time.time()
+
+    time.time()
    chat_response = chat.ask(
-        query, 
-        top_k=20, 
+        query,
+        top_k=20,
        recompute_beighbor_embeddings=True,
        complexity=32,
        beam_width=1,
    )
-    end_time = time.time()
+    time.time()
    # print(f"Time taken: {end_time - start_time} seconds")
    # highlight the answer
    print(f"Leann chat response: \033[36m{chat_response}\033[0m")

+
 async def main():
    # Parse command line arguments
-    parser = argparse.ArgumentParser(description='LEANN Mail Reader - Create and query email index')
+    parser = argparse.ArgumentParser(description="LEANN Mail Reader - Create and query email index")
    # Remove --mail-path argument and auto-detect all Messages directories
    # Remove DEFAULT_MAIL_PATH
-    parser.add_argument('--index-dir', type=str, default="./mail_index",
-                       help='Directory to store the LEANN index (default: ./mail_index_leann_raw_text_all_dicts)')
-    parser.add_argument('--max-emails', type=int, default=1000,
-                       help='Maximum number of emails to process (-1 means all)')
-    parser.add_argument('--query', type=str, default="Give me some funny advertisement about apple or other companies",
-                       help='Single query to run (default: runs example queries)')
-    parser.add_argument('--include-html', action='store_true', default=False,
-                       help='Include HTML content in email processing (default: False)')
-    parser.add_argument('--embedding-model', type=str, default="facebook/contriever",
-                       help='Embedding model to use (default: facebook/contriever)')
-    
+    parser.add_argument(
+        "--index-dir",
+        type=str,
+        default="./mail_index",
+        help="Directory to store the LEANN index (default: ./mail_index_leann_raw_text_all_dicts)",
+    )
+    parser.add_argument(
+        "--max-emails",
+        type=int,
+        default=1000,
+        help="Maximum number of emails to process (-1 means all)",
+    )
+    parser.add_argument(
+        "--query",
+        type=str,
+        default="Give me some funny advertisement about apple or other companies",
+        help="Single query to run (default: runs example queries)",
+    )
+    parser.add_argument(
+        "--include-html",
+        action="store_true",
+        default=False,
+        help="Include HTML content in email processing (default: False)",
+    )
+    parser.add_argument(
+        "--embedding-model",
+        type=str,
+        default="facebook/contriever",
+        help="Embedding model to use (default: facebook/contriever)",
+    )
+
    args = parser.parse_args()

    print(f"args: {args}")
-    
+
    # Automatically find all Messages directories under the current user's Mail directory
    from examples.email_data.LEANN_email_reader import find_all_messages_directories
+
    mail_path = get_mail_path()
    print(f"Searching for email data in: {mail_path}")
    messages_dirs = find_all_messages_directories(mail_path)
    # messages_dirs = find_all_messages_directories(DEFAULT_MAIL_PATH)
    # messages_dirs = [DEFAULT_MAIL_PATH]
    # messages_dirs = messages_dirs[:1]
-    
-    print('len(messages_dirs): ', len(messages_dirs))
-    
-    
+
+    print("len(messages_dirs): ", len(messages_dirs))
+
    if not messages_dirs:
        print("No Messages directories found. Exiting.")
        return
-    
+
    INDEX_DIR = Path(args.index_dir)
    INDEX_PATH = str(INDEX_DIR / "mail_documents.leann")
    print(f"Index directory: {INDEX_DIR}")
    print(f"Found {len(messages_dirs)} Messages directories.")
-    
+
    # Create or load the LEANN index from all sources
-    index_path = create_leann_index_from_multiple_sources(messages_dirs, INDEX_PATH, args.max_emails, args.include_html, args.embedding_model)
-    
+    index_path = create_leann_index_from_multiple_sources(
+        messages_dirs, INDEX_PATH, args.max_emails, args.include_html, args.embedding_model
+    )
+
    if index_path:
        if args.query:
            # Run single query
@@ -281,11 +327,12 @@ async def main():
            queries = [
                "Hows Berkeley Graduate Student Instructor",
                "how's the icloud related advertisement saying",
-                "Whats the number of class recommend to take per semester for incoming EECS students"
+                "Whats the number of class recommend to take per semester for incoming EECS students",
            ]
            for query in queries:
-                print("\n" + "="*60)
+                print("\n" + "=" * 60)
                await query_leann_index(index_path, query)

+
 if __name__ == "__main__":
-    asyncio.run(main()) 
+    asyncio.run(main())
--- a/examples/mail_reader_llamaindex.py
+++ b/examples/mail_reader_llamaindex.py
@@ -1,26 +1,30 @@
+import argparse
 import os
 import sys
-import argparse
 from pathlib import Path
-from typing import List, Any

 # Add the project root to Python path so we can import from examples
 project_root = Path(__file__).parent.parent
 sys.path.insert(0, str(project_root))

-from llama_index.core import VectorStoreIndex, StorageContext
+import torch
+from llama_index.core import StorageContext, VectorStoreIndex
 from llama_index.core.node_parser import SentenceSplitter

 # --- EMBEDDING MODEL ---
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
-import torch

 # --- END EMBEDDING MODEL ---
-
 # Import EmlxReader from the new module
 from examples.email_data.LEANN_email_reader import EmlxReader

-def create_and_save_index(mail_path: str, save_dir: str = "mail_index_embedded", max_count: int = 1000, include_html: bool = False):
+
+def create_and_save_index(
+    mail_path: str,
+    save_dir: str = "mail_index_embedded",
+    max_count: int = 1000,
+    include_html: bool = False,
+):
    print("Creating index from mail data with embedded metadata...")
    documents = EmlxReader(include_html=include_html).load_data(mail_path, max_count=max_count)
    if not documents:
@@ -30,7 +34,7 @@ def create_and_save_index(mail_path: str, save_dir: str = "mail_index_embedded",
    # Use facebook/contriever as the embedder
    embed_model = HuggingFaceEmbedding(model_name="facebook/contriever")
    # set on device
-    import torch
+
    if torch.cuda.is_available():
        embed_model._model.to("cuda")
    # set mps
@@ -39,21 +43,19 @@ def create_and_save_index(mail_path: str, save_dir: str = "mail_index_embedded",
    else:
        embed_model._model.to("cpu")
    index = VectorStoreIndex.from_documents(
-        documents,
-        transformations=[text_splitter],
-        embed_model=embed_model
+        documents, transformations=[text_splitter], embed_model=embed_model
    )
    os.makedirs(save_dir, exist_ok=True)
    index.storage_context.persist(persist_dir=save_dir)
    print(f"Index saved to {save_dir}")
    return index

+
 def load_index(save_dir: str = "mail_index_embedded"):
    try:
        storage_context = StorageContext.from_defaults(persist_dir=save_dir)
        index = VectorStoreIndex.from_vector_store(
-            storage_context.vector_store,
-            storage_context=storage_context
+            storage_context.vector_store, storage_context=storage_context
        )
        print(f"Index loaded from {save_dir}")
        return index
@@ -61,6 +63,7 @@ def load_index(save_dir: str = "mail_index_embedded"):
        print(f"Error loading index: {e}")
        return None

+
 def query_index(index, query: str):
    if index is None:
        print("No index available for querying.")
@@ -70,39 +73,57 @@ def query_index(index, query: str):
    print(f"Query: {query}")
    print(f"Response: {response}")

+
 def main():
    # Parse command line arguments
-    parser = argparse.ArgumentParser(description='LlamaIndex Mail Reader - Create and query email index')
-    parser.add_argument('--mail-path', type=str, 
-                       default="/Users/yichuan/Library/Mail/V10/0FCA0879-FD8C-4B7E-83BF-FDDA930791C5/[Gmail].mbox/All Mail.mbox/78BA5BE1-8819-4F9A-9613-EB63772F1DD0/Data/9/Messages",
-                       help='Path to mail data directory')
-    parser.add_argument('--save-dir', type=str, default="mail_index_embedded",
-                       help='Directory to store the index (default: mail_index_embedded)')
-    parser.add_argument('--max-emails', type=int, default=10000,
-                       help='Maximum number of emails to process')
-    parser.add_argument('--include-html', action='store_true', default=False,
-                       help='Include HTML content in email processing (default: False)')
-    
+    parser = argparse.ArgumentParser(
+        description="LlamaIndex Mail Reader - Create and query email index"
+    )
+    parser.add_argument(
+        "--mail-path",
+        type=str,
+        default="/Users/yichuan/Library/Mail/V10/0FCA0879-FD8C-4B7E-83BF-FDDA930791C5/[Gmail].mbox/All Mail.mbox/78BA5BE1-8819-4F9A-9613-EB63772F1DD0/Data/9/Messages",
+        help="Path to mail data directory",
+    )
+    parser.add_argument(
+        "--save-dir",
+        type=str,
+        default="mail_index_embedded",
+        help="Directory to store the index (default: mail_index_embedded)",
+    )
+    parser.add_argument(
+        "--max-emails", type=int, default=10000, help="Maximum number of emails to process"
+    )
+    parser.add_argument(
+        "--include-html",
+        action="store_true",
+        default=False,
+        help="Include HTML content in email processing (default: False)",
+    )
+
    args = parser.parse_args()
-    
+
    mail_path = args.mail_path
    save_dir = args.save_dir
-    
+
    if os.path.exists(save_dir) and os.path.exists(os.path.join(save_dir, "vector_store.json")):
        print("Loading existing index...")
        index = load_index(save_dir)
    else:
        print("Creating new index...")
-        index = create_and_save_index(mail_path, save_dir, max_count=args.max_emails, include_html=args.include_html)
+        index = create_and_save_index(
+            mail_path, save_dir, max_count=args.max_emails, include_html=args.include_html
+        )
    if index:
        queries = [
            "Hows Berkeley Graduate Student Instructor",
            "how's the icloud related advertisement saying",
-            "Whats the number of class recommend to take per semester for incoming EECS students"
+            "Whats the number of class recommend to take per semester for incoming EECS students",
        ]
        for query in queries:
-            print("\n" + "="*50)
+            print("\n" + "=" * 50)
            query_index(index, query)

+
 if __name__ == "__main__":
-    main() 
+    main()
--- a/examples/main_cli_example.py
+++ b/examples/main_cli_example.py
@@ -1,10 +1,11 @@
 import argparse
-from llama_index.core import SimpleDirectoryReader
-from llama_index.core.node_parser import SentenceSplitter
 import asyncio
+from pathlib import Path
+
 import dotenv
 from leann.api import LeannBuilder, LeannChat
-from pathlib import Path
+from llama_index.core import SimpleDirectoryReader
+from llama_index.core.node_parser import SentenceSplitter

 dotenv.load_dotenv()

@@ -56,7 +57,7 @@ async def main(args):
    else:
        print(f"--- Using existing index at {INDEX_DIR} ---")

-    print(f"\n[PHASE 2] Starting Leann chat session...")
+    print("\n[PHASE 2] Starting Leann chat session...")

    llm_config = {"type": "hf", "model": "Qwen/Qwen3-4B"}
    llm_config = {"type": "ollama", "model": "qwen3:8b"}
@@ -64,7 +65,7 @@ async def main(args):

    chat = LeannChat(index_path=INDEX_PATH, llm_config=llm_config)
    # query = (
-    #     "什么是盘古大模型以及盘古开发过程中遇到了什么阴暗面，任务令一般在什么城市颁发"
+    #     "什么是盘古大模型以及盘古开发过程中遇到了什么阴暗面,任务令一般在什么城市颁发"
    # )
    query = args.query

@@ -74,9 +75,7 @@ async def main(args):


 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Run Leann Chat with various LLM backends."
-    )
+    parser = argparse.ArgumentParser(description="Run Leann Chat with various LLM backends.")
    parser.add_argument(
        "--llm",
        type=str,
--- a/examples/multi_vector_aggregator.py
+++ b/examples/multi_vector_aggregator.py
@@ -14,48 +14,55 @@ Key features:
 - Document-level result consolidation
 """

-import numpy as np
-from typing import List, Dict, Any, Tuple, Optional
-from dataclasses import dataclass
 from collections import defaultdict
-import json
+from dataclasses import dataclass
+from typing import Any
+
+import numpy as np
+

@dataclass
 class PatchResult:
    """Represents a single patch search result."""
+
    patch_id: int
    image_name: str
    image_path: str
-    coordinates: Tuple[int, int, int, int]  # (x1, y1, x2, y2)
+    coordinates: tuple[int, int, int, int]  # (x1, y1, x2, y2)
    score: float
    attention_score: float
    scale: float
-    metadata: Dict[str, Any]
+    metadata: dict[str, Any]
+

@dataclass
 class AggregatedResult:
    """Represents an aggregated document-level result."""
+
    image_name: str
    image_path: str
    doc_score: float
    patch_count: int
    best_patch: PatchResult
-    all_patches: List[PatchResult]
+    all_patches: list[PatchResult]
    aggregation_method: str
-    spatial_clusters: Optional[List[List[PatchResult]]] = None
+    spatial_clusters: list[list[PatchResult]] | None = None
+

 class MultiVectorAggregator:
    """
    Aggregates multiple patch-level results into document-level results.
    """
-    
-    def __init__(self, 
-                 aggregation_method: str = "maxsim",
-                 spatial_clustering: bool = True,
-                 cluster_distance_threshold: float = 100.0):
+
+    def __init__(
+        self,
+        aggregation_method: str = "maxsim",
+        spatial_clustering: bool = True,
+        cluster_distance_threshold: float = 100.0,
+    ):
        """
        Initialize the aggregator.
-        
+
        Args:
            aggregation_method: "maxsim", "voting", "weighted", or "mean"
            spatial_clustering: Whether to cluster spatially close patches
@@ -64,23 +71,23 @@ class MultiVectorAggregator:
        self.aggregation_method = aggregation_method
        self.spatial_clustering = spatial_clustering
        self.cluster_distance_threshold = cluster_distance_threshold
-    
-    def aggregate_results(self, 
-                         search_results: List[Dict[str, Any]], 
-                         top_k: int = 10) -> List[AggregatedResult]:
+
+    def aggregate_results(
+        self, search_results: list[dict[str, Any]], top_k: int = 10
+    ) -> list[AggregatedResult]:
        """
        Aggregate patch-level search results into document-level results.
-        
+
        Args:
            search_results: List of search results from LeannSearcher
            top_k: Number of top documents to return
-            
+
        Returns:
            List of aggregated document results
        """
        # Group results by image
        image_groups = defaultdict(list)
-        
+
        for result in search_results:
            metadata = result.metadata
            if "image_name" in metadata and "patch_id" in metadata:
@@ -92,55 +99,57 @@ class MultiVectorAggregator:
                    score=result.score,
                    attention_score=metadata.get("attention_score", 0.0),
                    scale=metadata.get("scale", 1.0),
-                    metadata=metadata
+                    metadata=metadata,
                )
                image_groups[metadata["image_name"]].append(patch_result)
-        
+
        # Aggregate each image group
        aggregated_results = []
        for image_name, patches in image_groups.items():
            if len(patches) == 0:
                continue
-                
+
            agg_result = self._aggregate_image_patches(image_name, patches)
            aggregated_results.append(agg_result)
-        
+
        # Sort by aggregated score and return top-k
        aggregated_results.sort(key=lambda x: x.doc_score, reverse=True)
        return aggregated_results[:top_k]
-    
-    def _aggregate_image_patches(self, image_name: str, patches: List[PatchResult]) -> AggregatedResult:
+
+    def _aggregate_image_patches(
+        self, image_name: str, patches: list[PatchResult]
+    ) -> AggregatedResult:
        """Aggregate patches for a single image."""
-        
+
        if self.aggregation_method == "maxsim":
            doc_score = max(patch.score for patch in patches)
            best_patch = max(patches, key=lambda p: p.score)
-            
+
        elif self.aggregation_method == "voting":
            # Count patches above threshold
            threshold = np.percentile([p.score for p in patches], 75)
            doc_score = sum(1 for patch in patches if patch.score >= threshold)
            best_patch = max(patches, key=lambda p: p.score)
-            
+
        elif self.aggregation_method == "weighted":
            # Weight by attention scores
            total_weighted_score = sum(p.score * p.attention_score for p in patches)
            total_weights = sum(p.attention_score for p in patches)
            doc_score = total_weighted_score / max(total_weights, 1e-8)
            best_patch = max(patches, key=lambda p: p.score * p.attention_score)
-            
+
        elif self.aggregation_method == "mean":
            doc_score = np.mean([patch.score for patch in patches])
            best_patch = max(patches, key=lambda p: p.score)
-            
+
        else:
            raise ValueError(f"Unknown aggregation method: {self.aggregation_method}")
-        
+
        # Spatial clustering if enabled
        spatial_clusters = None
        if self.spatial_clustering:
            spatial_clusters = self._cluster_patches_spatially(patches)
-        
+
        return AggregatedResult(
            image_name=image_name,
            image_path=patches[0].image_path,
@@ -149,23 +158,23 @@ class MultiVectorAggregator:
            best_patch=best_patch,
            all_patches=sorted(patches, key=lambda p: p.score, reverse=True),
            aggregation_method=self.aggregation_method,
-            spatial_clusters=spatial_clusters
+            spatial_clusters=spatial_clusters,
        )
-    
-    def _cluster_patches_spatially(self, patches: List[PatchResult]) -> List[List[PatchResult]]:
+
+    def _cluster_patches_spatially(self, patches: list[PatchResult]) -> list[list[PatchResult]]:
        """Cluster patches that are spatially close to each other."""
        if len(patches) <= 1:
            return [patches]
-        
+
        clusters = []
        remaining_patches = patches.copy()
-        
+
        while remaining_patches:
            # Start new cluster with highest scoring remaining patch
            seed_patch = max(remaining_patches, key=lambda p: p.score)
            current_cluster = [seed_patch]
            remaining_patches.remove(seed_patch)
-            
+
            # Add nearby patches to cluster
            added_to_cluster = True
            while added_to_cluster:
@@ -175,145 +184,175 @@ class MultiVectorAggregator:
                        current_cluster.append(patch)
                        remaining_patches.remove(patch)
                        added_to_cluster = True
-            
+
            clusters.append(current_cluster)
-        
+
        return sorted(clusters, key=lambda cluster: max(p.score for p in cluster), reverse=True)
-    
-    def _is_patch_nearby(self, patch: PatchResult, cluster: List[PatchResult]) -> bool:
+
+    def _is_patch_nearby(self, patch: PatchResult, cluster: list[PatchResult]) -> bool:
        """Check if a patch is spatially close to any patch in the cluster."""
        patch_center = self._get_patch_center(patch.coordinates)
-        
+
        for cluster_patch in cluster:
            cluster_center = self._get_patch_center(cluster_patch.coordinates)
-            distance = np.sqrt((patch_center[0] - cluster_center[0])**2 + 
-                             (patch_center[1] - cluster_center[1])**2)
-            
+            distance = np.sqrt(
+                (patch_center[0] - cluster_center[0]) ** 2
+                + (patch_center[1] - cluster_center[1]) ** 2
+            )
+
            if distance <= self.cluster_distance_threshold:
                return True
-        
+
        return False
-    
-    def _get_patch_center(self, coordinates: Tuple[int, int, int, int]) -> Tuple[float, float]:
+
+    def _get_patch_center(self, coordinates: tuple[int, int, int, int]) -> tuple[float, float]:
        """Get center point of a patch."""
        x1, y1, x2, y2 = coordinates
        return ((x1 + x2) / 2, (y1 + y2) / 2)
-    
-    def print_aggregated_results(self, results: List[AggregatedResult], max_patches_per_doc: int = 3):
+
+    def print_aggregated_results(
+        self, results: list[AggregatedResult], max_patches_per_doc: int = 3
+    ):
        """Pretty print aggregated results."""
        print(f"\n🔍 Aggregated Results (method: {self.aggregation_method})")
        print("=" * 80)
-        
+
        for i, result in enumerate(results):
-            print(f"\n{i+1}. {result.image_name}")
+            print(f"\n{i + 1}. {result.image_name}")
            print(f"   Doc Score: {result.doc_score:.4f} | Patches: {result.patch_count}")
            print(f"   Path: {result.image_path}")
-            
+
            # Show best patch
            best = result.best_patch
-            print(f"   🌟 Best Patch: #{best.patch_id} at {best.coordinates} (score: {best.score:.4f})")
-            
+            print(
+                f"   🌟 Best Patch: #{best.patch_id} at {best.coordinates} (score: {best.score:.4f})"
+            )
+
            # Show top patches
-            print(f"   📍 Top Patches:")
+            print("   📍 Top Patches:")
            for j, patch in enumerate(result.all_patches[:max_patches_per_doc]):
-                print(f"      {j+1}. Patch #{patch.patch_id}: {patch.score:.4f} at {patch.coordinates}")
-            
+                print(
+                    f"      {j + 1}. Patch #{patch.patch_id}: {patch.score:.4f} at {patch.coordinates}"
+                )
+
            # Show spatial clusters if available
            if result.spatial_clusters and len(result.spatial_clusters) > 1:
                print(f"   🗂️ Spatial Clusters: {len(result.spatial_clusters)}")
                for j, cluster in enumerate(result.spatial_clusters[:2]):  # Show top 2 clusters
                    cluster_score = max(p.score for p in cluster)
-                    print(f"      Cluster {j+1}: {len(cluster)} patches (best: {cluster_score:.4f})")
+                    print(
+                        f"      Cluster {j + 1}: {len(cluster)} patches (best: {cluster_score:.4f})"
+                    )
+

 def demo_aggregation():
    """Demonstrate the multi-vector aggregation functionality."""
    print("=== Multi-Vector Aggregation Demo ===")
-    
+
    # Simulate some patch-level search results
    # In real usage, these would come from LeannSearcher.search()
-    
+
    class MockResult:
        def __init__(self, score, metadata):
            self.score = score
            self.metadata = metadata
-    
+
    # Simulate results for 2 images with multiple patches each
    mock_results = [
        # Image 1: cats_and_kitchen.jpg - 4 patches
-        MockResult(0.85, {
-            "image_name": "cats_and_kitchen.jpg",
-            "image_path": "/path/to/cats_and_kitchen.jpg",
-            "patch_id": 3,
-            "coordinates": [100, 50, 224, 174],  # Kitchen area
-            "attention_score": 0.92,
-            "scale": 1.0
-        }),
-        MockResult(0.78, {
-            "image_name": "cats_and_kitchen.jpg", 
-            "image_path": "/path/to/cats_and_kitchen.jpg",
-            "patch_id": 7,
-            "coordinates": [200, 300, 324, 424],  # Cat area
-            "attention_score": 0.88,
-            "scale": 1.0
-        }),
-        MockResult(0.72, {
-            "image_name": "cats_and_kitchen.jpg",
-            "image_path": "/path/to/cats_and_kitchen.jpg", 
-            "patch_id": 12,
-            "coordinates": [150, 100, 274, 224],  # Appliances
-            "attention_score": 0.75,
-            "scale": 1.0
-        }),
-        MockResult(0.65, {
-            "image_name": "cats_and_kitchen.jpg",
-            "image_path": "/path/to/cats_and_kitchen.jpg",
-            "patch_id": 15,
-            "coordinates": [50, 250, 174, 374],  # Furniture
-            "attention_score": 0.70,
-            "scale": 1.0
-        }),
-        
-        # Image 2: city_street.jpg - 3 patches  
-        MockResult(0.68, {
-            "image_name": "city_street.jpg",
-            "image_path": "/path/to/city_street.jpg",
-            "patch_id": 2,
-            "coordinates": [300, 100, 424, 224],  # Buildings
-            "attention_score": 0.80,
-            "scale": 1.0
-        }),
-        MockResult(0.62, {
-            "image_name": "city_street.jpg",
-            "image_path": "/path/to/city_street.jpg",
-            "patch_id": 8,
-            "coordinates": [100, 350, 224, 474],  # Street level
-            "attention_score": 0.75,
-            "scale": 1.0
-        }),
-        MockResult(0.55, {
-            "image_name": "city_street.jpg", 
-            "image_path": "/path/to/city_street.jpg",
-            "patch_id": 11,
-            "coordinates": [400, 200, 524, 324],  # Sky area
-            "attention_score": 0.60,
-            "scale": 1.0
-        }),
+        MockResult(
+            0.85,
+            {
+                "image_name": "cats_and_kitchen.jpg",
+                "image_path": "/path/to/cats_and_kitchen.jpg",
+                "patch_id": 3,
+                "coordinates": [100, 50, 224, 174],  # Kitchen area
+                "attention_score": 0.92,
+                "scale": 1.0,
+            },
+        ),
+        MockResult(
+            0.78,
+            {
+                "image_name": "cats_and_kitchen.jpg",
+                "image_path": "/path/to/cats_and_kitchen.jpg",
+                "patch_id": 7,
+                "coordinates": [200, 300, 324, 424],  # Cat area
+                "attention_score": 0.88,
+                "scale": 1.0,
+            },
+        ),
+        MockResult(
+            0.72,
+            {
+                "image_name": "cats_and_kitchen.jpg",
+                "image_path": "/path/to/cats_and_kitchen.jpg",
+                "patch_id": 12,
+                "coordinates": [150, 100, 274, 224],  # Appliances
+                "attention_score": 0.75,
+                "scale": 1.0,
+            },
+        ),
+        MockResult(
+            0.65,
+            {
+                "image_name": "cats_and_kitchen.jpg",
+                "image_path": "/path/to/cats_and_kitchen.jpg",
+                "patch_id": 15,
+                "coordinates": [50, 250, 174, 374],  # Furniture
+                "attention_score": 0.70,
+                "scale": 1.0,
+            },
+        ),
+        # Image 2: city_street.jpg - 3 patches
+        MockResult(
+            0.68,
+            {
+                "image_name": "city_street.jpg",
+                "image_path": "/path/to/city_street.jpg",
+                "patch_id": 2,
+                "coordinates": [300, 100, 424, 224],  # Buildings
+                "attention_score": 0.80,
+                "scale": 1.0,
+            },
+        ),
+        MockResult(
+            0.62,
+            {
+                "image_name": "city_street.jpg",
+                "image_path": "/path/to/city_street.jpg",
+                "patch_id": 8,
+                "coordinates": [100, 350, 224, 474],  # Street level
+                "attention_score": 0.75,
+                "scale": 1.0,
+            },
+        ),
+        MockResult(
+            0.55,
+            {
+                "image_name": "city_street.jpg",
+                "image_path": "/path/to/city_street.jpg",
+                "patch_id": 11,
+                "coordinates": [400, 200, 524, 324],  # Sky area
+                "attention_score": 0.60,
+                "scale": 1.0,
+            },
+        ),
    ]
-    
+
    # Test different aggregation methods
    methods = ["maxsim", "voting", "weighted", "mean"]
-    
+
    for method in methods:
-        print(f"\n{'='*20} {method.upper()} AGGREGATION {'='*20}")
-        
+        print(f"\n{'=' * 20} {method.upper()} AGGREGATION {'=' * 20}")
+
        aggregator = MultiVectorAggregator(
-            aggregation_method=method,
-            spatial_clustering=True,
-            cluster_distance_threshold=100.0
+            aggregation_method=method, spatial_clustering=True, cluster_distance_threshold=100.0
        )
-        
+
        aggregated = aggregator.aggregate_results(mock_results, top_k=5)
        aggregator.print_aggregated_results(aggregated)

+
 if __name__ == "__main__":
-    demo_aggregation()
+    demo_aggregation()
--- a/examples/openai_hnsw_example.py
+++ b/examples/openai_hnsw_example.py
@@ -6,22 +6,24 @@ Complete example showing how to build and search with OpenAI embeddings using HN
 """

 import os
-import dotenv
 from pathlib import Path
+
+import dotenv
 from leann.api import LeannBuilder, LeannSearcher

 # Load environment variables
 dotenv.load_dotenv()

+
 def main():
    # Check if OpenAI API key is available
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        print("ERROR: OPENAI_API_KEY environment variable not set")
        return False
-    
+
    print(f"✅ OpenAI API key found: {api_key[:10]}...")
-    
+
    # Sample texts
    sample_texts = [
        "Machine learning is a powerful technology that enables computers to learn from data.",
@@ -33,15 +35,15 @@ def main():
        "Artificial intelligence aims to create machines that can perform human-like tasks.",
        "Python is a popular programming language used extensively in data science and AI.",
        "Neural networks are inspired by the structure and function of the human brain.",
-        "Big data refers to extremely large datasets that require special tools to process."
+        "Big data refers to extremely large datasets that require special tools to process.",
    ]
-    
+
    INDEX_DIR = Path("./simple_openai_test_index")
    INDEX_PATH = str(INDEX_DIR / "simple_test.leann")
-    
-    print(f"\n=== Building Index with OpenAI Embeddings ===")
+
+    print("\n=== Building Index with OpenAI Embeddings ===")
    print(f"Index path: {INDEX_PATH}")
-    
+
    try:
        # Use proper configuration for OpenAI embeddings
        builder = LeannBuilder(
@@ -49,60 +51,63 @@ def main():
            embedding_model="text-embedding-3-small",
            embedding_mode="openai",
            # HNSW settings for OpenAI embeddings
-            M=16,                    # Smaller graph degree
-            efConstruction=64,       # Smaller construction complexity  
-            is_compact=True,         # Enable compact storage for recompute
-            is_recompute=True,       # MUST enable for OpenAI embeddings
+            M=16,  # Smaller graph degree
+            efConstruction=64,  # Smaller construction complexity
+            is_compact=True,  # Enable compact storage for recompute
+            is_recompute=True,  # MUST enable for OpenAI embeddings
            num_threads=1,
        )
-        
+
        print(f"Adding {len(sample_texts)} texts to the index...")
        for i, text in enumerate(sample_texts):
            metadata = {"id": f"doc_{i}", "topic": "AI"}
            builder.add_text(text, metadata)
-        
+
        print("Building index...")
        builder.build_index(INDEX_PATH)
-        print(f"✅ Index built successfully!")
-        
+        print("✅ Index built successfully!")
+
    except Exception as e:
        print(f"❌ Error building index: {e}")
        import traceback
+
        traceback.print_exc()
        return False
-    
-    print(f"\n=== Testing Search ===")
-    
+
+    print("\n=== Testing Search ===")
+
    try:
        searcher = LeannSearcher(INDEX_PATH)
-        
+
        test_queries = [
            "What is machine learning?",
            "How do neural networks work?",
-            "Programming languages for data science"
+            "Programming languages for data science",
        ]
-        
+
        for query in test_queries:
            print(f"\n🔍 Query: '{query}'")
            results = searcher.search(query, top_k=3)
-            
+
            print(f"   Found {len(results)} results:")
            for i, result in enumerate(results):
-                print(f"   {i+1}. Score: {result.score:.4f}")
+                print(f"   {i + 1}. Score: {result.score:.4f}")
                print(f"      Text: {result.text[:80]}...")
-        
-        print(f"\n✅ Search test completed successfully!")
+
+        print("\n✅ Search test completed successfully!")
        return True
-        
+
    except Exception as e:
        print(f"❌ Error during search: {e}")
        import traceback
+
        traceback.print_exc()
        return False

+
 if __name__ == "__main__":
    success = main()
    if success:
-        print(f"\n🎉 Simple OpenAI index test completed successfully!")
+        print("\n🎉 Simple OpenAI index test completed successfully!")
    else:
-        print(f"\n💥 Simple OpenAI index test failed!")
+        print("\n💥 Simple OpenAI index test failed!")
--- a/examples/resue_index.py
+++ b/examples/resue_index.py
@@ -1,18 +1,23 @@
 import asyncio
-from leann.api import LeannChat
 from pathlib import Path

+from leann.api import LeannChat
+
 INDEX_DIR = Path("./test_pdf_index_huawei")
 INDEX_PATH = str(INDEX_DIR / "pdf_documents.leann")

+
 async def main():
-    print(f"\n[PHASE 2] Starting Leann chat session...")
+    print("\n[PHASE 2] Starting Leann chat session...")
    chat = LeannChat(index_path=INDEX_PATH)
    query = "What is the main idea of RL and give me 5 exapmle of classic RL algorithms?"
    query = "Based on the paper, what are the main techniques LEANN explores to reduce the storage overhead and DLPM explore to achieve Fairness and Efiiciency trade-off?"
-    # query = "什么是盘古大模型以及盘古开发过程中遇到了什么阴暗面，任务令一般在什么城市颁发"
-    response = chat.ask(query,top_k=20,recompute_beighbor_embeddings=True,complexity=32,beam_width=1)
+    # query = "什么是盘古大模型以及盘古开发过程中遇到了什么阴暗面,任务令一般在什么城市颁发"
+    response = chat.ask(
+        query, top_k=20, recompute_beighbor_embeddings=True, complexity=32, beam_width=1
+    )
    print(f"\n[PHASE 2] Response: {response}")

+
 if __name__ == "__main__":
-    asyncio.run(main())
+    asyncio.run(main())
--- a/examples/run_evaluation.py
+++ b/examples/run_evaluation.py
@@ -5,24 +5,21 @@ It correctly compares results by fetching the text content for both the new sear
 results and the golden standard results, making the comparison robust to ID changes.
 """

-import json
 import argparse
+import json
+import sys
 import time
 from pathlib import Path
-import sys
-import numpy as np
-from typing import List

-from leann.api import LeannSearcher, LeannBuilder
+import numpy as np
+from leann.api import LeannBuilder, LeannSearcher


 def download_data_if_needed(data_root: Path, download_embeddings: bool = False):
    """Checks if the data directory exists, and if not, downloads it from HF Hub."""
    if not data_root.exists():
        print(f"Data directory '{data_root}' not found.")
-        print(
-            "Downloading evaluation data from Hugging Face Hub... (this may take a moment)"
-        )
+        print("Downloading evaluation data from Hugging Face Hub... (this may take a moment)")
        try:
            from huggingface_hub import snapshot_download

@@ -63,7 +60,7 @@ def download_data_if_needed(data_root: Path, download_embeddings: bool = False):
            sys.exit(1)


-def download_embeddings_if_needed(data_root: Path, dataset_type: str = None):
+def download_embeddings_if_needed(data_root: Path, dataset_type: str | None = None):
    """Download embeddings files specifically."""
    embeddings_dir = data_root / "embeddings"

@@ -101,7 +98,7 @@ def download_embeddings_if_needed(data_root: Path, dataset_type: str = None):


 # --- Helper Function to get Golden Passages ---
-def get_golden_texts(searcher: LeannSearcher, golden_ids: List[int]) -> set:
+def get_golden_texts(searcher: LeannSearcher, golden_ids: list[int]) -> set:
    """
    Retrieves the text for golden passage IDs directly from the LeannSearcher's
    passage manager.
@@ -113,24 +110,20 @@ def get_golden_texts(searcher: LeannSearcher, golden_ids: List[int]) -> set:
            passage_data = searcher.passage_manager.get_passage(str(gid))
            golden_texts.add(passage_data["text"])
        except KeyError:
-            print(
-                f"Warning: Golden passage ID '{gid}' not found in the index's passage data."
-            )
+            print(f"Warning: Golden passage ID '{gid}' not found in the index's passage data.")
    return golden_texts


-def load_queries(file_path: Path) -> List[str]:
+def load_queries(file_path: Path) -> list[str]:
    queries = []
-    with open(file_path, "r", encoding="utf-8") as f:
+    with open(file_path, encoding="utf-8") as f:
        for line in f:
            data = json.loads(line)
            queries.append(data["query"])
    return queries


-def build_index_from_embeddings(
-    embeddings_file: str, output_path: str, backend: str = "hnsw"
-):
+def build_index_from_embeddings(embeddings_file: str, output_path: str, backend: str = "hnsw"):
    """
    Build a LEANN index from pre-computed embeddings.

@@ -173,9 +166,7 @@ def build_index_from_embeddings(


 def main():
-    parser = argparse.ArgumentParser(
-        description="Run recall evaluation on a LEANN index."
-    )
+    parser = argparse.ArgumentParser(description="Run recall evaluation on a LEANN index.")
    parser.add_argument(
        "index_path",
        type=str,
@@ -202,9 +193,7 @@ def main():
    parser.add_argument(
        "--num-queries", type=int, default=10, help="Number of queries to evaluate."
    )
-    parser.add_argument(
-        "--top-k", type=int, default=3, help="The 'k' value for recall@k."
-    )
+    parser.add_argument("--top-k", type=int, default=3, help="The 'k' value for recall@k.")
    parser.add_argument(
        "--ef-search", type=int, default=120, help="The 'efSearch' parameter for HNSW."
    )
@@ -219,9 +208,7 @@ def main():
    # Download data based on mode
    if args.mode == "build":
        # For building mode, we need embeddings
-        download_data_if_needed(
-            data_root, download_embeddings=False
-        )  # Basic data first
+        download_data_if_needed(data_root, download_embeddings=False)  # Basic data first

        # Auto-detect dataset type and download embeddings
        if args.embeddings_file:
@@ -262,9 +249,7 @@ def main():
        print(f"Index built successfully: {built_index_path}")

        # Ask if user wants to run evaluation
-        eval_response = (
-            input("Run evaluation on the built index? (y/n): ").strip().lower()
-        )
+        eval_response = input("Run evaluation on the built index? (y/n): ").strip().lower()
        if eval_response != "y":
            print("Index building complete. Exiting.")
            return
@@ -293,12 +278,8 @@ def main():
                        break

            if not args.index_path:
-                print(
-                    "No indices found. The data download should have included pre-built indices."
-                )
-                print(
-                    "Please check the data/indices/ directory or provide --index-path manually."
-                )
+                print("No indices found. The data download should have included pre-built indices.")
+                print("Please check the data/indices/ directory or provide --index-path manually.")
                sys.exit(1)

    # Detect dataset type from index path to select the correct ground truth
@@ -310,14 +291,10 @@ def main():
    else:
        # Fallback: try to infer from the index directory name
        dataset_type = Path(args.index_path).name
-        print(
-            f"WARNING: Could not detect dataset type from path, inferred '{dataset_type}'."
-        )
+        print(f"WARNING: Could not detect dataset type from path, inferred '{dataset_type}'.")

    queries_file = data_root / "queries" / "nq_open.jsonl"
-    golden_results_file = (
-        data_root / "ground_truth" / dataset_type / "flat_results_nq_k3.json"
-    )
+    golden_results_file = data_root / "ground_truth" / dataset_type / "flat_results_nq_k3.json"

    print(f"INFO: Detected dataset type: {dataset_type}")
    print(f"INFO: Using queries file: {queries_file}")
@@ -327,7 +304,7 @@ def main():
        searcher = LeannSearcher(args.index_path)
        queries = load_queries(queries_file)

-        with open(golden_results_file, "r") as f:
+        with open(golden_results_file) as f:
            golden_results_data = json.load(f)

        num_eval_queries = min(args.num_queries, len(queries))
@@ -339,9 +316,7 @@ def main():

        for i in range(num_eval_queries):
            start_time = time.time()
-            new_results = searcher.search(
-                queries[i], top_k=args.top_k, ef=args.ef_search
-            )
+            new_results = searcher.search(queries[i], top_k=args.top_k, ef=args.ef_search)
            search_times.append(time.time() - start_time)

            # Correct Recall Calculation: Based on TEXT content
--- a/examples/simple_demo.py
+++ b/examples/simple_demo.py
@@ -4,18 +4,25 @@ Run: uv run python examples/simple_demo.py
 """

 import argparse
-from leann import LeannBuilder, LeannSearcher, LeannChat
+
+from leann import LeannBuilder, LeannChat, LeannSearcher


 def main():
-    parser = argparse.ArgumentParser(description="Simple demo of Leann with selectable embedding models.")
-    parser.add_argument("--embedding_model", type=str, default="sentence-transformers/all-mpnet-base-v2",
-                        help="The embedding model to use, e.g., 'sentence-transformers/all-mpnet-base-v2' or 'text-embedding-ada-002'.")
+    parser = argparse.ArgumentParser(
+        description="Simple demo of Leann with selectable embedding models."
+    )
+    parser.add_argument(
+        "--embedding_model",
+        type=str,
+        default="sentence-transformers/all-mpnet-base-v2",
+        help="The embedding model to use, e.g., 'sentence-transformers/all-mpnet-base-v2' or 'text-embedding-ada-002'.",
+    )
    args = parser.parse_args()

    print(f"=== Leann Simple Demo with {args.embedding_model} ===")
    print()
-    
+
    # Sample knowledge base
    chunks = [
        "Machine learning is a subset of artificial intelligence that enables computers to learn without being explicitly programmed.",
@@ -27,7 +34,7 @@ def main():
        "Big data refers to extremely large datasets that require special tools and techniques to process.",
        "Cloud computing provides on-demand access to computing resources over the internet.",
    ]
-    
+
    print("1. Building index (no embeddings stored)...")
    builder = LeannBuilder(
        embedding_model=args.embedding_model,
@@ -37,45 +44,45 @@ def main():
        builder.add_text(chunk)
    builder.build_index("demo_knowledge.leann")
    print()
-    
+
    print("2. Searching with real-time embeddings...")
    searcher = LeannSearcher("demo_knowledge.leann")
-    
+
    queries = [
        "What is machine learning?",
-        "How does neural network work?", 
+        "How does neural network work?",
        "Tell me about data processing",
    ]
-    
+
    for query in queries:
        print(f"Query: {query}")
        results = searcher.search(query, top_k=2)
-        
+
        for i, result in enumerate(results, 1):
            print(f"  {i}. Score: {result.score:.3f}")
            print(f"     Text: {result.text[:100]}...")
        print()
-    
+
    print("3. Interactive chat demo:")
    print("   (Note: Requires OpenAI API key for real responses)")
-    
+
    chat = LeannChat("demo_knowledge.leann")
-    
+
    # Demo questions
    demo_questions: list[str] = [
        "What is the difference between machine learning and deep learning?",
        "How is data science related to big data?",
    ]
-    
+
    for question in demo_questions:
        print(f"   Q: {question}")
        response = chat.ask(question)
        print(f"   A: {response}")
        print()
-    
+
    print("Demo completed! Try running:")
    print("   uv run python examples/document_search.py")


 if __name__ == "__main__":
-    main()
+    main()
--- a/examples/wechat_history_reader_leann.py
+++ b/examples/wechat_history_reader_leann.py
@@ -1,13 +1,11 @@
-import os
-import asyncio
-import dotenv
 import argparse
+import asyncio
+import os
 from pathlib import Path
-from typing import List, Any, Optional
-from leann.api import LeannBuilder, LeannSearcher, LeannChat
+
+import dotenv
+from leann.api import LeannBuilder, LeannChat
 from llama_index.core.node_parser import SentenceSplitter
-import requests
-import time

 dotenv.load_dotenv()

@@ -16,7 +14,7 @@ DEFAULT_WECHAT_EXPORT_DIR = "./wechat_export_direct"


 def create_leann_index_from_multiple_wechat_exports(
-    export_dirs: List[Path],
+    export_dirs: list[Path],
    index_path: str = "wechat_history_index.leann",
    max_count: int = -1,
 ):
@@ -38,15 +36,13 @@ def create_leann_index_from_multiple_wechat_exports(
    INDEX_DIR = Path(index_path).parent

    if not INDEX_DIR.exists():
-        print(f"--- Index directory not found, building new index ---")
+        print("--- Index directory not found, building new index ---")
        all_documents = []
        total_processed = 0

        # Process each WeChat export directory
        for i, export_dir in enumerate(export_dirs):
-            print(
-                f"\nProcessing WeChat export {i + 1}/{len(export_dirs)}: {export_dir}"
-            )
+            print(f"\nProcessing WeChat export {i + 1}/{len(export_dirs)}: {export_dir}")

            try:
                documents = reader.load_data(
@@ -86,7 +82,12 @@ def create_leann_index_from_multiple_wechat_exports(
            # Split the document into chunks
            nodes = text_splitter.get_nodes_from_documents([doc])
            for node in nodes:
-                text = '[Contact] means the message is from: ' + doc.metadata["contact_name"] + '\n' + node.get_content()
+                text = (
+                    "[Contact] means the message is from: "
+                    + doc.metadata["contact_name"]
+                    + "\n"
+                    + node.get_content()
+                )
                all_texts.append(text)

        print(
@@ -94,12 +95,12 @@ def create_leann_index_from_multiple_wechat_exports(
        )

        # Create LEANN index directory
-        print(f"--- Index directory not found, building new index ---")
+        print("--- Index directory not found, building new index ---")
        INDEX_DIR.mkdir(exist_ok=True)

-        print(f"--- Building new LEANN index ---")
+        print("--- Building new LEANN index ---")

-        print(f"\n[PHASE 1] Building Leann index...")
+        print("\n[PHASE 1] Building Leann index...")

        # Use HNSW backend for better macOS compatibility
        builder = LeannBuilder(
@@ -125,7 +126,7 @@ def create_leann_index_from_multiple_wechat_exports(


 def create_leann_index(
-    export_dir: str = None,
+    export_dir: str | None = None,
    index_path: str = "wechat_history_index.leann",
    max_count: int = 1000,
 ):
@@ -141,12 +142,12 @@ def create_leann_index(
    INDEX_DIR = Path(index_path).parent

    if not INDEX_DIR.exists():
-        print(f"--- Index directory not found, building new index ---")
+        print("--- Index directory not found, building new index ---")
        INDEX_DIR.mkdir(exist_ok=True)

-        print(f"--- Building new LEANN index ---")
+        print("--- Building new LEANN index ---")

-        print(f"\n[PHASE 1] Building Leann index...")
+        print("\n[PHASE 1] Building Leann index...")

        # Load documents using WeChatHistoryReader from history_data
        from history_data.wechat_history import WeChatHistoryReader
@@ -179,12 +180,12 @@ def create_leann_index(
        print(f"Created {len(all_texts)} text chunks from {len(documents)} documents")

        # Create LEANN index directory
-        print(f"--- Index directory not found, building new index ---")
+        print("--- Index directory not found, building new index ---")
        INDEX_DIR.mkdir(exist_ok=True)

-        print(f"--- Building new LEANN index ---")
+        print("--- Building new LEANN index ---")

-        print(f"\n[PHASE 1] Building Leann index...")
+        print("\n[PHASE 1] Building Leann index...")

        # Use HNSW backend for better macOS compatibility
        builder = LeannBuilder(
@@ -217,7 +218,7 @@ async def query_leann_index(index_path: str, query: str):
        index_path: Path to the LEANN index
        query: The query string
    """
-    print(f"\n[PHASE 2] Starting Leann chat session...")
+    print("\n[PHASE 2] Starting Leann chat session...")
    chat = LeannChat(index_path=index_path)

    print(f"You: {query}")
@@ -307,7 +308,7 @@ async def main():
        else:
            # Example queries
            queries = [
-                "我想买魔术师约翰逊的球衣，给我一些对应聊天记录?",
+                "我想买魔术师约翰逊的球衣,给我一些对应聊天记录?",
            ]

            for query in queries:
--- a/packages/leann-backend-diskann/init.py
+++ b/packages/leann-backend-diskann/init.py
@@ -1 +1 @@
-# This file makes the directory a Python package 
+# This file makes the directory a Python package
--- a/packages/leann-backend-diskann/leann_backend_diskann/init.py
+++ b/packages/leann-backend-diskann/leann_backend_diskann/init.py
@@ -1 +1 @@
-from . import diskann_backend
+from . import diskann_backend as diskann_backend
--- a/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py
+++ b/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py
@@ -1,20 +1,19 @@
-import numpy as np
+import contextlib
+import logging
 import os
 import struct
 import sys
 from pathlib import Path
-from typing import Dict, Any, List, Literal, Optional
-import contextlib
+from typing import Any, Literal

-import logging
-
-from leann.searcher_base import BaseSearcher
-from leann.registry import register_backend
+import numpy as np
 from leann.interface import (
-    LeannBackendFactoryInterface,
    LeannBackendBuilderInterface,
+    LeannBackendFactoryInterface,
    LeannBackendSearcherInterface,
 )
+from leann.registry import register_backend
+from leann.searcher_base import BaseSearcher

 logger = logging.getLogger(__name__)

@@ -100,7 +99,7 @@ class DiskannBuilder(LeannBackendBuilderInterface):
    def __init__(self, **kwargs):
        self.build_params = kwargs

-    def build(self, data: np.ndarray, ids: List[str], index_path: str, **kwargs):
+    def build(self, data: np.ndarray, ids: list[str], index_path: str, **kwargs):
        path = Path(index_path)
        index_dir = path.parent
        index_prefix = path.stem
@@ -186,11 +185,11 @@ class DiskannSearcher(BaseSearcher):
        prune_ratio: float = 0.0,
        recompute_embeddings: bool = False,
        pruning_strategy: Literal["global", "local", "proportional"] = "global",
-        zmq_port: Optional[int] = None,
+        zmq_port: int | None = None,
        batch_recompute: bool = False,
        dedup_node_dis: bool = False,
        **kwargs,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
        """
        Search for nearest neighbors using DiskANN index.

@@ -216,14 +215,10 @@ class DiskannSearcher(BaseSearcher):
        # Handle zmq_port compatibility: DiskANN can now update port at runtime
        if recompute_embeddings:
            if zmq_port is None:
-                raise ValueError(
-                    "zmq_port must be provided if recompute_embeddings is True"
-                )
+                raise ValueError("zmq_port must be provided if recompute_embeddings is True")
            current_port = self._index.get_zmq_port()
            if zmq_port != current_port:
-                logger.debug(
-                    f"Updating DiskANN zmq_port from {current_port} to {zmq_port}"
-                )
+                logger.debug(f"Updating DiskANN zmq_port from {current_port} to {zmq_port}")
                self._index.set_zmq_port(zmq_port)

        # DiskANN doesn't support "proportional" strategy
@@ -259,8 +254,6 @@ class DiskannSearcher(BaseSearcher):
                use_global_pruning,
            )

-        string_labels = [
-            [str(int_label) for int_label in batch_labels] for batch_labels in labels
-        ]
+        string_labels = [[str(int_label) for int_label in batch_labels] for batch_labels in labels]

        return {"labels": string_labels, "distances": distances}
--- a/packages/leann-backend-diskann/leann_backend_diskann/diskann_embedding_server.py
+++ b/packages/leann-backend-diskann/leann_backend_diskann/diskann_embedding_server.py
@@ -3,16 +3,16 @@ DiskANN-specific embedding server
 """

 import argparse
+import json
+import logging
+import os
+import sys
 import threading
 import time
-import os
-import zmq
-import numpy as np
-import json
 from pathlib import Path
-from typing import Optional
-import sys
-import logging
+
+import numpy as np
+import zmq

 # Set up logging based on environment variable
 LOG_LEVEL = os.getenv("LEANN_LOG_LEVEL", "WARNING").upper()
@@ -32,7 +32,7 @@ if not logger.handlers:


 def create_diskann_embedding_server(
-    passages_file: Optional[str] = None,
+    passages_file: str | None = None,
    zmq_port: int = 5555,
    model_name: str = "sentence-transformers/all-mpnet-base-v2",
    embedding_mode: str = "sentence-transformers",
@@ -50,8 +50,8 @@ def create_diskann_embedding_server(
    sys.path.insert(0, str(leann_core_path))

    try:
-        from leann.embedding_compute import compute_embeddings
        from leann.api import PassageManager
+        from leann.embedding_compute import compute_embeddings

        logger.info("Successfully imported unified embedding computation module")
    except ImportError as e:
@@ -76,7 +76,7 @@ def create_diskann_embedding_server(
        raise ValueError("Only metadata files (.meta.json) are supported")

    # Load metadata to get passage sources
-    with open(passages_file, "r") as f:
+    with open(passages_file) as f:
        meta = json.load(f)

    passages = PassageManager(meta["passage_sources"])
@@ -150,9 +150,7 @@ def create_diskann_embedding_server(
                        ):
                            texts = request
                            is_text_request = True
-                            logger.info(
-                                f"✅ MSGPACK: Direct text request for {len(texts)} texts"
-                            )
+                            logger.info(f"✅ MSGPACK: Direct text request for {len(texts)} texts")
                        else:
                            raise ValueError("Not a valid msgpack text request")
                    except Exception as msgpack_error:
@@ -167,9 +165,7 @@ def create_diskann_embedding_server(
                            passage_data = passages.get_passage(str(nid))
                            txt = passage_data["text"]
                            if not txt:
-                                raise RuntimeError(
-                                    f"FATAL: Empty text for passage ID {nid}"
-                                )
+                                raise RuntimeError(f"FATAL: Empty text for passage ID {nid}")
                            texts.append(txt)
                        except KeyError as e:
                            logger.error(f"Passage ID {nid} not found: {e}")
@@ -180,9 +176,7 @@ def create_diskann_embedding_server(

                    # Debug logging
                    logger.debug(f"Processing {len(texts)} texts")
-                    logger.debug(
-                        f"Text lengths: {[len(t) for t in texts[:5]]}"
-                    )  # Show first 5
+                    logger.debug(f"Text lengths: {[len(t) for t in texts[:5]]}")  # Show first 5

                # Process embeddings using unified computation
                embeddings = compute_embeddings(texts, model_name, mode=embedding_mode)
@@ -199,9 +193,7 @@ def create_diskann_embedding_server(
                else:
                    # For DiskANN C++ compatibility: return protobuf format
                    resp_proto = embedding_pb2.NodeEmbeddingResponse()
-                    hidden_contiguous = np.ascontiguousarray(
-                        embeddings, dtype=np.float32
-                    )
+                    hidden_contiguous = np.ascontiguousarray(embeddings, dtype=np.float32)

                    # Serialize embeddings data
                    resp_proto.embeddings_data = hidden_contiguous.tobytes()
--- a/packages/leann-backend-diskann/leann_backend_diskann/embedding_pb2.py
+++ b/packages/leann-backend-diskann/leann_backend_diskann/embedding_pb2.py
@@ -1,27 +1,28 @@
-# -*- coding: utf-8 -*-
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
 # source: embedding.proto
+# ruff: noqa
 """Generated protocol buffer code."""
-from google.protobuf.internal import builder as _builder
+
 from google.protobuf import descriptor as _descriptor
 from google.protobuf import descriptor_pool as _descriptor_pool
 from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
+
 # @@protoc_insertion_point(imports)

 _sym_db = _symbol_database.Default()


-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0f\x65mbedding.proto\x12\x0eprotoembedding\"(\n\x14NodeEmbeddingRequest\x12\x10\n\x08node_ids\x18\x01 \x03(\r\"Y\n\x15NodeEmbeddingResponse\x12\x17\n\x0f\x65mbeddings_data\x18\x01 \x01(\x0c\x12\x12\n\ndimensions\x18\x02 \x03(\x05\x12\x13\n\x0bmissing_ids\x18\x03 \x03(\rb\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n\x0f\x65mbedding.proto\x12\x0eprotoembedding"(\n\x14NodeEmbeddingRequest\x12\x10\n\x08node_ids\x18\x01 \x03(\r"Y\n\x15NodeEmbeddingResponse\x12\x17\n\x0f\x65mbeddings_data\x18\x01 \x01(\x0c\x12\x12\n\ndimensions\x18\x02 \x03(\x05\x12\x13\n\x0bmissing_ids\x18\x03 \x03(\rb\x06proto3'
+)

 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'embedding_pb2', globals())
-if _descriptor._USE_C_DESCRIPTORS == False:
-
-  DESCRIPTOR._options = None
-  _NODEEMBEDDINGREQUEST._serialized_start=35
-  _NODEEMBEDDINGREQUEST._serialized_end=75
-  _NODEEMBEDDINGRESPONSE._serialized_start=77
-  _NODEEMBEDDINGRESPONSE._serialized_end=166
+_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "embedding_pb2", globals())
+if not _descriptor._USE_C_DESCRIPTORS:
+    DESCRIPTOR._options = None
+    _NODEEMBEDDINGREQUEST._serialized_start = 35
+    _NODEEMBEDDINGREQUEST._serialized_end = 75
+    _NODEEMBEDDINGRESPONSE._serialized_start = 77
+    _NODEEMBEDDINGRESPONSE._serialized_end = 166
 # @@protoc_insertion_point(module_scope)
--- a/packages/leann-backend-diskann/pyproject.toml
+++ b/packages/leann-backend-diskann/pyproject.toml
@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"

 [project]
 name = "leann-backend-diskann"
-version = "0.1.12"
-dependencies = ["leann-core==0.1.12", "numpy", "protobuf>=3.19.0"]
+version = "0.1.14"
+dependencies = ["leann-core==0.1.14", "numpy", "protobuf>=3.19.0"]

 [tool.scikit-build]
 # Key: simplified CMake path
--- a/packages/leann-backend-hnsw/leann_backend_hnsw/init.py
+++ b/packages/leann-backend-hnsw/leann_backend_hnsw/init.py
@@ -1 +1 @@
-from . import hnsw_backend
+from . import hnsw_backend as hnsw_backend
--- a/packages/leann-backend-hnsw/leann_backend_hnsw/convert_to_csr.py
+++ b/packages/leann-backend-hnsw/leann_backend_hnsw/convert_to_csr.py
@@ -1,87 +1,111 @@
+import argparse
+import gc  # Import garbage collector interface
+import os
 import struct
 import sys
-import numpy as np
-import os
-import argparse
-import gc # Import garbage collector interface
 import time
+
+import numpy as np
+
 # --- FourCCs (add more if needed) ---
-INDEX_HNSW_FLAT_FOURCC = int.from_bytes(b'IHNf', 'little')
+INDEX_HNSW_FLAT_FOURCC = int.from_bytes(b"IHNf", "little")
 # Add other HNSW fourccs if you expect different storage types inside HNSW
 # INDEX_HNSW_PQ_FOURCC = int.from_bytes(b'IHNp', 'little')
 # INDEX_HNSW_SQ_FOURCC = int.from_bytes(b'IHNs', 'little')
 # INDEX_HNSW_CAGRA_FOURCC = int.from_bytes(b'IHNc', 'little') # Example

-EXPECTED_HNSW_FOURCCS = {INDEX_HNSW_FLAT_FOURCC} # Modify if needed
-NULL_INDEX_FOURCC = int.from_bytes(b'null', 'little')
+EXPECTED_HNSW_FOURCCS = {INDEX_HNSW_FLAT_FOURCC}  # Modify if needed
+NULL_INDEX_FOURCC = int.from_bytes(b"null", "little")

 # --- Helper functions for reading/writing binary data ---

+
 def read_struct(f, fmt):
    """Reads data according to the struct format."""
    size = struct.calcsize(fmt)
    data = f.read(size)
    if len(data) != size:
-        raise EOFError(f"File ended unexpectedly reading struct fmt '{fmt}'. Expected {size} bytes, got {len(data)}.")
+        raise EOFError(
+            f"File ended unexpectedly reading struct fmt '{fmt}'. Expected {size} bytes, got {len(data)}."
+        )
    return struct.unpack(fmt, data)[0]

+
 def read_vector_raw(f, element_fmt_char):
    """Reads a vector (size followed by data), returns count and raw bytes."""
-    count = -1 # Initialize count
-    total_bytes = -1 # Initialize total_bytes
+    count = -1  # Initialize count
+    total_bytes = -1  # Initialize total_bytes
    try:
-        count = read_struct(f, '<Q') # size_t usually 64-bit unsigned
+        count = read_struct(f, "<Q")  # size_t usually 64-bit unsigned
        element_size = struct.calcsize(element_fmt_char)
        # --- FIX for MemoryError: Check for unreasonably large count ---
-        max_reasonable_count = 10 * (10**9) # ~10 billion elements limit
+        max_reasonable_count = 10 * (10**9)  # ~10 billion elements limit
        if count > max_reasonable_count or count < 0:
-            raise MemoryError(f"Vector count {count} seems unreasonably large, possibly due to file corruption or incorrect format read.")
+            raise MemoryError(
+                f"Vector count {count} seems unreasonably large, possibly due to file corruption or incorrect format read."
+            )

        total_bytes = count * element_size
        # --- FIX for MemoryError: Check for huge byte size before allocation ---
-        max_reasonable_bytes = 50 * (1024**3) # ~50 GB limit
-        if total_bytes > max_reasonable_bytes or total_bytes < 0: # Check for overflow
-             raise MemoryError(f"Attempting to read {total_bytes} bytes ({count} elements * {element_size} bytes/element), which exceeds the safety limit. File might be corrupted or format mismatch.")
+        max_reasonable_bytes = 50 * (1024**3)  # ~50 GB limit
+        if total_bytes > max_reasonable_bytes or total_bytes < 0:  # Check for overflow
+            raise MemoryError(
+                f"Attempting to read {total_bytes} bytes ({count} elements * {element_size} bytes/element), which exceeds the safety limit. File might be corrupted or format mismatch."
+            )

        data_bytes = f.read(total_bytes)

        if len(data_bytes) != total_bytes:
-             raise EOFError(f"File ended unexpectedly reading vector data. Expected {total_bytes} bytes, got {len(data_bytes)}.")
+            raise EOFError(
+                f"File ended unexpectedly reading vector data. Expected {total_bytes} bytes, got {len(data_bytes)}."
+            )
        return count, data_bytes
    except (MemoryError, OverflowError) as e:
-         # Add context to the error message
-         print(f"\nError during raw vector read (element_fmt='{element_fmt_char}', count={count}, total_bytes={total_bytes}): {e}", file=sys.stderr)
-         raise e # Re-raise the original error type
+        # Add context to the error message
+        print(
+            f"\nError during raw vector read (element_fmt='{element_fmt_char}', count={count}, total_bytes={total_bytes}): {e}",
+            file=sys.stderr,
+        )
+        raise e  # Re-raise the original error type
+

 def read_numpy_vector(f, np_dtype, struct_fmt_char):
    """Reads a vector into a NumPy array."""
-    count = -1 # Initialize count for robust error handling
-    print(f"  Reading vector (dtype={np_dtype}, fmt='{struct_fmt_char}')... ", end='', flush=True)
+    count = -1  # Initialize count for robust error handling
+    print(f"  Reading vector (dtype={np_dtype}, fmt='{struct_fmt_char}')... ", end="", flush=True)
    try:
        count, data_bytes = read_vector_raw(f, struct_fmt_char)
        print(f"Count={count}, Bytes={len(data_bytes)}")
        if count > 0 and len(data_bytes) > 0:
            arr = np.frombuffer(data_bytes, dtype=np_dtype)
            if arr.size != count:
-                raise ValueError(f"Inconsistent array size after reading. Expected {count}, got {arr.size}")
+                raise ValueError(
+                    f"Inconsistent array size after reading. Expected {count}, got {arr.size}"
+                )
            return arr
        elif count == 0:
-             return np.array([], dtype=np_dtype)
+            return np.array([], dtype=np_dtype)
        else:
-             raise ValueError("Read zero bytes but count > 0.")
+            raise ValueError("Read zero bytes but count > 0.")
    except MemoryError as e:
        # Now count should be defined (or -1 if error was in read_struct)
-        print(f"\nMemoryError creating NumPy array (dtype={np_dtype}, count={count}). {e}", file=sys.stderr)
+        print(
+            f"\nMemoryError creating NumPy array (dtype={np_dtype}, count={count}). {e}",
+            file=sys.stderr,
+        )
        raise e
-    except Exception as e: # Catch other potential errors like ValueError
-        print(f"\nError reading numpy vector (dtype={np_dtype}, fmt='{struct_fmt_char}', count={count}): {e}", file=sys.stderr)
+    except Exception as e:  # Catch other potential errors like ValueError
+        print(
+            f"\nError reading numpy vector (dtype={np_dtype}, fmt='{struct_fmt_char}', count={count}): {e}",
+            file=sys.stderr,
+        )
        raise e


 def write_numpy_vector(f, arr, struct_fmt_char):
    """Writes a NumPy array as a vector (size followed by data)."""
    count = arr.size
-    f.write(struct.pack('<Q', count))
+    f.write(struct.pack("<Q", count))
    try:
        expected_dtype = np.dtype(struct_fmt_char)
        if arr.dtype != expected_dtype:
@@ -89,23 +113,30 @@ def write_numpy_vector(f, arr, struct_fmt_char):
        else:
            data_to_write = arr.tobytes()
        f.write(data_to_write)
-        del data_to_write # Hint GC
+        del data_to_write  # Hint GC
    except MemoryError as e:
-         print(f"\nMemoryError converting NumPy array to bytes for writing (size={count}, dtype={arr.dtype}). {e}", file=sys.stderr)
-         raise e
+        print(
+            f"\nMemoryError converting NumPy array to bytes for writing (size={count}, dtype={arr.dtype}). {e}",
+            file=sys.stderr,
+        )
+        raise e
+

 def write_list_vector(f, lst, struct_fmt_char):
    """Writes a Python list as a vector iteratively."""
    count = len(lst)
-    f.write(struct.pack('<Q', count))
-    fmt = '<' + struct_fmt_char
+    f.write(struct.pack("<Q", count))
+    fmt = "<" + struct_fmt_char
    chunk_size = 1024 * 1024
    element_size = struct.calcsize(fmt)
    # Allocate buffer outside the loop if possible, or handle MemoryError during allocation
    try:
        buffer = bytearray(chunk_size * element_size)
    except MemoryError:
-        print(f"MemoryError: Cannot allocate buffer for writing list vector chunk (size {chunk_size * element_size} bytes).", file=sys.stderr)
+        print(
+            f"MemoryError: Cannot allocate buffer for writing list vector chunk (size {chunk_size * element_size} bytes).",
+            file=sys.stderr,
+        )
        raise
    buffer_count = 0

@@ -116,66 +147,80 @@ def write_list_vector(f, lst, struct_fmt_char):
            buffer_count += 1

            if buffer_count == chunk_size or i == count - 1:
-                f.write(buffer[:buffer_count * element_size])
+                f.write(buffer[: buffer_count * element_size])
                buffer_count = 0

        except struct.error as e:
-            print(f"\nStruct packing error for item {item} at index {i} with format '{fmt}'. {e}", file=sys.stderr)
+            print(
+                f"\nStruct packing error for item {item} at index {i} with format '{fmt}'. {e}",
+                file=sys.stderr,
+            )
            raise e


 def get_cum_neighbors(cum_nneighbor_per_level_np, level):
    """Helper to get cumulative neighbors count, matching C++ logic."""
-    if level < 0: return 0
+    if level < 0:
+        return 0
    if level < len(cum_nneighbor_per_level_np):
        return cum_nneighbor_per_level_np[level]
    else:
        return cum_nneighbor_per_level_np[-1] if len(cum_nneighbor_per_level_np) > 0 else 0

-def write_compact_format(f_out, original_hnsw_data, assign_probas_np, cum_nneighbor_per_level_np, 
-                        levels_np, compact_level_ptr, compact_node_offsets_np, 
-                        compact_neighbors_data, storage_fourcc, storage_data):
+
+def write_compact_format(
+    f_out,
+    original_hnsw_data,
+    assign_probas_np,
+    cum_nneighbor_per_level_np,
+    levels_np,
+    compact_level_ptr,
+    compact_node_offsets_np,
+    compact_neighbors_data,
+    storage_fourcc,
+    storage_data,
+):
    """Write HNSW data in compact format following C++ read order exactly."""
    # Write IndexHNSW Header
-    f_out.write(struct.pack('<I', original_hnsw_data['index_fourcc']))
-    f_out.write(struct.pack('<i', original_hnsw_data['d']))
-    f_out.write(struct.pack('<q', original_hnsw_data['ntotal']))
-    f_out.write(struct.pack('<q', original_hnsw_data['dummy1']))
-    f_out.write(struct.pack('<q', original_hnsw_data['dummy2']))
-    f_out.write(struct.pack('<?', original_hnsw_data['is_trained']))
-    f_out.write(struct.pack('<i', original_hnsw_data['metric_type']))
-    if original_hnsw_data['metric_type'] > 1:
-         f_out.write(struct.pack('<f', original_hnsw_data['metric_arg']))
+    f_out.write(struct.pack("<I", original_hnsw_data["index_fourcc"]))
+    f_out.write(struct.pack("<i", original_hnsw_data["d"]))
+    f_out.write(struct.pack("<q", original_hnsw_data["ntotal"]))
+    f_out.write(struct.pack("<q", original_hnsw_data["dummy1"]))
+    f_out.write(struct.pack("<q", original_hnsw_data["dummy2"]))
+    f_out.write(struct.pack("<?", original_hnsw_data["is_trained"]))
+    f_out.write(struct.pack("<i", original_hnsw_data["metric_type"]))
+    if original_hnsw_data["metric_type"] > 1:
+        f_out.write(struct.pack("<f", original_hnsw_data["metric_arg"]))

    # Write HNSW struct parts (standard order)
-    write_numpy_vector(f_out, assign_probas_np, 'd')
-    write_numpy_vector(f_out, cum_nneighbor_per_level_np, 'i')
-    write_numpy_vector(f_out, levels_np, 'i')
+    write_numpy_vector(f_out, assign_probas_np, "d")
+    write_numpy_vector(f_out, cum_nneighbor_per_level_np, "i")
+    write_numpy_vector(f_out, levels_np, "i")

    # Write compact format flag
-    f_out.write(struct.pack('<?', True)) # storage_is_compact = True
+    f_out.write(struct.pack("<?", True))  # storage_is_compact = True

    # Write compact data in CORRECT C++ read order: level_ptr, node_offsets FIRST
    if isinstance(compact_level_ptr, np.ndarray):
-        write_numpy_vector(f_out, compact_level_ptr, 'Q')
+        write_numpy_vector(f_out, compact_level_ptr, "Q")
    else:
-        write_list_vector(f_out, compact_level_ptr, 'Q')
-    
-    write_numpy_vector(f_out, compact_node_offsets_np, 'Q')
+        write_list_vector(f_out, compact_level_ptr, "Q")
+
+    write_numpy_vector(f_out, compact_node_offsets_np, "Q")

    # Write HNSW scalar parameters
-    f_out.write(struct.pack('<i', original_hnsw_data['entry_point']))
-    f_out.write(struct.pack('<i', original_hnsw_data['max_level']))
-    f_out.write(struct.pack('<i', original_hnsw_data['efConstruction']))
-    f_out.write(struct.pack('<i', original_hnsw_data['efSearch']))
-    f_out.write(struct.pack('<i', original_hnsw_data['dummy_upper_beam']))
+    f_out.write(struct.pack("<i", original_hnsw_data["entry_point"]))
+    f_out.write(struct.pack("<i", original_hnsw_data["max_level"]))
+    f_out.write(struct.pack("<i", original_hnsw_data["efConstruction"]))
+    f_out.write(struct.pack("<i", original_hnsw_data["efSearch"]))
+    f_out.write(struct.pack("<i", original_hnsw_data["dummy_upper_beam"]))

    # Write storage fourcc (this determines how to read what follows)
-    f_out.write(struct.pack('<I', storage_fourcc))
-    
+    f_out.write(struct.pack("<I", storage_fourcc))
+
    # Write compact neighbors data AFTER storage fourcc
-    write_list_vector(f_out, compact_neighbors_data, 'i')
-    
+    write_list_vector(f_out, compact_neighbors_data, "i")
+
    # Write storage data if not NULL (only after neighbors)
    if storage_fourcc != NULL_INDEX_FOURCC and storage_data:
        f_out.write(storage_data)
@@ -183,11 +228,12 @@ def write_compact_format(f_out, original_hnsw_data, assign_probas_np, cum_nneigh

 # --- Main Conversion Logic ---

+
 def convert_hnsw_graph_to_csr(input_filename, output_filename, prune_embeddings=True):
    """
    Converts an HNSW graph file to the CSR format.
    Supports both original and already-compact formats (backward compatibility).
-    
+
    Args:
        input_filename: Input HNSW index file
        output_filename: Output CSR index file
@@ -196,172 +242,228 @@ def convert_hnsw_graph_to_csr(input_filename, output_filename, prune_embeddings=
    print(f"Starting conversion: {input_filename} -> {output_filename}")
    start_time = time.time()
    original_hnsw_data = {}
-    neighbors_np = None # Initialize to allow check in finally block
+    neighbors_np = None  # Initialize to allow check in finally block
    try:
-        with open(input_filename, 'rb') as f_in, open(output_filename, 'wb') as f_out:
-
+        with open(input_filename, "rb") as f_in, open(output_filename, "wb") as f_out:
            # --- Read IndexHNSW FourCC and Header ---
            print(f"[{time.time() - start_time:.2f}s] Reading Index HNSW header...")
            # ... (Keep the header reading logic as before) ...
-            hnsw_index_fourcc = read_struct(f_in, '<I')
+            hnsw_index_fourcc = read_struct(f_in, "<I")
            if hnsw_index_fourcc not in EXPECTED_HNSW_FOURCCS:
-                 print(f"Error: Expected HNSW Index FourCC ({list(EXPECTED_HNSW_FOURCCS)}), got {hnsw_index_fourcc:08x}.", file=sys.stderr)
-                 return False
-            original_hnsw_data['index_fourcc'] = hnsw_index_fourcc
-            original_hnsw_data['d'] = read_struct(f_in, '<i')
-            original_hnsw_data['ntotal'] = read_struct(f_in, '<q')
-            original_hnsw_data['dummy1'] = read_struct(f_in, '<q')
-            original_hnsw_data['dummy2'] = read_struct(f_in, '<q')
-            original_hnsw_data['is_trained'] = read_struct(f_in, '?')
-            original_hnsw_data['metric_type'] = read_struct(f_in, '<i')
-            original_hnsw_data['metric_arg'] = 0.0
-            if original_hnsw_data['metric_type'] > 1:
-                 original_hnsw_data['metric_arg'] = read_struct(f_in, '<f')
-            print(f"[{time.time() - start_time:.2f}s]   Header read: d={original_hnsw_data['d']}, ntotal={original_hnsw_data['ntotal']}")
-
+                print(
+                    f"Error: Expected HNSW Index FourCC ({list(EXPECTED_HNSW_FOURCCS)}), got {hnsw_index_fourcc:08x}.",
+                    file=sys.stderr,
+                )
+                return False
+            original_hnsw_data["index_fourcc"] = hnsw_index_fourcc
+            original_hnsw_data["d"] = read_struct(f_in, "<i")
+            original_hnsw_data["ntotal"] = read_struct(f_in, "<q")
+            original_hnsw_data["dummy1"] = read_struct(f_in, "<q")
+            original_hnsw_data["dummy2"] = read_struct(f_in, "<q")
+            original_hnsw_data["is_trained"] = read_struct(f_in, "?")
+            original_hnsw_data["metric_type"] = read_struct(f_in, "<i")
+            original_hnsw_data["metric_arg"] = 0.0
+            if original_hnsw_data["metric_type"] > 1:
+                original_hnsw_data["metric_arg"] = read_struct(f_in, "<f")
+            print(
+                f"[{time.time() - start_time:.2f}s]   Header read: d={original_hnsw_data['d']}, ntotal={original_hnsw_data['ntotal']}"
+            )

            # --- Read original HNSW struct data ---
            print(f"[{time.time() - start_time:.2f}s] Reading HNSW struct vectors...")
-            assign_probas_np = read_numpy_vector(f_in, np.float64, 'd')
-            print(f"[{time.time() - start_time:.2f}s]   Read assign_probas ({assign_probas_np.size})")
+            assign_probas_np = read_numpy_vector(f_in, np.float64, "d")
+            print(
+                f"[{time.time() - start_time:.2f}s]   Read assign_probas ({assign_probas_np.size})"
+            )
            gc.collect()

-            cum_nneighbor_per_level_np = read_numpy_vector(f_in, np.int32, 'i')
-            print(f"[{time.time() - start_time:.2f}s]   Read cum_nneighbor_per_level ({cum_nneighbor_per_level_np.size})")
+            cum_nneighbor_per_level_np = read_numpy_vector(f_in, np.int32, "i")
+            print(
+                f"[{time.time() - start_time:.2f}s]   Read cum_nneighbor_per_level ({cum_nneighbor_per_level_np.size})"
+            )
            gc.collect()

-            levels_np = read_numpy_vector(f_in, np.int32, 'i')
+            levels_np = read_numpy_vector(f_in, np.int32, "i")
            print(f"[{time.time() - start_time:.2f}s]   Read levels ({levels_np.size})")
            gc.collect()

            ntotal = len(levels_np)
-            if ntotal != original_hnsw_data['ntotal']:
-                 print(f"Warning: ntotal mismatch! Header says {original_hnsw_data['ntotal']}, levels vector size is {ntotal}. Using levels vector size.", file=sys.stderr)
-                 original_hnsw_data['ntotal'] = ntotal
+            if ntotal != original_hnsw_data["ntotal"]:
+                print(
+                    f"Warning: ntotal mismatch! Header says {original_hnsw_data['ntotal']}, levels vector size is {ntotal}. Using levels vector size.",
+                    file=sys.stderr,
+                )
+                original_hnsw_data["ntotal"] = ntotal

            # --- Check for compact format flag ---
            print(f"[{time.time() - start_time:.2f}s]   Probing for compact storage flag...")
            pos_before_compact = f_in.tell()
            try:
-                is_compact_flag = read_struct(f_in, '<?')
+                is_compact_flag = read_struct(f_in, "<?")
                print(f"[{time.time() - start_time:.2f}s]   Found compact flag: {is_compact_flag}")
-                
+
                if is_compact_flag:
                    # Input is already in compact format - read compact data
-                    print(f"[{time.time() - start_time:.2f}s]   Input is already in compact format, reading compact data...")
-                    
-                    compact_level_ptr = read_numpy_vector(f_in, np.uint64, 'Q')
-                    print(f"[{time.time() - start_time:.2f}s]   Read compact_level_ptr ({compact_level_ptr.size})")
-                    
-                    compact_node_offsets_np = read_numpy_vector(f_in, np.uint64, 'Q')
-                    print(f"[{time.time() - start_time:.2f}s]   Read compact_node_offsets ({compact_node_offsets_np.size})")
-                    
+                    print(
+                        f"[{time.time() - start_time:.2f}s]   Input is already in compact format, reading compact data..."
+                    )
+
+                    compact_level_ptr = read_numpy_vector(f_in, np.uint64, "Q")
+                    print(
+                        f"[{time.time() - start_time:.2f}s]   Read compact_level_ptr ({compact_level_ptr.size})"
+                    )
+
+                    compact_node_offsets_np = read_numpy_vector(f_in, np.uint64, "Q")
+                    print(
+                        f"[{time.time() - start_time:.2f}s]   Read compact_node_offsets ({compact_node_offsets_np.size})"
+                    )
+
                    # Read scalar parameters
-                    original_hnsw_data['entry_point'] = read_struct(f_in, '<i')
-                    original_hnsw_data['max_level'] = read_struct(f_in, '<i')
-                    original_hnsw_data['efConstruction'] = read_struct(f_in, '<i')
-                    original_hnsw_data['efSearch'] = read_struct(f_in, '<i')
-                    original_hnsw_data['dummy_upper_beam'] = read_struct(f_in, '<i')
-                    print(f"[{time.time() - start_time:.2f}s]   Read scalar params (ep={original_hnsw_data['entry_point']}, max_lvl={original_hnsw_data['max_level']})")
+                    original_hnsw_data["entry_point"] = read_struct(f_in, "<i")
+                    original_hnsw_data["max_level"] = read_struct(f_in, "<i")
+                    original_hnsw_data["efConstruction"] = read_struct(f_in, "<i")
+                    original_hnsw_data["efSearch"] = read_struct(f_in, "<i")
+                    original_hnsw_data["dummy_upper_beam"] = read_struct(f_in, "<i")
+                    print(
+                        f"[{time.time() - start_time:.2f}s]   Read scalar params (ep={original_hnsw_data['entry_point']}, max_lvl={original_hnsw_data['max_level']})"
+                    )

                    # Read storage fourcc
-                    storage_fourcc = read_struct(f_in, '<I')
-                    print(f"[{time.time() - start_time:.2f}s]   Found storage fourcc: {storage_fourcc:08x}")
-                    
+                    storage_fourcc = read_struct(f_in, "<I")
+                    print(
+                        f"[{time.time() - start_time:.2f}s]   Found storage fourcc: {storage_fourcc:08x}"
+                    )
+
                    if prune_embeddings and storage_fourcc != NULL_INDEX_FOURCC:
                        # Read compact neighbors data
-                        compact_neighbors_data_np = read_numpy_vector(f_in, np.int32, 'i')
-                        print(f"[{time.time() - start_time:.2f}s]   Read compact neighbors data ({compact_neighbors_data_np.size})")
+                        compact_neighbors_data_np = read_numpy_vector(f_in, np.int32, "i")
+                        print(
+                            f"[{time.time() - start_time:.2f}s]   Read compact neighbors data ({compact_neighbors_data_np.size})"
+                        )
                        compact_neighbors_data = compact_neighbors_data_np.tolist()
                        del compact_neighbors_data_np
-                        
+
                        # Skip storage data and write with NULL marker
-                        print(f"[{time.time() - start_time:.2f}s]   Pruning embeddings: Writing NULL storage marker.")
+                        print(
+                            f"[{time.time() - start_time:.2f}s]   Pruning embeddings: Writing NULL storage marker."
+                        )
                        storage_fourcc = NULL_INDEX_FOURCC
                    elif not prune_embeddings:
                        # Read and preserve compact neighbors and storage
-                        compact_neighbors_data_np = read_numpy_vector(f_in, np.int32, 'i')
+                        compact_neighbors_data_np = read_numpy_vector(f_in, np.int32, "i")
                        compact_neighbors_data = compact_neighbors_data_np.tolist()
                        del compact_neighbors_data_np
-                        
+
                        # Read remaining storage data
                        storage_data = f_in.read()
                    else:
                        # Already pruned (NULL storage)
-                        compact_neighbors_data_np = read_numpy_vector(f_in, np.int32, 'i')
+                        compact_neighbors_data_np = read_numpy_vector(f_in, np.int32, "i")
                        compact_neighbors_data = compact_neighbors_data_np.tolist()
                        del compact_neighbors_data_np
-                        storage_data = b''
-                    
+                        storage_data = b""
+
                    # Write the updated compact format
                    print(f"[{time.time() - start_time:.2f}s] Writing updated compact format...")
-                    write_compact_format(f_out, original_hnsw_data, assign_probas_np, cum_nneighbor_per_level_np, 
-                                       levels_np, compact_level_ptr, compact_node_offsets_np, 
-                                       compact_neighbors_data, storage_fourcc, storage_data if not prune_embeddings else b'')
-                    
+                    write_compact_format(
+                        f_out,
+                        original_hnsw_data,
+                        assign_probas_np,
+                        cum_nneighbor_per_level_np,
+                        levels_np,
+                        compact_level_ptr,
+                        compact_node_offsets_np,
+                        compact_neighbors_data,
+                        storage_fourcc,
+                        storage_data if not prune_embeddings else b"",
+                    )
+
                    print(f"[{time.time() - start_time:.2f}s] Conversion complete.")
                    return True
-                    
+
                else:
                    # is_compact=False, rewind and read original format
                    f_in.seek(pos_before_compact)
-                    print(f"[{time.time() - start_time:.2f}s]   Compact flag is False, reading original format...")
-                    
+                    print(
+                        f"[{time.time() - start_time:.2f}s]   Compact flag is False, reading original format..."
+                    )
+
            except EOFError:
                # No compact flag found, assume original format
                f_in.seek(pos_before_compact)
-                print(f"[{time.time() - start_time:.2f}s]   No compact flag found, assuming original format...")
+                print(
+                    f"[{time.time() - start_time:.2f}s]   No compact flag found, assuming original format..."
+                )

            # --- Handle potential extra byte in original format (like C++ code) ---
-            print(f"[{time.time() - start_time:.2f}s]   Probing for potential extra byte before non-compact offsets...")
+            print(
+                f"[{time.time() - start_time:.2f}s]   Probing for potential extra byte before non-compact offsets..."
+            )
            pos_before_probe = f_in.tell()
            try:
-                suspected_flag = read_struct(f_in, '<B')  # Read 1 byte
+                suspected_flag = read_struct(f_in, "<B")  # Read 1 byte
                if suspected_flag == 0x00:
-                    print(f"[{time.time() - start_time:.2f}s]   Found and consumed an unexpected 0x00 byte.")
+                    print(
+                        f"[{time.time() - start_time:.2f}s]   Found and consumed an unexpected 0x00 byte."
+                    )
                elif suspected_flag == 0x01:
-                    print(f"[{time.time() - start_time:.2f}s]   ERROR: Found 0x01 but is_compact should be False")
+                    print(
+                        f"[{time.time() - start_time:.2f}s]   ERROR: Found 0x01 but is_compact should be False"
+                    )
                    raise ValueError("Inconsistent compact flag state")
                else:
                    # Rewind - this byte is part of offsets data
                    f_in.seek(pos_before_probe)
-                    print(f"[{time.time() - start_time:.2f}s]   Rewound to original position (byte was 0x{suspected_flag:02x})")
+                    print(
+                        f"[{time.time() - start_time:.2f}s]   Rewound to original position (byte was 0x{suspected_flag:02x})"
+                    )
            except EOFError:
                f_in.seek(pos_before_probe)
-                print(f"[{time.time() - start_time:.2f}s]   No extra byte found (EOF), proceeding with offsets read")
+                print(
+                    f"[{time.time() - start_time:.2f}s]   No extra byte found (EOF), proceeding with offsets read"
+                )

            # --- Read original format data ---
-            offsets_np = read_numpy_vector(f_in, np.uint64, 'Q')
+            offsets_np = read_numpy_vector(f_in, np.uint64, "Q")
            print(f"[{time.time() - start_time:.2f}s]   Read offsets ({offsets_np.size})")
            if len(offsets_np) != ntotal + 1:
-                 raise ValueError(f"Inconsistent offsets size: len(levels)={ntotal} but len(offsets)={len(offsets_np)}")
+                raise ValueError(
+                    f"Inconsistent offsets size: len(levels)={ntotal} but len(offsets)={len(offsets_np)}"
+                )
            gc.collect()

            print(f"[{time.time() - start_time:.2f}s]   Attempting to read neighbors vector...")
-            neighbors_np = read_numpy_vector(f_in, np.int32, 'i')
+            neighbors_np = read_numpy_vector(f_in, np.int32, "i")
            print(f"[{time.time() - start_time:.2f}s]   Read neighbors ({neighbors_np.size})")
            expected_neighbors_size = offsets_np[-1] if ntotal > 0 else 0
            if neighbors_np.size != expected_neighbors_size:
-                 print(f"Warning: neighbors vector size mismatch. Expected {expected_neighbors_size} based on offsets, got {neighbors_np.size}.")
+                print(
+                    f"Warning: neighbors vector size mismatch. Expected {expected_neighbors_size} based on offsets, got {neighbors_np.size}."
+                )
            gc.collect()

-            original_hnsw_data['entry_point'] = read_struct(f_in, '<i')
-            original_hnsw_data['max_level'] = read_struct(f_in, '<i')
-            original_hnsw_data['efConstruction'] = read_struct(f_in, '<i')
-            original_hnsw_data['efSearch'] = read_struct(f_in, '<i')
-            original_hnsw_data['dummy_upper_beam'] = read_struct(f_in, '<i')
-            print(f"[{time.time() - start_time:.2f}s]   Read scalar params (ep={original_hnsw_data['entry_point']}, max_lvl={original_hnsw_data['max_level']})")
+            original_hnsw_data["entry_point"] = read_struct(f_in, "<i")
+            original_hnsw_data["max_level"] = read_struct(f_in, "<i")
+            original_hnsw_data["efConstruction"] = read_struct(f_in, "<i")
+            original_hnsw_data["efSearch"] = read_struct(f_in, "<i")
+            original_hnsw_data["dummy_upper_beam"] = read_struct(f_in, "<i")
+            print(
+                f"[{time.time() - start_time:.2f}s]   Read scalar params (ep={original_hnsw_data['entry_point']}, max_lvl={original_hnsw_data['max_level']})"
+            )

            print(f"[{time.time() - start_time:.2f}s] Checking for storage data...")
            storage_fourcc = None
            try:
-                storage_fourcc = read_struct(f_in, '<I')
-                print(f"[{time.time() - start_time:.2f}s]   Found storage fourcc: {storage_fourcc:08x}.")
+                storage_fourcc = read_struct(f_in, "<I")
+                print(
+                    f"[{time.time() - start_time:.2f}s]   Found storage fourcc: {storage_fourcc:08x}."
+                )
            except EOFError:
-                 print(f"[{time.time() - start_time:.2f}s]   No storage data found (EOF).")
+                print(f"[{time.time() - start_time:.2f}s]   No storage data found (EOF).")
            except Exception as e:
-                 print(f"[{time.time() - start_time:.2f}s]   Error reading potential storage data: {e}")
-
+                print(
+                    f"[{time.time() - start_time:.2f}s]   Error reading potential storage data: {e}"
+                )

            # --- Perform Conversion ---
            print(f"[{time.time() - start_time:.2f}s] Converting to CSR format...")
@@ -373,17 +475,21 @@ def convert_hnsw_graph_to_csr(input_filename, output_filename, prune_embeddings=

            current_level_ptr_idx = 0
            current_data_idx = 0
-            total_valid_neighbors_counted = 0 # For validation
+            total_valid_neighbors_counted = 0  # For validation

            # Optimize calculation by getting slices once per node if possible
            for i in range(ntotal):
-                if i > 0 and i % (ntotal // 100 or 1) == 0: # Log progress roughly every 1%
+                if i > 0 and i % (ntotal // 100 or 1) == 0:  # Log progress roughly every 1%
                    progress = (i / ntotal) * 100
                    elapsed = time.time() - start_time
-                    print(f"\r[{elapsed:.2f}s]   Converting node {i}/{ntotal} ({progress:.1f}%)...", end="")
+                    print(
+                        f"\r[{elapsed:.2f}s]   Converting node {i}/{ntotal} ({progress:.1f}%)...",
+                        end="",
+                    )

                node_max_level = levels_np[i] - 1
-                if node_max_level < -1: node_max_level = -1
+                if node_max_level < -1:
+                    node_max_level = -1

                node_ptr_start_index = current_level_ptr_idx
                compact_node_offsets_np[i] = node_ptr_start_index
@@ -394,13 +500,17 @@ def convert_hnsw_graph_to_csr(input_filename, output_filename, prune_embeddings=
                for level in range(node_max_level + 1):
                    compact_level_ptr.append(current_data_idx)

-                    begin_orig_np = original_offset_start + get_cum_neighbors(cum_nneighbor_per_level_np, level)
-                    end_orig_np = original_offset_start + get_cum_neighbors(cum_nneighbor_per_level_np, level + 1)
+                    begin_orig_np = original_offset_start + get_cum_neighbors(
+                        cum_nneighbor_per_level_np, level
+                    )
+                    end_orig_np = original_offset_start + get_cum_neighbors(
+                        cum_nneighbor_per_level_np, level + 1
+                    )

                    begin_orig = int(begin_orig_np)
                    end_orig = int(end_orig_np)

-                    neighbors_len = len(neighbors_np) # Cache length
+                    neighbors_len = len(neighbors_np)  # Cache length
                    begin_orig = min(max(0, begin_orig), neighbors_len)
                    end_orig = min(max(begin_orig, end_orig), neighbors_len)

@@ -413,83 +523,117 @@ def convert_hnsw_graph_to_csr(input_filename, output_filename, prune_embeddings=

                        if num_valid > 0:
                            # Append valid neighbors
-                            compact_neighbors_data.extend(level_neighbors_slice[valid_neighbors_mask])
+                            compact_neighbors_data.extend(
+                                level_neighbors_slice[valid_neighbors_mask]
+                            )
                            current_data_idx += num_valid
                            total_valid_neighbors_counted += num_valid

-
                compact_level_ptr.append(current_data_idx)
                current_level_ptr_idx += num_pointers_expected

            compact_node_offsets_np[ntotal] = current_level_ptr_idx
-            print(f"\r[{time.time() - start_time:.2f}s]   Conversion loop finished.                        ") # Clear progress line
+            print(
+                f"\r[{time.time() - start_time:.2f}s]   Conversion loop finished.                        "
+            )  # Clear progress line

            # --- Validation Checks ---
            print(f"[{time.time() - start_time:.2f}s] Running validation checks...")
            valid_check_passed = True
            # Check 1: Total valid neighbors count
-            print(f"    Checking total valid neighbor count...")
+            print("    Checking total valid neighbor count...")
            expected_valid_count = np.sum(neighbors_np >= 0)
            if total_valid_neighbors_counted != len(compact_neighbors_data):
-                 print(f"Error: Mismatch between counted valid neighbors ({total_valid_neighbors_counted}) and final compact_data size ({len(compact_neighbors_data)})!", file=sys.stderr)
-                 valid_check_passed = False
+                print(
+                    f"Error: Mismatch between counted valid neighbors ({total_valid_neighbors_counted}) and final compact_data size ({len(compact_neighbors_data)})!",
+                    file=sys.stderr,
+                )
+                valid_check_passed = False
            if expected_valid_count != len(compact_neighbors_data):
-                 print(f"Error: Mismatch between NumPy count of valid neighbors ({expected_valid_count}) and final compact_data size ({len(compact_neighbors_data)})!", file=sys.stderr)
-                 valid_check_passed = False
+                print(
+                    f"Error: Mismatch between NumPy count of valid neighbors ({expected_valid_count}) and final compact_data size ({len(compact_neighbors_data)})!",
+                    file=sys.stderr,
+                )
+                valid_check_passed = False
            else:
-                 print(f"    OK: Total valid neighbors = {len(compact_neighbors_data)}")
+                print(f"    OK: Total valid neighbors = {len(compact_neighbors_data)}")

            # Check 2: Final pointer indices consistency
-            print(f"    Checking final pointer indices...")
+            print("    Checking final pointer indices...")
            if compact_node_offsets_np[ntotal] != len(compact_level_ptr):
-                 print(f"Error: Final node offset ({compact_node_offsets_np[ntotal]}) doesn't match level_ptr size ({len(compact_level_ptr)})!", file=sys.stderr)
-                 valid_check_passed = False
-            if (len(compact_level_ptr) > 0 and compact_level_ptr[-1] != len(compact_neighbors_data)) or \
-               (len(compact_level_ptr) == 0 and len(compact_neighbors_data) != 0):
-                 last_ptr = compact_level_ptr[-1] if len(compact_level_ptr) > 0 else -1
-                 print(f"Error: Last level pointer ({last_ptr}) doesn't match compact_data size ({len(compact_neighbors_data)})!", file=sys.stderr)
-                 valid_check_passed = False
+                print(
+                    f"Error: Final node offset ({compact_node_offsets_np[ntotal]}) doesn't match level_ptr size ({len(compact_level_ptr)})!",
+                    file=sys.stderr,
+                )
+                valid_check_passed = False
+            if (
+                len(compact_level_ptr) > 0 and compact_level_ptr[-1] != len(compact_neighbors_data)
+            ) or (len(compact_level_ptr) == 0 and len(compact_neighbors_data) != 0):
+                last_ptr = compact_level_ptr[-1] if len(compact_level_ptr) > 0 else -1
+                print(
+                    f"Error: Last level pointer ({last_ptr}) doesn't match compact_data size ({len(compact_neighbors_data)})!",
+                    file=sys.stderr,
+                )
+                valid_check_passed = False
            else:
-                 print(f"    OK: Final pointers match data size.")
+                print("    OK: Final pointers match data size.")

            if not valid_check_passed:
-                print("Error: Validation checks failed. Output file might be incorrect.", file=sys.stderr)
+                print(
+                    "Error: Validation checks failed. Output file might be incorrect.",
+                    file=sys.stderr,
+                )
                # Optional: Exit here if validation fails
                # return False

            # --- Explicitly delete large intermediate arrays ---
-            print(f"[{time.time() - start_time:.2f}s] Deleting original neighbors and offsets arrays...")
+            print(
+                f"[{time.time() - start_time:.2f}s] Deleting original neighbors and offsets arrays..."
+            )
            del neighbors_np
            del offsets_np
            gc.collect()

-            print(f"    CSR Stats: |data|={len(compact_neighbors_data)}, |level_ptr|={len(compact_level_ptr)}")
+            print(
+                f"    CSR Stats: |data|={len(compact_neighbors_data)}, |level_ptr|={len(compact_level_ptr)}"
+            )

            # --- Write CSR HNSW graph data using unified function ---
-            print(f"[{time.time() - start_time:.2f}s] Writing CSR HNSW graph data in FAISS-compatible order...")
-            
+            print(
+                f"[{time.time() - start_time:.2f}s] Writing CSR HNSW graph data in FAISS-compatible order..."
+            )
+
            # Determine storage fourcc and data based on prune_embeddings
            if prune_embeddings:
-                print(f"   Pruning embeddings: Writing NULL storage marker.")
+                print("   Pruning embeddings: Writing NULL storage marker.")
                output_storage_fourcc = NULL_INDEX_FOURCC
-                storage_data = b''
+                storage_data = b""
            else:
                # Keep embeddings - read and preserve original storage data
                if storage_fourcc and storage_fourcc != NULL_INDEX_FOURCC:
-                    print(f"   Preserving embeddings: Reading original storage data...")
+                    print("   Preserving embeddings: Reading original storage data...")
                    storage_data = f_in.read()  # Read remaining storage data
                    output_storage_fourcc = storage_fourcc
                    print(f"   Read {len(storage_data)} bytes of storage data")
                else:
-                    print(f"   No embeddings found in original file (NULL storage)")
+                    print("   No embeddings found in original file (NULL storage)")
                    output_storage_fourcc = NULL_INDEX_FOURCC
-                    storage_data = b''
-            
+                    storage_data = b""
+
            # Use the unified write function
-            write_compact_format(f_out, original_hnsw_data, assign_probas_np, cum_nneighbor_per_level_np, 
-                               levels_np, compact_level_ptr, compact_node_offsets_np, 
-                               compact_neighbors_data, output_storage_fourcc, storage_data)
-            
+            write_compact_format(
+                f_out,
+                original_hnsw_data,
+                assign_probas_np,
+                cum_nneighbor_per_level_np,
+                levels_np,
+                compact_level_ptr,
+                compact_node_offsets_np,
+                compact_neighbors_data,
+                output_storage_fourcc,
+                storage_data,
+            )
+
            # Clean up memory
            del assign_probas_np, cum_nneighbor_per_level_np, levels_np
            del compact_neighbors_data, compact_level_ptr, compact_node_offsets_np
@@ -503,40 +647,63 @@ def convert_hnsw_graph_to_csr(input_filename, output_filename, prune_embeddings=
        print(f"Error: Input file not found: {input_filename}", file=sys.stderr)
        return False
    except MemoryError as e:
-         print(f"\nFatal MemoryError during conversion: {e}. Insufficient RAM.", file=sys.stderr)
-         # Clean up potentially partially written output file?
-         try: os.remove(output_filename)
-         except OSError: pass
-         return False
+        print(f"\nFatal MemoryError during conversion: {e}. Insufficient RAM.", file=sys.stderr)
+        # Clean up potentially partially written output file?
+        try:
+            os.remove(output_filename)
+        except OSError:
+            pass
+        return False
    except EOFError as e:
-        print(f"Error: Reached end of file unexpectedly reading {input_filename}. {e}", file=sys.stderr)
-        try: os.remove(output_filename)
-        except OSError: pass
+        print(
+            f"Error: Reached end of file unexpectedly reading {input_filename}. {e}",
+            file=sys.stderr,
+        )
+        try:
+            os.remove(output_filename)
+        except OSError:
+            pass
        return False
    except Exception as e:
        print(f"An unexpected error occurred during conversion: {e}", file=sys.stderr)
        import traceback
+
        traceback.print_exc()
        try:
            os.remove(output_filename)
-        except OSError: pass
+        except OSError:
+            pass
        return False
    # Ensure neighbors_np is deleted even if an error occurs after its allocation
    finally:
-        if 'neighbors_np' in locals() and neighbors_np is not None:
-            del neighbors_np
-            gc.collect()
+        try:
+            if "neighbors_np" in locals() and neighbors_np is not None:
+                del neighbors_np
+                gc.collect()
+        except NameError:
+            pass


 # --- Script Execution ---
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Convert a Faiss IndexHNSWFlat file to a CSR-based HNSW graph file.")
+    parser = argparse.ArgumentParser(
+        description="Convert a Faiss IndexHNSWFlat file to a CSR-based HNSW graph file."
+    )
    parser.add_argument("input_index_file", help="Path to the input IndexHNSWFlat file")
-    parser.add_argument("output_csr_graph_file", help="Path to write the output CSR HNSW graph file")
-    parser.add_argument("--prune-embeddings", action="store_true", default=True, 
-                       help="Prune embedding storage (write NULL storage marker)")
-    parser.add_argument("--keep-embeddings", action="store_true", 
-                       help="Keep embedding storage (overrides --prune-embeddings)")
+    parser.add_argument(
+        "output_csr_graph_file", help="Path to write the output CSR HNSW graph file"
+    )
+    parser.add_argument(
+        "--prune-embeddings",
+        action="store_true",
+        default=True,
+        help="Prune embedding storage (write NULL storage marker)",
+    )
+    parser.add_argument(
+        "--keep-embeddings",
+        action="store_true",
+        help="Keep embedding storage (overrides --prune-embeddings)",
+    )

    args = parser.parse_args()

@@ -545,10 +712,12 @@ if __name__ == "__main__":
        sys.exit(1)

    if os.path.abspath(args.input_index_file) == os.path.abspath(args.output_csr_graph_file):
-         print(f"Error: Input and output filenames cannot be the same.", file=sys.stderr)
-         sys.exit(1)
+        print("Error: Input and output filenames cannot be the same.", file=sys.stderr)
+        sys.exit(1)

    prune_embeddings = args.prune_embeddings and not args.keep_embeddings
-    success = convert_hnsw_graph_to_csr(args.input_index_file, args.output_csr_graph_file, prune_embeddings)
+    success = convert_hnsw_graph_to_csr(
+        args.input_index_file, args.output_csr_graph_file, prune_embeddings
+    )
    if not success:
-        sys.exit(1)
+        sys.exit(1)
--- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py
+++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py
@@ -1,19 +1,19 @@
-import numpy as np
-import os
-from pathlib import Path
-from typing import Dict, Any, List, Literal, Optional
-import shutil
 import logging
+import os
+import shutil
+from pathlib import Path
+from typing import Any, Literal

-from leann.searcher_base import BaseSearcher
-from .convert_to_csr import convert_hnsw_graph_to_csr
-
-from leann.registry import register_backend
+import numpy as np
 from leann.interface import (
-    LeannBackendFactoryInterface,
    LeannBackendBuilderInterface,
+    LeannBackendFactoryInterface,
    LeannBackendSearcherInterface,
 )
+from leann.registry import register_backend
+from leann.searcher_base import BaseSearcher
+
+from .convert_to_csr import convert_hnsw_graph_to_csr

 logger = logging.getLogger(__name__)

@@ -51,9 +51,11 @@ class HNSWBuilder(LeannBackendBuilderInterface):
        if not self.is_recompute:
            if self.is_compact:
                # TODO: support this case @andy
-                raise ValueError("is_recompute is False, but is_compact is True. This is not compatible now. change is compact to False and you can use the original HNSW index.")
+                raise ValueError(
+                    "is_recompute is False, but is_compact is True. This is not compatible now. change is compact to False and you can use the original HNSW index."
+                )

-    def build(self, data: np.ndarray, ids: List[str], index_path: str, **kwargs):
+    def build(self, data: np.ndarray, ids: list[str], index_path: str, **kwargs):
        from . import faiss  # type: ignore

        path = Path(index_path)
@@ -99,16 +101,12 @@ class HNSWBuilder(LeannBackendBuilderInterface):
            # index_file_old = index_file.with_suffix(".old")
            # shutil.move(str(index_file), str(index_file_old))
            shutil.move(str(csr_temp_file), str(index_file))
-            logger.info(
-                f"INFO: Replaced original index with {mode_str} version at '{index_file}'"
-            )
+            logger.info(f"INFO: Replaced original index with {mode_str} version at '{index_file}'")
        else:
            # Clean up and fail fast
            if csr_temp_file.exists():
                os.remove(csr_temp_file)
-            raise RuntimeError(
-                "CSR conversion failed - cannot proceed with compact format"
-            )
+            raise RuntimeError("CSR conversion failed - cannot proceed with compact format")


 class HNSWSearcher(BaseSearcher):
@@ -146,7 +144,7 @@ class HNSWSearcher(BaseSearcher):
        self,
        query: np.ndarray,
        top_k: int,
-        zmq_port: Optional[int] = None,
+        zmq_port: int | None = None,
        complexity: int = 64,
        beam_width: int = 1,
        prune_ratio: float = 0.0,
@@ -154,7 +152,7 @@ class HNSWSearcher(BaseSearcher):
        pruning_strategy: Literal["global", "local", "proportional"] = "global",
        batch_size: int = 0,
        **kwargs,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
        """
        Search for nearest neighbors using HNSW index.

@@ -183,9 +181,7 @@ class HNSWSearcher(BaseSearcher):
                raise RuntimeError("Recompute is required for pruned index.")
        if recompute_embeddings:
            if zmq_port is None:
-                raise ValueError(
-                    "zmq_port must be provided if recompute_embeddings is True"
-                )
+                raise ValueError("zmq_port must be provided if recompute_embeddings is True")

        if query.dtype != np.float32:
            query = query.astype(np.float32)
@@ -194,9 +190,7 @@ class HNSWSearcher(BaseSearcher):

        params = faiss.SearchParametersHNSW()
        if zmq_port is not None:
-            params.zmq_port = (
-                zmq_port  # C++ code won't use this if recompute_embeddings is False
-            )
+            params.zmq_port = zmq_port  # C++ code won't use this if recompute_embeddings is False
        params.efSearch = complexity
        params.beam_size = beam_width

@@ -209,9 +203,7 @@ class HNSWSearcher(BaseSearcher):
            params.send_neigh_times_ratio = 0.0
        elif pruning_strategy == "proportional":
            params.local_prune = False
-            params.send_neigh_times_ratio = (
-                1.0  # Any value > 1e-6 triggers proportional mode
-            )
+            params.send_neigh_times_ratio = 1.0  # Any value > 1e-6 triggers proportional mode
        else:  # "global"
            params.local_prune = False
            params.send_neigh_times_ratio = 0.0
@@ -232,8 +224,6 @@ class HNSWSearcher(BaseSearcher):
            params,
        )

-        string_labels = [
-            [str(int_label) for int_label in batch_labels] for batch_labels in labels
-        ]
+        string_labels = [[str(int_label) for int_label in batch_labels] for batch_labels in labels]

        return {"labels": string_labels, "distances": distances}
--- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py
+++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py
@@ -3,17 +3,17 @@ HNSW-specific embedding server
 """

 import argparse
+import json
+import logging
+import os
+import sys
 import threading
 import time
-import os
-import zmq
-import numpy as np
-import msgpack
-import json
 from pathlib import Path
-from typing import Optional
-import sys
-import logging
+
+import msgpack
+import numpy as np
+import zmq

 # Set up logging based on environment variable
 LOG_LEVEL = os.getenv("LEANN_LOG_LEVEL", "WARNING").upper()
@@ -33,7 +33,7 @@ if not logger.handlers:


 def create_hnsw_embedding_server(
-    passages_file: Optional[str] = None,
+    passages_file: str | None = None,
    zmq_port: int = 5555,
    model_name: str = "sentence-transformers/all-mpnet-base-v2",
    distance_metric: str = "mips",
@@ -52,8 +52,8 @@ def create_hnsw_embedding_server(
    sys.path.insert(0, str(leann_core_path))

    try:
-        from leann.embedding_compute import compute_embeddings
        from leann.api import PassageManager
+        from leann.embedding_compute import compute_embeddings

        logger.info("Successfully imported unified embedding computation module")
    except ImportError as e:
@@ -78,13 +78,11 @@ def create_hnsw_embedding_server(
        raise ValueError("Only metadata files (.meta.json) are supported")

    # Load metadata to get passage sources
-    with open(passages_file, "r") as f:
+    with open(passages_file) as f:
        meta = json.load(f)

    # Convert relative paths to absolute paths based on metadata file location
-    metadata_dir = Path(
-        passages_file
-    ).parent.parent  # Go up one level from the metadata file
+    metadata_dir = Path(passages_file).parent.parent  # Go up one level from the metadata file
    passage_sources = []
    for source in meta["passage_sources"]:
        source_copy = source.copy()
@@ -134,9 +132,7 @@ def create_hnsw_embedding_server(
                        response = embeddings.tolist()
                        socket.send(msgpack.packb(response))
                        e2e_end = time.time()
-                        logger.info(
-                            f"⏱️  Text embedding E2E time: {e2e_end - e2e_start:.6f}s"
-                        )
+                        logger.info(f"⏱️  Text embedding E2E time: {e2e_end - e2e_start:.6f}s")
                        continue

                # Handle distance calculation requests
@@ -162,17 +158,13 @@ def create_hnsw_embedding_server(
                            texts.append(txt)
                        except KeyError:
                            logger.error(f"Passage ID {nid} not found")
-                            raise RuntimeError(
-                                f"FATAL: Passage with ID {nid} not found"
-                            )
+                            raise RuntimeError(f"FATAL: Passage with ID {nid} not found")
                        except Exception as e:
                            logger.error(f"Exception looking up passage ID {nid}: {e}")
                            raise

                    # Process embeddings
-                    embeddings = compute_embeddings(
-                        texts, model_name, mode=embedding_mode
-                    )
+                    embeddings = compute_embeddings(texts, model_name, mode=embedding_mode)
                    logger.info(
                        f"Computed embeddings for {len(texts)} texts, shape: {embeddings.shape}"
                    )
@@ -186,18 +178,12 @@ def create_hnsw_embedding_server(
                        distances = -np.dot(embeddings, query_vector)

                    response_payload = distances.flatten().tolist()
-                    response_bytes = msgpack.packb(
-                        [response_payload], use_single_float=True
-                    )
-                    logger.debug(
-                        f"Sending distance response with {len(distances)} distances"
-                    )
+                    response_bytes = msgpack.packb([response_payload], use_single_float=True)
+                    logger.debug(f"Sending distance response with {len(distances)} distances")

                    socket.send(response_bytes)
                    e2e_end = time.time()
-                    logger.info(
-                        f"⏱️  Distance calculation E2E time: {e2e_end - e2e_start:.6f}s"
-                    )
+                    logger.info(f"⏱️  Distance calculation E2E time: {e2e_end - e2e_start:.6f}s")
                    continue

                # Standard embedding request (passage ID lookup)
@@ -222,9 +208,7 @@ def create_hnsw_embedding_server(
                        passage_data = passages.get_passage(str(nid))
                        txt = passage_data["text"]
                        if not txt:
-                            raise RuntimeError(
-                                f"FATAL: Empty text for passage ID {nid}"
-                            )
+                            raise RuntimeError(f"FATAL: Empty text for passage ID {nid}")
                        texts.append(txt)
                    except KeyError:
                        raise RuntimeError(f"FATAL: Passage with ID {nid} not found")
@@ -243,11 +227,9 @@ def create_hnsw_embedding_server(
                    logger.error(
                        f"NaN or Inf detected in embeddings! Requested IDs: {node_ids[:5]}..."
                    )
-                    assert False
+                    raise AssertionError()

-                hidden_contiguous_f32 = np.ascontiguousarray(
-                    embeddings, dtype=np.float32
-                )
+                hidden_contiguous_f32 = np.ascontiguousarray(embeddings, dtype=np.float32)
                response_payload = [
                    list(hidden_contiguous_f32.shape),
                    hidden_contiguous_f32.flatten().tolist(),
--- a/packages/leann-backend-hnsw/pyproject.toml
+++ b/packages/leann-backend-hnsw/pyproject.toml
@@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build"

 [project]
 name = "leann-backend-hnsw"
-version = "0.1.12"
+version = "0.1.14"
 description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
 dependencies = [
-    "leann-core==0.1.12", 
+    "leann-core==0.1.14", 
    "numpy",
    "pyzmq>=23.0.0",
    "msgpack>=1.0.0",
--- a/packages/leann-core/pyproject.toml
+++ b/packages/leann-core/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "leann-core"
-version = "0.1.12"
+version = "0.1.14"
 description = "Core API and plugin system for LEANN"
 readme = "README.md"
 requires-python = ">=3.9"
@@ -20,6 +20,8 @@ dependencies = [
    "torch>=2.0.0",
    "sentence-transformers>=2.2.0",
    "llama-index-core>=0.12.0",
+    "llama-index-readers-file>=0.4.0",  # Essential for document reading
+    "llama-index-embeddings-huggingface>=0.5.5",  # For embeddings
    "python-dotenv>=1.0.0",
    "openai>=1.0.0",
    "huggingface-hub>=0.20.0",
@@ -33,6 +35,13 @@ dependencies = [
    "mlx-lm>=0.26.0; sys_platform == 'darwin'",
 ]

+[project.optional-dependencies]
+colab = [
+    "torch>=2.0.0,<3.0.0",  # Limit torch version to avoid conflicts
+    "transformers>=4.30.0,<5.0.0",  # Limit transformers version
+    "accelerate>=0.20.0,<1.0.0",  # Limit accelerate version
+]
+
 [project.scripts]
 leann = "leann.cli:main"

--- a/packages/leann-core/src/leann/init.py
+++ b/packages/leann-core/src/leann/init.py
@@ -14,4 +14,4 @@ from .registry import BACKEND_REGISTRY, autodiscover_backends

 autodiscover_backends()

-__all__ = ["LeannBuilder", "LeannSearcher", "LeannChat", "BACKEND_REGISTRY"]
+__all__ = ["BACKEND_REGISTRY", "LeannBuilder", "LeannChat", "LeannSearcher"]
--- a/packages/leann-core/src/leann/api.py
+++ b/packages/leann-core/src/leann/api.py
@@ -4,27 +4,30 @@ with the correct, original embedding logic from the user's reference code.
 """

 import json
-import pickle
-from leann.interface import LeannBackendSearcherInterface
-import numpy as np
-import time
-from pathlib import Path
-from typing import List, Dict, Any, Optional, Literal
-from dataclasses import dataclass, field
-from .registry import BACKEND_REGISTRY
-from .interface import LeannBackendFactoryInterface
-from .chat import get_llm
 import logging
+import pickle
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Literal
+
+import numpy as np
+
+from leann.interface import LeannBackendSearcherInterface
+
+from .chat import get_llm
+from .interface import LeannBackendFactoryInterface
+from .registry import BACKEND_REGISTRY

 logger = logging.getLogger(__name__)


 def compute_embeddings(
-    chunks: List[str],
+    chunks: list[str],
    model_name: str,
    mode: str = "sentence-transformers",
    use_server: bool = True,
-    port: Optional[int] = None,
+    port: int | None = None,
    is_build=False,
 ) -> np.ndarray:
    """
@@ -61,9 +64,7 @@ def compute_embeddings(
        )


-def compute_embeddings_via_server(
-    chunks: List[str], model_name: str, port: int
-) -> np.ndarray:
+def compute_embeddings_via_server(chunks: list[str], model_name: str, port: int) -> np.ndarray:
    """Computes embeddings using sentence-transformers.

    Args:
@@ -73,9 +74,9 @@ def compute_embeddings_via_server(
    logger.info(
        f"Computing embeddings for {len(chunks)} chunks using SentenceTransformer model '{model_name}' (via embedding server)..."
    )
-    import zmq
    import msgpack
    import numpy as np
+    import zmq

    # Connect to embedding server
    context = zmq.Context()
@@ -104,11 +105,11 @@ class SearchResult:
    id: str
    score: float
    text: str
-    metadata: Dict[str, Any] = field(default_factory=dict)
+    metadata: dict[str, Any] = field(default_factory=dict)


 class PassageManager:
-    def __init__(self, passage_sources: List[Dict[str, Any]]):
+    def __init__(self, passage_sources: list[dict[str, Any]]):
        self.offset_maps = {}
        self.passage_files = {}
        self.global_offset_map = {}  # Combined map for fast lookup
@@ -117,8 +118,15 @@ class PassageManager:
            assert source["type"] == "jsonl", "only jsonl is supported"
            passage_file = source["path"]
            index_file = source["index_path"]  # .idx file
+
+            # Fix path resolution for Colab and other environments
+            if not Path(index_file).is_absolute():
+                # If relative path, try to resolve it properly
+                index_file = str(Path(index_file).resolve())
+
            if not Path(index_file).exists():
                raise FileNotFoundError(f"Passage index file not found: {index_file}")
+
            with open(index_file, "rb") as f:
                offset_map = pickle.load(f)
                self.offset_maps[passage_file] = offset_map
@@ -128,11 +136,11 @@ class PassageManager:
                for passage_id, offset in offset_map.items():
                    self.global_offset_map[passage_id] = (passage_file, offset)

-    def get_passage(self, passage_id: str) -> Dict[str, Any]:
+    def get_passage(self, passage_id: str) -> dict[str, Any]:
        if passage_id in self.global_offset_map:
            passage_file, offset = self.global_offset_map[passage_id]
            # Lazy file opening - only open when needed
-            with open(passage_file, "r", encoding="utf-8") as f:
+            with open(passage_file, encoding="utf-8") as f:
                f.seek(offset)
                return json.loads(f.readline())
        raise KeyError(f"Passage ID not found: {passage_id}")
@@ -143,14 +151,12 @@ class LeannBuilder:
        self,
        backend_name: str,
        embedding_model: str = "facebook/contriever",
-        dimensions: Optional[int] = None,
+        dimensions: int | None = None,
        embedding_mode: str = "sentence-transformers",
        **backend_kwargs,
    ):
        self.backend_name = backend_name
-        backend_factory: LeannBackendFactoryInterface | None = BACKEND_REGISTRY.get(
-            backend_name
-        )
+        backend_factory: LeannBackendFactoryInterface | None = BACKEND_REGISTRY.get(backend_name)
        if backend_factory is None:
            raise ValueError(f"Backend '{backend_name}' not found or not registered.")
        self.backend_factory = backend_factory
@@ -158,9 +164,9 @@ class LeannBuilder:
        self.dimensions = dimensions
        self.embedding_mode = embedding_mode
        self.backend_kwargs = backend_kwargs
-        self.chunks: List[Dict[str, Any]] = []
+        self.chunks: list[dict[str, Any]] = []

-    def add_text(self, text: str, metadata: Optional[Dict[str, Any]] = None):
+    def add_text(self, text: str, metadata: dict[str, Any] | None = None):
        if metadata is None:
            metadata = {}
        passage_id = metadata.get("id", str(len(self.chunks)))
@@ -190,9 +196,7 @@ class LeannBuilder:
            try:
                from tqdm import tqdm

-                chunk_iterator = tqdm(
-                    self.chunks, desc="Writing passages", unit="chunk"
-                )
+                chunk_iterator = tqdm(self.chunks, desc="Writing passages", unit="chunk")
            except ImportError:
                chunk_iterator = self.chunks

@@ -222,9 +226,7 @@ class LeannBuilder:
        string_ids = [chunk["id"] for chunk in self.chunks]
        current_backend_kwargs = {**self.backend_kwargs, "dimensions": self.dimensions}
        builder_instance = self.backend_factory.builder(**current_backend_kwargs)
-        builder_instance.build(
-            embeddings, string_ids, index_path, **current_backend_kwargs
-        )
+        builder_instance.build(embeddings, string_ids, index_path, **current_backend_kwargs)
        leann_meta_path = index_dir / f"{index_name}.meta.json"
        meta_data = {
            "version": "1.0",
@@ -273,9 +275,7 @@ class LeannBuilder:
        ids, embeddings = data

        if not isinstance(embeddings, np.ndarray):
-            raise ValueError(
-                f"Expected embeddings to be numpy array, got {type(embeddings)}"
-            )
+            raise ValueError(f"Expected embeddings to be numpy array, got {type(embeddings)}")

        if len(ids) != embeddings.shape[0]:
            raise ValueError(
@@ -287,9 +287,7 @@ class LeannBuilder:
        if self.dimensions is None:
            self.dimensions = embedding_dim
        elif self.dimensions != embedding_dim:
-            raise ValueError(
-                f"Dimension mismatch: expected {self.dimensions}, got {embedding_dim}"
-            )
+            raise ValueError(f"Dimension mismatch: expected {self.dimensions}, got {embedding_dim}")

        logger.info(
            f"Building index from precomputed embeddings: {len(ids)} items, {embedding_dim} dimensions"
@@ -374,26 +372,24 @@ class LeannBuilder:
        with open(leann_meta_path, "w", encoding="utf-8") as f:
            json.dump(meta_data, f, indent=2)

-        logger.info(
-            f"Index built successfully from precomputed embeddings: {index_path}"
-        )
+        logger.info(f"Index built successfully from precomputed embeddings: {index_path}")


 class LeannSearcher:
    def __init__(self, index_path: str, enable_warmup: bool = False, **backend_kwargs):
+        # Fix path resolution for Colab and other environments
+        if not Path(index_path).is_absolute():
+            index_path = str(Path(index_path).resolve())
+
        self.meta_path_str = f"{index_path}.meta.json"
        if not Path(self.meta_path_str).exists():
-            raise FileNotFoundError(
-                f"Leann metadata file not found at {self.meta_path_str}"
-            )
-        with open(self.meta_path_str, "r", encoding="utf-8") as f:
+            raise FileNotFoundError(f"Leann metadata file not found at {self.meta_path_str}")
+        with open(self.meta_path_str, encoding="utf-8") as f:
            self.meta_data = json.load(f)
        backend_name = self.meta_data["backend_name"]
        self.embedding_model = self.meta_data["embedding_model"]
        # Support both old and new format
-        self.embedding_mode = self.meta_data.get(
-            "embedding_mode", "sentence-transformers"
-        )
+        self.embedding_mode = self.meta_data.get("embedding_mode", "sentence-transformers")
        self.passage_manager = PassageManager(self.meta_data.get("passage_sources", []))
        backend_factory = BACKEND_REGISTRY.get(backend_name)
        if backend_factory is None:
@@ -415,7 +411,7 @@ class LeannSearcher:
        pruning_strategy: Literal["global", "local", "proportional"] = "global",
        expected_zmq_port: int = 5557,
        **kwargs,
-    ) -> List[SearchResult]:
+    ) -> list[SearchResult]:
        logger.info("🔍 LeannSearcher.search() called:")
        logger.info(f"  Query: '{query}'")
        logger.info(f"  Top_k: {top_k}")
@@ -442,7 +438,7 @@ class LeannSearcher:
            zmq_port=zmq_port,
        )
        # logger.info(f"  Generated embedding shape: {query_embedding.shape}")
-        embedding_time = time.time() - start_time
+        time.time() - start_time
        # logger.info(f"  Embedding time: {embedding_time} seconds")

        start_time = time.time()
@@ -457,17 +453,15 @@ class LeannSearcher:
            zmq_port=zmq_port,
            **kwargs,
        )
-        search_time = time.time() - start_time
+        time.time() - start_time
        # logger.info(f"  Search time: {search_time} seconds")
-        logger.info(
-            f"  Backend returned: labels={len(results.get('labels', [[]])[0])} results"
-        )
+        logger.info(f"  Backend returned: labels={len(results.get('labels', [[]])[0])} results")

        enriched_results = []
        if "labels" in results and "distances" in results:
            logger.info(f"  Processing {len(results['labels'][0])} passage IDs:")
            for i, (string_id, dist) in enumerate(
-                zip(results["labels"][0], results["distances"][0])
+                zip(results["labels"][0], results["distances"][0], strict=False)
            ):
                try:
                    passage_data = self.passage_manager.get_passage(string_id)
@@ -479,15 +473,15 @@ class LeannSearcher:
                            metadata=passage_data.get("metadata", {}),
                        )
                    )
-                    
+
                    # Color codes for better logging
                    GREEN = "\033[92m"
                    BLUE = "\033[94m"
                    YELLOW = "\033[93m"
                    RESET = "\033[0m"
-                    
+
                    # Truncate text for display (first 100 chars)
-                    display_text = passage_data['text']
+                    display_text = passage_data["text"]
                    logger.info(
                        f"   {GREEN}✓{RESET} {BLUE}[{i + 1:2d}]{RESET} {YELLOW}ID:{RESET} '{string_id}' {YELLOW}Score:{RESET} {dist:.4f} {YELLOW}Text:{RESET} {display_text}"
                    )
@@ -505,7 +499,7 @@ class LeannChat:
    def __init__(
        self,
        index_path: str,
-        llm_config: Optional[Dict[str, Any]] = None,
+        llm_config: dict[str, Any] | None = None,
        enable_warmup: bool = False,
        **kwargs,
    ):
@@ -521,7 +515,7 @@ class LeannChat:
        prune_ratio: float = 0.0,
        recompute_embeddings: bool = True,
        pruning_strategy: Literal["global", "local", "proportional"] = "global",
-        llm_kwargs: Optional[Dict[str, Any]] = None,
+        llm_kwargs: dict[str, Any] | None = None,
        expected_zmq_port: int = 5557,
        **search_kwargs,
    ):
--- a/packages/leann-core/src/leann/chat.py
+++ b/packages/leann-core/src/leann/chat.py
@@ -4,11 +4,12 @@ This file contains the chat generation logic for the LEANN project,
 supporting different backends like Ollama, Hugging Face Transformers, and a simulation mode.
 """

-from abc import ABC, abstractmethod
-from typing import Dict, Any, Optional, List
+import difflib
 import logging
 import os
-import difflib
+from abc import ABC, abstractmethod
+from typing import Any
+
 import torch

 # Configure logging
@@ -16,10 +17,11 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)


-def check_ollama_models() -> List[str]:
+def check_ollama_models() -> list[str]:
    """Check available Ollama models and return a list"""
    try:
        import requests
+
        response = requests.get("http://localhost:11434/api/tags", timeout=5)
        if response.status_code == 200:
            data = response.json()
@@ -31,51 +33,52 @@ def check_ollama_models() -> List[str]:

 def check_ollama_model_exists_remotely(model_name: str) -> tuple[bool, list[str]]:
    """Check if a model exists in Ollama's remote library and return available tags
-    
+
    Returns:
        (model_exists, available_tags): bool and list of matching tags
    """
    try:
-        import requests
        import re
-        
+
+        import requests
+
        # Split model name and tag
-        if ':' in model_name:
-            base_model, requested_tag = model_name.split(':', 1)
+        if ":" in model_name:
+            base_model, requested_tag = model_name.split(":", 1)
        else:
            base_model, requested_tag = model_name, None
-        
+
        # First check if base model exists in library
        library_response = requests.get("https://ollama.com/library", timeout=8)
        if library_response.status_code != 200:
            return True, []  # Assume exists if can't check
-            
+
        # Extract model names from library page
        models_in_library = re.findall(r'href="/library/([^"]+)"', library_response.text)
-        
+
        if base_model not in models_in_library:
            return False, []  # Base model doesn't exist
-        
+
        # If base model exists, get available tags
        tags_response = requests.get(f"https://ollama.com/library/{base_model}/tags", timeout=8)
        if tags_response.status_code != 200:
            return True, []  # Base model exists but can't get tags
-            
+
        # Extract tags for this model - be more specific to avoid HTML artifacts
-        tag_pattern = rf'{re.escape(base_model)}:[a-zA-Z0-9\.\-_]+'
+        tag_pattern = rf"{re.escape(base_model)}:[a-zA-Z0-9\.\-_]+"
        raw_tags = re.findall(tag_pattern, tags_response.text)
-        
+
        # Clean up tags - remove HTML artifacts and duplicates
        available_tags = []
        seen = set()
        for tag in raw_tags:
            # Skip if it looks like HTML (contains < or >)
-            if '<' in tag or '>' in tag:
+            if "<" in tag or ">" in tag:
                continue
            if tag not in seen:
                seen.add(tag)
                available_tags.append(tag)
-        
+
        # Check if exact model exists
        if requested_tag is None:
            # User just requested base model, suggest tags
@@ -83,76 +86,80 @@ def check_ollama_model_exists_remotely(model_name: str) -> tuple[bool, list[str]
        else:
            exact_match = model_name in available_tags
            return exact_match, available_tags[:10]
-            
+
    except Exception:
        pass
-    
+
    # If scraping fails, assume model might exist (don't block user)
    return True, []


-def search_ollama_models_fuzzy(query: str, available_models: List[str]) -> List[str]:
+def search_ollama_models_fuzzy(query: str, available_models: list[str]) -> list[str]:
    """Use intelligent fuzzy search for Ollama models"""
    if not available_models:
        return []
-    
+
    query_lower = query.lower()
    suggestions = []
-    
+
    # 1. Exact matches first
    exact_matches = [m for m in available_models if query_lower == m.lower()]
    suggestions.extend(exact_matches)
-    
+
    # 2. Starts with query
-    starts_with = [m for m in available_models if m.lower().startswith(query_lower) and m not in suggestions]
+    starts_with = [
+        m for m in available_models if m.lower().startswith(query_lower) and m not in suggestions
+    ]
    suggestions.extend(starts_with)
-    
+
    # 3. Contains query
    contains = [m for m in available_models if query_lower in m.lower() and m not in suggestions]
    suggestions.extend(contains)
-    
+
    # 4. Base model name matching (remove version numbers)
    def get_base_name(model_name: str) -> str:
        """Extract base name without version (e.g., 'llama3:8b' -> 'llama3')"""
-        return model_name.split(':')[0].split('-')[0]
-    
+        return model_name.split(":")[0].split("-")[0]
+
    query_base = get_base_name(query_lower)
    base_matches = [
-        m for m in available_models 
+        m
+        for m in available_models
        if get_base_name(m.lower()) == query_base and m not in suggestions
    ]
    suggestions.extend(base_matches)
-    
+
    # 5. Family/variant matching
    model_families = {
-        'llama': ['llama2', 'llama3', 'alpaca', 'vicuna', 'codellama'],
-        'qwen': ['qwen', 'qwen2', 'qwen3'],
-        'gemma': ['gemma', 'gemma2'],
-        'phi': ['phi', 'phi2', 'phi3'],
-        'mistral': ['mistral', 'mixtral', 'openhermes'],
-        'dolphin': ['dolphin', 'openchat'],
-        'deepseek': ['deepseek', 'deepseek-coder']
+        "llama": ["llama2", "llama3", "alpaca", "vicuna", "codellama"],
+        "qwen": ["qwen", "qwen2", "qwen3"],
+        "gemma": ["gemma", "gemma2"],
+        "phi": ["phi", "phi2", "phi3"],
+        "mistral": ["mistral", "mixtral", "openhermes"],
+        "dolphin": ["dolphin", "openchat"],
+        "deepseek": ["deepseek", "deepseek-coder"],
    }
-    
+
    query_family = None
    for family, variants in model_families.items():
        if any(variant in query_lower for variant in variants):
            query_family = family
            break
-    
+
    if query_family:
        family_variants = model_families[query_family]
        family_matches = [
-            m for m in available_models
+            m
+            for m in available_models
            if any(variant in m.lower() for variant in family_variants) and m not in suggestions
        ]
        suggestions.extend(family_matches)
-    
+
    # 6. Use difflib for remaining fuzzy matches
    remaining_models = [m for m in available_models if m not in suggestions]
    difflib_matches = difflib.get_close_matches(query_lower, remaining_models, n=3, cutoff=0.4)
    suggestions.extend(difflib_matches)
-    
+
    return suggestions[:8]  # Return top 8 suggestions


@@ -162,15 +169,13 @@ def search_ollama_models_fuzzy(query: str, available_models: List[str]) -> List[
 # Remove this too - no need for fallback


-def suggest_similar_models(invalid_model: str, available_models: List[str]) -> List[str]:
+def suggest_similar_models(invalid_model: str, available_models: list[str]) -> list[str]:
    """Use difflib to find similar model names"""
    if not available_models:
        return []
-    
+
    # Get close matches using fuzzy matching
-    suggestions = difflib.get_close_matches(
-        invalid_model, available_models, n=3, cutoff=0.3
-    )
+    suggestions = difflib.get_close_matches(invalid_model, available_models, n=3, cutoff=0.3)
    return suggestions


@@ -178,49 +183,50 @@ def check_hf_model_exists(model_name: str) -> bool:
    """Quick check if HuggingFace model exists without downloading"""
    try:
        from huggingface_hub import model_info
+
        model_info(model_name)
        return True
    except Exception:
        return False


-def get_popular_hf_models() -> List[str]:
+def get_popular_hf_models() -> list[str]:
    """Return a list of popular HuggingFace models for suggestions"""
    try:
        from huggingface_hub import list_models
-        
+
        # Get popular text-generation models, sorted by downloads
        models = list_models(
            filter="text-generation",
            sort="downloads",
            direction=-1,
-            limit=20  # Get top 20 most downloaded
+            limit=20,  # Get top 20 most downloaded
        )
-        
+
        # Extract model names and filter for chat/conversation models
        model_names = []
-        chat_keywords = ['chat', 'instruct', 'dialog', 'conversation', 'assistant']
-        
+        chat_keywords = ["chat", "instruct", "dialog", "conversation", "assistant"]
+
        for model in models:
-            model_name = model.id if hasattr(model, 'id') else str(model)
+            model_name = model.id if hasattr(model, "id") else str(model)
            # Prioritize models with chat-related keywords
            if any(keyword in model_name.lower() for keyword in chat_keywords):
                model_names.append(model_name)
            elif len(model_names) < 10:  # Fill up with other popular models
                model_names.append(model_name)
-                
+
        return model_names[:10] if model_names else _get_fallback_hf_models()
-        
+
    except Exception:
        # Fallback to static list if API call fails
        return _get_fallback_hf_models()


-def _get_fallback_hf_models() -> List[str]:
+def _get_fallback_hf_models() -> list[str]:
    """Fallback list of popular HuggingFace models"""
    return [
        "microsoft/DialoGPT-medium",
-        "microsoft/DialoGPT-large", 
+        "microsoft/DialoGPT-large",
        "facebook/blenderbot-400M-distill",
        "microsoft/phi-2",
        "deepseek-ai/deepseek-llm-7b-chat",
@@ -228,44 +234,40 @@ def _get_fallback_hf_models() -> List[str]:
        "facebook/blenderbot_small-90M",
        "microsoft/phi-1_5",
        "facebook/opt-350m",
-        "EleutherAI/gpt-neo-1.3B"
+        "EleutherAI/gpt-neo-1.3B",
    ]


-def search_hf_models_fuzzy(query: str, limit: int = 10) -> List[str]:
+def search_hf_models_fuzzy(query: str, limit: int = 10) -> list[str]:
    """Use HuggingFace Hub's native fuzzy search for model suggestions"""
    try:
        from huggingface_hub import list_models
-        
+
        # HF Hub's search is already fuzzy! It handles typos and partial matches
        models = list_models(
-            search=query,
-            filter="text-generation",
-            sort="downloads", 
-            direction=-1,
-            limit=limit
+            search=query, filter="text-generation", sort="downloads", direction=-1, limit=limit
        )
-        
-        model_names = [model.id if hasattr(model, 'id') else str(model) for model in models]
-        
+
+        model_names = [model.id if hasattr(model, "id") else str(model) for model in models]
+
        # If direct search doesn't return enough results, try some variations
        if len(model_names) < 3:
            # Try searching for partial matches or common variations
            variations = []
-            
+
            # Extract base name (e.g., "gpt3" from "gpt-3.5")
-            base_query = query.lower().replace('-', '').replace('.', '').replace('_', '')
+            base_query = query.lower().replace("-", "").replace(".", "").replace("_", "")
            if base_query != query.lower():
                variations.append(base_query)
-            
+
            # Try common model name patterns
-            if 'gpt' in query.lower():
-                variations.extend(['gpt2', 'gpt-neo', 'gpt-j', 'dialoGPT'])
-            elif 'llama' in query.lower():
-                variations.extend(['llama2', 'alpaca', 'vicuna'])
-            elif 'bert' in query.lower():
-                variations.extend(['roberta', 'distilbert', 'albert'])
-            
+            if "gpt" in query.lower():
+                variations.extend(["gpt2", "gpt-neo", "gpt-j", "dialoGPT"])
+            elif "llama" in query.lower():
+                variations.extend(["llama2", "alpaca", "vicuna"])
+            elif "bert" in query.lower():
+                variations.extend(["roberta", "distilbert", "albert"])
+
            # Search with variations
            for var in variations[:2]:  # Limit to 2 variations to avoid too many API calls
                try:
@@ -274,13 +276,15 @@ def search_hf_models_fuzzy(query: str, limit: int = 10) -> List[str]:
                        filter="text-generation",
                        sort="downloads",
                        direction=-1,
-                        limit=3
+                        limit=3,
                    )
-                    var_names = [model.id if hasattr(model, 'id') else str(model) for model in var_models]
+                    var_names = [
+                        model.id if hasattr(model, "id") else str(model) for model in var_models
+                    ]
                    model_names.extend(var_names)
-                except:
+                except Exception:
                    continue
-        
+
        # Remove duplicates while preserving order
        seen = set()
        unique_models = []
@@ -288,65 +292,67 @@ def search_hf_models_fuzzy(query: str, limit: int = 10) -> List[str]:
            if model not in seen:
                seen.add(model)
                unique_models.append(model)
-        
+
        return unique_models[:limit]
-        
+
    except Exception:
        # If search fails, return empty list
        return []


-def search_hf_models(query: str, limit: int = 10) -> List[str]:
+def search_hf_models(query: str, limit: int = 10) -> list[str]:
    """Simple search for HuggingFace models based on query (kept for backward compatibility)"""
    return search_hf_models_fuzzy(query, limit)


-def validate_model_and_suggest(model_name: str, llm_type: str) -> Optional[str]:
+def validate_model_and_suggest(model_name: str, llm_type: str) -> str | None:
    """Validate model name and provide suggestions if invalid"""
    if llm_type == "ollama":
        available_models = check_ollama_models()
        if available_models and model_name not in available_models:
            error_msg = f"Model '{model_name}' not found in your local Ollama installation."
-            
+
            # Check if the model exists remotely and get available tags
            model_exists_remotely, available_tags = check_ollama_model_exists_remotely(model_name)
-            
+
            if model_exists_remotely and model_name in available_tags:
                # Exact model exists remotely - suggest pulling it
-                error_msg += f"\n\nTo install the requested model:\n"
+                error_msg += "\n\nTo install the requested model:\n"
                error_msg += f"  ollama pull {model_name}\n"
-                
+
                # Show local alternatives
                suggestions = search_ollama_models_fuzzy(model_name, available_models)
                if suggestions:
                    error_msg += "\nOr use one of these similar installed models:\n"
                    for i, suggestion in enumerate(suggestions, 1):
                        error_msg += f"  {i}. {suggestion}\n"
-                        
+
            elif model_exists_remotely and available_tags:
                # Base model exists but requested tag doesn't - suggest correct tags
-                base_model = model_name.split(':')[0]
-                requested_tag = model_name.split(':', 1)[1] if ':' in model_name else None
-                
-                error_msg += f"\n\nModel '{base_model}' exists, but tag '{requested_tag}' is not available."
+                base_model = model_name.split(":")[0]
+                requested_tag = model_name.split(":", 1)[1] if ":" in model_name else None
+
+                error_msg += (
+                    f"\n\nModel '{base_model}' exists, but tag '{requested_tag}' is not available."
+                )
                error_msg += f"\n\nAvailable {base_model} models you can install:\n"
                for i, tag in enumerate(available_tags[:8], 1):
                    error_msg += f"  {i}. ollama pull {tag}\n"
                if len(available_tags) > 8:
                    error_msg += f"  ... and {len(available_tags) - 8} more variants\n"
-                    
+
                # Also show local alternatives
                suggestions = search_ollama_models_fuzzy(model_name, available_models)
                if suggestions:
                    error_msg += "\nOr use one of these similar installed models:\n"
                    for i, suggestion in enumerate(suggestions, 1):
                        error_msg += f"  {i}. {suggestion}\n"
-                        
+
            else:
                # Model doesn't exist remotely - show fuzzy suggestions
                suggestions = search_ollama_models_fuzzy(model_name, available_models)
                error_msg += f"\n\nModel '{model_name}' was not found in Ollama's library."
-                
+
                if suggestions:
                    error_msg += "\n\nDid you mean one of these installed models?\n"
                    for i, suggestion in enumerate(suggestions, 1):
@@ -357,23 +363,25 @@ def validate_model_and_suggest(model_name: str, llm_type: str) -> Optional[str]:
                        error_msg += f"  {i}. {model}\n"
                    if len(available_models) > 8:
                        error_msg += f"  ... and {len(available_models) - 8} more\n"
-            
+
            error_msg += "\n\nCommands:"
            error_msg += "\n  ollama list                    # List installed models"
            if model_exists_remotely and available_tags:
                if model_name in available_tags:
                    error_msg += f"\n  ollama pull {model_name}          # Install requested model"
                else:
-                    error_msg += f"\n  ollama pull {available_tags[0]}    # Install recommended variant"
+                    error_msg += (
+                        f"\n  ollama pull {available_tags[0]}    # Install recommended variant"
+                    )
            error_msg += "\n  https://ollama.com/library     # Browse available models"
            return error_msg
-            
+
    elif llm_type == "hf":
        # For HF models, we can do a quick existence check
        if not check_hf_model_exists(model_name):
            # Use HF Hub's native fuzzy search directly
            search_suggestions = search_hf_models_fuzzy(model_name, limit=8)
-            
+
            error_msg = f"Model '{model_name}' not found on HuggingFace Hub."
            if search_suggestions:
                error_msg += "\n\nDid you mean one of these?\n"
@@ -385,10 +393,10 @@ def validate_model_and_suggest(model_name: str, llm_type: str) -> Optional[str]:
                error_msg += "\n\nPopular chat models:\n"
                for i, model in enumerate(popular_models[:5], 1):
                    error_msg += f"  {i}. {model}\n"
-            
+
            error_msg += f"\nSearch more: https://huggingface.co/models?search={model_name}&pipeline_tag=text-generation"
            return error_msg
-    
+
    return None  # Model is valid or we can't check


@@ -451,28 +459,27 @@ class OllamaChat(LLMInterface):
            # Check if the Ollama server is responsive
            if host:
                requests.get(host)
-                
+
            # Pre-check model availability with helpful suggestions
            model_error = validate_model_and_suggest(model, "ollama")
            if model_error:
                raise ValueError(model_error)
-                
+
        except ImportError:
            raise ImportError(
                "The 'requests' library is required for Ollama. Please install it with 'pip install requests'."
            )
        except requests.exceptions.ConnectionError:
-            logger.error(
-                f"Could not connect to Ollama at {host}. Please ensure Ollama is running."
-            )
+            logger.error(f"Could not connect to Ollama at {host}. Please ensure Ollama is running.")
            raise ConnectionError(
                f"Could not connect to Ollama at {host}. Please ensure Ollama is running."
            )

    def ask(self, prompt: str, **kwargs) -> str:
-        import requests
        import json

+        import requests
+
        full_url = f"{self.host}/api/generate"
        payload = {
            "model": self.model,
@@ -482,7 +489,7 @@ class OllamaChat(LLMInterface):
        }
        logger.debug(f"Sending request to Ollama: {payload}")
        try:
-            logger.info(f"Sending request to Ollama and waiting for response...")
+            logger.info("Sending request to Ollama and waiting for response...")
            response = requests.post(full_url, data=json.dumps(payload))
            response.raise_for_status()

@@ -506,15 +513,15 @@ class HFChat(LLMInterface):

    def __init__(self, model_name: str = "deepseek-ai/deepseek-llm-7b-chat"):
        logger.info(f"Initializing HFChat with model='{model_name}'")
-        
+
        # Pre-check model availability with helpful suggestions
        model_error = validate_model_and_suggest(model_name, "hf")
        if model_error:
            raise ValueError(model_error)
-            
+
        try:
-            from transformers import AutoTokenizer, AutoModelForCausalLM
            import torch
+            from transformers import AutoModelForCausalLM, AutoTokenizer
        except ImportError:
            raise ImportError(
                "The 'transformers' and 'torch' libraries are required for Hugging Face models. Please install them with 'pip install transformers torch'."
@@ -537,36 +544,34 @@ class HFChat(LLMInterface):
            model_name,
            torch_dtype=torch.float16 if self.device != "cpu" else torch.float32,
            device_map="auto" if self.device != "cpu" else None,
-            trust_remote_code=True
+            trust_remote_code=True,
        )
-        
+
        # Move model to device if not using device_map
        if self.device != "cpu" and "device_map" not in str(self.model):
            self.model = self.model.to(self.device)
-        
+
        # Set pad token if not present
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

    def ask(self, prompt: str, **kwargs) -> str:
-        print('kwargs in HF: ', kwargs)
+        print("kwargs in HF: ", kwargs)
        # Check if this is a Qwen model and add /no_think by default
        is_qwen_model = "qwen" in self.model.config._name_or_path.lower()
-        
+
        # For Qwen models, automatically add /no_think to the prompt
        if is_qwen_model and "/no_think" not in prompt and "/think" not in prompt:
            prompt = prompt + " /no_think"
-        
+
        # Prepare chat template
        messages = [{"role": "user", "content": prompt}]
-        
+
        # Apply chat template if available
        if hasattr(self.tokenizer, "apply_chat_template"):
            try:
                formatted_prompt = self.tokenizer.apply_chat_template(
-                    messages, 
-                    tokenize=False, 
-                    add_generation_prompt=True
+                    messages, tokenize=False, add_generation_prompt=True
                )
            except Exception as e:
                logger.warning(f"Chat template failed, using raw prompt: {e}")
@@ -577,13 +582,9 @@ class HFChat(LLMInterface):

        # Tokenize input
        inputs = self.tokenizer(
-            formatted_prompt, 
-            return_tensors="pt", 
-            padding=True,
-            truncation=True,
-            max_length=2048
+            formatted_prompt, return_tensors="pt", padding=True, truncation=True, max_length=2048
        )
-        
+
        # Move inputs to device
        if self.device != "cpu":
            inputs = {k: v.to(self.device) for k, v in inputs.items()}
@@ -597,32 +598,29 @@ class HFChat(LLMInterface):
            "pad_token_id": self.tokenizer.eos_token_id,
            "eos_token_id": self.tokenizer.eos_token_id,
        }
-        
+
        # Handle temperature=0 for greedy decoding
        if generation_config["temperature"] == 0.0:
            generation_config["do_sample"] = False
            generation_config.pop("temperature")

        logger.info(f"Generating with HuggingFace model, config: {generation_config}")
-        
+
        # Generate
        with torch.no_grad():
-            outputs = self.model.generate(
-                **inputs,
-                **generation_config
-            )
+            outputs = self.model.generate(**inputs, **generation_config)

        # Decode response
-        generated_tokens = outputs[0][inputs["input_ids"].shape[1]:]
+        generated_tokens = outputs[0][inputs["input_ids"].shape[1] :]
        response = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
-        
+
        return response.strip()


 class OpenAIChat(LLMInterface):
    """LLM interface for OpenAI models."""

-    def __init__(self, model: str = "gpt-4o", api_key: Optional[str] = None):
+    def __init__(self, model: str = "gpt-4o", api_key: str | None = None):
        self.model = model
        self.api_key = api_key or os.getenv("OPENAI_API_KEY")

@@ -649,11 +647,7 @@ class OpenAIChat(LLMInterface):
            "messages": [{"role": "user", "content": prompt}],
            "max_tokens": kwargs.get("max_tokens", 1000),
            "temperature": kwargs.get("temperature", 0.7),
-            **{
-                k: v
-                for k, v in kwargs.items()
-                if k not in ["max_tokens", "temperature"]
-            },
+            **{k: v for k, v in kwargs.items() if k not in ["max_tokens", "temperature"]},
        }

        logger.info(f"Sending request to OpenAI with model {self.model}")
@@ -675,7 +669,7 @@ class SimulatedChat(LLMInterface):
        return "This is a simulated answer from the LLM based on the retrieved context."


-def get_llm(llm_config: Optional[Dict[str, Any]] = None) -> LLMInterface:
+def get_llm(llm_config: dict[str, Any] | None = None) -> LLMInterface:
    """
    Factory function to get an LLM interface based on configuration.

--- a/packages/leann-core/src/leann/cli.py
+++ b/packages/leann-core/src/leann/cli.py
@@ -5,12 +5,14 @@ from pathlib import Path
 from llama_index.core import SimpleDirectoryReader
 from llama_index.core.node_parser import SentenceSplitter

-from .api import LeannBuilder, LeannSearcher, LeannChat
+from .api import LeannBuilder, LeannChat, LeannSearcher
+

 def extract_pdf_text_with_pymupdf(file_path: str) -> str:
    """Extract text from PDF using PyMuPDF for better quality."""
    try:
        import fitz  # PyMuPDF
+
        doc = fitz.open(file_path)
        text = ""
        for page in doc:
@@ -21,10 +23,12 @@ def extract_pdf_text_with_pymupdf(file_path: str) -> str:
        # Fallback to default reader
        return None

+
 def extract_pdf_text_with_pdfplumber(file_path: str) -> str:
    """Extract text from PDF using pdfplumber for better quality."""
    try:
        import pdfplumber
+
        text = ""
        with pdfplumber.open(file_path) as pdf:
            for page in pdf.pages:
@@ -72,18 +76,12 @@ Examples:
        # Build command
        build_parser = subparsers.add_parser("build", help="Build document index")
        build_parser.add_argument("index_name", help="Index name")
-        build_parser.add_argument(
-            "--docs", type=str, required=True, help="Documents directory"
-        )
+        build_parser.add_argument("--docs", type=str, required=True, help="Documents directory")
        build_parser.add_argument(
            "--backend", type=str, default="hnsw", choices=["hnsw", "diskann"]
        )
-        build_parser.add_argument(
-            "--embedding-model", type=str, default="facebook/contriever"
-        )
-        build_parser.add_argument(
-            "--force", "-f", action="store_true", help="Force rebuild"
-        )
+        build_parser.add_argument("--embedding-model", type=str, default="facebook/contriever")
+        build_parser.add_argument("--force", "-f", action="store_true", help="Force rebuild")
        build_parser.add_argument("--graph-degree", type=int, default=32)
        build_parser.add_argument("--complexity", type=int, default=64)
        build_parser.add_argument("--num-threads", type=int, default=1)
@@ -129,7 +127,7 @@ Examples:
        )

        # List command
-        list_parser = subparsers.add_parser("list", help="List all indexes")
+        subparsers.add_parser("list", help="List all indexes")

        return parser

@@ -137,17 +135,13 @@ Examples:
        print("Stored LEANN indexes:")

        if not self.indexes_dir.exists():
-            print(
-                "No indexes found. Use 'leann build <name> --docs <dir>' to create one."
-            )
+            print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
            return

        index_dirs = [d for d in self.indexes_dir.iterdir() if d.is_dir()]

        if not index_dirs:
-            print(
-                "No indexes found. Use 'leann build <name> --docs <dir>' to create one."
-            )
+            print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
            return

        print(f"Found {len(index_dirs)} indexes:")
@@ -157,15 +151,15 @@ Examples:

            print(f"  {i}. {index_name} [{status}]")
            if self.index_exists(index_name):
-                meta_file = index_dir / "documents.leann.meta.json"
-                size_mb = sum(
-                    f.stat().st_size for f in index_dir.iterdir() if f.is_file()
-                ) / (1024 * 1024)
+                index_dir / "documents.leann.meta.json"
+                size_mb = sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / (
+                    1024 * 1024
+                )
                print(f"     Size: {size_mb:.1f} MB")

        if index_dirs:
            example_name = index_dirs[0].name
-            print(f"\nUsage:")
+            print("\nUsage:")
            print(f'  leann search {example_name} "your query"')
            print(f"  leann ask {example_name} --interactive")

@@ -175,19 +169,20 @@ Examples:
        # Try to use better PDF parsers first
        documents = []
        docs_path = Path(docs_dir)
-        
+
        for file_path in docs_path.rglob("*.pdf"):
            print(f"Processing PDF: {file_path}")
-            
+
            # Try PyMuPDF first (best quality)
            text = extract_pdf_text_with_pymupdf(str(file_path))
            if text is None:
                # Try pdfplumber
                text = extract_pdf_text_with_pdfplumber(str(file_path))
-            
+
            if text:
                # Create a simple document structure
                from llama_index.core import Document
+
                doc = Document(text=text, metadata={"source": str(file_path)})
                documents.append(doc)
            else:
--- a/packages/leann-core/src/leann/embedding_compute.py
+++ b/packages/leann-core/src/leann/embedding_compute.py
@@ -4,11 +4,12 @@ Consolidates all embedding computation logic using SentenceTransformer
 Preserves all optimization parameters to ensure performance
 """

-import numpy as np
-import torch
-from typing import List, Dict, Any
 import logging
 import os
+from typing import Any
+
+import numpy as np
+import torch

 # Set up logger with proper level
 logger = logging.getLogger(__name__)
@@ -17,11 +18,11 @@ log_level = getattr(logging, LOG_LEVEL, logging.WARNING)
 logger.setLevel(log_level)

 # Global model cache to avoid repeated loading
-_model_cache: Dict[str, Any] = {}
+_model_cache: dict[str, Any] = {}


 def compute_embeddings(
-    texts: List[str],
+    texts: list[str],
    model_name: str,
    mode: str = "sentence-transformers",
    is_build: bool = False,
@@ -59,7 +60,7 @@ def compute_embeddings(


 def compute_embeddings_sentence_transformers(
-    texts: List[str],
+    texts: list[str],
    model_name: str,
    use_fp16: bool = True,
    device: str = "auto",
@@ -114,9 +115,7 @@ def compute_embeddings_sentence_transformers(
        logger.info(f"Using cached optimized model: {model_name}")
        model = _model_cache[cache_key]
    else:
-        logger.info(
-            f"Loading and caching optimized SentenceTransformer model: {model_name}"
-        )
+        logger.info(f"Loading and caching optimized SentenceTransformer model: {model_name}")
        from sentence_transformers import SentenceTransformer

        logger.info(f"Using device: {device}")
@@ -134,9 +133,7 @@ def compute_embeddings_sentence_transformers(
                if hasattr(torch.mps, "set_per_process_memory_fraction"):
                    torch.mps.set_per_process_memory_fraction(0.9)
            except AttributeError:
-                logger.warning(
-                    "Some MPS optimizations not available in this PyTorch version"
-                )
+                logger.warning("Some MPS optimizations not available in this PyTorch version")
        elif device == "cpu":
            # TODO: Haven't tested this yet
            torch.set_num_threads(min(8, os.cpu_count() or 4))
@@ -226,25 +223,22 @@ def compute_embeddings_sentence_transformers(
            device=device,
        )

-    logger.info(
-        f"Generated {len(embeddings)} embeddings, dimension: {embeddings.shape[1]}"
-    )
+    logger.info(f"Generated {len(embeddings)} embeddings, dimension: {embeddings.shape[1]}")

    # Validate results
    if np.isnan(embeddings).any() or np.isinf(embeddings).any():
-        raise RuntimeError(
-            f"Detected NaN or Inf values in embeddings, model: {model_name}"
-        )
+        raise RuntimeError(f"Detected NaN or Inf values in embeddings, model: {model_name}")

    return embeddings


-def compute_embeddings_openai(texts: List[str], model_name: str) -> np.ndarray:
+def compute_embeddings_openai(texts: list[str], model_name: str) -> np.ndarray:
    # TODO: @yichuan-w add progress bar only in build mode
    """Compute embeddings using OpenAI API"""
    try:
-        import openai
        import os
+
+        import openai
    except ImportError as e:
        raise ImportError(f"OpenAI package not installed: {e}")

@@ -294,16 +288,12 @@ def compute_embeddings_openai(texts: List[str], model_name: str) -> np.ndarray:
            raise

    embeddings = np.array(all_embeddings, dtype=np.float32)
-    logger.info(
-        f"Generated {len(embeddings)} embeddings, dimension: {embeddings.shape[1]}"
-    )
+    logger.info(f"Generated {len(embeddings)} embeddings, dimension: {embeddings.shape[1]}")
    print(f"len of embeddings: {len(embeddings)}")
    return embeddings


-def compute_embeddings_mlx(
-    chunks: List[str], model_name: str, batch_size: int = 16
-) -> np.ndarray:
+def compute_embeddings_mlx(chunks: list[str], model_name: str, batch_size: int = 16) -> np.ndarray:
    # TODO: @yichuan-w add progress bar only in build mode
    """Computes embeddings using an MLX model."""
    try:
--- a/packages/leann-core/src/leann/embedding_server_manager.py
+++ b/packages/leann-core/src/leann/embedding_server_manager.py
@@ -1,12 +1,12 @@
-import time
 import atexit
+import logging
+import os
 import socket
 import subprocess
 import sys
-import os
-import logging
+import time
 from pathlib import Path
-from typing import Optional
+
 import psutil

 # Set up logging based on environment variable
@@ -33,7 +33,7 @@ def _get_available_port(start_port: int = 5557) -> int:
                return port
        except OSError:
            port += 1
-    raise RuntimeError(f"No available ports found in range {start_port}-{start_port+100}")
+    raise RuntimeError(f"No available ports found in range {start_port}-{start_port + 100}")


 def _check_port(port: int) -> bool:
@@ -182,8 +182,8 @@ class EmbeddingServerManager:
                                       e.g., "leann_backend_diskann.embedding_server"
        """
        self.backend_module_name = backend_module_name
-        self.server_process: Optional[subprocess.Popen] = None
-        self.server_port: Optional[int] = None
+        self.server_process: subprocess.Popen | None = None
+        self.server_port: int | None = None
        self._atexit_registered = False

    def start_server(
@@ -234,10 +234,10 @@ class EmbeddingServerManager:
            return False, port

        logger.info(f"Starting server on port {actual_port} for Colab environment")
-        
+
        # Use a simpler startup strategy for Colab
        command = self._build_server_command(actual_port, model_name, embedding_mode, **kwargs)
-        
+
        try:
            # In Colab, we'll use a more direct approach
            self._launch_server_process_colab(command, actual_port)
@@ -246,26 +246,16 @@ class EmbeddingServerManager:
            logger.error(f"Failed to start embedding server in Colab: {e}")
            return False, actual_port

-    def _has_compatible_running_server(
-        self, model_name: str, passages_file: str
-    ) -> bool:
+    def _has_compatible_running_server(self, model_name: str, passages_file: str) -> bool:
        """Check if we have a compatible running server."""
-        if not (
-            self.server_process
-            and self.server_process.poll() is None
-            and self.server_port
-        ):
+        if not (self.server_process and self.server_process.poll() is None and self.server_port):
            return False

        if _check_process_matches_config(self.server_port, model_name, passages_file):
-            logger.info(
-                f"Existing server process (PID {self.server_process.pid}) is compatible"
-            )
+            logger.info(f"Existing server process (PID {self.server_process.pid}) is compatible")
            return True

-        logger.info(
-            "Existing server process is incompatible. Should start a new server."
-        )
+        logger.info("Existing server process is incompatible. Should start a new server.")
        return False

    def _start_new_server(
@@ -400,7 +390,7 @@ class EmbeddingServerManager:
    def _wait_for_server_ready_colab(self, port: int) -> tuple[bool, int]:
        """Wait for the server to be ready with Colab-specific timeout."""
        max_wait, wait_interval = 30, 0.5  # Shorter timeout for Colab
-        
+
        for _ in range(int(max_wait / wait_interval)):
            if _check_port(port):
                logger.info("Colab embedding server is ready!")
@@ -409,7 +399,7 @@ class EmbeddingServerManager:
            if self.server_process and self.server_process.poll() is not None:
                # Check for error output
                stdout, stderr = self.server_process.communicate()
-                logger.error(f"Colab server terminated during startup.")
+                logger.error("Colab server terminated during startup.")
                logger.error(f"stdout: {stdout}")
                logger.error(f"stderr: {stderr}")
                return False, port
--- a/packages/leann-core/src/leann/interface.py
+++ b/packages/leann-core/src/leann/interface.py
@@ -1,15 +1,14 @@
 from abc import ABC, abstractmethod
+from typing import Any, Literal
+
 import numpy as np
-from typing import Dict, Any, List, Literal, Optional


 class LeannBackendBuilderInterface(ABC):
    """Backend interface for building indexes"""

    @abstractmethod
-    def build(
-        self, data: np.ndarray, ids: List[str], index_path: str, **kwargs
-    ) -> None:
+    def build(self, data: np.ndarray, ids: list[str], index_path: str, **kwargs) -> None:
        """Build index

        Args:
@@ -35,9 +34,7 @@ class LeannBackendSearcherInterface(ABC):
        pass

    @abstractmethod
-    def _ensure_server_running(
-        self, passages_source_file: str, port: Optional[int], **kwargs
-    ) -> int:
+    def _ensure_server_running(self, passages_source_file: str, port: int | None, **kwargs) -> int:
        """Ensure server is running"""
        pass

@@ -51,9 +48,9 @@ class LeannBackendSearcherInterface(ABC):
        prune_ratio: float = 0.0,
        recompute_embeddings: bool = False,
        pruning_strategy: Literal["global", "local", "proportional"] = "global",
-        zmq_port: Optional[int] = None,
+        zmq_port: int | None = None,
        **kwargs,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
        """Search for nearest neighbors

        Args:
@@ -77,7 +74,7 @@ class LeannBackendSearcherInterface(ABC):
        self,
        query: str,
        use_server_if_available: bool = True,
-        zmq_port: Optional[int] = None,
+        zmq_port: int | None = None,
    ) -> np.ndarray:
        """Compute embedding for a query string

--- a/packages/leann-core/src/leann/registry.py
+++ b/packages/leann-core/src/leann/registry.py
@@ -1,13 +1,13 @@
 # packages/leann-core/src/leann/registry.py

-from typing import Dict, TYPE_CHECKING
 import importlib
 import importlib.metadata
+from typing import TYPE_CHECKING

 if TYPE_CHECKING:
    from leann.interface import LeannBackendFactoryInterface

-BACKEND_REGISTRY: Dict[str, "LeannBackendFactoryInterface"] = {}
+BACKEND_REGISTRY: dict[str, "LeannBackendFactoryInterface"] = {}


 def register_backend(name: str):
@@ -31,13 +31,11 @@ def autodiscover_backends():
            backend_module_name = dist_name.replace("-", "_")
            discovered_backends.append(backend_module_name)

-    for backend_module_name in sorted(
-        discovered_backends
-    ):  # sort for deterministic loading
+    for backend_module_name in sorted(discovered_backends):  # sort for deterministic loading
        try:
            importlib.import_module(backend_module_name)
            # Registration message is printed by the decorator
-        except ImportError as e:
+        except ImportError:
            # print(f"WARN: Could not import backend module '{backend_module_name}': {e}")
            pass
    # print("INFO: Backend auto-discovery finished.")
--- a/packages/leann-core/src/leann/searcher_base.py
+++ b/packages/leann-core/src/leann/searcher_base.py
@@ -1,7 +1,7 @@
 import json
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Dict, Any, Literal, Optional
+from typing import Any, Literal

 import numpy as np

@@ -38,9 +38,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):

        self.embedding_model = self.meta.get("embedding_model")
        if not self.embedding_model:
-            print(
-                "WARNING: embedding_model not found in meta.json. Recompute will fail."
-            )
+            print("WARNING: embedding_model not found in meta.json. Recompute will fail.")

        self.embedding_mode = self.meta.get("embedding_mode", "sentence-transformers")

@@ -48,26 +46,22 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
            backend_module_name=backend_module_name,
        )

-    def _load_meta(self) -> Dict[str, Any]:
+    def _load_meta(self) -> dict[str, Any]:
        """Loads the metadata file associated with the index."""
        # This is the corrected logic for finding the meta file.
        meta_path = self.index_dir / f"{self.index_path.name}.meta.json"
        if not meta_path.exists():
            raise FileNotFoundError(f"Leann metadata file not found at {meta_path}")
-        with open(meta_path, "r", encoding="utf-8") as f:
+        with open(meta_path, encoding="utf-8") as f:
            return json.load(f)

-    def _ensure_server_running(
-        self, passages_source_file: str, port: int, **kwargs
-    ) -> int:
+    def _ensure_server_running(self, passages_source_file: str, port: int, **kwargs) -> int:
        """
        Ensures the embedding server is running if recompute is needed.
        This is a helper for subclasses.
        """
        if not self.embedding_model:
-            raise ValueError(
-                "Cannot use recompute mode without 'embedding_model' in meta.json."
-            )
+            raise ValueError("Cannot use recompute mode without 'embedding_model' in meta.json.")

        server_started, actual_port = self.embedding_server_manager.start_server(
            port=port,
@@ -78,9 +72,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
            enable_warmup=kwargs.get("enable_warmup", False),
        )
        if not server_started:
-            raise RuntimeError(
-                f"Failed to start embedding server on port {actual_port}"
-            )
+            raise RuntimeError(f"Failed to start embedding server on port {actual_port}")

        return actual_port

@@ -109,9 +101,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
                # on that port?

                # Ensure we have a server with passages_file for compatibility
-                passages_source_file = (
-                    self.index_dir / f"{self.index_path.name}.meta.json"
-                )
+                passages_source_file = self.index_dir / f"{self.index_path.name}.meta.json"
                # Convert to absolute path to ensure server can find it
                zmq_port = self._ensure_server_running(
                    str(passages_source_file.resolve()), zmq_port
@@ -132,8 +122,8 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):

    def _compute_embedding_via_server(self, chunks: list, zmq_port: int) -> np.ndarray:
        """Compute embeddings using the ZMQ embedding server."""
-        import zmq
        import msgpack
+        import zmq

        try:
            context = zmq.Context()
@@ -172,9 +162,9 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
        prune_ratio: float = 0.0,
        recompute_embeddings: bool = False,
        pruning_strategy: Literal["global", "local", "proportional"] = "global",
-        zmq_port: Optional[int] = None,
+        zmq_port: int | None = None,
        **kwargs,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
        """
        Search for the top_k nearest neighbors of the query vector.

--- a/packages/leann/init.py
+++ b/packages/leann/init.py
@@ -7,6 +7,6 @@ A revolutionary vector database that democratizes personal AI.
 __version__ = "0.1.0"

 # Re-export main API from leann-core
-from leann_core import LeannBuilder, LeannSearcher, LeannChat
+from leann_core import LeannBuilder, LeannChat, LeannSearcher

-__all__ = ["LeannBuilder", "LeannSearcher", "LeannChat"]
+__all__ = ["LeannBuilder", "LeannChat", "LeannSearcher"]
--- a/packages/leann/pyproject.toml
+++ b/packages/leann/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "leann"
-version = "0.1.12"
+version = "0.1.14"
 description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
 readme = "README.md"
 requires-python = ">=3.9"
--- a/packages/wechat-exporter/main.py
+++ b/packages/wechat-exporter/main.py
@@ -1,22 +1,23 @@
 import json
-import typer
-from pathlib import Path
-import requests
-from tqdm import tqdm
-import xml.etree.ElementTree as ET
-from typing_extensions import Annotated
 import sqlite3
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from typing import Annotated
+
+import requests
+import typer
+from tqdm import tqdm

 app = typer.Typer()

+
 def get_safe_path(s: str) -> str:
    """
    Remove invalid characters to sanitize a path.
    :param s: str to sanitize
    :returns: sanitized str
    """
-    ban_chars = "\\  /  :  *  ?  \"  '  <  >  |  $  \r  \n".replace(
-        ' ', '')
+    ban_chars = "\\  /  :  *  ?  \"  '  <  >  |  $  \r  \n".replace(" ", "")
    for i in ban_chars:
        s = s.replace(i, "")
    return s
@@ -26,35 +27,38 @@ def process_history(history: str):
    if history.startswith("<?xml") or history.startswith("<msg>"):
        try:
            root = ET.fromstring(history)
-            title = root.find('.//title').text if root.find('.//title') is not None else None
-            quoted = root.find('.//refermsg/content').text if root.find('.//refermsg/content') is not None else None
+            title = root.find(".//title").text if root.find(".//title") is not None else None
+            quoted = (
+                root.find(".//refermsg/content").text
+                if root.find(".//refermsg/content") is not None
+                else None
+            )
            if title and quoted:
-                return {
-                    "title": title,
-                    "quoted": process_history(quoted)
-                }
+                return {"title": title, "quoted": process_history(quoted)}
            if title:
                return title
        except Exception:
            return history
    return history

+
 def get_message(history: dict | str):
    if isinstance(history, dict):
-        if 'title' in history:
-            return history['title']
+        if "title" in history:
+            return history["title"]
    else:
        return history

+
 def export_chathistory(user_id: str):
-    res = requests.get("http://localhost:48065/wechat/chatlog", params={
-        "userId": user_id,
-        "count": 100000
-    }).json()
-    for i in range(len(res['chatLogs'])):
-        res['chatLogs'][i]['content'] = process_history(res['chatLogs'][i]['content'])
-        res['chatLogs'][i]['message'] = get_message(res['chatLogs'][i]['content'])
-    return res['chatLogs']
+    res = requests.get(
+        "http://localhost:48065/wechat/chatlog", params={"userId": user_id, "count": 100000}
+    ).json()
+    for i in range(len(res["chatLogs"])):
+        res["chatLogs"][i]["content"] = process_history(res["chatLogs"][i]["content"])
+        res["chatLogs"][i]["message"] = get_message(res["chatLogs"][i]["content"])
+    return res["chatLogs"]
+

@app.command()
 def export_all(dest: Annotated[Path, typer.Argument(help="Destination path to export to.")]):
@@ -64,7 +68,7 @@ def export_all(dest: Annotated[Path, typer.Argument(help="Destination path to ex
    if not dest.is_dir():
        if not dest.exists():
            inp = typer.prompt("Destination path does not exist, create it? (y/n)")
-            if inp.lower() == 'y':
+            if inp.lower() == "y":
                dest.mkdir(parents=True)
            else:
                typer.echo("Aborted.", err=True)
@@ -77,12 +81,12 @@ def export_all(dest: Annotated[Path, typer.Argument(help="Destination path to ex
    exported_count = 0
    for user in tqdm(all_users):
        try:
-            usr_chatlog = export_chathistory(user['arg'])
-            
+            usr_chatlog = export_chathistory(user["arg"])
+
            # Only write file if there are messages
            if len(usr_chatlog) > 0:
-                out_path = dest/get_safe_path((user['title'] or "")+"-"+user['arg']+'.json')
-                with open(out_path, 'w', encoding='utf-8') as f:
+                out_path = dest / get_safe_path((user["title"] or "") + "-" + user["arg"] + ".json")
+                with open(out_path, "w", encoding="utf-8") as f:
                    json.dump(usr_chatlog, f, ensure_ascii=False, indent=2)
                exported_count += 1
        except Exception as e:
@@ -91,23 +95,42 @@ def export_all(dest: Annotated[Path, typer.Argument(help="Destination path to ex

    print(f"Exported {exported_count} users' chat history to {dest} in json.")

+
@app.command()
-def export_sqlite(dest: Annotated[Path, typer.Argument(help="Destination path to export to.")] = Path("chatlog.db")):
+def export_sqlite(
+    dest: Annotated[Path, typer.Argument(help="Destination path to export to.")] = Path(
+        "chatlog.db"
+    ),
+):
    """
    Export all users' chat history to a sqlite database.
    """
    connection = sqlite3.connect(dest)
    cursor = connection.cursor()
-    cursor.execute("CREATE TABLE IF NOT EXISTS chatlog (id INTEGER PRIMARY KEY AUTOINCREMENT, with_id TEXT, from_user TEXT, to_user TEXT, message TEXT, timest DATETIME, auxiliary TEXT)")
+    cursor.execute(
+        "CREATE TABLE IF NOT EXISTS chatlog (id INTEGER PRIMARY KEY AUTOINCREMENT, with_id TEXT, from_user TEXT, to_user TEXT, message TEXT, timest DATETIME, auxiliary TEXT)"
+    )
    cursor.execute("CREATE INDEX IF NOT EXISTS chatlog_with_id_index ON chatlog (with_id)")
    cursor.execute("CREATE TABLE iF NOT EXISTS users (id TEXT PRIMARY KEY, name TEXT)")

    all_users = requests.get("http://localhost:48065/wechat/allcontacts").json()
    for user in tqdm(all_users):
-        cursor.execute("INSERT OR IGNORE INTO users (id, name) VALUES (?, ?)", (user['arg'], user['title']))
-        usr_chatlog = export_chathistory(user['arg'])
+        cursor.execute(
+            "INSERT OR IGNORE INTO users (id, name) VALUES (?, ?)", (user["arg"], user["title"])
+        )
+        usr_chatlog = export_chathistory(user["arg"])
        for msg in usr_chatlog:
-            cursor.execute("INSERT INTO chatlog (with_id, from_user, to_user, message, timest, auxiliary) VALUES (?, ?, ?, ?, ?, ?)", (user['arg'], msg['fromUser'], msg['toUser'], msg['message'], msg['createTime'], str(msg['content'])))
+            cursor.execute(
+                "INSERT INTO chatlog (with_id, from_user, to_user, message, timest, auxiliary) VALUES (?, ?, ?, ?, ?, ?)",
+                (
+                    user["arg"],
+                    msg["fromUser"],
+                    msg["toUser"],
+                    msg["message"],
+                    msg["createTime"],
+                    str(msg["content"]),
+                ),
+            )
    connection.commit()


--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,14 +25,21 @@ dependencies = [
    "requests>=2.25.0",
    "sentence-transformers>=2.2.0",
    "openai>=1.0.0",
+    # PDF parsing dependencies - essential for document processing
    "PyPDF2>=3.0.0",
+    "pdfplumber>=0.11.0",
+    "pymupdf>=1.26.0",
+    "pypdfium2>=4.30.0",
+    # LlamaIndex core and readers - updated versions
    "llama-index>=0.12.44",
+    "llama-index-readers-file>=0.4.0",  # Essential for PDF parsing
    "llama-index-readers-docling",
    "llama-index-node-parser-docling",
-    "ipykernel==6.29.5",
-    "msgpack>=1.1.1",
    "llama-index-vector-stores-faiss>=0.4.0",
    "llama-index-embeddings-huggingface>=0.5.5",
+    # Other dependencies
+    "ipykernel==6.29.5",
+    "msgpack>=1.1.1",
    "mlx>=0.26.3; sys_platform == 'darwin'",
    "mlx-lm>=0.26.0; sys_platform == 'darwin'",
    "psutil>=5.8.0",
@@ -52,6 +59,14 @@ diskann = [
    "leann-backend-diskann",
 ]

+# Add a new optional dependency group for document processing
+documents = [
+    "beautifulsoup4>=4.13.0",  # For HTML parsing
+    "python-docx>=0.8.11",     # For Word documents
+    "openpyxl>=3.1.0",         # For Excel files
+    "pandas>=2.2.0",           # For data processing
+]
+
 [tool.setuptools]
 py-modules = []

@@ -60,3 +75,50 @@ py-modules = []
 leann-core = { path = "packages/leann-core", editable = true }
 leann-backend-diskann = { path = "packages/leann-backend-diskann", editable = true }
 leann-backend-hnsw = { path = "packages/leann-backend-hnsw", editable = true }
+
+[tool.ruff]
+target-version = "py310"
+line-length = 100
+extend-exclude = [
+    "third_party",
+    "*.egg-info",
+    "__pycache__",
+    ".git",
+    ".venv",
+]
+
+[tool.ruff.lint]
+select = [
+    "E",      # pycodestyle errors
+    "W",      # pycodestyle warnings
+    "F",      # pyflakes
+    "I",      # isort
+    "B",      # flake8-bugbear
+    "C4",     # flake8-comprehensions
+    "UP",     # pyupgrade
+    "N",      # pep8-naming
+    "RUF",    # ruff-specific rules
+]
+ignore = [
+    "E501",   # line too long (handled by formatter)
+    "B008",   # do not perform function calls in argument defaults
+    "B904",   # raise without from
+    "N812",   # lowercase imported as non-lowercase
+    "N806",   # variable in function should be lowercase
+    "RUF012", # mutable class attributes should be annotated with typing.ClassVar
+]
+
+[tool.ruff.lint.per-file-ignores]
+"test/**/*.py" = ["E402"]      # module level import not at top of file (common in tests)
+"examples/**/*.py" = ["E402"]  # module level import not at top of file (common in examples)
+
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
+
+[dependency-groups]
+dev = [
+    "ruff>=0.12.4",
+]
--- a/test/build_mlx_index.py
+++ b/test/build_mlx_index.py
@@ -1,5 +1,6 @@
 import os
-from leann.api import LeannBuilder, LeannSearcher, LeannChat
+
+from leann.api import LeannBuilder, LeannChat

 # Define the path for our new MLX-based index
 INDEX_PATH = "./mlx_diskann_index/leann"
@@ -38,7 +39,5 @@ chat = LeannChat(index_path=INDEX_PATH)
 # add query
 query = "MLX is an array framework for machine learning on Apple silicon."
 print(f"Query: {query}")
-response = chat.ask(
-    query, top_k=3, recompute_beighbor_embeddings=True, complexity=3, beam_width=1
-)
+response = chat.ask(query, top_k=3, recompute_beighbor_embeddings=True, complexity=3, beam_width=1)
 print(f"Response: {response}")
--- a/test/mail_reader_llamaindex.py
+++ b/test/mail_reader_llamaindex.py
@@ -1,76 +1,84 @@
-import os
 import email
-from pathlib import Path
-from typing import List, Any
-from llama_index.core import VectorStoreIndex, Document
+import os
+from typing import Any
+
+from llama_index.core import Document, VectorStoreIndex
 from llama_index.core.readers.base import BaseReader

+
 class EmlxReader(BaseReader):
    """
    Apple Mail .emlx file reader.
-    
+
    Reads individual .emlx files from Apple Mail's storage format.
    """
-    
+
    def __init__(self) -> None:
        """Initialize."""
        pass
-    
-    def load_data(self, input_dir: str, **load_kwargs: Any) -> List[Document]:
+
+    def load_data(self, input_dir: str, **load_kwargs: Any) -> list[Document]:
        """
        Load data from the input directory containing .emlx files.
-        
+
        Args:
            input_dir: Directory containing .emlx files
            **load_kwargs:
                max_count (int): Maximum amount of messages to read.
        """
-        docs: List[Document] = []
-        max_count = load_kwargs.get('max_count', 1000)
+        docs: list[Document] = []
+        max_count = load_kwargs.get("max_count", 1000)
        count = 0
-        
+
        # Walk through the directory recursively
        for dirpath, dirnames, filenames in os.walk(input_dir):
            # Skip hidden directories
            dirnames[:] = [d for d in dirnames if not d.startswith(".")]
-            
+
            for filename in filenames:
                if count >= max_count:
                    break
-                    
+
                if filename.endswith(".emlx"):
                    filepath = os.path.join(dirpath, filename)
                    try:
                        # Read the .emlx file
-                        with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
+                        with open(filepath, encoding="utf-8", errors="ignore") as f:
                            content = f.read()
-                        
+
                        # .emlx files have a length prefix followed by the email content
                        # The first line contains the length, followed by the email
-                        lines = content.split('\n', 1)
+                        lines = content.split("\n", 1)
                        if len(lines) >= 2:
                            email_content = lines[1]
-                            
+
                            # Parse the email using Python's email module
                            try:
                                msg = email.message_from_string(email_content)
-                                
+
                                # Extract email metadata
-                                subject = msg.get('Subject', 'No Subject')
-                                from_addr = msg.get('From', 'Unknown')
-                                to_addr = msg.get('To', 'Unknown')
-                                date = msg.get('Date', 'Unknown')
-                                
+                                subject = msg.get("Subject", "No Subject")
+                                from_addr = msg.get("From", "Unknown")
+                                to_addr = msg.get("To", "Unknown")
+                                date = msg.get("Date", "Unknown")
+
                                # Extract email body
                                body = ""
                                if msg.is_multipart():
                                    for part in msg.walk():
-                                        if part.get_content_type() == "text/plain" or part.get_content_type() == "text/html":
-                                            body += part.get_payload(decode=True).decode('utf-8', errors='ignore')
+                                        if (
+                                            part.get_content_type() == "text/plain"
+                                            or part.get_content_type() == "text/html"
+                                        ):
+                                            body += part.get_payload(decode=True).decode(
+                                                "utf-8", errors="ignore"
+                                            )
                                            # break
                                else:
-                                    body = msg.get_payload(decode=True).decode('utf-8', errors='ignore')
-                                
+                                    body = msg.get_payload(decode=True).decode(
+                                        "utf-8", errors="ignore"
+                                    )
+
                                # Create document content
                                doc_content = f"""
 From: {from_addr}
@@ -80,32 +88,38 @@ Date: {date}

 {body}
 """
-                                
+
                                # Create metadata
                                metadata = {
-                                    'file_path': filepath,
-                                    'subject': subject,
-                                    'from': from_addr,
-                                    'to': to_addr,
-                                    'date': date,
-                                    'filename': filename
+                                    "file_path": filepath,
+                                    "subject": subject,
+                                    "from": from_addr,
+                                    "to": to_addr,
+                                    "date": date,
+                                    "filename": filename,
                                }
                                if count == 0:
                                    print("--------------------------------")
-                                    print('dir path', dirpath)
+                                    print("dir path", dirpath)
                                    print(metadata)
                                    print(doc_content)
                                    print("--------------------------------")
-                                    body=[]
+                                    body = []
                                    if msg.is_multipart():
                                        for part in msg.walk():
-                                            print("--------------------------------  get content type -------------------------------")
+                                            print(
+                                                "--------------------------------  get content type -------------------------------"
+                                            )
                                            print(part.get_content_type())
                                            print(part)
                                            # body.append(part.get_payload(decode=True).decode('utf-8', errors='ignore'))
-                                            print("--------------------------------  get content type -------------------------------")
+                                            print(
+                                                "--------------------------------  get content type -------------------------------"
+                                            )
                                    else:
-                                        body = msg.get_payload(decode=True).decode('utf-8', errors='ignore')
+                                        body = msg.get_payload(decode=True).decode(
+                                            "utf-8", errors="ignore"
+                                        )
                                        print(body)

                                    print(body)
@@ -113,22 +127,23 @@ Date: {date}
                                doc = Document(text=doc_content, metadata=metadata)
                                docs.append(doc)
                                count += 1
-                                
+
                            except Exception as e:
                                print(f"!!!!!!! Error parsing email from {filepath}: {e} !!!!!!!!")
                                continue
-                                
+
                    except Exception as e:
                        print(f"!!!!!!! Error reading file !!!!!!!! {filepath}: {e}")
                        continue
-        
+
        print(f"Loaded {len(docs)} email documents")
        return docs

+
 # Use the custom EmlxReader instead of MboxReader
 documents = EmlxReader().load_data(
-    "/Users/yichuan/Library/Mail/V10/0FCA0879-FD8C-4B7E-83BF-FDDA930791C5/[Gmail].mbox/All Mail.mbox/78BA5BE1-8819-4F9A-9613-EB63772F1DD0/Data/9/Messages", 
-    max_count=1000
+    "/Users/yichuan/Library/Mail/V10/0FCA0879-FD8C-4B7E-83BF-FDDA930791C5/[Gmail].mbox/All Mail.mbox/78BA5BE1-8819-4F9A-9613-EB63772F1DD0/Data/9/Messages",
+    max_count=1000,
 )  # Returns list of documents

 # Configure the index with larger chunk size to handle long metadata
@@ -138,10 +153,9 @@ from llama_index.core.node_parser import SentenceSplitter
 text_splitter = SentenceSplitter(chunk_size=2048, chunk_overlap=200)

 index = VectorStoreIndex.from_documents(
-    documents,
-    transformations=[text_splitter]
+    documents, transformations=[text_splitter]
 )  # Initialize index with documents

 query_engine = index.as_query_engine()
 res = query_engine.query("Hows Berkeley Graduate Student Instructor")
-print(res)
+print(res)
--- a/test/mail_reader_save_load.py
+++ b/test/mail_reader_save_load.py
@@ -1,77 +1,82 @@
-import os
 import email
-from pathlib import Path
-from typing import List, Any
-from llama_index.core import VectorStoreIndex, Document, StorageContext
-from llama_index.core.readers.base import BaseReader
+import os
+from typing import Any
+
+from llama_index.core import Document, StorageContext, VectorStoreIndex
 from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core.readers.base import BaseReader
+

 class EmlxReader(BaseReader):
    """
    Apple Mail .emlx file reader.
-    
+
    Reads individual .emlx files from Apple Mail's storage format.
    """
-    
+
    def __init__(self) -> None:
        """Initialize."""
        pass
-    
-    def load_data(self, input_dir: str, **load_kwargs: Any) -> List[Document]:
+
+    def load_data(self, input_dir: str, **load_kwargs: Any) -> list[Document]:
        """
        Load data from the input directory containing .emlx files.
-        
+
        Args:
            input_dir: Directory containing .emlx files
            **load_kwargs:
                max_count (int): Maximum amount of messages to read.
        """
-        docs: List[Document] = []
-        max_count = load_kwargs.get('max_count', 1000)
+        docs: list[Document] = []
+        max_count = load_kwargs.get("max_count", 1000)
        count = 0
-        
+
        # Walk through the directory recursively
        for dirpath, dirnames, filenames in os.walk(input_dir):
            # Skip hidden directories
            dirnames[:] = [d for d in dirnames if not d.startswith(".")]
-            
+
            for filename in filenames:
                if count >= max_count:
                    break
-                    
+
                if filename.endswith(".emlx"):
                    filepath = os.path.join(dirpath, filename)
                    try:
                        # Read the .emlx file
-                        with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
+                        with open(filepath, encoding="utf-8", errors="ignore") as f:
                            content = f.read()
-                        
+
                        # .emlx files have a length prefix followed by the email content
                        # The first line contains the length, followed by the email
-                        lines = content.split('\n', 1)
+                        lines = content.split("\n", 1)
                        if len(lines) >= 2:
                            email_content = lines[1]
-                            
+
                            # Parse the email using Python's email module
                            try:
                                msg = email.message_from_string(email_content)
-                                
+
                                # Extract email metadata
-                                subject = msg.get('Subject', 'No Subject')
-                                from_addr = msg.get('From', 'Unknown')
-                                to_addr = msg.get('To', 'Unknown')
-                                date = msg.get('Date', 'Unknown')
-                                
+                                subject = msg.get("Subject", "No Subject")
+                                from_addr = msg.get("From", "Unknown")
+                                to_addr = msg.get("To", "Unknown")
+                                date = msg.get("Date", "Unknown")
+
                                # Extract email body
                                body = ""
                                if msg.is_multipart():
                                    for part in msg.walk():
                                        if part.get_content_type() == "text/plain":
-                                            body = part.get_payload(decode=True).decode('utf-8', errors='ignore')
+                                            body = part.get_payload(decode=True).decode(
+                                                "utf-8", errors="ignore"
+                                            )
                                            break
                                else:
-                                    body = msg.get_payload(decode=True).decode('utf-8', errors='ignore')
-                                
+                                    body = msg.get_payload(decode=True).decode(
+                                        "utf-8", errors="ignore"
+                                    )
+
                                # Create document content
                                doc_content = f"""
 From: {from_addr}
@@ -81,97 +86,96 @@ Date: {date}

 {body}
 """
-                                
+
                                # Create metadata
                                metadata = {
-                                    'file_path': filepath,
-                                    'subject': subject,
-                                    'from': from_addr,
-                                    'to': to_addr,
-                                    'date': date,
-                                    'filename': filename
+                                    "file_path": filepath,
+                                    "subject": subject,
+                                    "from": from_addr,
+                                    "to": to_addr,
+                                    "date": date,
+                                    "filename": filename,
                                }
-                                
+
                                doc = Document(text=doc_content, metadata=metadata)
                                docs.append(doc)
                                count += 1
-                                
+
                            except Exception as e:
                                print(f"Error parsing email from {filepath}: {e}")
                                continue
-                                
+
                    except Exception as e:
                        print(f"Error reading file {filepath}: {e}")
                        continue
-        
+
        print(f"Loaded {len(docs)} email documents")
        return docs

+
 def create_and_save_index(mail_path: str, save_dir: str = "mail_index", max_count: int = 1000):
    """
    Create the index from mail data and save it to disk.
-    
+
    Args:
        mail_path: Path to the mail directory
        save_dir: Directory to save the index
        max_count: Maximum number of emails to process
    """
    print("Creating index from mail data...")
-    
+
    # Load documents
    documents = EmlxReader().load_data(mail_path, max_count=max_count)
-    
+
    if not documents:
        print("No documents loaded. Exiting.")
        return None
-    
+
    # Create text splitter
    text_splitter = SentenceSplitter(chunk_size=256, chunk_overlap=0)
-    
+
    # Create index
-    index = VectorStoreIndex.from_documents(
-        documents,
-        transformations=[text_splitter]
-    )
-    
+    index = VectorStoreIndex.from_documents(documents, transformations=[text_splitter])
+
    # Save the index
    os.makedirs(save_dir, exist_ok=True)
    index.storage_context.persist(persist_dir=save_dir)
    print(f"Index saved to {save_dir}")
-    
+
    return index

+
 def load_index(save_dir: str = "mail_index"):
    """
    Load the saved index from disk.
-    
+
    Args:
        save_dir: Directory where the index is saved
-    
+
    Returns:
        Loaded index or None if loading fails
    """
    try:
        # Load storage context
        storage_context = StorageContext.from_defaults(persist_dir=save_dir)
-        
+
        # Load index
        index = VectorStoreIndex.from_vector_store(
-            storage_context.vector_store,
-            storage_context=storage_context
+            storage_context.vector_store, storage_context=storage_context
        )
-        
+
        print(f"Index loaded from {save_dir}")
        return index
-    
+
    except Exception as e:
        print(f"Error loading index: {e}")
        return None

+
 def query_index(index, query: str):
    """
    Query the loaded index.
-    
+
    Args:
        index: The loaded index
        query: The query string
@@ -179,16 +183,17 @@ def query_index(index, query: str):
    if index is None:
        print("No index available for querying.")
        return
-    
+
    query_engine = index.as_query_engine()
    response = query_engine.query(query)
    print(f"Query: {query}")
    print(f"Response: {response}")

+
 def main():
    mail_path = "/Users/yichuan/Library/Mail/V10/0FCA0879-FD8C-4B7E-83BF-FDDA930791C5/[Gmail].mbox/All Mail.mbox/78BA5BE1-8819-4F9A-9613-EB63772F1DD0/Data/9/Messages"
    save_dir = "mail_index"
-    
+
    # Check if index already exists
    if os.path.exists(save_dir) and os.path.exists(os.path.join(save_dir, "vector_store.json")):
        print("Loading existing index...")
@@ -196,18 +201,19 @@ def main():
    else:
        print("Creating new index...")
        index = create_and_save_index(mail_path, save_dir, max_count=1000)
-    
+
    if index:
        # Example queries
        queries = [
            "Hows Berkeley Graduate Student Instructor",
            "What emails mention GSR appointments?",
-            "Find emails about deadlines"
+            "Find emails about deadlines",
        ]
-        
+
        for query in queries:
-            print("\n" + "="*50)
+            print("\n" + "=" * 50)
            query_index(index, query)

+
 if __name__ == "__main__":
-    main() 
+    main()
--- a/test/mail_reader_small_chunks.py
+++ b/test/mail_reader_small_chunks.py
@@ -1,77 +1,82 @@
-import os
 import email
-from pathlib import Path
-from typing import List, Any
-from llama_index.core import VectorStoreIndex, Document, StorageContext
-from llama_index.core.readers.base import BaseReader
+import os
+from typing import Any
+
+from llama_index.core import Document, StorageContext, VectorStoreIndex
 from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core.readers.base import BaseReader
+

 class EmlxReader(BaseReader):
    """
    Apple Mail .emlx file reader with reduced metadata.
-    
+
    Reads individual .emlx files from Apple Mail's storage format.
    """
-    
+
    def __init__(self) -> None:
        """Initialize."""
        pass
-    
-    def load_data(self, input_dir: str, **load_kwargs: Any) -> List[Document]:
+
+    def load_data(self, input_dir: str, **load_kwargs: Any) -> list[Document]:
        """
        Load data from the input directory containing .emlx files.
-        
+
        Args:
            input_dir: Directory containing .emlx files
            **load_kwargs:
                max_count (int): Maximum amount of messages to read.
        """
-        docs: List[Document] = []
-        max_count = load_kwargs.get('max_count', 1000)
+        docs: list[Document] = []
+        max_count = load_kwargs.get("max_count", 1000)
        count = 0
-        
+
        # Walk through the directory recursively
        for dirpath, dirnames, filenames in os.walk(input_dir):
            # Skip hidden directories
            dirnames[:] = [d for d in dirnames if not d.startswith(".")]
-            
+
            for filename in filenames:
                if count >= max_count:
                    break
-                    
+
                if filename.endswith(".emlx"):
                    filepath = os.path.join(dirpath, filename)
                    try:
                        # Read the .emlx file
-                        with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
+                        with open(filepath, encoding="utf-8", errors="ignore") as f:
                            content = f.read()
-                        
+
                        # .emlx files have a length prefix followed by the email content
                        # The first line contains the length, followed by the email
-                        lines = content.split('\n', 1)
+                        lines = content.split("\n", 1)
                        if len(lines) >= 2:
                            email_content = lines[1]
-                            
+
                            # Parse the email using Python's email module
                            try:
                                msg = email.message_from_string(email_content)
-                                
+
                                # Extract email metadata
-                                subject = msg.get('Subject', 'No Subject')
-                                from_addr = msg.get('From', 'Unknown')
-                                to_addr = msg.get('To', 'Unknown')
-                                date = msg.get('Date', 'Unknown')
-                                
+                                subject = msg.get("Subject", "No Subject")
+                                from_addr = msg.get("From", "Unknown")
+                                to_addr = msg.get("To", "Unknown")
+                                date = msg.get("Date", "Unknown")
+
                                # Extract email body
                                body = ""
                                if msg.is_multipart():
                                    for part in msg.walk():
                                        if part.get_content_type() == "text/plain":
-                                            body = part.get_payload(decode=True).decode('utf-8', errors='ignore')
+                                            body = part.get_payload(decode=True).decode(
+                                                "utf-8", errors="ignore"
+                                            )
                                            break
                                else:
-                                    body = msg.get_payload(decode=True).decode('utf-8', errors='ignore')
-                                
+                                    body = msg.get_payload(decode=True).decode(
+                                        "utf-8", errors="ignore"
+                                    )
+
                                # Create document content with metadata embedded in text
                                doc_content = f"""
 From: {from_addr}
@@ -81,95 +86,96 @@ Date: {date}

 {body}
 """
-                                
+
                                # Create minimal metadata (only essential info)
                                metadata = {
-                                    'subject': subject[:50],  # Truncate subject
-                                    'from': from_addr[:30],   # Truncate from
-                                    'date': date[:20],        # Truncate date
-                                    'filename': filename      # Keep filename
+                                    "subject": subject[:50],  # Truncate subject
+                                    "from": from_addr[:30],  # Truncate from
+                                    "date": date[:20],  # Truncate date
+                                    "filename": filename,  # Keep filename
                                }
-                                
+
                                doc = Document(text=doc_content, metadata=metadata)
                                docs.append(doc)
                                count += 1
-                                
+
                            except Exception as e:
                                print(f"Error parsing email from {filepath}: {e}")
                                continue
-                                
+
                    except Exception as e:
                        print(f"Error reading file {filepath}: {e}")
                        continue
-        
+
        print(f"Loaded {len(docs)} email documents")
        return docs

-def create_and_save_index(mail_path: str, save_dir: str = "mail_index_small", max_count: int = 1000):
+
+def create_and_save_index(
+    mail_path: str, save_dir: str = "mail_index_small", max_count: int = 1000
+):
    """
    Create the index from mail data and save it to disk.
-    
+
    Args:
        mail_path: Path to the mail directory
        save_dir: Directory to save the index
        max_count: Maximum number of emails to process
    """
    print("Creating index from mail data with small chunks...")
-    
+
    # Load documents
    documents = EmlxReader().load_data(mail_path, max_count=max_count)
-    
+
    if not documents:
        print("No documents loaded. Exiting.")
        return None
-    
+
    # Create text splitter with small chunk size
    text_splitter = SentenceSplitter(chunk_size=512, chunk_overlap=50)
-    
+
    # Create index
-    index = VectorStoreIndex.from_documents(
-        documents,
-        transformations=[text_splitter]
-    )
-    
+    index = VectorStoreIndex.from_documents(documents, transformations=[text_splitter])
+
    # Save the index
    os.makedirs(save_dir, exist_ok=True)
    index.storage_context.persist(persist_dir=save_dir)
    print(f"Index saved to {save_dir}")
-    
+
    return index

+
 def load_index(save_dir: str = "mail_index_small"):
    """
    Load the saved index from disk.
-    
+
    Args:
        save_dir: Directory where the index is saved
-    
+
    Returns:
        Loaded index or None if loading fails
    """
    try:
        # Load storage context
        storage_context = StorageContext.from_defaults(persist_dir=save_dir)
-        
+
        # Load index
        index = VectorStoreIndex.from_vector_store(
-            storage_context.vector_store,
-            storage_context=storage_context
+            storage_context.vector_store, storage_context=storage_context
        )
-        
+
        print(f"Index loaded from {save_dir}")
        return index
-    
+
    except Exception as e:
        print(f"Error loading index: {e}")
        return None

+
 def query_index(index, query: str):
    """
    Query the loaded index.
-    
+
    Args:
        index: The loaded index
        query: The query string
@@ -177,16 +183,17 @@ def query_index(index, query: str):
    if index is None:
        print("No index available for querying.")
        return
-    
+
    query_engine = index.as_query_engine()
    response = query_engine.query(query)
    print(f"Query: {query}")
    print(f"Response: {response}")

+
 def main():
    mail_path = "/Users/yichuan/Library/Mail/V10/0FCA0879-FD8C-4B7E-83BF-FDDA930791C5/[Gmail].mbox/All Mail.mbox/78BA5BE1-8819-4F9A-9613-EB63772F1DD0/Data/9/Messages"
    save_dir = "mail_index_small"
-    
+
    # Check if index already exists
    if os.path.exists(save_dir) and os.path.exists(os.path.join(save_dir, "vector_store.json")):
        print("Loading existing index...")
@@ -194,18 +201,19 @@ def main():
    else:
        print("Creating new index...")
        index = create_and_save_index(mail_path, save_dir, max_count=1000)
-    
+
    if index:
        # Example queries
        queries = [
            "Hows Berkeley Graduate Student Instructor",
            "What emails mention GSR appointments?",
-            "Find emails about deadlines"
+            "Find emails about deadlines",
        ]
-        
+
        for query in queries:
-            print("\n" + "="*50)
+            print("\n" + "=" * 50)
            query_index(index, query)

+
 if __name__ == "__main__":
-    main() 
+    main()
--- a/test/mail_reader_test.py
+++ b/test/mail_reader_test.py
@@ -1,89 +1,94 @@
-import os
 import email
-from pathlib import Path
-from typing import List, Any
-from llama_index.core import VectorStoreIndex, Document
+import os
+from typing import Any
+
+from llama_index.core import Document, VectorStoreIndex
 from llama_index.core.readers.base import BaseReader

+
 class EmlxReader(BaseReader):
    """
    Apple Mail .emlx file reader.
-    
+
    Reads individual .emlx files from Apple Mail's storage format.
    """
-    
+
    def __init__(self) -> None:
        """Initialize."""
        pass
-    
-    def load_data(self, input_dir: str, **load_kwargs: Any) -> List[Document]:
+
+    def load_data(self, input_dir: str, **load_kwargs: Any) -> list[Document]:
        """
        Load data from the input directory containing .emlx files.
-        
+
        Args:
            input_dir: Directory containing .emlx files
            **load_kwargs:
                max_count (int): Maximum amount of messages to read.
        """
-        docs: List[Document] = []
-        max_count = load_kwargs.get('max_count', 1000)
+        docs: list[Document] = []
+        max_count = load_kwargs.get("max_count", 1000)
        count = 0
-        
+
        # Check if directory exists and is accessible
        if not os.path.exists(input_dir):
            print(f"Error: Directory '{input_dir}' does not exist")
            return docs
-        
+
        if not os.access(input_dir, os.R_OK):
            print(f"Error: Directory '{input_dir}' is not accessible (permission denied)")
            print("This is likely due to macOS security restrictions on Mail app data")
            return docs
-        
+
        print(f"Scanning directory: {input_dir}")
-        
+
        # Walk through the directory recursively
        for dirpath, dirnames, filenames in os.walk(input_dir):
            # Skip hidden directories
            dirnames[:] = [d for d in dirnames if not d.startswith(".")]
-            
+
            for filename in filenames:
                if count >= max_count:
                    break
-                    
+
                if filename.endswith(".emlx"):
                    filepath = os.path.join(dirpath, filename)
                    print(f"Found .emlx file: {filepath}")
                    try:
                        # Read the .emlx file
-                        with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
+                        with open(filepath, encoding="utf-8", errors="ignore") as f:
                            content = f.read()
-                        
+
                        # .emlx files have a length prefix followed by the email content
                        # The first line contains the length, followed by the email
-                        lines = content.split('\n', 1)
+                        lines = content.split("\n", 1)
                        if len(lines) >= 2:
                            email_content = lines[1]
-                            
+
                            # Parse the email using Python's email module
                            try:
                                msg = email.message_from_string(email_content)
-                                
+
                                # Extract email metadata
-                                subject = msg.get('Subject', 'No Subject')
-                                from_addr = msg.get('From', 'Unknown')
-                                to_addr = msg.get('To', 'Unknown')
-                                date = msg.get('Date', 'Unknown')
-                                
+                                subject = msg.get("Subject", "No Subject")
+                                from_addr = msg.get("From", "Unknown")
+                                to_addr = msg.get("To", "Unknown")
+                                date = msg.get("Date", "Unknown")
+
                                # Extract email body
                                body = ""
                                if msg.is_multipart():
                                    for part in msg.walk():
                                        if part.get_content_type() == "text/plain":
-                                            body = part.get_payload(decode=True).decode('utf-8', errors='ignore')
+                                            body = part.get_payload(decode=True).decode(
+                                                "utf-8", errors="ignore"
+                                            )
                                            break
                                else:
-                                    body = msg.get_payload(decode=True).decode('utf-8', errors='ignore')
-                                
+                                    body = msg.get_payload(decode=True).decode(
+                                        "utf-8", errors="ignore"
+                                    )
+
                                # Create document content
                                doc_content = f"""
 From: {from_addr}
@@ -93,55 +98,57 @@ Date: {date}

 {body}
 """
-                                
+
                                # Create metadata
                                metadata = {
-                                    'file_path': filepath,
-                                    'subject': subject,
-                                    'from': from_addr,
-                                    'to': to_addr,
-                                    'date': date,
-                                    'filename': filename
+                                    "file_path": filepath,
+                                    "subject": subject,
+                                    "from": from_addr,
+                                    "to": to_addr,
+                                    "date": date,
+                                    "filename": filename,
                                }
-                                
+
                                doc = Document(text=doc_content, metadata=metadata)
                                docs.append(doc)
                                count += 1
-                                
+
                            except Exception as e:
                                print(f"Error parsing email from {filepath}: {e}")
                                continue
-                                
+
                    except Exception as e:
                        print(f"Error reading file {filepath}: {e}")
                        continue
-        
+
        print(f"Loaded {len(docs)} email documents")
        return docs

+
 def main():
    # Use the current directory where the sample.emlx file is located
    current_dir = os.path.dirname(os.path.abspath(__file__))
-    
+
    print("Testing EmlxReader with sample .emlx file...")
    print(f"Scanning directory: {current_dir}")
-    
+
    # Use the custom EmlxReader
    documents = EmlxReader().load_data(current_dir, max_count=1000)
-    
+
    if not documents:
        print("No documents loaded. Make sure sample.emlx exists in the examples directory.")
        return
-    
+
    print(f"\nSuccessfully loaded {len(documents)} document(s)")
-    
+
    # Initialize index with documents
    index = VectorStoreIndex.from_documents(documents)
    query_engine = index.as_query_engine()
-    
+
    print("\nTesting query: 'Hows Berkeley Graduate Student Instructor'")
    res = query_engine.query("Hows Berkeley Graduate Student Instructor")
    print(f"Response: {res}")

+
 if __name__ == "__main__":
-    main() 
+    main()
--- a/test/micro_tpt.py
+++ b/test/micro_tpt.py
@@ -2,20 +2,20 @@

 import argparse
 import time
+from contextlib import contextmanager
 from dataclasses import dataclass
-from typing import Dict, List, Optional, Tuple

 import numpy as np
 import torch
 from torch import nn
-from transformers import AutoModel, BitsAndBytesConfig
 from tqdm import tqdm
-from contextlib import contextmanager
+from transformers import AutoModel, BitsAndBytesConfig
+

@dataclass
 class BenchmarkConfig:
    model_path: str
-    batch_sizes: List[int]
+    batch_sizes: list[int]
    seq_length: int
    num_runs: int
    use_fp16: bool = True
@@ -28,47 +28,44 @@ class BenchmarkConfig:

 class GraphContainer:
    """Container for managing graphs for different batch sizes (CUDA graphs on NVIDIA, regular on others)."""
-    
+
    def __init__(self, model: nn.Module, seq_length: int):
        self.model = model
        self.seq_length = seq_length
-        self.graphs: Dict[int, 'GraphWrapper'] = {}
-    
-    def get_or_create(self, batch_size: int) -> 'GraphWrapper':
+        self.graphs: dict[int, GraphWrapper] = {}
+
+    def get_or_create(self, batch_size: int) -> "GraphWrapper":
        if batch_size not in self.graphs:
-            self.graphs[batch_size] = GraphWrapper(
-                self.model, batch_size, self.seq_length
-            )
+            self.graphs[batch_size] = GraphWrapper(self.model, batch_size, self.seq_length)
        return self.graphs[batch_size]


 class GraphWrapper:
    """Wrapper for graph capture and replay (CUDA graphs on NVIDIA, regular on others)."""
-    
+
    def __init__(self, model: nn.Module, batch_size: int, seq_length: int):
        self.model = model
        self.device = self._get_device()
        self.static_input = self._create_random_batch(batch_size, seq_length)
        self.static_attention_mask = torch.ones_like(self.static_input)
-        
+
        # Warm up
        self._warmup()
-        
+
        # Only use CUDA graphs on NVIDIA GPUs
-        if torch.cuda.is_available() and hasattr(torch.cuda, 'CUDAGraph'):
+        if torch.cuda.is_available() and hasattr(torch.cuda, "CUDAGraph"):
            # Capture graph
            self.graph = torch.cuda.CUDAGraph()
            with torch.cuda.graph(self.graph):
                self.static_output = self.model(
-                    input_ids=self.static_input,
-                    attention_mask=self.static_attention_mask
+                    input_ids=self.static_input, attention_mask=self.static_attention_mask
                )
            self.use_cuda_graph = True
        else:
            # For MPS or CPU, just store the model
            self.use_cuda_graph = False
            self.static_output = None
-    
+
    def _get_device(self) -> str:
        if torch.cuda.is_available():
            return "cuda"
@@ -76,22 +73,17 @@ class GraphWrapper:
            return "mps"
        else:
            return "cpu"
-    
+
    def _create_random_batch(self, batch_size: int, seq_length: int) -> torch.Tensor:
        return torch.randint(
-            0, 1000, (batch_size, seq_length), 
-            device=self.device, 
-            dtype=torch.long
+            0, 1000, (batch_size, seq_length), device=self.device, dtype=torch.long
        )
-    
+
    def _warmup(self, num_warmup: int = 3):
        with torch.no_grad():
            for _ in range(num_warmup):
-                self.model(
-                    input_ids=self.static_input,
-                    attention_mask=self.static_attention_mask
-                )
-    
+                self.model(input_ids=self.static_input, attention_mask=self.static_attention_mask)
+
    def __call__(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
        if self.use_cuda_graph:
            self.static_input.copy_(input_ids)
@@ -105,14 +97,14 @@ class GraphWrapper:

 class ModelOptimizer:
    """Applies various optimizations to the model."""
-    
+
    @staticmethod
    def optimize(model: nn.Module, config: BenchmarkConfig) -> nn.Module:
        print("\nApplying model optimizations:")
-        
+
        if model is None:
            raise ValueError("Cannot optimize None model")
-        
+
        # Move to GPU
        if torch.cuda.is_available():
            model = model.cuda()
@@ -124,53 +116,59 @@ class ModelOptimizer:
            model = model.cpu()
            device = "cpu"
        print(f"- Model moved to {device}")
-        
+
        # FP16
        if config.use_fp16 and not config.use_int4:
            model = model.half()
            # use torch compile
            model = torch.compile(model)
            print("- Using FP16 precision")
-        
+
        # Check if using SDPA (only on CUDA)
-        if torch.cuda.is_available() and torch.version.cuda and float(torch.version.cuda[:3]) >= 11.6:
-            if hasattr(torch.nn.functional, 'scaled_dot_product_attention'):
+        if (
+            torch.cuda.is_available()
+            and torch.version.cuda
+            and float(torch.version.cuda[:3]) >= 11.6
+        ):
+            if hasattr(torch.nn.functional, "scaled_dot_product_attention"):
                print("- Using PyTorch SDPA (scaled_dot_product_attention)")
            else:
                print("- PyTorch SDPA not available")
-        
+
        # Flash Attention (only on CUDA)
        if config.use_flash_attention and torch.cuda.is_available():
            try:
-                from flash_attn.flash_attention import FlashAttention
+                from flash_attn.flash_attention import FlashAttention  # noqa: F401
+
                print("- Flash Attention 2 available")
                if hasattr(model.config, "attention_mode"):
                    model.config.attention_mode = "flash_attention_2"
                    print("  - Enabled Flash Attention 2 mode")
            except ImportError:
                print("- Flash Attention not available")
-        
+
        # Memory efficient attention (only on CUDA)
        if torch.cuda.is_available():
            try:
-                from xformers.ops import memory_efficient_attention
-                if hasattr(model, 'enable_xformers_memory_efficient_attention'):
+                from xformers.ops import memory_efficient_attention  # noqa: F401
+
+                if hasattr(model, "enable_xformers_memory_efficient_attention"):
                    model.enable_xformers_memory_efficient_attention()
                    print("- Enabled xformers memory efficient attention")
                else:
                    print("- Model doesn't support xformers")
            except (ImportError, AttributeError):
                print("- Xformers not available")
-        
+
        model.eval()
        print("- Model set to eval mode")
-        
+
        return model


 class Timer:
    """Handles accurate GPU timing using GPU events or CPU timing."""
-    
+
    def __init__(self):
        if torch.cuda.is_available():
            self.start_event = torch.cuda.Event(enable_timing=True)
@@ -182,7 +180,7 @@ class Timer:
        else:
            # CPU timing
            self.use_gpu_timing = False
-    
+
    @contextmanager
    def timing(self):
        if self.use_gpu_timing:
@@ -195,7 +193,7 @@ class Timer:
            start_time = time.time()
            yield
            self.cpu_elapsed = time.time() - start_time
-    
+
    def elapsed_time(self) -> float:
        if self.use_gpu_timing:
            return self.start_event.elapsed_time(self.end_event) / 1000  # ms to seconds
@@ -205,14 +203,14 @@ class Timer:

 class Benchmark:
    """Main benchmark runner."""
-    
+
    def __init__(self, config: BenchmarkConfig):
        self.config = config
        try:
            self.model = self._load_model()
            if self.model is None:
                raise ValueError("Model initialization failed - model is None")
-            
+
            # Only use CUDA graphs on NVIDIA GPUs
            if config.use_cuda_graphs and torch.cuda.is_available():
                self.graphs = GraphContainer(self.model, config.seq_length)
@@ -220,25 +218,27 @@ class Benchmark:
                self.graphs = None
            self.timer = Timer()
        except Exception as e:
-            print(f"ERROR in benchmark initialization: {str(e)}")
+            print(f"ERROR in benchmark initialization: {e!s}")
            raise
-    
+
    def _load_model(self) -> nn.Module:
        print(f"Loading model from {self.config.model_path}...")
-        
+
        try:
            # Int4 quantization using HuggingFace integration
            if self.config.use_int4:
                import bitsandbytes as bnb
+
                print(f"- bitsandbytes version: {bnb.__version__}")
-                
-                # 检查是否使用自定义的8bit量化
-                if hasattr(self.config, 'use_linear8bitlt') and self.config.use_linear8bitlt:
+
+                # Check if using custom 8bit quantization
+                if hasattr(self.config, "use_linear8bitlt") and self.config.use_linear8bitlt:
                    print("- Using custom Linear8bitLt replacement for all linear layers")
-                    
-                    # 加载原始模型（不使用量化配置）
+
+                    # Load original model (without quantization config)
                    import bitsandbytes as bnb
                    import torch
+
                    # set default to half
                    torch.set_default_dtype(torch.float16)
                    compute_dtype = torch.float16 if self.config.use_fp16 else torch.float32
@@ -246,112 +246,120 @@ class Benchmark:
                        self.config.model_path,
                        torch_dtype=compute_dtype,
                    )
-                    
-                    # 定义替换函数
+
+                    # Define replacement function
                    def replace_linear_with_linear8bitlt(model):
-                        """递归地将模型中的所有nn.Linear层替换为Linear8bitLt"""
+                        """Recursively replace all nn.Linear layers with Linear8bitLt"""
                        for name, module in list(model.named_children()):
                            if isinstance(module, nn.Linear):
-                                # 获取原始线性层的参数
+                                # Get original linear layer parameters
                                in_features = module.in_features
                                out_features = module.out_features
                                bias = module.bias is not None
-                                
-                                # 创建8bit线性层
+
+                                # Create 8bit linear layer
                                # print size
                                print(f"in_features: {in_features}, out_features: {out_features}")
                                new_module = bnb.nn.Linear8bitLt(
-                                    in_features, 
-                                    out_features, 
-                                    bias=bias, 
-                                    has_fp16_weights=False
+                                    in_features, out_features, bias=bias, has_fp16_weights=False
                                )
-                                
-                                # 复制权重和偏置
+
+                                # Copy weights and bias
                                new_module.weight.data = module.weight.data
                                if bias:
                                    new_module.bias.data = module.bias.data
-                                    
-                                # 替换模块
+
+                                # Replace module
                                setattr(model, name, new_module)
                            else:
-                                # 递归处理子模块
+                                # Process child modules recursively
                                replace_linear_with_linear8bitlt(module)
-                        
+
                        return model
-                    
-                    # 替换所有线性层
+
+                    # Replace all linear layers
                    model = replace_linear_with_linear8bitlt(model)
                    # add torch compile
                    model = torch.compile(model)
-                    
-                    # 将模型移到GPU（量化发生在这里）
-                    device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
+
+                    # Move model to GPU (quantization happens here)
+                    device = (
+                        "cuda"
+                        if torch.cuda.is_available()
+                        else "mps"
+                        if torch.backends.mps.is_available()
+                        else "cpu"
+                    )
                    model = model.to(device)
-                    
+
                    print("- All linear layers replaced with Linear8bitLt")
-                    
+
                else:
-                    # 使用原来的Int4量化方法
+                    # Use original Int4 quantization method
                    print("- Using bitsandbytes for Int4 quantization")
-                    
+
                    # Create quantization config
-                    
+
                    compute_dtype = torch.float16 if self.config.use_fp16 else torch.float32
                    quantization_config = BitsAndBytesConfig(
                        load_in_4bit=True,
                        bnb_4bit_compute_dtype=compute_dtype,
                        bnb_4bit_use_double_quant=True,
-                        bnb_4bit_quant_type="nf4"
+                        bnb_4bit_quant_type="nf4",
                    )
-                    
+
                    print("- Quantization config:", quantization_config)
-                    
+
                    # Load model directly with quantization config
                    model = AutoModel.from_pretrained(
                        self.config.model_path,
                        quantization_config=quantization_config,
                        torch_dtype=compute_dtype,
-                        device_map="auto"  # Let HF decide on device mapping
+                        device_map="auto",  # Let HF decide on device mapping
                    )
-                
+
                # Check if model loaded successfully
                if model is None:
                    raise ValueError("Model loading returned None")
-                    
+
                print(f"- Model type: {type(model)}")
-                
+
                # Apply optimizations directly here
                print("\nApplying model optimizations:")
-                
-                if hasattr(self.config, 'use_linear8bitlt') and self.config.use_linear8bitlt:
+
+                if hasattr(self.config, "use_linear8bitlt") and self.config.use_linear8bitlt:
                    print("- Model moved to GPU with Linear8bitLt quantization")
                else:
                    # Skip moving to GPU since device_map="auto" already did that
                    print("- Model already on GPU due to device_map='auto'")
-                
+
                # Skip FP16 conversion since we specified compute_dtype
                print(f"- Using {compute_dtype} for compute dtype")
-                
+
                # Check CUDA and SDPA
-                if torch.cuda.is_available() and torch.version.cuda and float(torch.version.cuda[:3]) >= 11.6:
-                    if hasattr(torch.nn.functional, 'scaled_dot_product_attention'):
+                if (
+                    torch.cuda.is_available()
+                    and torch.version.cuda
+                    and float(torch.version.cuda[:3]) >= 11.6
+                ):
+                    if hasattr(torch.nn.functional, "scaled_dot_product_attention"):
                        print("- Using PyTorch SDPA (scaled_dot_product_attention)")
                    else:
                        print("- PyTorch SDPA not available")
-                
+
                # Try xformers if available (only on CUDA)
                if torch.cuda.is_available():
                    try:
-                        from xformers.ops import memory_efficient_attention
-                        if hasattr(model, 'enable_xformers_memory_efficient_attention'):
+                        from xformers.ops import memory_efficient_attention  # noqa: F401
+
+                        if hasattr(model, "enable_xformers_memory_efficient_attention"):
                            model.enable_xformers_memory_efficient_attention()
                            print("- Enabled xformers memory efficient attention")
                        else:
                            print("- Model doesn't support xformers")
                    except (ImportError, AttributeError):
                        print("- Xformers not available")
-                
+
                # Set to eval mode
                model.eval()
                print("- Model set to eval mode")
@@ -365,76 +373,79 @@ class Benchmark:
                    llm_int8_threshold=6.0,
                    llm_int8_has_fp16_weight=False,
                )
-                
+
                model = AutoModel.from_pretrained(
                    self.config.model_path,
                    quantization_config=quantization_config,
                    torch_dtype=compute_dtype,
-                    device_map="auto"
+                    device_map="auto",
                )
-                
+
                if model is None:
                    raise ValueError("Model loading returned None")
-                    
+
                print(f"- Model type: {type(model)}")
                model.eval()
                print("- Model set to eval mode")
-                
+
            else:
                # Standard loading for FP16/FP32
                model = AutoModel.from_pretrained(self.config.model_path)
                print("- Model loaded in standard precision")
                print(f"- Model type: {type(model)}")
-                
+
                # Apply standard optimizations
                # set default to half
                import torch
+
                torch.set_default_dtype(torch.bfloat16)
                model = ModelOptimizer.optimize(model, self.config)
                model = model.half()
                # add torch compile
                model = torch.compile(model)
-            
+
            # Final check to ensure model is not None
            if model is None:
                raise ValueError("Model is None after optimization")
-                
+
            print(f"- Final model type: {type(model)}")
            return model
-        
+
        except Exception as e:
-            print(f"ERROR loading model: {str(e)}")
+            print(f"ERROR loading model: {e!s}")
            import traceback
+
            traceback.print_exc()
            raise
-    
+
    def _create_random_batch(self, batch_size: int) -> torch.Tensor:
-        device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
-        return torch.randint(
-            0, 1000,
-            (batch_size, self.config.seq_length),
-            device=device,
-            dtype=torch.long
+        device = (
+            "cuda"
+            if torch.cuda.is_available()
+            else "mps"
+            if torch.backends.mps.is_available()
+            else "cpu"
        )
-    
+        return torch.randint(
+            0, 1000, (batch_size, self.config.seq_length), device=device, dtype=torch.long
+        )
+
    def _run_inference(
-        self,
-        input_ids: torch.Tensor,
-        graph_wrapper: Optional[GraphWrapper] = None
-    ) -> Tuple[float, torch.Tensor]:
+        self, input_ids: torch.Tensor, graph_wrapper: GraphWrapper | None = None
+    ) -> tuple[float, torch.Tensor]:
        attention_mask = torch.ones_like(input_ids)
-        
+
        with torch.no_grad(), self.timer.timing():
            if graph_wrapper is not None:
                output = graph_wrapper(input_ids, attention_mask)
            else:
                output = self.model(input_ids=input_ids, attention_mask=attention_mask)
-        
+
        return self.timer.elapsed_time(), output
-    
-    def run(self) -> Dict[int, Dict[str, float]]:
+
+    def run(self) -> dict[int, dict[str, float]]:
        results = {}
-        
+
        # Reset peak memory stats
        if torch.cuda.is_available():
            torch.cuda.reset_peak_memory_stats()
@@ -443,22 +454,20 @@ class Benchmark:
            pass
        else:
            print("- No GPU memory stats available")
-        
+
        for batch_size in self.config.batch_sizes:
            print(f"\nTesting batch size: {batch_size}")
            times = []
-            
+
            # Get or create graph for this batch size
            graph_wrapper = (
-                self.graphs.get_or_create(batch_size)
-                if self.graphs is not None
-                else None
+                self.graphs.get_or_create(batch_size) if self.graphs is not None else None
            )
-            
+
            # Pre-allocate input tensor
            input_ids = self._create_random_batch(batch_size)
            print(f"Input shape: {input_ids.shape}")
-            
+
            # Run benchmark
            for i in tqdm(range(self.config.num_runs), desc=f"Batch size {batch_size}"):
                try:
@@ -469,44 +478,44 @@ class Benchmark:
                except Exception as e:
                    print(f"Error during inference: {e}")
                    break
-            
+
            if not times:
                print(f"No successful runs for batch size {batch_size}, skipping")
                continue
-                
+
            # Calculate statistics
            avg_time = np.mean(times)
            std_time = np.std(times)
            throughput = batch_size / avg_time
-            
+
            results[batch_size] = {
                "avg_time": avg_time,
                "std_time": std_time,
                "throughput": throughput,
            }
-            
+
            print(f"Avg Time: {avg_time:.4f}s ± {std_time:.4f}s")
            print(f"Throughput: {throughput:.2f} sequences/second")
-        
+
        # Log memory usage
        if torch.cuda.is_available():
-            peak_memory_gb = torch.cuda.max_memory_allocated() / (1024 ** 3)
+            peak_memory_gb = torch.cuda.max_memory_allocated() / (1024**3)
        elif torch.backends.mps.is_available():
            # MPS doesn't have max_memory_allocated, use 0
            peak_memory_gb = 0.0
        else:
            peak_memory_gb = 0.0
            print("- No GPU memory usage available")
-        
+
        if peak_memory_gb > 0:
            print(f"\nPeak GPU memory usage: {peak_memory_gb:.2f} GB")
        else:
            print("\n- GPU memory usage not available")
-        
+
        # Add memory info to results
        for batch_size in results:
            results[batch_size]["peak_memory_gb"] = peak_memory_gb
-        
+
        return results


@@ -566,14 +575,14 @@ def main():
        action="store_true",
        help="Enable Linear8bitLt quantization for all linear layers",
    )
-    
+
    args = parser.parse_args()
-    
+
    # Print arguments for debugging
    print("\nCommand line arguments:")
    for arg, value in vars(args).items():
        print(f"- {arg}: {value}")
-    
+
    config = BenchmarkConfig(
        model_path=args.model_path,
        batch_sizes=[int(bs) for bs in args.batch_sizes.split(",")],
@@ -586,45 +595,56 @@ def main():
        use_flash_attention=args.use_flash_attention,
        use_linear8bitlt=args.use_linear8bitlt,
    )
-        
+
    # Print configuration for debugging
    print("\nBenchmark configuration:")
    for field, value in vars(config).items():
        print(f"- {field}: {value}")
-    
+
    try:
        benchmark = Benchmark(config)
        results = benchmark.run()
-        
+
        # Save results to file
        import json
        import os
-        
+
        # Create results directory if it doesn't exist
        os.makedirs("results", exist_ok=True)
-        
+
        # Generate filename based on configuration
-        precision_type = "int4" if config.use_int4 else "int8" if config.use_int8 else "fp16" if config.use_fp16 else "fp32"
+        precision_type = (
+            "int4"
+            if config.use_int4
+            else "int8"
+            if config.use_int8
+            else "fp16"
+            if config.use_fp16
+            else "fp32"
+        )
        model_name = os.path.basename(config.model_path)
        output_file = f"results/benchmark_{model_name}_{precision_type}.json"
-        
+
        # Save results
        with open(output_file, "w") as f:
            json.dump(
                {
-                    "config": {k: str(v) if isinstance(v, list) else v for k, v in vars(config).items()},
-                    "results": {str(k): v for k, v in results.items()}
-                }, 
-                f, 
-                indent=2
+                    "config": {
+                        k: str(v) if isinstance(v, list) else v for k, v in vars(config).items()
+                    },
+                    "results": {str(k): v for k, v in results.items()},
+                },
+                f,
+                indent=2,
            )
        print(f"Results saved to {output_file}")
-        
+
    except Exception as e:
        print(f"Benchmark failed: {e}")
        import traceback
+
        traceback.print_exc()


 if __name__ == "__main__":
-    main()
+    main()
--- a/test/query_saved_index.py
+++ b/test/query_saved_index.py
@@ -1,37 +1,39 @@
 import os
-from llama_index.core import VectorStoreIndex, StorageContext
+
+from llama_index.core import StorageContext, VectorStoreIndex
+

 def load_index(save_dir: str = "mail_index"):
    """
    Load the saved index from disk.
-    
+
    Args:
        save_dir: Directory where the index is saved
-    
+
    Returns:
        Loaded index or None if loading fails
    """
    try:
        # Load storage context
        storage_context = StorageContext.from_defaults(persist_dir=save_dir)
-        
+
        # Load index
        index = VectorStoreIndex.from_vector_store(
-            storage_context.vector_store,
-            storage_context=storage_context
+            storage_context.vector_store, storage_context=storage_context
        )
-        
+
        print(f"Index loaded from {save_dir}")
        return index
-    
+
    except Exception as e:
        print(f"Error loading index: {e}")
        return None

+
 def query_index(index, query: str):
    """
    Query the loaded index.
-    
+
    Args:
        index: The loaded index
        query: The query string
@@ -39,44 +41,47 @@ def query_index(index, query: str):
    if index is None:
        print("No index available for querying.")
        return
-    
+
    query_engine = index.as_query_engine()
    response = query_engine.query(query)
    print(f"\nQuery: {query}")
    print(f"Response: {response}")

+
 def main():
    save_dir = "mail_index"
-    
+
    # Check if index exists
-    if not os.path.exists(save_dir) or not os.path.exists(os.path.join(save_dir, "vector_store.json")):
+    if not os.path.exists(save_dir) or not os.path.exists(
+        os.path.join(save_dir, "vector_store.json")
+    ):
        print(f"Index not found in {save_dir}")
        print("Please run mail_reader_save_load.py first to create the index.")
        return
-    
+
    # Load the index
    index = load_index(save_dir)
-    
+
    if not index:
        print("Failed to load index.")
        return
-    
-    print("\n" + "="*60)
+
+    print("\n" + "=" * 60)
    print("Email Query Interface")
-    print("="*60)
+    print("=" * 60)
    print("Type 'quit' to exit")
    print("Type 'help' for example queries")
-    print("="*60)
-    
+    print("=" * 60)
+
    # Interactive query loop
    while True:
        try:
            query = input("\nEnter your query: ").strip()
-            
-            if query.lower() == 'quit':
+
+            if query.lower() == "quit":
                print("Goodbye!")
                break
-            elif query.lower() == 'help':
+            elif query.lower() == "help":
                print("\nExample queries:")
                print("- Hows Berkeley Graduate Student Instructor")
                print("- What emails mention GSR appointments?")
@@ -86,14 +91,15 @@ def main():
                continue
            elif not query:
                continue
-            
+
            query_index(index, query)
-            
+
        except KeyboardInterrupt:
            print("\nGoodbye!")
            break
        except Exception as e:
            print(f"Error processing query: {e}")

+
 if __name__ == "__main__":
-    main() 
+    main()
--- a/test/sanity_checks/benchmark_embeddings.py
+++ b/test/sanity_checks/benchmark_embeddings.py
@@ -1,43 +1,46 @@
 import time
-import numpy as np
+
 import matplotlib.pyplot as plt
-import torch
-from sentence_transformers import SentenceTransformer
 import mlx.core as mx
+import numpy as np
+import torch
 from mlx_lm import load
+from sentence_transformers import SentenceTransformer

 # --- Configuration ---
 MODEL_NAME_TORCH = "Qwen/Qwen3-Embedding-0.6B"
 MODEL_NAME_MLX = "mlx-community/Qwen3-Embedding-0.6B-4bit-DWQ"
 BATCH_SIZES = [1, 8, 16, 32, 64, 128]
 NUM_RUNS = 10  # Number of runs to average for each batch size
-WARMUP_RUNS = 2 # Number of warm-up runs
+WARMUP_RUNS = 2  # Number of warm-up runs

 # --- Generate Dummy Data ---
 DUMMY_SENTENCES = ["This is a test sentence for benchmarking." * 5] * max(BATCH_SIZES)

 # --- Benchmark Functions ---b

+
 def benchmark_torch(model, sentences):
    start_time = time.time()
    model.encode(sentences, convert_to_numpy=True)
    end_time = time.time()
    return (end_time - start_time) * 1000  # Return time in ms

+
 def benchmark_mlx(model, tokenizer, sentences):
    start_time = time.time()
-    
+
    # Tokenize sentences using MLX tokenizer
    tokens = []
    for sentence in sentences:
        token_ids = tokenizer.encode(sentence)
        tokens.append(token_ids)
-    
+
    # Pad sequences to the same length
    max_len = max(len(t) for t in tokens)
    input_ids = []
    attention_mask = []
-    
+
    for token_seq in tokens:
        # Pad sequence
        padded = token_seq + [tokenizer.eos_token_id] * (max_len - len(token_seq))
@@ -45,24 +48,25 @@ def benchmark_mlx(model, tokenizer, sentences):
        # Create attention mask (1 for real tokens, 0 for padding)
        mask = [1] * len(token_seq) + [0] * (max_len - len(token_seq))
        attention_mask.append(mask)
-    
+
    # Convert to MLX arrays
    input_ids = mx.array(input_ids)
    attention_mask = mx.array(attention_mask)
-    
+
    # Get embeddings
    embeddings = model(input_ids)
-    
+
    # Mean pooling
    mask = mx.expand_dims(attention_mask, -1)
    sum_embeddings = (embeddings * mask).sum(axis=1)
    sum_mask = mask.sum(axis=1)
    _ = sum_embeddings / sum_mask
-    
+
    mx.eval()  # Ensure computation is finished
    end_time = time.time()
    return (end_time - start_time) * 1000  # Return time in ms

+
 # --- Main Execution ---
 def main():
    print("--- Initializing Models ---")
@@ -92,13 +96,15 @@ def main():
    for batch_size in BATCH_SIZES:
        print(f"Benchmarking batch size: {batch_size}")
        sentences_batch = DUMMY_SENTENCES[:batch_size]
-        
+
        # Benchmark PyTorch
        torch_times = [benchmark_torch(model_torch, sentences_batch) for _ in range(NUM_RUNS)]
        results_torch.append(np.mean(torch_times))
-        
+
        # Benchmark MLX
-        mlx_times = [benchmark_mlx(model_mlx, tokenizer_mlx, sentences_batch) for _ in range(NUM_RUNS)]
+        mlx_times = [
+            benchmark_mlx(model_mlx, tokenizer_mlx, sentences_batch) for _ in range(NUM_RUNS)
+        ]
        results_mlx.append(np.mean(mlx_times))

    print("\n--- Benchmark Results (Average time per batch in ms) ---")
@@ -109,20 +115,21 @@ def main():
    # --- Plotting ---
    print("\n--- Generating Plot ---")
    plt.figure(figsize=(10, 6))
-    plt.plot(BATCH_SIZES, results_torch, marker='o', linestyle='-', label=f'PyTorch ({device})')
-    plt.plot(BATCH_SIZES, results_mlx, marker='s', linestyle='-', label='MLX')
+    plt.plot(BATCH_SIZES, results_torch, marker="o", linestyle="-", label=f"PyTorch ({device})")
+    plt.plot(BATCH_SIZES, results_mlx, marker="s", linestyle="-", label="MLX")

-    plt.title(f'Embedding Performance: MLX vs PyTorch\nModel: {MODEL_NAME_TORCH}')
+    plt.title(f"Embedding Performance: MLX vs PyTorch\nModel: {MODEL_NAME_TORCH}")
    plt.xlabel("Batch Size")
    plt.ylabel("Average Time per Batch (ms)")
    plt.xticks(BATCH_SIZES)
    plt.grid(True)
    plt.legend()
-    
+
    # Save the plot
    output_filename = "embedding_benchmark.png"
    plt.savefig(output_filename)
    print(f"Plot saved to {output_filename}")

+
 if __name__ == "__main__":
    main()
--- a/test/sanity_checks/debug_zmq_issue.py
+++ b/test/sanity_checks/debug_zmq_issue.py
@@ -3,49 +3,52 @@
 Debug script to test ZMQ communication with the exact same setup as main_cli_example.py
 """

-import zmq
-import time
-import threading
 import sys
-sys.path.append('packages/leann-backend-diskann')
+import time
+
+import zmq
+
+sys.path.append("packages/leann-backend-diskann")
 from leann_backend_diskann import embedding_pb2

+
 def test_zmq_with_same_model():
    print("=== Testing ZMQ with same model as main_cli_example.py ===")
-    
+
    # Test the exact same model that main_cli_example.py uses
    model_name = "sentence-transformers/all-mpnet-base-v2"
-    
+
    # Start server with the same model
    import subprocess
+
    server_cmd = [
-        sys.executable, "-m", 
+        sys.executable,
+        "-m",
        "packages.leann-backend-diskann.leann_backend_diskann.embedding_server",
-        "--zmq-port", "5556",  # Use different port to avoid conflicts
-        "--model-name", model_name
+        "--zmq-port",
+        "5556",  # Use different port to avoid conflicts
+        "--model-name",
+        model_name,
    ]
-    
+
    print(f"Starting server with command: {' '.join(server_cmd)}")
    server_process = subprocess.Popen(
-        server_cmd,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        text=True
+        server_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
    )
-    
+
    # Wait for server to start
    print("Waiting for server to start...")
    time.sleep(10)
-    
+
    # Check if server is running
    if server_process.poll() is not None:
        stdout, stderr = server_process.communicate()
        print(f"Server failed to start. stdout: {stdout}")
        print(f"Server failed to start. stderr: {stderr}")
        return False
-    
+
    print(f"Server started with PID: {server_process.pid}")
-    
+
    try:
        # Test client
        context = zmq.Context()
@@ -53,39 +56,39 @@ def test_zmq_with_same_model():
        socket.connect("tcp://127.0.0.1:5556")
        socket.setsockopt(zmq.RCVTIMEO, 30000)  # 30 second timeout like C++
        socket.setsockopt(zmq.SNDTIMEO, 30000)
-        
+
        # Create request with same format as C++
        request = embedding_pb2.NodeEmbeddingRequest()
        request.node_ids.extend([0, 1, 2, 3, 4])  # Test with some node IDs
-        
+
        print(f"Sending request with {len(request.node_ids)} node IDs...")
        start_time = time.time()
-        
+
        # Send request
        socket.send(request.SerializeToString())
-        
+
        # Receive response
        response_data = socket.recv()
        end_time = time.time()
-        
+
        print(f"Received response in {end_time - start_time:.3f} seconds")
        print(f"Response size: {len(response_data)} bytes")
-        
+
        # Parse response
        response = embedding_pb2.NodeEmbeddingResponse()
        response.ParseFromString(response_data)
-        
+
        print(f"Response dimensions: {list(response.dimensions)}")
        print(f"Embeddings data size: {len(response.embeddings_data)} bytes")
        print(f"Missing IDs: {list(response.missing_ids)}")
-        
+
        # Calculate expected size
        if len(response.dimensions) == 2:
            batch_size = response.dimensions[0]
            embedding_dim = response.dimensions[1]
            expected_bytes = batch_size * embedding_dim * 4  # 4 bytes per float
            print(f"Expected bytes: {expected_bytes}, Actual: {len(response.embeddings_data)}")
-            
+
            if len(response.embeddings_data) == expected_bytes:
                print("✅ Response format is correct!")
                return True
@@ -95,7 +98,7 @@ def test_zmq_with_same_model():
        else:
            print("❌ Invalid response dimensions!")
            return False
-            
+
    except Exception as e:
        print(f"❌ Error during ZMQ test: {e}")
        return False
@@ -105,9 +108,10 @@ def test_zmq_with_same_model():
        server_process.wait()
        print("Server terminated")

+
 if __name__ == "__main__":
    success = test_zmq_with_same_model()
    if success:
        print("\n✅ ZMQ communication test passed!")
    else:
-        print("\n❌ ZMQ communication test failed!") 
+        print("\n❌ ZMQ communication test failed!")
--- a/test/simple_mac_tpt_test.py
+++ b/test/simple_mac_tpt_test.py
@@ -1,26 +1,27 @@
 import time
 from dataclasses import dataclass
-from typing import Dict, List

 import numpy as np
 import torch
 from torch import nn
-from transformers import AutoModel, BitsAndBytesConfig
 from tqdm import tqdm
+from transformers import AutoModel

 # Add MLX imports
 try:
    import mlx.core as mx
    from mlx_lm.utils import load
+
    MLX_AVAILABLE = True
-except ImportError as e:
+except ImportError:
    print("MLX not available. Install with: uv pip install mlx mlx-lm")
    MLX_AVAILABLE = False

+
@dataclass
 class BenchmarkConfig:
    model_path: str = "facebook/contriever"
-    batch_sizes: List[int] = None
+    batch_sizes: list[int] = None
    seq_length: int = 256
    num_runs: int = 5
    use_fp16: bool = True
@@ -30,18 +31,19 @@ class BenchmarkConfig:
    use_flash_attention: bool = False
    use_linear8bitlt: bool = False
    use_mlx: bool = False  # New flag for MLX testing
-    
+
    def __post_init__(self):
        if self.batch_sizes is None:
            self.batch_sizes = [1, 2, 4, 8, 16, 32, 64]

+
 class MLXBenchmark:
    """MLX-specific benchmark for embedding models"""
-    
+
    def __init__(self, config: BenchmarkConfig):
        self.config = config
        self.model, self.tokenizer = self._load_model()
-    
+
    def _load_model(self):
        """Load MLX model and tokenizer following the API pattern"""
        print(f"Loading MLX model from {self.config.model_path}...")
@@ -52,55 +54,51 @@ class MLXBenchmark:
        except Exception as e:
            print(f"Error loading MLX model: {e}")
            raise
-    
+
    def _create_random_batch(self, batch_size: int):
        """Create random input batches for MLX testing - same as PyTorch"""
-        return torch.randint(
-            0, 1000,
-            (batch_size, self.config.seq_length),
-            dtype=torch.long
-        )
-    
+        return torch.randint(0, 1000, (batch_size, self.config.seq_length), dtype=torch.long)
+
    def _run_inference(self, input_ids: torch.Tensor) -> float:
        """Run MLX inference with same input as PyTorch"""
        start_time = time.time()
        try:
            # Convert PyTorch tensor to MLX array
            input_ids_mlx = mx.array(input_ids.numpy())
-            
+
            # Get embeddings
            embeddings = self.model(input_ids_mlx)
-            
+
            # Mean pooling (following the API pattern)
            pooled = embeddings.mean(axis=1)
-            
+
            # Convert to numpy (following the API pattern)
            pooled_numpy = np.array(pooled.tolist(), dtype=np.float32)
-            
+
            # Force computation
            _ = pooled_numpy.shape
-            
+
        except Exception as e:
            print(f"MLX inference error: {e}")
-            return float('inf')
+            return float("inf")
        end_time = time.time()
-        
+
        return end_time - start_time
-    
-    def run(self) -> Dict[int, Dict[str, float]]:
+
+    def run(self) -> dict[int, dict[str, float]]:
        """Run the MLX benchmark across all batch sizes"""
        results = {}
-        
+
        print(f"Starting MLX benchmark with model: {self.config.model_path}")
        print(f"Testing batch sizes: {self.config.batch_sizes}")
-        
+
        for batch_size in self.config.batch_sizes:
            print(f"\n=== Testing MLX batch size: {batch_size} ===")
            times = []
-            
+
            # Create input batch (same as PyTorch)
            input_ids = self._create_random_batch(batch_size)
-            
+
            # Warm up
            print("Warming up...")
            for _ in range(3):
@@ -109,26 +107,26 @@ class MLXBenchmark:
                except Exception as e:
                    print(f"Warmup error: {e}")
                    break
-            
+
            # Run benchmark
-            for i in tqdm(range(self.config.num_runs), desc=f"MLX Batch size {batch_size}"):
+            for _i in tqdm(range(self.config.num_runs), desc=f"MLX Batch size {batch_size}"):
                try:
                    elapsed_time = self._run_inference(input_ids)
-                    if elapsed_time != float('inf'):
+                    if elapsed_time != float("inf"):
                        times.append(elapsed_time)
                except Exception as e:
                    print(f"Error during MLX inference: {e}")
                    break
-            
+
            if not times:
                print(f"Skipping batch size {batch_size} due to errors")
                continue
-                
+
            # Calculate statistics
            avg_time = np.mean(times)
            std_time = np.std(times)
            throughput = batch_size / avg_time
-            
+
            results[batch_size] = {
                "avg_time": avg_time,
                "std_time": std_time,
@@ -136,179 +134,166 @@ class MLXBenchmark:
                "min_time": np.min(times),
                "max_time": np.max(times),
            }
-            
+
            print(f"MLX Results for batch size {batch_size}:")
            print(f"  Avg Time: {avg_time:.4f}s ± {std_time:.4f}s")
            print(f"  Min Time: {np.min(times):.4f}s")
            print(f"  Max Time: {np.max(times):.4f}s")
            print(f"  Throughput: {throughput:.2f} sequences/second")
-        
+
        return results

+
 class Benchmark:
    def __init__(self, config: BenchmarkConfig):
        self.config = config
-        self.device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
+        self.device = (
+            "cuda"
+            if torch.cuda.is_available()
+            else "mps"
+            if torch.backends.mps.is_available()
+            else "cpu"
+        )
        self.model = self._load_model()
-    
+
    def _load_model(self) -> nn.Module:
        print(f"Loading model from {self.config.model_path}...")
-        
-        
+
        model = AutoModel.from_pretrained(self.config.model_path)
        if self.config.use_fp16:
            model = model.half()
        model = torch.compile(model)
        model = model.to(self.device)
-        
+
        model.eval()
        return model
-    
+
    def _create_random_batch(self, batch_size: int) -> torch.Tensor:
        return torch.randint(
-            0, 1000,
-            (batch_size, self.config.seq_length),
-            device=self.device,
-            dtype=torch.long
+            0, 1000, (batch_size, self.config.seq_length), device=self.device, dtype=torch.long
        )
-    
+
    def _run_inference(self, input_ids: torch.Tensor) -> float:
        attention_mask = torch.ones_like(input_ids)
-        
+
        start_time = time.time()
        with torch.no_grad():
-            output = self.model(input_ids=input_ids, attention_mask=attention_mask)
+            self.model(input_ids=input_ids, attention_mask=attention_mask)
        end_time = time.time()
-        
+
        return end_time - start_time
-    
-    def run(self) -> Dict[int, Dict[str, float]]:
+
+    def run(self) -> dict[int, dict[str, float]]:
        results = {}
-        
+
        if torch.cuda.is_available():
            torch.cuda.reset_peak_memory_stats()
-        
+
        for batch_size in self.config.batch_sizes:
            print(f"\nTesting batch size: {batch_size}")
            times = []
-            
+
            input_ids = self._create_random_batch(batch_size)
-            
-            for i in tqdm(range(self.config.num_runs), desc=f"Batch size {batch_size}"):
+
+            for _i in tqdm(range(self.config.num_runs), desc=f"Batch size {batch_size}"):
                try:
                    elapsed_time = self._run_inference(input_ids)
                    times.append(elapsed_time)
                except Exception as e:
                    print(f"Error during inference: {e}")
                    break
-            
+
            if not times:
                continue
-                
+
            avg_time = np.mean(times)
            std_time = np.std(times)
            throughput = batch_size / avg_time
-            
+
            results[batch_size] = {
                "avg_time": avg_time,
                "std_time": std_time,
                "throughput": throughput,
            }
-            
+
            print(f"Avg Time: {avg_time:.4f}s ± {std_time:.4f}s")
            print(f"Throughput: {throughput:.2f} sequences/second")
-        
+
        if torch.cuda.is_available():
-            peak_memory_gb = torch.cuda.max_memory_allocated() / (1024 ** 3)
+            peak_memory_gb = torch.cuda.max_memory_allocated() / (1024**3)
        else:
            peak_memory_gb = 0.0
-        
+
        for batch_size in results:
            results[batch_size]["peak_memory_gb"] = peak_memory_gb
-        
+
        return results

+
 def run_benchmark():
    """Main function to run the benchmark with optimized parameters."""
    config = BenchmarkConfig()
-    
+
    try:
        benchmark = Benchmark(config)
        results = benchmark.run()
-        
+
        max_throughput = max(results[batch_size]["throughput"] for batch_size in results)
        avg_throughput = np.mean([results[batch_size]["throughput"] for batch_size in results])
-        
+
        return {
            "max_throughput": max_throughput,
            "avg_throughput": avg_throughput,
-            "results": results
+            "results": results,
        }
-        
+
    except Exception as e:
        print(f"Benchmark failed: {e}")
-        return {
-            "max_throughput": 0.0,
-            "avg_throughput": 0.0,
-            "error": str(e)
-        }
+        return {"max_throughput": 0.0, "avg_throughput": 0.0, "error": str(e)}
+

 def run_mlx_benchmark():
    """Run MLX-specific benchmark"""
    if not MLX_AVAILABLE:
        print("MLX not available, skipping MLX benchmark")
-        return {
-            "max_throughput": 0.0,
-            "avg_throughput": 0.0,
-            "error": "MLX not available"
-        }
-    
-    config = BenchmarkConfig(
-        model_path="mlx-community/all-MiniLM-L6-v2-4bit",
-        use_mlx=True
-    )
-    
+        return {"max_throughput": 0.0, "avg_throughput": 0.0, "error": "MLX not available"}
+
+    config = BenchmarkConfig(model_path="mlx-community/all-MiniLM-L6-v2-4bit", use_mlx=True)
+
    try:
        benchmark = MLXBenchmark(config)
        results = benchmark.run()
-        
+
        if not results:
-            return {
-                "max_throughput": 0.0,
-                "avg_throughput": 0.0,
-                "error": "No valid results"
-            }
-        
+            return {"max_throughput": 0.0, "avg_throughput": 0.0, "error": "No valid results"}
+
        max_throughput = max(results[batch_size]["throughput"] for batch_size in results)
        avg_throughput = np.mean([results[batch_size]["throughput"] for batch_size in results])
-        
+
        return {
            "max_throughput": max_throughput,
            "avg_throughput": avg_throughput,
-            "results": results
+            "results": results,
        }
-        
+
    except Exception as e:
        print(f"MLX benchmark failed: {e}")
-        return {
-            "max_throughput": 0.0,
-            "avg_throughput": 0.0,
-            "error": str(e)
-        }
+        return {"max_throughput": 0.0, "avg_throughput": 0.0, "error": str(e)}
+

 if __name__ == "__main__":
    print("=== PyTorch Benchmark ===")
    pytorch_result = run_benchmark()
    print(f"PyTorch Max throughput: {pytorch_result['max_throughput']:.2f} sequences/second")
    print(f"PyTorch Average throughput: {pytorch_result['avg_throughput']:.2f} sequences/second")
-    
+
    print("\n=== MLX Benchmark ===")
    mlx_result = run_mlx_benchmark()
    print(f"MLX Max throughput: {mlx_result['max_throughput']:.2f} sequences/second")
    print(f"MLX Average throughput: {mlx_result['avg_throughput']:.2f} sequences/second")
-    
+
    # Compare results
-    if pytorch_result['max_throughput'] > 0 and mlx_result['max_throughput'] > 0:
-        speedup = mlx_result['max_throughput'] / pytorch_result['max_throughput']
-        print(f"\n=== Comparison ===")
-        print(f"MLX is {speedup:.2f}x {'faster' if speedup > 1 else 'slower'} than PyTorch") 
+    if pytorch_result["max_throughput"] > 0 and mlx_result["max_throughput"] > 0:
+        speedup = mlx_result["max_throughput"] / pytorch_result["max_throughput"]
+        print("\n=== Comparison ===")
+        print(f"MLX is {speedup:.2f}x {'faster' if speedup > 1 else 'slower'} than PyTorch")
--- a/uv.lock
+++ b/uv.lock
Author	SHA1	Message	Date
GitHub Actions	5d09586853	chore: release v0.1.14	2025-07-27 08:50:56 +00:00
Andy Lee	a7cba078dd	chore: consolidate essential fixes and add pre-commit hooks - Add pre-commit configuration with ruff and black - Fix lint CI job to use uv tool install instead of sync - Add essential LlamaIndex dependencies to leann-core Co-Authored-By: Yichuan Wang <73766326+yichuan-w@users.noreply.github.com>	2025-07-27 01:24:24 -07:00
Andy Lee	b3e9ee96fa	fix: resolve all ruff linting errors and add lint CI check - Fix ambiguous fullwidth characters (commas, parentheses) in strings and comments - Replace Chinese comments with English equivalents - Fix unused imports with proper noqa annotations for intentional imports - Fix bare except clauses with specific exception types - Fix redefined variables and undefined names - Add ruff noqa annotations for generated protobuf files - Add lint and format check to GitHub Actions CI pipeline	2025-07-26 22:38:13 -07:00
yichuan520030910320	8537a6b17e	default args change	2025-07-26 21:51:14 -07:00
yichuan520030910320	7c8d7dc5c2	tones down	2025-07-26 21:47:55 -07:00
yichuan520030910320	8e23d663e6	Merge branch 'main' of https://github.com/yichuan-w/LEANN	2025-07-26 21:46:02 -07:00
yichuan520030910320	8a3994bf80	update colab now it works perfect	2025-07-26 21:45:56 -07:00
GitHub Actions	8375f601ba	chore: release v0.1.13	2025-07-27 01:08:17 +00:00
yichuan520030910320	c87c0fe662	update colab install & fix colab path	2025-07-26 18:07:31 -07:00
yichuan520030910320	73927b68ef	Merge branch 'main' of https://github.com/yichuan-w/LEANN	2025-07-26 17:09:55 -07:00
yichuan520030910320	cc1a62e5aa	update pytoml version again	2025-07-26 17:09:45 -07:00