Merge remote-tracking branch 'origin/main' into feature/claude-code-research

feat: Add Claude Code integration with MCP server
feat: Claude Code integration ready - LEANN CLI works out of the box
2025-08-05 23:02:00 -07:00 · 2025-08-05 14:03:36 -07:00 · 2025-08-05 12:27:58 -07:00 · 2025-08-04 20:10:14 -07:00 · 2025-08-04 20:01:23 -07:00 · 2025-08-04 19:29:17 -07:00
31 changed files with 3964 additions and 5223 deletions
@@ -54,36 +54,16 @@ jobs:
            python: '3.12'
          - os: ubuntu-22.04
            python: '3.13'
-          - os: macos-14
+          - os: macos-latest
            python: '3.9'
-          - os: macos-14
+          - os: macos-latest
            python: '3.10'
-          - os: macos-14
+          - os: macos-latest
            python: '3.11'
-          - os: macos-14
+          - os: macos-latest
            python: '3.12'
-          - os: macos-14
+          - os: macos-latest
            python: '3.13'
-          - os: macos-15
-            python: '3.9'
-          - os: macos-15
-            python: '3.10'
-          - os: macos-15
-            python: '3.11'
-          - os: macos-15
-            python: '3.12'
-          - os: macos-15
-            python: '3.13'
-          - os: macos-13
-            python: '3.9'
-          - os: macos-13
-            python: '3.10'
-          - os: macos-13
-            python: '3.11'
-          - os: macos-13
-            python: '3.12'
-          # Note: macos-13 + Python 3.13 excluded due to PyTorch compatibility
-          # (PyTorch 2.5+ supports Python 3.13 but not Intel Mac x86_64)
    runs-on: ${{ matrix.os }}

    steps:
@@ -129,73 +109,48 @@ jobs:
            uv pip install --system delocate
          fi

-      - name: Set macOS environment variables
-        if: runner.os == 'macOS'
-        run: |
-          # Use brew --prefix to automatically detect Homebrew installation path
-          HOMEBREW_PREFIX=$(brew --prefix)
-          echo "HOMEBREW_PREFIX=${HOMEBREW_PREFIX}" >> $GITHUB_ENV
-          echo "OpenMP_ROOT=${HOMEBREW_PREFIX}/opt/libomp" >> $GITHUB_ENV
-
-          # Set CMAKE_PREFIX_PATH to let CMake find all packages automatically
-          echo "CMAKE_PREFIX_PATH=${HOMEBREW_PREFIX}" >> $GITHUB_ENV
-
-          # Set compiler flags for OpenMP (required for both backends)
-          echo "LDFLAGS=-L${HOMEBREW_PREFIX}/opt/libomp/lib" >> $GITHUB_ENV
-          echo "CPPFLAGS=-I${HOMEBREW_PREFIX}/opt/libomp/include" >> $GITHUB_ENV
-
      - name: Build packages
        run: |
          # Build core (platform independent)
-          cd packages/leann-core
-          uv build
-          cd ../..
+          if [[ "${{ matrix.os }}" == ubuntu-* ]]; then
+            cd packages/leann-core
+            uv build
+            cd ../..
+          fi

          # Build HNSW backend
          cd packages/leann-backend-hnsw
-          if [[ "${{ matrix.os }}" == macos-* ]]; then
-            # Use system clang for better compatibility
+          if [ "${{ matrix.os }}" == "macos-latest" ]; then
+            # Use system clang instead of homebrew LLVM for better compatibility
            export CC=clang
            export CXX=clang++
-            # Homebrew libraries on each macOS version require matching minimum version
-            if [[ "${{ matrix.os }}" == "macos-13" ]]; then
-              export MACOSX_DEPLOYMENT_TARGET=13.0
-            elif [[ "${{ matrix.os }}" == "macos-14" ]]; then
-              export MACOSX_DEPLOYMENT_TARGET=14.0
-            elif [[ "${{ matrix.os }}" == "macos-15" ]]; then
-              export MACOSX_DEPLOYMENT_TARGET=15.0
-            fi
-            uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
+            export MACOSX_DEPLOYMENT_TARGET=11.0
+            uv build --wheel --python python
          else
-            uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
+            uv build --wheel --python python
          fi
          cd ../..

          # Build DiskANN backend
          cd packages/leann-backend-diskann
-          if [[ "${{ matrix.os }}" == macos-* ]]; then
-            # Use system clang for better compatibility
+          if [ "${{ matrix.os }}" == "macos-latest" ]; then
+            # Use system clang instead of homebrew LLVM for better compatibility
            export CC=clang
            export CXX=clang++
            # DiskANN requires macOS 13.3+ for sgesdd_ LAPACK function
-            # But Homebrew libraries on each macOS version require matching minimum version
-            if [[ "${{ matrix.os }}" == "macos-13" ]]; then
-              export MACOSX_DEPLOYMENT_TARGET=13.3
-            elif [[ "${{ matrix.os }}" == "macos-14" ]]; then
-              export MACOSX_DEPLOYMENT_TARGET=14.0
-            elif [[ "${{ matrix.os }}" == "macos-15" ]]; then
-              export MACOSX_DEPLOYMENT_TARGET=15.0
-            fi
-            uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
+            export MACOSX_DEPLOYMENT_TARGET=13.3
+            uv build --wheel --python python
          else
-            uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
+            uv build --wheel --python python
          fi
          cd ../..

          # Build meta package (platform independent)
-          cd packages/leann
-          uv build
-          cd ../..
+          if [[ "${{ matrix.os }}" == ubuntu-* ]]; then
+            cd packages/leann
+            uv build
+            cd ../..
+          fi

      - name: Repair wheels (Linux)
        if: runner.os == 'Linux'
@@ -221,24 +176,10 @@ jobs:
      - name: Repair wheels (macOS)
        if: runner.os == 'macOS'
        run: |
-          # Determine deployment target based on runner OS
-          # Must match the Homebrew libraries for each macOS version
-          if [[ "${{ matrix.os }}" == "macos-13" ]]; then
-            HNSW_TARGET="13.0"
-            DISKANN_TARGET="13.3"
-          elif [[ "${{ matrix.os }}" == "macos-14" ]]; then
-            HNSW_TARGET="14.0"
-            DISKANN_TARGET="14.0"
-          elif [[ "${{ matrix.os }}" == "macos-15" ]]; then
-            HNSW_TARGET="15.0"
-            DISKANN_TARGET="15.0"
-          fi
-
          # Repair HNSW wheel
          cd packages/leann-backend-hnsw
          if [ -d dist ]; then
-            export MACOSX_DEPLOYMENT_TARGET=$HNSW_TARGET
-            delocate-wheel -w dist_repaired -v --require-target-macos-version $HNSW_TARGET dist/*.whl
+            delocate-wheel -w dist_repaired -v dist/*.whl
            rm -rf dist
            mv dist_repaired dist
          fi
@@ -247,8 +188,7 @@ jobs:
          # Repair DiskANN wheel
          cd packages/leann-backend-diskann
          if [ -d dist ]; then
-            export MACOSX_DEPLOYMENT_TARGET=$DISKANN_TARGET
-            delocate-wheel -w dist_repaired -v --require-target-macos-version $DISKANN_TARGET dist/*.whl
+            delocate-wheel -w dist_repaired -v dist/*.whl
            rm -rf dist
            mv dist_repaired dist
          fi
@@ -259,18 +199,20 @@ jobs:
          echo "📦 Built packages:"
          find packages/*/dist -name "*.whl" -o -name "*.tar.gz" | sort

-
      - name: Install built packages for testing
        run: |
-          # Create a virtual environment with the correct Python version
-          uv venv --python ${{ matrix.python }}
+          # Create a virtual environment
+          uv venv
          source .venv/bin/activate || source .venv/Scripts/activate

-          # Install packages using --find-links to prioritize local builds
-          uv pip install --find-links packages/leann-core/dist --find-links packages/leann-backend-hnsw/dist --find-links packages/leann-backend-diskann/dist packages/leann-core/dist/*.whl || uv pip install --find-links packages/leann-core/dist packages/leann-core/dist/*.tar.gz
-          uv pip install --find-links packages/leann-core/dist packages/leann-backend-hnsw/dist/*.whl
-          uv pip install --find-links packages/leann-core/dist packages/leann-backend-diskann/dist/*.whl
-          uv pip install packages/leann/dist/*.whl || uv pip install packages/leann/dist/*.tar.gz
+          # Install the built wheels
+          # Use --find-links to let uv choose the correct wheel for the platform
+          if [[ "${{ matrix.os }}" == ubuntu-* ]]; then
+            uv pip install leann-core --find-links packages/leann-core/dist
+            uv pip install leann --find-links packages/leann/dist
+          fi
+          uv pip install leann-backend-hnsw --find-links packages/leann-backend-hnsw/dist
+          uv pip install leann-backend-diskann --find-links packages/leann-backend-diskann/dist

          # Install test dependencies using extras
          uv pip install -e ".[test]"
@@ -288,8 +230,8 @@ jobs:
          # Activate virtual environment
          source .venv/bin/activate || source .venv/Scripts/activate

-          # Run tests
-          pytest -v tests/
+          # Run all tests
+          pytest tests/

      - name: Run sanity checks (optional)
        run: |
@@ -3,11 +3,9 @@
 </p>

 <p align="center">
-  <img src="https://img.shields.io/badge/Python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue.svg" alt="Python Versions">
-  <img src="https://github.com/yichuan-w/LEANN/actions/workflows/build-and-publish.yml/badge.svg" alt="CI Status">
-  <img src="https://img.shields.io/badge/Platform-Ubuntu%20%7C%20macOS%20(ARM64%2FIntel)-lightgrey" alt="Platform">
+  <img src="https://img.shields.io/badge/Python-3.9%2B-blue.svg" alt="Python 3.9+">
  <img src="https://img.shields.io/badge/License-MIT-green.svg" alt="MIT License">
-  <img src="https://img.shields.io/badge/MCP-Native%20Integration-blue" alt="MCP Integration">
+  <img src="https://img.shields.io/badge/Platform-Linux%20%7C%20macOS-lightgrey" alt="Platform">
 </p>

 <h2 align="center" tabindex="-1" class="heading-element" dir="auto">
@@ -18,10 +16,9 @@ LEANN is an innovative vector database that democratizes personal AI. Transform

 LEANN achieves this through *graph-based selective recomputation* with *high-degree preserving pruning*, computing embeddings on-demand instead of storing them all. [Illustration Fig →](#️-architecture--how-it-works) | [Paper →](https://arxiv.org/abs/2506.08276)

-**Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can semantic search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)**, **[codebase](#-claude-code-integration-transform-your-development-workflow)**\* , or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy.
+**Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)**, or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy.

-
-\* Claude Code only supports basic `grep`-style keyword search. **LEANN** is a drop-in **semantic search MCP service fully compatible with Claude Code**, unlocking intelligent retrieval without changing your workflow. 🔥 Check out [the easy setup →](packages/leann-mcp/README.md)
+> **🚀 NEW: Claude Code Integration!** LEANN now provides native MCP integration for Claude Code users. Index your codebase and get intelligent code assistance directly in Claude Code. [Setup Guide →](packages/leann-mcp/README.md)



@@ -31,7 +28,7 @@ LEANN achieves this through *graph-based selective recomputation* with *high-deg
  <img src="assets/effects.png" alt="LEANN vs Traditional Vector DB Storage Comparison" width="70%">
 </p>

-> **The numbers speak for themselves:** Index 60 million text chunks in just 6GB instead of 201GB. From emails to browser history, everything fits on your laptop. [See detailed benchmarks for different applications below ↓](#storage-comparison)
+> **The numbers speak for themselves:** Index 60 million Wikipedia chunks in just 6GB instead of 201GB. From emails to browser history, everything fits on your laptop. [See detailed benchmarks for different applications below ↓](#storage-comparison)


 🔒 **Privacy:** Your data never leaves your laptop. No OpenAI, no cloud, no "terms of service".
@@ -71,8 +68,6 @@ source .venv/bin/activate
 uv pip install leann
 ```

-> Low-resource? See “Low-resource setups” in the [Configuration Guide](docs/configuration-guide.md#low-resource-setups).
-
 <details>
 <summary>
 <strong>🔧 Build from Source (Recommended for development)</strong>
@@ -192,8 +187,8 @@ All RAG examples share these common parameters. **Interactive mode** is availabl
 --force-rebuild         # Force rebuild index even if it exists

 # Embedding Parameters
--embedding-model MODEL  # e.g., facebook/contriever, text-embedding-3-small, nomic-embed-text,mlx-community/Qwen3-Embedding-0.6B-8bit or nomic-embed-text
--embedding-mode MODE    # sentence-transformers, openai, mlx, or ollama
+--embedding-model MODEL  # e.g., facebook/contriever, text-embedding-3-small or mlx-community/multilingual-e5-base-mlx
+--embedding-mode MODE    # sentence-transformers, openai, or mlx

 # LLM Parameters (Text generation models)
 --llm TYPE              # LLM backend: openai, ollama, or hf (default: openai)
@@ -226,7 +221,7 @@ Ask questions directly about your personal PDFs, documents, and any directory co
  <img src="videos/paper_clear.gif" alt="LEANN Document Search Demo" width="600">
 </p>

-The example below asks a question about summarizing our paper (uses default data in `data/`, which is a directory with diverse data sources: two papers, Pride and Prejudice, and a Technical report about LLM in Huawei in Chinese), and this is the **easiest example** to run here:
+The example below asks a question about summarizing our paper (uses default data in `data/`, which is a directory with diverse data sources: two papers, Pride and Prejudice, and a README in Chinese) and this is the **easiest example** to run here:

 ```bash
 source .venv/bin/activate # Don't forget to activate the virtual environment
@@ -421,26 +416,7 @@ Once the index is built, you can ask questions like:

 </details>

-### 🚀 Claude Code Integration: Transform Your Development Workflow!

-**The future of code assistance is here.** Transform your development workflow with LEANN's native MCP integration for Claude Code. Index your entire codebase and get intelligent code assistance directly in your IDE.
-
-**Key features:**
- 🔍 **Semantic code search** across your entire project
- 📚 **Context-aware assistance** for debugging and development
- 🚀 **Zero-config setup** with automatic language detection
-
-```bash
-# Install LEANN globally for MCP integration
-uv tool install leann-core
-
-# Setup is automatic - just start using Claude Code!
-```
-Try our fully agentic pipeline with auto query rewriting, semantic search planning, and more:
-
-![LEANN MCP Integration](assets/mcp_leann.png)
-
-**Ready to supercharge your coding?** [Complete Setup Guide →](packages/leann-mcp/README.md)

 ## 🖥️ Command Line Interface

@@ -470,8 +446,11 @@ leann --help
 ### Usage Examples

 ```bash
-# build from a specific directory, and my_docs is the index name(Here you can also build from multiple dict or multiple files)
-leann build my-docs --docs ./your_documents
+# Build an index from current directory (default)
+leann build my-docs
+
+# Or from specific directory
+leann build my-docs --docs ./documents

 # Search your documents
 leann search my-docs "machine learning concepts"
@@ -609,9 +588,8 @@ We welcome more contributors! Feel free to open issues or submit PRs.

 This work is done at [**Berkeley Sky Computing Lab**](https://sky.cs.berkeley.edu/).

-## Star History
+---

-[![Star History Chart](https://api.star-history.com/svg?repos=yichuan-w/LEANN&type=Date)](https://www.star-history.com/#yichuan-w/LEANN&Date)
 <p align="center">
  <strong>⭐ Star us on GitHub if Leann is useful for your research or applications!</strong>
 </p>
@@ -75,7 +75,7 @@ class BaseRAGExample(ABC):
            "--embedding-mode",
            type=str,
            default="sentence-transformers",
-            choices=["sentence-transformers", "openai", "mlx", "ollama"],
+            choices=["sentence-transformers", "openai", "mlx"],
            help="Embedding backend mode (default: sentence-transformers)",
        )

@@ -85,7 +85,7 @@ class BaseRAGExample(ABC):
            "--llm",
            type=str,
            default="openai",
-            choices=["openai", "ollama", "hf", "simulated"],
+            choices=["openai", "ollama", "hf"],
            help="LLM backend to use (default: openai)",
        )
        llm_group.add_argument(
@@ -1,82 +0,0 @@
-# 盘古之殇：华为诺亚盘古大模型研发历程的心酸与黑暗
-
-各位好，
-
-我是一名盘古大模型团队，华为诺亚方舟实验室的员工。
-
-首先为自证身份，列举一些细节：
-
-1. 现诺亚主任，前算法应用部部长，后改名为小模型实验室的主任王云鹤。前诺亚主任：姚骏（大家称姚老师）。几个实验室主任：唐睿明（明哥，明队，已离职），尚利峰，张维（维哥），郝建业（郝老师），刘武龙（称呼为武龙所）等。其他骨干成员和专家陆续有很多人离职。
-2. 我们隶属于“四野”这个组织。四野下属有许多纵队，基础语言大模型是四纵。王云鹤的小模型是十六纵队。我们参加过苏州的集结，有各种月份的时间节点。在苏州攻关会颁发任务令，需要在节点前达成目标。苏州集结会把各地的人员都集中在苏州研究所，平常住宾馆，比如在甪直的酒店，与家人孩子天各一方。
-3. 在苏州集结的时候周六默认上班，非常辛苦，不过周六有下午茶，有一次还有小龙虾。在苏州研究所的工位搬迁过一次，从一栋楼换到了另一栋。苏州研究所楼栋都是欧式装修，门口有大坡，里面景色很不错。去苏州集结一般至少要去一周，甚至更久，多的人甚至一两个月都回不了家。
-4. 诺亚曾经传说是研究型的，但是来了之后因为在四野做大模型项目，项目成员完全变成了交付型的，且充满了例会，评审，汇报。很多时候做实验都要申请。团队需要对接终端小艺，华为云，ICT等诸多业务线，交付压力不小。
-5. 诺亚研发的盘古模型早期内部代号叫做“盘古智子”，一开始只有内部需要申请试用的网页版，到后续迫于压力在welink上接入和公测开放。
-
-这些天发生关于质疑盘古大模型抄袭千问的事情闹的沸沸扬扬。作为一个盘古团队的成员，我最近夜夜辗转反侧，难以入眠。盘古的品牌受到如此大的影响，一方面，我自私的为我的职业发展担忧，也为自己过去的努力工作感到不值。另一方面，由于有人开始揭露这些事情我内心又感到大快人心。在多少个日日夜夜，我们对内部某些人一次次靠着造假而又获得了无数利益的行为咬牙切齿而又无能为力。这种压抑和羞辱也逐渐消磨了我对华为的感情，让我在这里的时日逐渐浑浑噩噩，迷茫无措，时常怀疑自己的人生和自我价值。
-
-我承认我是一个懦弱的人，作为一个小小的打工人，我不仅不敢和王云鹤等内部手眼通天的人做对，更不敢和华为这样的庞然大物做对。我很怕失去我的工作，毕竟我也有家人和孩子，所以我打心眼里很佩服揭露者。但是，看到内部还在试图洗地掩盖事实，蒙蔽公众的时候，我实在不能容忍了。我也希望勇敢一次，顺从自己本心。就算自损八百，我也希望能伤敌一千。我决定把我在这里的所见所闻（部分来自于同事口述）公布出来，关于盘古大模型的“传奇故事”：
-
-华为确实主要在昇腾卡上训练大模型（小模型实验室有不少英伟达的卡，他们之前也会用来训练，后面转移到昇腾）。曾经我被华为“打造世界第二选择”的决心而折服，我本身也曾经对华为有深厚的感情。我们陪着昇腾一步步摸爬滚打，从充满bug到现在能训出模型，付出了巨大的心血和代价。
-
-最初我们的算力非常有限，在910A上训练模型。那会只支持fp16，训练的稳定性远不如bf16。盘古的moe开始很早，23年就主要是训练38Bmoe模型和后续的71B dense模型。71B的dense模型通过扩增变成了第一代的135Bdense模型，后面主力模型也逐渐在910B上训练。
-
-71B和135B模型都有一个巨大的硬伤就是tokenizer。当时使用的tokenizer编码效率极低，每个单个的符号，数字，空格，乃至汉字都会占用一个token。可想而知这会非常浪费算力，且使得模型的效果很差。这时候小模型实验室正好有个自己训的词表。姚老师当时怀疑是不是模型的tokenizer不好（虽然事后来看，他的怀疑是无疑正确的），于是就决定，让71B和135B换tokenizer，因为小模型实验室曾经尝试过。团队缝合了两个tokenizer，开始了tokenizer的更换。71B模型的更换失败了，而135B因为采用了更精细的embedding初始化策略，续训了至少1T的数据后词表总算更换成功，但可想而知，效果并不会变好。
-
-于此同期，阿里和智谱等国内其他公司在GPU上训练，且已经摸索出了正确的方法，盘古和竞品的差距越来越大。内部一个230B从头训练的dense模型又因为各种原因训练失败，导致项目的状况几乎陷入绝境。面临几个节点的压力以及内部对盘古的强烈质疑时，团队的士气低迷到了极点。团队在算力极其有限的时候，做出了很多努力和挣扎。比如，团队偶然发现当时的38B moe并没有预期moe的效果。于是去掉了moe参数，还原为了13B的dense模型。由于38B的moe源自很早的pangu alpha 13B，架构相对落后，团队进行了一系列的操作，比如切换绝对位置编码到rope，去掉bias，切换为rmsnorm。同时鉴于tokenizer的一些失败和换词表的经验，这个模型的词表也更换为了王云鹤的小模型实验室7B模型所使用的词表。后面这个13B模型进行了扩增续训，变成了第二代38B dense模型（在几个月内这个模型都是主要的盘古中档位模型），曾经具有一定的竞争力。但是，由于更大的135B模型架构落后，且更换词表模型损伤巨大（后续分析发现当时更换的缝合词表有更严重的bug），续训后也与千问等当时国内领先模型存在很大差距。这时由于内部的质疑声和领导的压力也越来越大。团队的状态几乎陷入了绝境。
-
-在这种情况下，王云鹤和他的小模型实验室出手了。他们声称是从旧的135B参数继承改造而来，通过训练短短的几百B数据，各项指标平均提升了十个点左右。实际上，这就是他们套壳应用到大模型的第一次杰作。华为的外行领导内行，使得领导完全对于这种扯淡的事情没有概念，他们只会觉得肯定是有什么算法创新。经过内部的分析，他们实际上是使用Qwen 1.5 110B续训而来，通过加层，扩增ffn维度，添加盘古pi论文的一些机制得来，凑够了大概135B的参数。实际上，旧的135B有107层，而这个模型只有82层，各种配置也都不一样。新的来路不明的135B训练完很多参数的分布也和Qwen 110B几乎一模一样。连模型代码的类名当时都是Qwen，甚至懒得改名。后续这个模型就是所谓的135B V2。而这个模型当时也提供给了很多下游，甚至包括外部客户。
-
-这件事对于我们这些认真诚实做事的同事们带来了巨大的冲击，内部很多人其实都知道这件事，甚至包括终端和华为云。我们都戏称以后别叫盘古模型了，叫千古吧。当时团队成员就想向bcg举报了，毕竟这已经是重大的业务造假了。但是后面据说被领导拦了下来，因为更高级别的领导（比如姚老师，以及可能熊总和查老）其实后面也知道了，但是并不管，因为通过套壳拿出好的结果，对他们也是有利的。这件事使得当时团队几位最强的同事开始心灰意冷，离职跑路也逐渐成为挂在嘴边的事。
-
-此时，盘古似乎迎来了转机。由于前面所述的这些盘古模型基本都是续训和改造而来，当时诺亚完全没有掌握从头训练的技术，何况还是在昇腾的NPU上进行训练。在当时团队的核心成员的极力争取下，盘古开始了第三代模型的训练，付出了巨大的努力后，在数据架构和训练算法方面都与业界逐渐接轨，而这其中的艰辛和小模型实验室的人一点关系都没有。
-
-一开始团队成员毫无信心，只从一个13B的模型开始训练，但是后面发现效果还不错，于是这个模型后续再次进行了一次参数扩增，变成了第三代的38B，代号38B V3。想必很多产品线的兄弟都对这个模型很熟悉。当时这个模型的tokenizer是基于llama的词表进行扩展的（也是业界常见的做法）。而当时王云鹤的实验室做出来了另一个词表（也就是后续pangu系列的词表）。当时两个词表还被迫进行了一次赛马，最终没有明显的好坏结论。于是，领导当即决定，应该统一词表，使用王云鹤他们的。于是，在后续从头训练的135B V3（也就是对外的Pangu Ultra），便是采用了这个tokenizer。这也解释了很多使用我们模型的兄弟的疑惑，为什么当时同为V3代的两个不同档位的模型，会使用不同的tokenizer。
-
-
-我们打心眼里觉得，135B V3是我们四纵团队当时的骄傲。这是第一个真正意义上的，华为全栈自研，正经从头训练的千亿级别的模型，且效果与24年同期竞品可比的。写到这里我已经热泪盈眶，太不容易了。当时为了稳定训练，团队做了大量实验对比，并且多次在模型梯度出现异常的时候进行及时回退重启。这个模型真正做到了后面技术报告所说的训练全程没有一个loss spike。我们克服了不知道多少困难，我们做到了，我们愿用生命和荣誉保证这个模型训练的真实性。多少个凌晨，我们为了它的训练而不眠。在被内部心声骂的一文不值的时候，我们有多么不甘，有多少的委屈，我们挺住了。
-
-我们这帮人是真的在为打磨国产算力底座燃烧自己的青春啊……客居他乡，我们放弃了家庭，放弃了假期，放弃了健康，放弃了娱乐，抛头颅洒热血，其中的艰辛与困苦，寥寥数笔不足以概括其万一。在各种动员大会上，当时口号中喊出的盘古必胜，华为必胜，我们心里是真的深深被感动。
-
-然而，我们的所有辛苦的成果，经常被小模型实验室轻飘飘的拿走了。数据，直接要走。代码，直接要走，还要求我们配合适配到能一键运行。我们当时戏称小模型实验室为点鼠标实验室。我们付出辛苦，他们取得荣耀。果然应了那句话，你在负重前行是因为有人替你岁月静好。在这种情况下，越来越多的战友再也坚持不下去了，选择了离开。看到身边那些优秀的同事一个个离职，我的内心又感叹又难过。在这种作战一样的环境下，我们比起同事来说更像是战友。他们在技术上也有无数值得我学习的地方，堪称良师。看到他们去了诸如字节Seed，Deepseek，月之暗面，腾讯和快手等等很多出色的团队，我打心眼里为他们高兴和祝福，脱离了这个辛苦却肮脏的地方。我至今还对一位离职同事的话记忆犹新，ta说：“来这里是我技术生涯中的耻辱，在这里再呆每一天都是浪费生命”。话虽难听却让我无言以对。我担心我自己技术方面的积累不足，以及没法适应互联网公司高淘汰的环境，让我多次想离职的心始终没有迈出这一步。
-
-盘古除了dense模型，后续也启动了moe的探索。一开始训练的是一个224B的moe模型。而与之平行的，小模型实验室也开启了第二次主要的套壳行动（次要的插曲可能还包括一些别的模型，比如math模型），即这次流传甚广的pangu pro moe 72B。这个模型内部自称是从小模型实验室的7B扩增上来的（就算如此，这也与技术报告不符，何况是套壳qwen 2.5的14b续训）。还记得他们训了没几天，内部的评测就立刻追上了当时的38B V3。AI系统实验室很多兄弟因为需要适配模型，都知道他们的套壳行动，只是迫于各种原因，无法伸张正义。实际上，对于后续训了很久很久的这个模型，Honestagi能够分析出这个量级的相似性我已经很诧异了，因为这个模型为了续训洗参数，所付出的算力甚至早就足够从头训一个同档位的模型了。听同事说他们为了洗掉千问的水印，采取了不少办法，甚至包括故意训了脏数据。这也为学术界研究模型血缘提供了一个前所未有的特殊模范吧。以后新的血缘方法提出可以拿出来溜溜。
-
-24年底和25年初，在Deepseek v3和r1发布之后，由于其惊艳的技术水平，团队受到了巨大的冲击，也受到了更大的质疑。于是为了紧跟潮流，盘古模仿Deepseek的模型尺寸，开启了718B moe的训练。这个时候，小模型实验室再次出手了。他们选择了套壳Deepseekv3续训。他们通过冻住Deepseek加载的参数，进行训练。连任务加载ckpt的目录都是deepseekv3，改都不改，何其嚣张？与之相反，一些有真正技术信仰的同事，在从头训练另一个718B的moe。但其中出现了各种各样的问题。但是很显然，这个模型怎么可能比直接套壳的好呢？如果不是团队leader坚持，早就被叫停了。
-
-华为的流程管理之繁重，严重拖累了大模型的研发节奏，例如版本管理，模型血缘，各种流程化，各种可追溯。讽刺的是，小模型实验室的模型似乎从来不受这些流程的约束，想套壳就套壳，想续训就续训，算力源源不断的伸手拿走。这种强烈到近乎魔幻的对比，说明了当前流程管理的情况：只许州官放火，不许百姓点灯。何其可笑？何其可悲？何其可恶？何其可耻！
-
-HonestAGI的事情出来后，内部让大家不停的研讨分析，如何公关和“回应”。诚然，这个原文的分析也许不够有力，给了王云鹤与小模型实验室他们狡辩和颠倒黑白的机会。为此，这两天我内心感到作呕，时时怀疑自己的人生意义以及苍天无眼。我不奉陪了，我要离职了，同时我也在申请从盘古部分技术报告的作者名单中移除。曾经在这些技术报告上署名是我一生都无法抹除的污点。当时我没想到，他们竟然猖狂到敢开源。我没想到，他们敢如此愚弄世人，大肆宣发。当时，我也许是存了侥幸心理，没有拒绝署名。我相信很多扎实做事的战友，也只是被迫上了贼船，或者不知情。但这件事已经无法挽回，我希望我的余生能够坚持扎实做真正有意义的事，为我当时的软弱和不坚定赎罪。
-
-深夜写到这里，我已经泪流满面，泣不成声。还记得一些出色的同事离职时，我苦笑问他们要不要发个长长的心声惯例帖，揭露一下现状。对方说：不了，浪费时间，而且我也怕揭露出来你们过的更糟。我当时一下黯然神伤，因为曾经共同为了理想奋斗过的战友已经彻底对华为彻底灰心了。当时大家调侃，我们用着当年共产党的小米加步枪，组织却有着堪比当年国民党的作风。
-
-曾几何时，我为我们用着小米加步枪打败洋枪洋炮而自豪。
-
-现在，我累了，我想投降。
-
-其实时至今日，我还是真心希望华为能认真吸取教训，能做好盘古，把盘古做到世界一流，把昇腾变成英伟达的水平。内部的劣币驱逐良币，使得诺亚乃至华为在短时间内急剧流失了大量出色的大模型人才。相信他们也正在如Deepseek等各个团队闪耀着，施展着他们的抱负才华，为中美在AI的激烈竞赛中奉献力量。我时常感叹，华为不是没有人才，而是根本不知道怎么留住人才。如果给这些人合适的环境，合适的资源，更少的枷锁，更少的政治斗争，盘古何愁不成？
-
-最后：我以生命，人格和荣誉发誓，我写的以上所有内容均为真实（至少在我有限的认知范围内）。我没有那么高的技术水平以及机会去做详尽扎实的分析，也不敢直接用内部记录举证，怕因为信息安全抓到。但是我相信我很多曾经的战友，会为我作证。在华为内部的兄弟，包括我们曾经服务过的产品线兄弟们，相信本文的无数细节能和你们的印象对照，印证我的说法。你们可能也曾经被蒙骗，但这些残酷的真相不会被尘封。我们奋战过的痕迹，也不应该被扭曲和埋葬。
-
-写了这么多，某些人肯定想把我找出来，抹杀掉。公司搞不好也想让我噤声乃至追责。如果真的这样，我，乃至我的家人的人身乃至生命安全可能都会受到威胁。为了自我保护，我近期每天会跟大家报平安。
-
-如果我消失了，就当是我为了真理和理想，为了华为乃至中国能够更好地发展算力和AI而牺牲了吧，我愿埋葬于那片曾经奋斗过的地方。
-
-诺亚，再见
-
-2025年7月6日凌晨      写于深圳
-
---
-
-各位好，
-
-感谢大家的关心与祝福。我目前暂时安全，但公司应该在进行排查与某些名单收集，后续情况未知。
-
-我补充一些细节，以免某些人继续颠倒黑白。
-
-关于135B V2，小模型实验室在迅速地完成套壳并拿完所有套壳带来的好处后（比如任务令表彰和及时激励），因为不想继续支撑下游应用和模型迭代，又把这个烫手山芋甩给了四纵。确实技高一筹，直接把四纵的兄弟们拉下水。同事提供过去一个老旧的模型，最终拿回了一个当时一个魔改的先进的千问。做大模型的人，自己做的模型就像自己孩子一样熟悉，不要把别人都当傻子。就像自家儿子出门一趟，回来个别人家孩子。
-
-盘古report的署名是不符合学术规范的。例如，135B V3有不少有技术贡献的人，因为作者名额数量限制，劳动成果没有得到应有的回报，团队内曾经有不小的意见。这个模型当时是大家智慧和汗水的结晶，甚至是团队当时的精神支柱，支撑着不少兄弟们继续留在诺亚。所谓的名额限制，以及挂名了一些毫无技术贡献的人（如一些小模型实验室的人），让兄弟们何其心寒。
-
---
-
-暂时平安。另外，支持我勇于说出真相的战友们 https://github.com/HW-whistleblower/True-Story-of-Pangu/issues/317
@@ -0,0 +1,150 @@
+# Claude Code x LEANN 集成指南
+
+## ✅ 现状：已经可以工作！
+
+好消息：LEANN CLI已经完全可以在Claude Code中使用，无需任何修改！
+
+## 🚀 立即开始
+
+### 1. 激活环境
+```bash
+# 在LEANN项目目录下
+source .venv/bin/activate.fish  # fish shell
+# 或
+source .venv/bin/activate       # bash shell
+```
+
+### 2. 基本命令
+
+#### 查看现有索引
+```bash
+leann list
+```
+
+#### 搜索文档
+```bash
+leann search my-docs "machine learning" --recompute-embeddings
+```
+
+#### 问答对话
+```bash
+echo "What is machine learning?" | leann ask my-docs --llm ollama --model qwen3:8b --recompute-embeddings
+```
+
+#### 构建新索引
+```bash
+leann build project-docs --docs ./src --recompute-embeddings
+```
+
+## 💡 Claude Code 使用技巧
+
+### 在Claude Code中直接使用
+
+1. **激活环境**：
+   ```bash
+   cd /Users/andyl/Projects/LEANN-RAG
+   source .venv/bin/activate.fish
+   ```
+
+2. **搜索代码库**：
+   ```bash
+   leann search my-docs "authentication patterns" --recompute-embeddings --top-k 10
+   ```
+
+3. **智能问答**：
+   ```bash
+   echo "How does the authentication system work?" | leann ask my-docs --llm ollama --model qwen3:8b --recompute-embeddings
+   ```
+
+### 批量操作示例
+
+```bash
+# 构建项目文档索引
+leann build project-docs --docs ./docs --force
+
+# 搜索多个关键词
+leann search project-docs "API authentication" --recompute-embeddings
+leann search project-docs "database schema" --recompute-embeddings
+leann search project-docs "deployment guide" --recompute-embeddings
+
+# 问答模式
+echo "What are the API endpoints?" | leann ask project-docs --recompute-embeddings
+```
+
+## 🎯 Claude 可以立即执行的工作流
+
+### 代码分析工作流
+```bash
+# 1. 构建代码库索引
+leann build codebase --docs ./src --backend hnsw --recompute-embeddings
+
+# 2. 分析架构
+echo "What is the overall architecture?" | leann ask codebase --recompute-embeddings
+
+# 3. 查找特定功能
+leann search codebase "user authentication" --recompute-embeddings --top-k 5
+
+# 4. 理解实现细节
+echo "How is user authentication implemented?" | leann ask codebase --recompute-embeddings
+```
+
+### 文档理解工作流
+```bash
+# 1. 索引项目文档
+leann build docs --docs ./docs --recompute-embeddings
+
+# 2. 快速查找信息
+leann search docs "installation requirements" --recompute-embeddings
+
+# 3. 获取详细说明
+echo "What are the system requirements?" | leann ask docs --recompute-embeddings
+```
+
+## ⚠️ 重要提示
+
+1. **必须使用 `--recompute-embeddings`** - 这是关键参数，不加会报错
+2. **需要先激活虚拟环境** - 确保有LEANN的Python环境
+3. **Ollama需要预先安装** - ask功能需要本地LLM
+
+## 🔥 立即可用的Claude提示词
+
+```
+Help me analyze this codebase using LEANN:
+
+1. First, activate the environment:
+   cd /Users/andyl/Projects/LEANN-RAG && source .venv/bin/activate.fish
+
+2. Build an index of the source code:
+   leann build codebase --docs ./src --recompute-embeddings
+
+3. Search for authentication patterns:
+   leann search codebase "authentication middleware" --recompute-embeddings --top-k 10
+
+4. Ask about the authentication system:
+   echo "How does user authentication work in this codebase?" | leann ask codebase --recompute-embeddings
+
+Please execute these commands and help me understand the code structure.
+```
+
+## 📈 下一步改进计划
+
+虽然现在已经可以用，但还可以进一步优化：
+
+1. **简化命令** - 默认启用recompute-embeddings
+2. **配置文件** - 避免重复输入参数
+3. **状态管理** - 自动检测环境和索引
+4. **输出格式** - 更适合Claude解析的格式
+
+但这些都是锦上添花，现在就能用起来！
+
+## 🎉 总结
+
+**LEANN现在就可以在Claude Code中完美工作！**
+
+- ✅ 搜索功能正常
+- ✅ RAG问答功能正常
+- ✅ 索引构建功能正常
+- ✅ 支持多种数据源
+- ✅ 支持本地LLM
+
+只需要记住加上 `--recompute-embeddings` 参数就行！
@@ -49,25 +49,14 @@ Based on our experience developing LEANN, embedding models fall into three categ
 - **Cons**: Slower inference, longer index build times
 - **Use when**: Quality is paramount and you have sufficient compute resources. **Highly recommended** for production use

-### Quick Start: Cloud and Local Embedding Options
+### Quick Start: OpenAI Embeddings (Fastest Setup)

-**OpenAI Embeddings (Fastest Setup)**
 For immediate testing without local model downloads:
 ```bash
 # Set OpenAI embeddings (requires OPENAI_API_KEY)
 --embedding-mode openai --embedding-model text-embedding-3-small
 ```

-**Ollama Embeddings (Privacy-Focused)**
-For local embeddings with complete privacy:
-```bash
-# First, pull an embedding model
-ollama pull nomic-embed-text
-
-# Use Ollama embeddings
--embedding-mode ollama --embedding-model nomic-embed-text
-```
-
 <details>
 <summary><strong>Cloud vs Local Trade-offs</strong></summary>

@@ -222,15 +211,9 @@ python apps/document_rag.py --query "What are the main techniques LEANN explores

 3. **Use MLX on Apple Silicon** (optional optimization):
   ```bash
-   --embedding-mode mlx --embedding-model mlx-community/Qwen3-Embedding-0.6B-8bit
+   --embedding-mode mlx --embedding-model mlx-community/multilingual-e5-base-mlx
   ```
-    MLX might not be the best choice, as we tested and found that it only offers 1.3x acceleration compared to HF, so maybe using ollama is a better choice for embedding generation

-4. **Use Ollama**
-   ```bash
-   --embedding-mode ollama --embedding-model nomic-embed-text
-   ```
-   To discover additional embedding models in ollama, check out https://ollama.com/search?c=embedding or read more about embedding models at https://ollama.com/blog/embedding-models, please do check the model size that works best for you
 ### If Search Quality is Poor

 1. **Increase retrieval count**:
@@ -259,80 +242,24 @@ Every configuration choice involves trade-offs:

 The key is finding the right balance for your specific use case. Start small and simple, measure performance, then scale up only where needed.

-## Low-resource setups
+## Deep Dive: Critical Configuration Decisions

-If you don’t have a local GPU or builds/searches are too slow, use one or more of the options below.
+### When to Disable Recomputation

-### 1) Use OpenAI embeddings (no local compute)
-
-Fastest path with zero local GPU requirements. Set your API key and use OpenAI embeddings during build and search:
+LEANN's recomputation feature provides exact distance calculations but can be disabled for extreme QPS requirements:

 ```bash
-export OPENAI_API_KEY=sk-...
-
-# Build with OpenAI embeddings
-leann build my-index \
-  --embedding-mode openai \
-  --embedding-model text-embedding-3-small
-
-# Search with OpenAI embeddings (recompute at query time)
-leann search my-index "your query" \
-  --recompute-embeddings
+--no-recompute  # Disable selective recomputation
 ```

-### 2) Run remote builds with SkyPilot (cloud GPU)
+**Trade-offs**:
+- **With recomputation** (default): Exact distances, best quality, higher latency, minimal storage (only stores metadata, recomputes embeddings on-demand)
+- **Without recomputation**: Must store full embeddings, significantly higher memory and storage usage (10-100x more), but faster search

-Offload embedding generation and index building to a GPU VM using SkyPilot. A template is provided at `sky/leann-build.yaml`.
-
-```bash
-# One-time: install and configure SkyPilot
-pip install skypilot
-sky launch -c leann-gpu sky/leann-build.yaml
-
-# Build remotely (template installs uv + leann CLI)
-sky exec leann-gpu -- "leann build my-index --docs ~/leann-data --backend hnsw --complexity 64 --graph-degree 32"
-```
-
-Details: see “Running Builds on SkyPilot (Optional)” below.
-
-### 3) Disable recomputation to trade storage for speed
-
-If you need lower latency and have more storage/memory, disable recomputation. This stores full embeddings and avoids recomputing at search time.
-
-```bash
-# Build without recomputation (HNSW requires non-compact in this mode)
-leann build my-index --no-recompute --no-compact
-
-# Search without recomputation
-leann search my-index "your query" --no-recompute
-```
-
-Trade-offs: lower query-time latency, but significantly higher storage usage.
-
-## Running Builds on SkyPilot (Optional)
-
-You can offload embedding generation and index building to a cloud GPU VM using SkyPilot, without changing any LEANN code. This is useful when your local machine lacks a GPU or you want faster throughput.
-
-### Quick Start
-
-1) Install SkyPilot by following their docs (`pip install skypilot`), then configure cloud credentials.
-
-2) Use the provided SkyPilot template:
-
-```bash
-sky launch -c leann-gpu sky/leann-build.yaml
-```
-
-3) On the remote, either put your data under the mounted path or adjust `file_mounts` in `sky/leann-build.yaml`. Then run the LEANN build:
-
-```bash
-sky exec leann-gpu -- "leann build my-index --docs ~/leann-data --backend hnsw --complexity 64 --graph-degree 32"
-```
-
-Notes:
- The template installs `uv` and the `leann` CLI globally on the remote instance.
- Change the `accelerators` and `cloud` settings in `sky/leann-build.yaml` to match your budget/availability (e.g., `A10G:1`, `A100:1`, or CPU-only if you prefer).
- You can also build with `diskann` by switching `--backend diskann`.
+**Disable when**:
+- You have abundant storage and memory
+- Need extremely low latency (< 100ms)
+- Running a read-heavy workload where storage cost is acceptable

 ## Further Reading

@@ -0,0 +1,8 @@
+# packages/leann-backend-diskann/CMakeLists.txt (simplified version)
+
+cmake_minimum_required(VERSION 3.20)
+project(leann_backend_diskann_wrapper)
+
+# Tell CMake to directly enter the DiskANN submodule and execute its own CMakeLists.txt
+# DiskANN will handle everything itself, including compiling Python bindings
+add_subdirectory(src/third_party/DiskANN)
@@ -4,7 +4,7 @@ import os
 import struct
 import sys
 from pathlib import Path
-from typing import Any, Literal, Optional
+from typing import Any, Literal

 import numpy as np
 import psutil
@@ -259,7 +259,7 @@ class DiskannSearcher(BaseSearcher):
        prune_ratio: float = 0.0,
        recompute_embeddings: bool = False,
        pruning_strategy: Literal["global", "local", "proportional"] = "global",
-        zmq_port: Optional[int] = None,
+        zmq_port: int | None = None,
        batch_recompute: bool = False,
        dedup_node_dis: bool = False,
        **kwargs,
@@ -10,7 +10,6 @@ import sys
 import threading
 import time
 from pathlib import Path
-from typing import Optional

 import numpy as np
 import zmq
@@ -33,7 +32,7 @@ if not logger.handlers:


 def create_diskann_embedding_server(
-    passages_file: Optional[str] = None,
+    passages_file: str | None = None,
    zmq_port: int = 5555,
    model_name: str = "sentence-transformers/all-mpnet-base-v2",
    embedding_mode: str = "sentence-transformers",
@@ -262,7 +261,7 @@ if __name__ == "__main__":
        "--embedding-mode",
        type=str,
        default="sentence-transformers",
-        choices=["sentence-transformers", "openai", "mlx", "ollama"],
+        choices=["sentence-transformers", "openai", "mlx"],
        help="Embedding backend mode",
    )
    parser.add_argument(
@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"

 [project]
 name = "leann-backend-diskann"
-version = "0.2.9"
-dependencies = ["leann-core==0.2.9", "numpy", "protobuf>=3.19.0"]
+version = "0.2.1"
+dependencies = ["leann-core==0.2.1", "numpy", "protobuf>=3.19.0"]

 [tool.scikit-build]
 # Key: simplified CMake path
@@ -17,5 +17,3 @@ editable.mode = "redirect"
 cmake.build-type = "Release"
 build.verbose = true
 build.tool-args = ["-j8"]
-# Let CMake find packages via Homebrew prefix
-cmake.define = {CMAKE_PREFIX_PATH = {env = "CMAKE_PREFIX_PATH"}, OpenMP_ROOT = {env = "OpenMP_ROOT"}}
@@ -5,20 +5,11 @@ set(CMAKE_CXX_COMPILER_WORKS 1)

 # Set OpenMP path for macOS
 if(APPLE)
-    # Detect Homebrew installation path (Apple Silicon vs Intel)
-    if(EXISTS "/opt/homebrew/opt/libomp")
-        set(HOMEBREW_PREFIX "/opt/homebrew")
-    elseif(EXISTS "/usr/local/opt/libomp")
-        set(HOMEBREW_PREFIX "/usr/local")
-    else()
-        message(FATAL_ERROR "Could not find libomp installation. Please install with: brew install libomp")
-    endif()
-
-    set(OpenMP_C_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_PREFIX}/opt/libomp/include")
-    set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_PREFIX}/opt/libomp/include")
+    set(OpenMP_C_FLAGS "-Xpreprocessor -fopenmp -I/opt/homebrew/opt/libomp/include")
+    set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I/opt/homebrew/opt/libomp/include")
    set(OpenMP_C_LIB_NAMES "omp")
    set(OpenMP_CXX_LIB_NAMES "omp")
-    set(OpenMP_omp_LIBRARY "${HOMEBREW_PREFIX}/opt/libomp/lib/libomp.dylib")
+    set(OpenMP_omp_LIBRARY "/opt/homebrew/opt/libomp/lib/libomp.dylib")

    # Force use of system libc++ to avoid version mismatch
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
@@ -2,7 +2,7 @@ import logging
 import os
 import shutil
 from pathlib import Path
-from typing import Any, Literal, Optional
+from typing import Any, Literal

 import numpy as np
 from leann.interface import (
@@ -152,7 +152,7 @@ class HNSWSearcher(BaseSearcher):
        self,
        query: np.ndarray,
        top_k: int,
-        zmq_port: Optional[int] = None,
+        zmq_port: int | None = None,
        complexity: int = 64,
        beam_width: int = 1,
        prune_ratio: float = 0.0,
@@ -10,7 +10,6 @@ import sys
 import threading
 import time
 from pathlib import Path
-from typing import Union

 import msgpack
 import numpy as np
@@ -34,7 +33,7 @@ if not logger.handlers:


 def create_hnsw_embedding_server(
-    passages_file: Union[str, None] = None,
+    passages_file: str | None = None,
    zmq_port: int = 5555,
    model_name: str = "sentence-transformers/all-mpnet-base-v2",
    distance_metric: str = "mips",
@@ -95,8 +94,6 @@ def create_hnsw_embedding_server(
        passage_sources.append(source_copy)

    passages = PassageManager(passage_sources)
-    # Use index dimensions from metadata for shaping fallback responses
-    embedding_dim: int = int(meta.get("dimensions", 0))
    logger.info(
        f"Loaded PassageManager with {len(passages.global_offset_map)} passages from metadata"
    )
@@ -111,9 +108,6 @@ def create_hnsw_embedding_server(
        socket.setsockopt(zmq.RCVTIMEO, 300000)
        socket.setsockopt(zmq.SNDTIMEO, 300000)

-        # Track last request type for safe fallback responses on exceptions
-        last_request_type = "unknown"  # one of: 'text', 'distance', 'embedding', 'unknown'
-        last_request_length = 0
        while True:
            try:
                message_bytes = socket.recv()
@@ -126,8 +120,6 @@ def create_hnsw_embedding_server(
                if isinstance(request_payload, list) and len(request_payload) > 0:
                    # Check if this is a direct text request (list of strings)
                    if all(isinstance(item, str) for item in request_payload):
-                        last_request_type = "text"
-                        last_request_length = len(request_payload)
                        logger.info(
                            f"Processing direct text embedding request for {len(request_payload)} texts in {embedding_mode} mode"
                        )
@@ -152,66 +144,43 @@ def create_hnsw_embedding_server(
                ):
                    node_ids = request_payload[0]
                    query_vector = np.array(request_payload[1], dtype=np.float32)
-                    last_request_type = "distance"
-                    last_request_length = len(node_ids)

                    logger.debug("Distance calculation request received")
                    logger.debug(f"    Node IDs: {node_ids}")
                    logger.debug(f"    Query vector dim: {len(query_vector)}")

-                    # Get embeddings for node IDs, tolerate missing IDs
-                    texts: list[str] = []
-                    found_indices: list[int] = []
-                    for idx, nid in enumerate(node_ids):
+                    # Get embeddings for node IDs
+                    texts = []
+                    for nid in node_ids:
                        try:
                            passage_data = passages.get_passage(str(nid))
-                            txt = passage_data.get("text", "")
-                            if isinstance(txt, str) and len(txt) > 0:
-                                texts.append(txt)
-                                found_indices.append(idx)
-                            else:
-                                logger.error(f"Empty text for passage ID {nid}")
+                            txt = passage_data["text"]
+                            texts.append(txt)
                        except KeyError:
                            logger.error(f"Passage ID {nid} not found")
+                            raise RuntimeError(f"FATAL: Passage with ID {nid} not found")
                        except Exception as e:
                            logger.error(f"Exception looking up passage ID {nid}: {e}")
+                            raise

-                    # Prepare full-length response distances with safe fallbacks
-                    large_distance = 1e9
-                    response_distances = [large_distance] * len(node_ids)
-
-                    if texts:
-                        try:
-                            # Process embeddings only for found indices
-                            embeddings = compute_embeddings(texts, model_name, mode=embedding_mode)
-                            logger.info(
-                                f"Computed embeddings for {len(texts)} texts, shape: {embeddings.shape}"
-                            )
-
-                            # Calculate distances for found embeddings only
-                            if distance_metric == "l2":
-                                partial_distances = np.sum(
-                                    np.square(embeddings - query_vector.reshape(1, -1)), axis=1
-                                )
-                            else:  # mips or cosine
-                                partial_distances = -np.dot(embeddings, query_vector)
-
-                            # Place computed distances back into the full response array
-                            for pos, dval in zip(
-                                found_indices, partial_distances.flatten().tolist()
-                            ):
-                                response_distances[pos] = float(dval)
-                        except Exception as e:
-                            logger.error(
-                                f"Distance computation error, falling back to large distances: {e}"
-                            )
-
-                    # Always reply with exactly len(node_ids) distances
-                    response_bytes = msgpack.packb([response_distances], use_single_float=True)
-                    logger.debug(
-                        f"Sending distance response with {len(response_distances)} distances (found={len(found_indices)})"
+                    # Process embeddings
+                    embeddings = compute_embeddings(texts, model_name, mode=embedding_mode)
+                    logger.info(
+                        f"Computed embeddings for {len(texts)} texts, shape: {embeddings.shape}"
                    )

+                    # Calculate distances
+                    if distance_metric == "l2":
+                        distances = np.sum(
+                            np.square(embeddings - query_vector.reshape(1, -1)), axis=1
+                        )
+                    else:  # mips or cosine
+                        distances = -np.dot(embeddings, query_vector)
+
+                    response_payload = distances.flatten().tolist()
+                    response_bytes = msgpack.packb([response_payload], use_single_float=True)
+                    logger.debug(f"Sending distance response with {len(distances)} distances")
+
                    socket.send(response_bytes)
                    e2e_end = time.time()
                    logger.info(f"⏱️  Distance calculation E2E time: {e2e_end - e2e_start:.6f}s")
@@ -231,61 +200,40 @@ def create_hnsw_embedding_server(

                node_ids = request_payload[0]
                logger.debug(f"Request for {len(node_ids)} node embeddings")
-                last_request_type = "embedding"
-                last_request_length = len(node_ids)

-                # Allocate output buffer (B, D) and fill with zeros for robustness
-                if embedding_dim <= 0:
-                    logger.error("Embedding dimension unknown; cannot serve embedding request")
-                    dims = [0, 0]
-                    data = []
-                else:
-                    dims = [len(node_ids), embedding_dim]
-                    data = [0.0] * (dims[0] * dims[1])
-
-                # Look up texts by node IDs; compute embeddings where available
-                texts: list[str] = []
-                found_indices: list[int] = []
-                for idx, nid in enumerate(node_ids):
+                # Look up texts by node IDs
+                texts = []
+                for nid in node_ids:
                    try:
                        passage_data = passages.get_passage(str(nid))
-                        txt = passage_data.get("text", "")
-                        if isinstance(txt, str) and len(txt) > 0:
-                            texts.append(txt)
-                            found_indices.append(idx)
-                        else:
-                            logger.error(f"Empty text for passage ID {nid}")
+                        txt = passage_data["text"]
+                        if not txt:
+                            raise RuntimeError(f"FATAL: Empty text for passage ID {nid}")
+                        texts.append(txt)
                    except KeyError:
-                        logger.error(f"Passage with ID {nid} not found")
+                        raise RuntimeError(f"FATAL: Passage with ID {nid} not found")
                    except Exception as e:
                        logger.error(f"Exception looking up passage ID {nid}: {e}")
+                        raise

-                if texts:
-                    try:
-                        # Process embeddings for found texts only
-                        embeddings = compute_embeddings(texts, model_name, mode=embedding_mode)
-                        logger.info(
-                            f"Computed embeddings for {len(texts)} texts, shape: {embeddings.shape}"
-                        )
+                # Process embeddings
+                embeddings = compute_embeddings(texts, model_name, mode=embedding_mode)
+                logger.info(
+                    f"Computed embeddings for {len(texts)} texts, shape: {embeddings.shape}"
+                )

-                        if np.isnan(embeddings).any() or np.isinf(embeddings).any():
-                            logger.error(
-                                f"NaN or Inf detected in embeddings! Requested IDs: {node_ids[:5]}..."
-                            )
-                            dims = [0, embedding_dim]
-                            data = []
-                        else:
-                            # Copy computed embeddings into the correct positions
-                            emb_f32 = np.ascontiguousarray(embeddings, dtype=np.float32)
-                            flat = emb_f32.flatten().tolist()
-                            for j, pos in enumerate(found_indices):
-                                start = pos * embedding_dim
-                                end = start + embedding_dim
-                                data[start:end] = flat[j * embedding_dim : (j + 1) * embedding_dim]
-                    except Exception as e:
-                        logger.error(f"Embedding computation error, returning zeros: {e}")
+                # Serialization and response
+                if np.isnan(embeddings).any() or np.isinf(embeddings).any():
+                    logger.error(
+                        f"NaN or Inf detected in embeddings! Requested IDs: {node_ids[:5]}..."
+                    )
+                    raise AssertionError()

-                response_payload = [dims, data]
+                hidden_contiguous_f32 = np.ascontiguousarray(embeddings, dtype=np.float32)
+                response_payload = [
+                    list(hidden_contiguous_f32.shape),
+                    hidden_contiguous_f32.flatten().tolist(),
+                ]
                response_bytes = msgpack.packb(response_payload, use_single_float=True)

                socket.send(response_bytes)
@@ -300,22 +248,7 @@ def create_hnsw_embedding_server(
                import traceback

                traceback.print_exc()
-                # Fallback to a safe, minimal-structure response to avoid client crashes
-                if last_request_type == "distance":
-                    # Return a vector of large distances with the expected length
-                    fallback_len = max(0, int(last_request_length))
-                    large_distance = 1e9
-                    safe_response = [[large_distance] * fallback_len]
-                elif last_request_type == "embedding":
-                    # Return an empty embedding block with known dimension if available
-                    if embedding_dim > 0:
-                        safe_response = [[0, embedding_dim], []]
-                    else:
-                        safe_response = [[0, 0], []]
-                else:
-                    # Unknown request type: default to empty embedding structure
-                    safe_response = [[0, int(embedding_dim) if embedding_dim > 0 else 0], []]
-                socket.send(msgpack.packb(safe_response, use_single_float=True))
+                socket.send(msgpack.packb([[], []]))

    zmq_thread = threading.Thread(target=zmq_server_thread, daemon=True)
    zmq_thread.start()
@@ -362,7 +295,7 @@ if __name__ == "__main__":
        "--embedding-mode",
        type=str,
        default="sentence-transformers",
-        choices=["sentence-transformers", "openai", "mlx", "ollama"],
+        choices=["sentence-transformers", "openai", "mlx"],
        help="Embedding backend mode",
    )

@@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build"

 [project]
 name = "leann-backend-hnsw"
-version = "0.2.9"
+version = "0.2.1"
 description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
 dependencies = [
-    "leann-core==0.2.9",
+    "leann-core==0.2.1",
    "numpy",
    "pyzmq>=23.0.0",
    "msgpack>=1.0.0",
@@ -22,8 +22,6 @@ cmake.build-type = "Release"
 build.verbose = true
 build.tool-args = ["-j8"]

-# CMake definitions to optimize compilation and find Homebrew packages
+# CMake definitions to optimize compilation
 [tool.scikit-build.cmake.define]
 CMAKE_BUILD_PARALLEL_LEVEL = "8"
-CMAKE_PREFIX_PATH = {env = "CMAKE_PREFIX_PATH"}
-OpenMP_ROOT = {env = "OpenMP_ROOT"}
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "leann-core"
-version = "0.2.9"
+version = "0.2.1"
 description = "Core API and plugin system for LEANN"
 readme = "README.md"
 requires-python = ">=3.9"
@@ -31,10 +31,8 @@ dependencies = [
    "PyPDF2>=3.0.0",
    "pymupdf>=1.23.0",
    "pdfplumber>=0.10.0",
-    "nbconvert>=7.0.0",  # For .ipynb file support
-    "gitignore-parser>=0.1.12",  # For proper .gitignore handling
-    "mlx>=0.26.3; sys_platform == 'darwin' and platform_machine == 'arm64'",
-    "mlx-lm>=0.26.0; sys_platform == 'darwin' and platform_machine == 'arm64'",
+    "mlx>=0.26.3; sys_platform == 'darwin'",
+    "mlx-lm>=0.26.0; sys_platform == 'darwin'",
 ]

 [project.optional-dependencies]
@@ -10,7 +10,7 @@ import time
 import warnings
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Literal, Optional
+from typing import Any, Literal

 import numpy as np

@@ -33,7 +33,7 @@ def compute_embeddings(
    model_name: str,
    mode: str = "sentence-transformers",
    use_server: bool = True,
-    port: Optional[int] = None,
+    port: int | None = None,
    is_build=False,
 ) -> np.ndarray:
    """
@@ -157,12 +157,12 @@ class LeannBuilder:
        self,
        backend_name: str,
        embedding_model: str = "facebook/contriever",
-        dimensions: Optional[int] = None,
+        dimensions: int | None = None,
        embedding_mode: str = "sentence-transformers",
        **backend_kwargs,
    ):
        self.backend_name = backend_name
-        backend_factory: Optional[LeannBackendFactoryInterface] = BACKEND_REGISTRY.get(backend_name)
+        backend_factory: LeannBackendFactoryInterface | None = BACKEND_REGISTRY.get(backend_name)
        if backend_factory is None:
            raise ValueError(f"Backend '{backend_name}' not found or not registered.")
        self.backend_factory = backend_factory
@@ -242,7 +242,7 @@ class LeannBuilder:
        self.backend_kwargs = backend_kwargs
        self.chunks: list[dict[str, Any]] = []

-    def add_text(self, text: str, metadata: Optional[dict[str, Any]] = None):
+    def add_text(self, text: str, metadata: dict[str, Any] | None = None):
        if metadata is None:
            metadata = {}
        passage_id = metadata.get("id", str(len(self.chunks)))
@@ -554,7 +554,7 @@ class LeannSearcher:
        if "labels" in results and "distances" in results:
            logger.info(f"  Processing {len(results['labels'][0])} passage IDs:")
            for i, (string_id, dist) in enumerate(
-                zip(results["labels"][0], results["distances"][0])
+                zip(results["labels"][0], results["distances"][0], strict=False)
            ):
                try:
                    passage_data = self.passage_manager.get_passage(string_id)
@@ -592,7 +592,7 @@ class LeannChat:
    def __init__(
        self,
        index_path: str,
-        llm_config: Optional[dict[str, Any]] = None,
+        llm_config: dict[str, Any] | None = None,
        enable_warmup: bool = False,
        **kwargs,
    ):
@@ -608,7 +608,7 @@ class LeannChat:
        prune_ratio: float = 0.0,
        recompute_embeddings: bool = True,
        pruning_strategy: Literal["global", "local", "proportional"] = "global",
-        llm_kwargs: Optional[dict[str, Any]] = None,
+        llm_kwargs: dict[str, Any] | None = None,
        expected_zmq_port: int = 5557,
        **search_kwargs,
    ):
@@ -8,7 +8,7 @@ import difflib
 import logging
 import os
 from abc import ABC, abstractmethod
-from typing import Any, Optional
+from typing import Any

 import torch

@@ -17,12 +17,12 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)


-def check_ollama_models(host: str) -> list[str]:
+def check_ollama_models() -> list[str]:
    """Check available Ollama models and return a list"""
    try:
        import requests

-        response = requests.get(f"{host}/api/tags", timeout=5)
+        response = requests.get("http://localhost:11434/api/tags", timeout=5)
        if response.status_code == 200:
            data = response.json()
            return [model["name"] for model in data.get("models", [])]
@@ -309,12 +309,10 @@ def search_hf_models(query: str, limit: int = 10) -> list[str]:
    return search_hf_models_fuzzy(query, limit)


-def validate_model_and_suggest(
-    model_name: str, llm_type: str, host: str = "http://localhost:11434"
-) -> Optional[str]:
+def validate_model_and_suggest(model_name: str, llm_type: str) -> str | None:
    """Validate model name and provide suggestions if invalid"""
    if llm_type == "ollama":
-        available_models = check_ollama_models(host)
+        available_models = check_ollama_models()
        if available_models and model_name not in available_models:
            error_msg = f"Model '{model_name}' not found in your local Ollama installation."

@@ -471,7 +469,7 @@ class OllamaChat(LLMInterface):
                requests.get(host)

            # Pre-check model availability with helpful suggestions
-            model_error = validate_model_and_suggest(model, "ollama", host)
+            model_error = validate_model_and_suggest(model, "ollama")
            if model_error:
                raise ValueError(model_error)

@@ -685,7 +683,7 @@ class HFChat(LLMInterface):
 class OpenAIChat(LLMInterface):
    """LLM interface for OpenAI models."""

-    def __init__(self, model: str = "gpt-4o", api_key: Optional[str] = None):
+    def __init__(self, model: str = "gpt-4o", api_key: str | None = None):
        self.model = model
        self.api_key = api_key or os.getenv("OPENAI_API_KEY")

@@ -761,7 +759,7 @@ class SimulatedChat(LLMInterface):
        return "This is a simulated answer from the LLM based on the retrieved context."


-def get_llm(llm_config: Optional[dict[str, Any]] = None) -> LLMInterface:
+def get_llm(llm_config: dict[str, Any] | None = None) -> LLMInterface:
    """
    Factory function to get an LLM interface based on configuration.

@@ -1,11 +1,9 @@
 import argparse
 import asyncio
 from pathlib import Path
-from typing import Union

 from llama_index.core import SimpleDirectoryReader
 from llama_index.core.node_parser import SentenceSplitter
-from tqdm import tqdm

 from .api import LeannBuilder, LeannChat, LeannSearcher

@@ -76,14 +74,10 @@ class LeannCLI:
            formatter_class=argparse.RawDescriptionHelpFormatter,
            epilog="""
 Examples:
-  leann build my-docs --docs ./documents                                  # Build index from directory
-  leann build my-code --docs ./src ./tests ./config                      # Build index from multiple directories
-  leann build my-files --docs ./file1.py ./file2.txt ./docs/             # Build index from files and directories
-  leann build my-mixed --docs ./readme.md ./src/ ./config.json           # Build index from mixed files/dirs
-  leann build my-ppts --docs ./ --file-types .pptx,.pdf                  # Index only PowerPoint and PDF files
-  leann search my-docs "query"                                           # Search in my-docs index
-  leann ask my-docs "question"                                           # Ask my-docs index
-  leann list                                                             # List all stored indexes
+  leann build my-docs --docs ./documents    # Build index named my-docs
+  leann search my-docs "query"             # Search in my-docs index
+  leann ask my-docs "question"             # Ask my-docs index
+  leann list                              # List all stored indexes
            """,
        )

@@ -91,50 +85,20 @@ Examples:

        # Build command
        build_parser = subparsers.add_parser("build", help="Build document index")
+        build_parser.add_argument("index_name", help="Index name")
        build_parser.add_argument(
-            "index_name", nargs="?", help="Index name (default: current directory name)"
-        )
-        build_parser.add_argument(
-            "--docs",
-            type=str,
-            nargs="+",
-            default=["."],
-            help="Documents directories and/or files (default: current directory)",
+            "--docs", type=str, default=".", help="Documents directory (default: current directory)"
        )
        build_parser.add_argument(
            "--backend", type=str, default="hnsw", choices=["hnsw", "diskann"]
        )
        build_parser.add_argument("--embedding-model", type=str, default="facebook/contriever")
-        build_parser.add_argument(
-            "--embedding-mode",
-            type=str,
-            default="sentence-transformers",
-            choices=["sentence-transformers", "openai", "mlx", "ollama"],
-            help="Embedding backend mode (default: sentence-transformers)",
-        )
        build_parser.add_argument("--force", "-f", action="store_true", help="Force rebuild")
        build_parser.add_argument("--graph-degree", type=int, default=32)
        build_parser.add_argument("--complexity", type=int, default=64)
        build_parser.add_argument("--num-threads", type=int, default=1)
        build_parser.add_argument("--compact", action="store_true", default=True)
-        build_parser.add_argument(
-            "--no-compact",
-            dest="compact",
-            action="store_false",
-            help="Disable compact index storage (store full embeddings; higher storage)",
-        )
        build_parser.add_argument("--recompute", action="store_true", default=True)
-        build_parser.add_argument(
-            "--no-recompute",
-            dest="recompute",
-            action="store_false",
-            help="Disable embedding recomputation (store full embeddings; lower query latency)",
-        )
-        build_parser.add_argument(
-            "--file-types",
-            type=str,
-            help="Comma-separated list of file extensions to include (e.g., '.txt,.pdf,.pptx'). If not specified, uses default supported types.",
-        )

        # Search command
        search_parser = subparsers.add_parser("search", help="Search documents")
@@ -144,24 +108,7 @@ Examples:
        search_parser.add_argument("--complexity", type=int, default=64)
        search_parser.add_argument("--beam-width", type=int, default=1)
        search_parser.add_argument("--prune-ratio", type=float, default=0.0)
-        search_parser.add_argument(
-            "--recompute-embeddings",
-            action="store_true",
-            default=True,
-            help="Recompute embeddings (default: True)",
-        )
-        search_parser.add_argument(
-            "--no-recompute-embeddings",
-            dest="recompute_embeddings",
-            action="store_false",
-            help="Disable embedding recomputation during search",
-        )
-        search_parser.add_argument(
-            "--no-recompute",
-            dest="recompute_embeddings",
-            action="store_false",
-            help="Alias for --no-recompute-embeddings",
-        )
+        search_parser.add_argument("--recompute-embeddings", action="store_true")
        search_parser.add_argument(
            "--pruning-strategy",
            choices=["global", "local", "proportional"],
@@ -184,24 +131,7 @@ Examples:
        ask_parser.add_argument("--complexity", type=int, default=32)
        ask_parser.add_argument("--beam-width", type=int, default=1)
        ask_parser.add_argument("--prune-ratio", type=float, default=0.0)
-        ask_parser.add_argument(
-            "--recompute-embeddings",
-            action="store_true",
-            default=True,
-            help="Recompute embeddings (default: True)",
-        )
-        ask_parser.add_argument(
-            "--no-recompute-embeddings",
-            dest="recompute_embeddings",
-            action="store_false",
-            help="Disable embedding recomputation during ask",
-        )
-        ask_parser.add_argument(
-            "--no-recompute",
-            dest="recompute_embeddings",
-            action="store_false",
-            help="Alias for --no-recompute-embeddings",
-        )
+        ask_parser.add_argument("--recompute-embeddings", action="store_true")
        ask_parser.add_argument(
            "--pruning-strategy",
            choices=["global", "local", "proportional"],
@@ -248,63 +178,6 @@ Examples:
        with open(global_registry, "w") as f:
            json.dump(projects, f, indent=2)

-    def _build_gitignore_parser(self, docs_dir: str):
-        """Build gitignore parser using gitignore-parser library."""
-        from gitignore_parser import parse_gitignore
-
-        # Try to parse the root .gitignore
-        gitignore_path = Path(docs_dir) / ".gitignore"
-
-        if gitignore_path.exists():
-            try:
-                # gitignore-parser automatically handles all subdirectory .gitignore files!
-                matches = parse_gitignore(str(gitignore_path))
-                print(f"📋 Loaded .gitignore from {docs_dir} (includes all subdirectories)")
-                return matches
-            except Exception as e:
-                print(f"Warning: Could not parse .gitignore: {e}")
-        else:
-            print("📋 No .gitignore found")
-
-        # Fallback: basic pattern matching for essential files
-        essential_patterns = {".git", ".DS_Store", "__pycache__", "node_modules", ".venv", "venv"}
-
-        def basic_matches(file_path):
-            path_parts = Path(file_path).parts
-            return any(part in essential_patterns for part in path_parts)
-
-        return basic_matches
-
-    def _should_exclude_file(self, relative_path: Path, gitignore_matches) -> bool:
-        """Check if a file should be excluded using gitignore parser."""
-        return gitignore_matches(str(relative_path))
-
-    def _is_git_submodule(self, path: Path) -> bool:
-        """Check if a path is a git submodule."""
-        try:
-            # Find the git repo root
-            current_dir = Path.cwd()
-            while current_dir != current_dir.parent:
-                if (current_dir / ".git").exists():
-                    gitmodules_path = current_dir / ".gitmodules"
-                    if gitmodules_path.exists():
-                        # Read .gitmodules to check if this path is a submodule
-                        gitmodules_content = gitmodules_path.read_text()
-                        # Convert path to relative to git root
-                        try:
-                            relative_path = path.resolve().relative_to(current_dir)
-                            # Check if this path appears in .gitmodules
-                            return f"path = {relative_path}" in gitmodules_content
-                        except ValueError:
-                            # Path is not under git root
-                            return False
-                    break
-                current_dir = current_dir.parent
-            return False
-        except Exception:
-            # If anything goes wrong, assume it's not a submodule
-            return False
-
    def list_indexes(self):
        print("Stored LEANN indexes:")

@@ -334,9 +207,7 @@ Examples:
            valid_projects.append(current_path)

        if not valid_projects:
-            print(
-                "No indexes found. Use 'leann build <name> --docs <dir> [<dir2> ...]' to create one."
-            )
+            print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
            return

        total_indexes = 0
@@ -383,249 +254,99 @@ Examples:
                    print(f'  leann search {example_name} "your query"')
                    print(f"  leann ask {example_name} --interactive")

-    def load_documents(
-        self, docs_paths: Union[str, list], custom_file_types: Union[str, None] = None
-    ):
-        # Handle both single path (string) and multiple paths (list) for backward compatibility
-        if isinstance(docs_paths, str):
-            docs_paths = [docs_paths]
+    def load_documents(self, docs_dir: str):
+        print(f"Loading documents from {docs_dir}...")

-        # Separate files and directories
-        files = []
-        directories = []
-        for path in docs_paths:
-            path_obj = Path(path)
-            if path_obj.is_file():
-                files.append(str(path_obj))
-            elif path_obj.is_dir():
-                # Check if this is a git submodule - if so, skip it
-                if self._is_git_submodule(path_obj):
-                    print(f"⚠️  Skipping git submodule: {path}")
-                    continue
-                directories.append(str(path_obj))
+        # Try to use better PDF parsers first
+        documents = []
+        docs_path = Path(docs_dir)
+
+        for file_path in docs_path.rglob("*.pdf"):
+            print(f"Processing PDF: {file_path}")
+
+            # Try PyMuPDF first (best quality)
+            text = extract_pdf_text_with_pymupdf(str(file_path))
+            if text is None:
+                # Try pdfplumber
+                text = extract_pdf_text_with_pdfplumber(str(file_path))
+
+            if text:
+                # Create a simple document structure
+                from llama_index.core import Document
+
+                doc = Document(text=text, metadata={"source": str(file_path)})
+                documents.append(doc)
            else:
-                print(f"⚠️  Warning: Path '{path}' does not exist, skipping...")
-                continue
-
-        # Print summary of what we're processing
-        total_items = len(files) + len(directories)
-        items_desc = []
-        if files:
-            items_desc.append(f"{len(files)} file{'s' if len(files) > 1 else ''}")
-        if directories:
-            items_desc.append(
-                f"{len(directories)} director{'ies' if len(directories) > 1 else 'y'}"
-            )
-
-        print(f"Loading documents from {' and '.join(items_desc)} ({total_items} total):")
-        if files:
-            print(f"  📄 Files: {', '.join([Path(f).name for f in files])}")
-        if directories:
-            print(f"  📁 Directories: {', '.join(directories)}")
-
-        if custom_file_types:
-            print(f"Using custom file types: {custom_file_types}")
-
-        all_documents = []
-
-        # First, process individual files if any
-        if files:
-            print(f"\n🔄 Processing {len(files)} individual file{'s' if len(files) > 1 else ''}...")
-
-            # Load individual files using SimpleDirectoryReader with input_files
-            # Note: We skip gitignore filtering for explicitly specified files
-            try:
-                # Group files by their parent directory for efficient loading
-                from collections import defaultdict
-
-                files_by_dir = defaultdict(list)
-                for file_path in files:
-                    parent_dir = str(Path(file_path).parent)
-                    files_by_dir[parent_dir].append(file_path)
-
-                # Load files from each parent directory
-                for parent_dir, file_list in files_by_dir.items():
-                    print(
-                        f"  Loading {len(file_list)} file{'s' if len(file_list) > 1 else ''} from {parent_dir}"
-                    )
-                    try:
-                        file_docs = SimpleDirectoryReader(
-                            parent_dir,
-                            input_files=file_list,
-                            filename_as_id=True,
-                        ).load_data()
-                        all_documents.extend(file_docs)
-                        print(
-                            f"    ✅ Loaded {len(file_docs)} document{'s' if len(file_docs) > 1 else ''}"
-                        )
-                    except Exception as e:
-                        print(f"    ❌ Warning: Could not load files from {parent_dir}: {e}")
-
-            except Exception as e:
-                print(f"❌ Error processing individual files: {e}")
-
-        # Define file extensions to process
-        if custom_file_types:
-            # Parse custom file types from comma-separated string
-            code_extensions = [ext.strip() for ext in custom_file_types.split(",") if ext.strip()]
-            # Ensure extensions start with a dot
-            code_extensions = [ext if ext.startswith(".") else f".{ext}" for ext in code_extensions]
-        else:
-            # Use default supported file types
-            code_extensions = [
-                # Original document types
-                ".txt",
-                ".md",
-                ".docx",
-                ".pptx",
-                # Code files for Claude Code integration
-                ".py",
-                ".js",
-                ".ts",
-                ".jsx",
-                ".tsx",
-                ".java",
-                ".cpp",
-                ".c",
-                ".h",
-                ".hpp",
-                ".cs",
-                ".go",
-                ".rs",
-                ".rb",
-                ".php",
-                ".swift",
-                ".kt",
-                ".scala",
-                ".r",
-                ".sql",
-                ".sh",
-                ".bash",
-                ".zsh",
-                ".fish",
-                ".ps1",
-                ".bat",
-                # Config and markup files
-                ".json",
-                ".yaml",
-                ".yml",
-                ".xml",
-                ".toml",
-                ".ini",
-                ".cfg",
-                ".conf",
-                ".html",
-                ".css",
-                ".scss",
-                ".less",
-                ".vue",
-                ".svelte",
-                # Data science
-                ".ipynb",
-                ".R",
-                ".py",
-                ".jl",
-            ]
-
-        # Process each directory
-        if directories:
-            print(
-                f"\n🔄 Processing {len(directories)} director{'ies' if len(directories) > 1 else 'y'}..."
-            )
-
-        for docs_dir in directories:
-            print(f"Processing directory: {docs_dir}")
-            # Build gitignore parser for each directory
-            gitignore_matches = self._build_gitignore_parser(docs_dir)
-
-            # Try to use better PDF parsers first, but only if PDFs are requested
-            documents = []
-            docs_path = Path(docs_dir)
-
-            # Check if we should process PDFs
-            should_process_pdfs = custom_file_types is None or ".pdf" in custom_file_types
-
-            if should_process_pdfs:
-                for file_path in docs_path.rglob("*.pdf"):
-                    # Check if file matches any exclude pattern
-                    try:
-                        relative_path = file_path.relative_to(docs_path)
-                        if self._should_exclude_file(relative_path, gitignore_matches):
-                            continue
-                    except ValueError:
-                        # Skip files that can't be made relative to docs_path
-                        print(f"⚠️  Skipping file outside directory scope: {file_path}")
-                        continue
-
-                    print(f"Processing PDF: {file_path}")
-
-                    # Try PyMuPDF first (best quality)
-                    text = extract_pdf_text_with_pymupdf(str(file_path))
-                    if text is None:
-                        # Try pdfplumber
-                        text = extract_pdf_text_with_pdfplumber(str(file_path))
-
-                    if text:
-                        # Create a simple document structure
-                        from llama_index.core import Document
-
-                        doc = Document(text=text, metadata={"source": str(file_path)})
-                        documents.append(doc)
-                    else:
-                        # Fallback to default reader
-                        print(f"Using default reader for {file_path}")
-                        try:
-                            default_docs = SimpleDirectoryReader(
-                                str(file_path.parent),
-                                filename_as_id=True,
-                                required_exts=[file_path.suffix],
-                            ).load_data()
-                            documents.extend(default_docs)
-                        except Exception as e:
-                            print(f"Warning: Could not process {file_path}: {e}")
-
-            # Load other file types with default reader
-            try:
-                # Create a custom file filter function using our PathSpec
-                def file_filter(
-                    file_path: str, docs_dir=docs_dir, gitignore_matches=gitignore_matches
-                ) -> bool:
-                    """Return True if file should be included (not excluded)"""
-                    try:
-                        docs_path_obj = Path(docs_dir)
-                        file_path_obj = Path(file_path)
-                        relative_path = file_path_obj.relative_to(docs_path_obj)
-                        return not self._should_exclude_file(relative_path, gitignore_matches)
-                    except (ValueError, OSError):
-                        return True  # Include files that can't be processed
-
-                other_docs = SimpleDirectoryReader(
-                    docs_dir,
-                    recursive=True,
-                    encoding="utf-8",
-                    required_exts=code_extensions,
-                    file_extractor={},  # Use default extractors
+                # Fallback to default reader
+                print(f"Using default reader for {file_path}")
+                default_docs = SimpleDirectoryReader(
+                    str(file_path.parent),
                    filename_as_id=True,
-                ).load_data(show_progress=True)
+                    required_exts=[file_path.suffix],
+                ).load_data()
+                documents.extend(default_docs)

-                # Filter documents after loading based on gitignore rules
-                filtered_docs = []
-                for doc in other_docs:
-                    file_path = doc.metadata.get("file_path", "")
-                    if file_filter(file_path):
-                        filtered_docs.append(doc)
-
-                documents.extend(filtered_docs)
-            except ValueError as e:
-                if "No files found" in str(e):
-                    print(f"No additional files found for other supported types in {docs_dir}.")
-                else:
-                    raise e
-
-            all_documents.extend(documents)
-            print(f"Loaded {len(documents)} documents from {docs_dir}")
-
-        documents = all_documents
+        # Load other file types with default reader
+        code_extensions = [
+            # Original document types
+            ".txt",
+            ".md",
+            ".docx",
+            # Code files for Claude Code integration
+            ".py",
+            ".js",
+            ".ts",
+            ".jsx",
+            ".tsx",
+            ".java",
+            ".cpp",
+            ".c",
+            ".h",
+            ".hpp",
+            ".cs",
+            ".go",
+            ".rs",
+            ".rb",
+            ".php",
+            ".swift",
+            ".kt",
+            ".scala",
+            ".r",
+            ".sql",
+            ".sh",
+            ".bash",
+            ".zsh",
+            ".fish",
+            ".ps1",
+            ".bat",
+            # Config and markup files
+            ".json",
+            ".yaml",
+            ".yml",
+            ".xml",
+            ".toml",
+            ".ini",
+            ".cfg",
+            ".conf",
+            ".html",
+            ".css",
+            ".scss",
+            ".less",
+            ".vue",
+            ".svelte",
+            # Data science
+            ".ipynb",
+            ".R",
+            ".py",
+            ".jl",
+        ]
+        other_docs = SimpleDirectoryReader(
+            docs_dir,
+            recursive=True,
+            encoding="utf-8",
+            required_exts=code_extensions,
+        ).load_data(show_progress=True)
+        documents.extend(other_docs)

        all_texts = []

@@ -676,9 +397,7 @@ Examples:
            ".jl",
        }

-        print("start chunking documents")
-        # Add progress bar for document chunking
-        for doc in tqdm(documents, desc="Chunking documents", unit="doc"):
+        for doc in documents:
            # Check if this is a code file based on source path
            source_path = doc.metadata.get("source", "")
            is_code_file = any(source_path.endswith(ext) for ext in code_file_exts)
@@ -694,36 +413,18 @@ Examples:
        return all_texts

    async def build_index(self, args):
-        docs_paths = args.docs
-        # Use current directory name if index_name not provided
-        if args.index_name:
-            index_name = args.index_name
-        else:
-            index_name = Path.cwd().name
-            print(f"Using current directory name as index: '{index_name}'")
-
+        docs_dir = args.docs
+        index_name = args.index_name
        index_dir = self.indexes_dir / index_name
        index_path = self.get_index_path(index_name)

-        # Display all paths being indexed with file/directory distinction
-        files = [p for p in docs_paths if Path(p).is_file()]
-        directories = [p for p in docs_paths if Path(p).is_dir()]
-
-        print(f"📂 Indexing {len(docs_paths)} path{'s' if len(docs_paths) > 1 else ''}:")
-        if files:
-            print(f"  📄 Files ({len(files)}):")
-            for i, file_path in enumerate(files, 1):
-                print(f"    {i}. {Path(file_path).resolve()}")
-        if directories:
-            print(f"  📁 Directories ({len(directories)}):")
-            for i, dir_path in enumerate(directories, 1):
-                print(f"    {i}. {Path(dir_path).resolve()}")
+        print(f"📂 Indexing: {Path(docs_dir).resolve()}")

        if index_dir.exists() and not args.force:
            print(f"Index '{index_name}' already exists. Use --force to rebuild.")
            return

-        all_texts = self.load_documents(docs_paths, args.file_types)
+        all_texts = self.load_documents(docs_dir)
        if not all_texts:
            print("No documents found")
            return
@@ -735,7 +436,6 @@ Examples:
        builder = LeannBuilder(
            backend_name=args.backend,
            embedding_model=args.embedding_model,
-            embedding_mode=args.embedding_mode,
            graph_degree=args.graph_degree,
            complexity=args.complexity,
            is_compact=args.compact,
@@ -759,7 +459,7 @@ Examples:

        if not self.index_exists(index_name):
            print(
-                f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir> [<dir2> ...]' to create it."
+                f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it."
            )
            return

@@ -786,7 +486,7 @@ Examples:

        if not self.index_exists(index_name):
            print(
-                f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir> [<dir2> ...]' to create it."
+                f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it."
            )
            return

@@ -35,7 +35,7 @@ def compute_embeddings(
    Args:
        texts: List of texts to compute embeddings for
        model_name: Model name
-        mode: Computation mode ('sentence-transformers', 'openai', 'mlx', 'ollama')
+        mode: Computation mode ('sentence-transformers', 'openai', 'mlx')
        is_build: Whether this is a build operation (shows progress bar)
        batch_size: Batch size for processing
        adaptive_optimization: Whether to use adaptive optimization based on batch size
@@ -55,8 +55,6 @@ def compute_embeddings(
        return compute_embeddings_openai(texts, model_name)
    elif mode == "mlx":
        return compute_embeddings_mlx(texts, model_name)
-    elif mode == "ollama":
-        return compute_embeddings_ollama(texts, model_name, is_build=is_build)
    else:
        raise ValueError(f"Unsupported embedding mode: {mode}")

@@ -367,286 +365,3 @@ def compute_embeddings_mlx(chunks: list[str], model_name: str, batch_size: int =

    # Stack numpy arrays
    return np.stack(all_embeddings)
-
-
-def compute_embeddings_ollama(
-    texts: list[str], model_name: str, is_build: bool = False, host: str = "http://localhost:11434"
-) -> np.ndarray:
-    """
-    Compute embeddings using Ollama API with simplified batch processing.
-
-    Uses batch size of 32 for MPS/CPU and 128 for CUDA to optimize performance.
-
-    Args:
-        texts: List of texts to compute embeddings for
-        model_name: Ollama model name (e.g., "nomic-embed-text", "mxbai-embed-large")
-        is_build: Whether this is a build operation (shows progress bar)
-        host: Ollama host URL (default: http://localhost:11434)
-
-    Returns:
-        Normalized embeddings array, shape: (len(texts), embedding_dim)
-    """
-    try:
-        import requests
-    except ImportError:
-        raise ImportError(
-            "The 'requests' library is required for Ollama embeddings. Install with: uv pip install requests"
-        )
-
-    if not texts:
-        raise ValueError("Cannot compute embeddings for empty text list")
-
-    logger.info(
-        f"Computing embeddings for {len(texts)} texts using Ollama API, model: '{model_name}'"
-    )
-
-    # Check if Ollama is running
-    try:
-        response = requests.get(f"{host}/api/version", timeout=5)
-        response.raise_for_status()
-    except requests.exceptions.ConnectionError:
-        error_msg = (
-            f"❌ Could not connect to Ollama at {host}.\n\n"
-            "Please ensure Ollama is running:\n"
-            "  • macOS/Linux: ollama serve\n"
-            "  • Windows: Make sure Ollama is running in the system tray\n\n"
-            "Installation: https://ollama.com/download"
-        )
-        raise RuntimeError(error_msg)
-    except Exception as e:
-        raise RuntimeError(f"Unexpected error connecting to Ollama: {e}")
-
-    # Check if model exists and provide helpful suggestions
-    try:
-        response = requests.get(f"{host}/api/tags", timeout=5)
-        response.raise_for_status()
-        models = response.json()
-        model_names = [model["name"] for model in models.get("models", [])]
-
-        # Filter for embedding models (models that support embeddings)
-        embedding_models = []
-        suggested_embedding_models = [
-            "nomic-embed-text",
-            "mxbai-embed-large",
-            "bge-m3",
-            "all-minilm",
-            "snowflake-arctic-embed",
-        ]
-
-        for model in model_names:
-            # Check if it's an embedding model (by name patterns or known models)
-            base_name = model.split(":")[0]
-            if any(emb in base_name for emb in ["embed", "bge", "minilm", "e5"]):
-                embedding_models.append(model)
-
-        # Check if model exists (handle versioned names) and resolve to full name
-        resolved_model_name = None
-        for name in model_names:
-            # Exact match
-            if model_name == name:
-                resolved_model_name = name
-                break
-            # Match without version tag (use the versioned name)
-            elif model_name == name.split(":")[0]:
-                resolved_model_name = name
-                break
-
-        if not resolved_model_name:
-            error_msg = f"❌ Model '{model_name}' not found in local Ollama.\n\n"
-
-            # Suggest pulling the model
-            error_msg += "📦 To install this embedding model:\n"
-            error_msg += f"   ollama pull {model_name}\n\n"
-
-            # Show available embedding models
-            if embedding_models:
-                error_msg += "✅ Available embedding models:\n"
-                for model in embedding_models[:5]:
-                    error_msg += f"   • {model}\n"
-                if len(embedding_models) > 5:
-                    error_msg += f"   ... and {len(embedding_models) - 5} more\n"
-            else:
-                error_msg += "💡 Popular embedding models to install:\n"
-                for model in suggested_embedding_models[:3]:
-                    error_msg += f"   • ollama pull {model}\n"
-
-            error_msg += "\n📚 Browse more: https://ollama.com/library"
-            raise ValueError(error_msg)
-
-        # Use the resolved model name for all subsequent operations
-        if resolved_model_name != model_name:
-            logger.info(f"Resolved model name '{model_name}' to '{resolved_model_name}'")
-        model_name = resolved_model_name
-
-        # Verify the model supports embeddings by testing it
-        try:
-            test_response = requests.post(
-                f"{host}/api/embeddings", json={"model": model_name, "prompt": "test"}, timeout=10
-            )
-            if test_response.status_code != 200:
-                error_msg = (
-                    f"⚠️ Model '{model_name}' exists but may not support embeddings.\n\n"
-                    f"Please use an embedding model like:\n"
-                )
-                for model in suggested_embedding_models[:3]:
-                    error_msg += f"   • {model}\n"
-                raise ValueError(error_msg)
-        except requests.exceptions.RequestException:
-            # If test fails, continue anyway - model might still work
-            pass
-
-    except requests.exceptions.RequestException as e:
-        logger.warning(f"Could not verify model existence: {e}")
-
-    # Determine batch size based on device availability
-    # Check for CUDA/MPS availability using torch if available
-    batch_size = 32  # Default for MPS/CPU
-    try:
-        import torch
-
-        if torch.cuda.is_available():
-            batch_size = 128  # CUDA gets larger batch size
-        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-            batch_size = 32  # MPS gets smaller batch size
-    except ImportError:
-        # If torch is not available, use conservative batch size
-        batch_size = 32
-
-    logger.info(f"Using batch size: {batch_size}")
-
-    def get_batch_embeddings(batch_texts):
-        """Get embeddings for a batch of texts."""
-        all_embeddings = []
-        failed_indices = []
-
-        for i, text in enumerate(batch_texts):
-            max_retries = 3
-            retry_count = 0
-
-            # Truncate very long texts to avoid API issues
-            truncated_text = text[:8000] if len(text) > 8000 else text
-            while retry_count < max_retries:
-                try:
-                    response = requests.post(
-                        f"{host}/api/embeddings",
-                        json={"model": model_name, "prompt": truncated_text},
-                        timeout=30,
-                    )
-                    response.raise_for_status()
-
-                    result = response.json()
-                    embedding = result.get("embedding")
-
-                    if embedding is None:
-                        raise ValueError(f"No embedding returned for text {i}")
-
-                    if not isinstance(embedding, list) or len(embedding) == 0:
-                        raise ValueError(f"Invalid embedding format for text {i}")
-
-                    all_embeddings.append(embedding)
-                    break
-
-                except requests.exceptions.Timeout:
-                    retry_count += 1
-                    if retry_count >= max_retries:
-                        logger.warning(f"Timeout for text {i} after {max_retries} retries")
-                        failed_indices.append(i)
-                        all_embeddings.append(None)
-                        break
-
-                except Exception as e:
-                    retry_count += 1
-                    if retry_count >= max_retries:
-                        logger.error(f"Failed to get embedding for text {i}: {e}")
-                        failed_indices.append(i)
-                        all_embeddings.append(None)
-                        break
-        return all_embeddings, failed_indices
-
-    # Process texts in batches
-    all_embeddings = []
-    all_failed_indices = []
-
-    # Setup progress bar if needed
-    show_progress = is_build or len(texts) > 10
-    try:
-        if show_progress:
-            from tqdm import tqdm
-    except ImportError:
-        show_progress = False
-
-    # Process batches
-    num_batches = (len(texts) + batch_size - 1) // batch_size
-
-    if show_progress:
-        batch_iterator = tqdm(range(num_batches), desc="Computing Ollama embeddings")
-    else:
-        batch_iterator = range(num_batches)
-
-    for batch_idx in batch_iterator:
-        start_idx = batch_idx * batch_size
-        end_idx = min(start_idx + batch_size, len(texts))
-        batch_texts = texts[start_idx:end_idx]
-
-        batch_embeddings, batch_failed = get_batch_embeddings(batch_texts)
-
-        # Adjust failed indices to global indices
-        global_failed = [start_idx + idx for idx in batch_failed]
-        all_failed_indices.extend(global_failed)
-        all_embeddings.extend(batch_embeddings)
-
-    # Handle failed embeddings
-    if all_failed_indices:
-        if len(all_failed_indices) == len(texts):
-            raise RuntimeError("Failed to compute any embeddings")
-
-        logger.warning(
-            f"Failed to compute embeddings for {len(all_failed_indices)}/{len(texts)} texts"
-        )
-
-        # Use zero embeddings as fallback for failed ones
-        valid_embedding = next((e for e in all_embeddings if e is not None), None)
-        if valid_embedding:
-            embedding_dim = len(valid_embedding)
-            for i, embedding in enumerate(all_embeddings):
-                if embedding is None:
-                    all_embeddings[i] = [0.0] * embedding_dim
-
-    # Remove None values
-    all_embeddings = [e for e in all_embeddings if e is not None]
-
-    if not all_embeddings:
-        raise RuntimeError("No valid embeddings were computed")
-
-    # Validate embedding dimensions
-    expected_dim = len(all_embeddings[0])
-    inconsistent_dims = []
-    for i, embedding in enumerate(all_embeddings):
-        if len(embedding) != expected_dim:
-            inconsistent_dims.append((i, len(embedding)))
-
-    if inconsistent_dims:
-        error_msg = f"Ollama returned inconsistent embedding dimensions. Expected {expected_dim}, but got:\n"
-        for idx, dim in inconsistent_dims[:10]:  # Show first 10 inconsistent ones
-            error_msg += f"  - Text {idx}: {dim} dimensions\n"
-        if len(inconsistent_dims) > 10:
-            error_msg += f"  ... and {len(inconsistent_dims) - 10} more\n"
-        error_msg += f"\nThis is likely an Ollama API bug with model '{model_name}'. Please try:\n"
-        error_msg += "1. Restart Ollama service: 'ollama serve'\n"
-        error_msg += f"2. Re-pull the model: 'ollama pull {model_name}'\n"
-        error_msg += (
-            "3. Use sentence-transformers instead: --embedding-mode sentence-transformers\n"
-        )
-        error_msg += "4. Report this issue to Ollama: https://github.com/ollama/ollama/issues"
-        raise ValueError(error_msg)
-
-    # Convert to numpy array and normalize
-    embeddings = np.array(all_embeddings, dtype=np.float32)
-
-    # Normalize embeddings (L2 normalization)
-    norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
-    embeddings = embeddings / (norms + 1e-8)  # Add small epsilon to avoid division by zero
-
-    logger.info(f"Generated {len(embeddings)} embeddings, dimension: {embeddings.shape[1]}")
-
-    return embeddings
@@ -6,7 +6,6 @@ import subprocess
 import sys
 import time
 from pathlib import Path
-from typing import Optional

 import psutil

@@ -183,8 +182,8 @@ class EmbeddingServerManager:
                                       e.g., "leann_backend_diskann.embedding_server"
        """
        self.backend_module_name = backend_module_name
-        self.server_process: Optional[subprocess.Popen] = None
-        self.server_port: Optional[int] = None
+        self.server_process: subprocess.Popen | None = None
+        self.server_port: int | None = None
        self._atexit_registered = False

    def start_server(
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Any, Literal, Union
+from typing import Any, Literal

 import numpy as np

@@ -34,9 +34,7 @@ class LeannBackendSearcherInterface(ABC):
        pass

    @abstractmethod
-    def _ensure_server_running(
-        self, passages_source_file: str, port: Union[int, None], **kwargs
-    ) -> int:
+    def _ensure_server_running(self, passages_source_file: str, port: int | None, **kwargs) -> int:
        """Ensure server is running"""
        pass

@@ -50,7 +48,7 @@ class LeannBackendSearcherInterface(ABC):
        prune_ratio: float = 0.0,
        recompute_embeddings: bool = False,
        pruning_strategy: Literal["global", "local", "proportional"] = "global",
-        zmq_port: Union[int, None] = None,
+        zmq_port: int | None = None,
        **kwargs,
    ) -> dict[str, Any]:
        """Search for nearest neighbors
@@ -76,7 +74,7 @@ class LeannBackendSearcherInterface(ABC):
        self,
        query: str,
        use_server_if_available: bool = True,
-        zmq_port: Union[int, None] = None,
+        zmq_port: int | None = None,
    ) -> np.ndarray:
        """Compute embedding for a query string

@@ -1,6 +1,7 @@
 #!/usr/bin/env python3

 import json
+import os
 import subprocess
 import sys

@@ -25,61 +26,32 @@ def handle_request(request):
                "tools": [
                    {
                        "name": "leann_search",
-                        "description": """🔍 Search code using natural language - like having a coding assistant who knows your entire codebase!
-
-🎯 **Perfect for**:
- "How does authentication work?" → finds auth-related code
- "Error handling patterns" → locates try-catch blocks and error logic
- "Database connection setup" → finds DB initialization code
- "API endpoint definitions" → locates route handlers
- "Configuration management" → finds config files and usage
-
-💡 **Pro tip**: Use this before making any changes to understand existing patterns and conventions.""",
+                        "description": "Search LEANN index",
                        "inputSchema": {
                            "type": "object",
                            "properties": {
-                                "index_name": {
-                                    "type": "string",
-                                    "description": "Name of the LEANN index to search. Use 'leann_list' first to see available indexes.",
-                                },
-                                "query": {
-                                    "type": "string",
-                                    "description": "Search query - can be natural language (e.g., 'how to handle errors') or technical terms (e.g., 'async function definition')",
-                                },
-                                "top_k": {
-                                    "type": "integer",
-                                    "default": 5,
-                                    "minimum": 1,
-                                    "maximum": 20,
-                                    "description": "Number of search results to return. Use 5-10 for focused results, 15-20 for comprehensive exploration.",
-                                },
-                                "complexity": {
-                                    "type": "integer",
-                                    "default": 32,
-                                    "minimum": 16,
-                                    "maximum": 128,
-                                    "description": "Search complexity level. Use 16-32 for fast searches (recommended), 64+ for higher precision when needed.",
-                                },
+                                "index_name": {"type": "string"},
+                                "query": {"type": "string"},
+                                "top_k": {"type": "integer", "default": 5},
                            },
                            "required": ["index_name", "query"],
                        },
                    },
                    {
-                        "name": "leann_status",
-                        "description": "📊 Check the health and stats of your code indexes - like a medical checkup for your codebase knowledge!",
+                        "name": "leann_ask",
+                        "description": "Ask question using LEANN RAG",
                        "inputSchema": {
                            "type": "object",
                            "properties": {
-                                "index_name": {
-                                    "type": "string",
-                                    "description": "Optional: Name of specific index to check. If not provided, shows status of all indexes.",
-                                }
+                                "index_name": {"type": "string"},
+                                "question": {"type": "string"},
                            },
+                            "required": ["index_name", "question"],
                        },
                    },
                    {
                        "name": "leann_list",
-                        "description": "📋 Show all your indexed codebases - your personal code library! Use this to see what's available for search.",
+                        "description": "List all LEANN indexes",
                        "inputSchema": {"type": "object", "properties": {}},
                    },
                ]
@@ -90,46 +62,32 @@ def handle_request(request):
        tool_name = request["params"]["name"]
        args = request["params"].get("arguments", {})

+        # Set working directory and environment
+        env = os.environ.copy()
+        cwd = "/Users/andyl/Projects/LEANN-RAG"
+
        try:
            if tool_name == "leann_search":
-                # Validate required parameters
-                if not args.get("index_name") or not args.get("query"):
-                    return {
-                        "jsonrpc": "2.0",
-                        "id": request.get("id"),
-                        "result": {
-                            "content": [
-                                {
-                                    "type": "text",
-                                    "text": "Error: Both index_name and query are required",
-                                }
-                            ]
-                        },
-                    }
-
-                # Build simplified command
                cmd = [
                    "leann",
                    "search",
                    args["index_name"],
                    args["query"],
+                    "--recompute-embeddings",
                    f"--top-k={args.get('top_k', 5)}",
-                    f"--complexity={args.get('complexity', 32)}",
                ]
+                result = subprocess.run(cmd, capture_output=True, text=True, cwd=cwd, env=env)

-                result = subprocess.run(cmd, capture_output=True, text=True)
-
-            elif tool_name == "leann_status":
-                if args.get("index_name"):
-                    # Check specific index status - for now, we'll use leann list and filter
-                    result = subprocess.run(["leann", "list"], capture_output=True, text=True)
-                    # We could enhance this to show more detailed status per index
-                else:
-                    # Show all indexes status
-                    result = subprocess.run(["leann", "list"], capture_output=True, text=True)
+            elif tool_name == "leann_ask":
+                cmd = f'echo "{args["question"]}" | leann ask {args["index_name"]} --recompute-embeddings --llm ollama --model qwen3:8b'
+                result = subprocess.run(
+                    cmd, shell=True, capture_output=True, text=True, cwd=cwd, env=env
+                )

            elif tool_name == "leann_list":
-                result = subprocess.run(["leann", "list"], capture_output=True, text=True)
+                result = subprocess.run(
+                    ["leann", "list"], capture_output=True, text=True, cwd=cwd, env=env
+                )

            return {
                "jsonrpc": "2.0",
@@ -1,7 +1,7 @@
 import json
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Any, Literal, Optional
+from typing import Any, Literal

 import numpy as np

@@ -169,7 +169,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
        prune_ratio: float = 0.0,
        recompute_embeddings: bool = False,
        pruning_strategy: Literal["global", "local", "proportional"] = "global",
-        zmq_port: Optional[int] = None,
+        zmq_port: int | None = None,
        **kwargs,
    ) -> dict[str, Any]:
        """
@@ -1,17 +1,18 @@
-# 🔥 LEANN Claude Code Integration
+# LEANN Claude Code Integration

-Transform your development workflow with intelligent code assistance using LEANN's semantic search directly in Claude Code.
+Intelligent code assistance using LEANN's vector search directly in Claude Code.

 ## Prerequisites

-Install LEANN globally for MCP integration (with default backend):
+First, install LEANN CLI globally:

 ```bash
-uv tool install leann-core --with leann
+uv tool install leann
 ```
-This installs the `leann` CLI into an isolated tool environment and includes both backends so `leann build` works out-of-the-box.

-## 🚀 Quick Setup
+This makes the `leann` command available system-wide, which `leann_mcp` requires.
+
+## Quick Setup

 Add the LEANN MCP server to Claude Code:

@@ -19,61 +20,23 @@ Add the LEANN MCP server to Claude Code:
 claude mcp add leann-server -- leann_mcp
 ```

-## 🛠️ Available Tools
+## Available Tools

-Once connected, you'll have access to these powerful semantic search tools in Claude Code:
+- **`leann_list`** - List available indexes across all projects
+- **`leann_search`** - Search code and documents with semantic queries
+- **`leann_ask`** - Ask questions and get AI-powered answers from your codebase

- **`leann_list`** - List all available indexes across your projects
- **`leann_search`** - Perform semantic searches across code and documents
- **`leann_ask`** - Ask natural language questions and get AI-powered answers from your codebase
-
-## 🎯 Quick Start Example
+## Quick Start

 ```bash
-# Build an index for your project (change to your actual path)
-leann build my-project --docs ./
+# Build an index for your project
+leann build my-project

 # Start Claude Code
 claude
 ```

-## 🚀 Advanced Usage Examples
-
-### Index Entire Git Repository
-```bash
-# Index all tracked files in your git repository, note right now we will skip submodules, but we can add it back easily if you want
-leann build my-repo --docs $(git ls-files) --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-
-# Index only specific file types from git
-leann build my-python-code --docs $(git ls-files "*.py") --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-```
-
-### Multiple Directories and Files
-```bash
-# Index multiple directories
-leann build my-codebase --docs ./src ./tests ./docs ./config --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-
-# Mix files and directories
-leann build my-project --docs ./README.md ./src/ ./package.json ./docs/ --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-
-# Specific files only
-leann build my-configs --docs ./tsconfig.json ./package.json ./webpack.config.js --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-```
-
-### Advanced Git Integration
-```bash
-# Index recently modified files
-leann build recent-changes --docs $(git diff --name-only HEAD~10..HEAD) --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-
-# Index files matching pattern
-leann build frontend --docs $(git ls-files "*.tsx" "*.ts" "*.jsx" "*.js") --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-
-# Index documentation and config files
-leann build docs-and-configs --docs $(git ls-files "*.md" "*.yml" "*.yaml" "*.json" "*.toml") --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-```
-
-
-**Try this in Claude Code:**
+Then in Claude Code:
 ```
 Help me understand this codebase. List available indexes and search for authentication patterns.
 ```
@@ -83,37 +46,24 @@ Help me understand this codebase. List available indexes and search for authenti
 </p>


-## 🧠 How It Works
+## How It Works

-The integration consists of three key components working seamlessly together:
-
- **`leann`** - Core CLI tool for indexing and searching (installed globally via `uv tool install`)
+- **`leann`** - Core CLI tool for indexing and searching (installed globally)
 - **`leann_mcp`** - MCP server that wraps `leann` commands for Claude Code integration
- **Claude Code** - Calls `leann_mcp`, which executes `leann` commands and returns intelligent results
+- Claude Code calls `leann_mcp`, which executes `leann` commands and returns results

-## 📁 File Support
+## File Support

-LEANN understands **30+ file types** including:
- **Programming**: Python, JavaScript, TypeScript, Java, Go, Rust, C++, C#
- **Data**: SQL, YAML, JSON, CSV, XML
- **Documentation**: Markdown, TXT, PDF
- **And many more!**
+Python, JavaScript, TypeScript, Java, Go, Rust, SQL, YAML, JSON, and 30+ more file types.

-## 💾 Storage & Organization
+## Storage

- **Project indexes**: Stored in `.leann/` directory (just like `.git`)
- **Global registry**: Project tracking at `~/.leann/projects.json`
- **Multi-project support**: Switch between different codebases seamlessly
- **Portable**: Transfer indexes between machines with minimal overhead
+- Project indexes in `.leann/` directory (like `.git`)
+- Global project registry at `~/.leann/projects.json`
+- Multi-project support built-in

-## 🗑️ Uninstalling
-
-To remove the LEANN MCP server from Claude Code:
+## Removing

 ```bash
 claude mcp remove leann-server
 ```
-To remove LEANN
-```
-uv pip uninstall leann leann-backend-hnsw leann-core
-```
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "leann"
-version = "0.2.9"
+version = "0.2.1"
 description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
 readme = "README.md"
 requires-python = ">=3.9"
@@ -32,7 +32,7 @@ dependencies = [
    "pypdfium2>=4.30.0",
    # LlamaIndex core and readers - updated versions
    "llama-index>=0.12.44",
-    "llama-index-readers-file>=0.4.0", # Essential for PDF parsing
+    "llama-index-readers-file>=0.4.0",  # Essential for PDF parsing
    # "llama-index-readers-docling",  # Requires Python >= 3.10
    # "llama-index-node-parser-docling",  # Requires Python >= 3.10
    "llama-index-vector-stores-faiss>=0.4.0",
@@ -40,12 +40,9 @@ dependencies = [
    # Other dependencies
    "ipykernel==6.29.5",
    "msgpack>=1.1.1",
-    "mlx>=0.26.3; sys_platform == 'darwin' and platform_machine == 'arm64'",
-    "mlx-lm>=0.26.0; sys_platform == 'darwin' and platform_machine == 'arm64'",
+    "mlx>=0.26.3; sys_platform == 'darwin'",
+    "mlx-lm>=0.26.0; sys_platform == 'darwin'",
    "psutil>=5.8.0",
-    "pathspec>=0.12.1",
-    "nbconvert>=7.16.6",
-    "gitignore-parser>=0.1.12",
 ]

 [project.optional-dependencies]
@@ -91,7 +88,7 @@ leann-backend-diskann = { path = "packages/leann-backend-diskann", editable = tr
 leann-backend-hnsw = { path = "packages/leann-backend-hnsw", editable = true }

 [tool.ruff]
-target-version = "py39"
+target-version = "py310"
 line-length = 100
 extend-exclude = [
    "third_party",
@@ -1,62 +0,0 @@
-name: leann-build
-
-resources:
-  # Choose a GPU for fast embeddings (examples: L4, A10G, A100). CPU also works but is slower.
-  accelerators: L4:1
-  # Optionally pin a cloud, otherwise SkyPilot will auto-select
-  # cloud: aws
-  disk_size: 100
-
-env:
-  # Build parameters (override with: sky launch -c leann-gpu sky/leann-build.yaml -e key=value)
-  index_name: my-index
-  docs: ./data
-  backend: hnsw               # hnsw | diskann
-  complexity: 64
-  graph_degree: 32
-  num_threads: 8
-  # Embedding selection
-  embedding_mode: sentence-transformers   # sentence-transformers | openai | mlx | ollama
-  embedding_model: facebook/contriever
-  # Storage/latency knobs
-  recompute: true             # true => selective recomputation; false => store full embeddings
-  compact: true               # for HNSW only: false when recompute=false
-  # Optional pass-through
-  extra_args: ""
-
-# Sync local paths to the remote VM. Adjust as needed.
-file_mounts:
-  # Example: mount your local data directory used for building
-  ~/leann-data: ${docs}
-
-setup: |
-  set -e
-  # Install uv (package manager)
-  curl -LsSf https://astral.sh/uv/install.sh | sh
-  export PATH="$HOME/.local/bin:$PATH"
-
-  # Install the LEANN CLI globally on the remote machine
-  uv tool install leann
-
-run: |
-  export PATH="$HOME/.local/bin:$PATH"
-  # Derive flags from env
-  recompute_flag=""
-  if [ "${recompute}" = "false" ] || [ "${recompute}" = "0" ]; then
-    recompute_flag="--no-recompute"
-  fi
-  compact_flag=""
-  if [ "${compact}" = "false" ] || [ "${compact}" = "0" ]; then
-    compact_flag="--no-compact"
-  fi
-
-  # Build command
-  leann build ${index_name} \
-    --docs ~/leann-data \
-    --backend ${backend} \
-    --complexity ${complexity} \
-    --graph-degree ${graph_degree} \
-    --num-threads ${num_threads} \
-    --embedding-mode ${embedding_mode} \
-    --embedding-model ${embedding_model} \
-    ${recompute_flag} ${compact_flag} ${extra_args}
Author	SHA1	Message	Date
Andy Lee	b55eeeae5f	Merge remote-tracking branch 'origin/main' into feature/claude-code-research	2025-08-05 23:02:00 -07:00
Andy Lee	e890b2311f	feat: Add Claude Code integration with MCP server	2025-08-05 14:03:36 -07:00
Andy Lee	f3d99fd118	feat: Claude Code integration ready - LEANN CLI works out of the box ✅ Verified LEANN CLI works perfectly with Claude Code ✅ Added integration guide with working examples ✅ Documented simple workflow for immediate use Key findings: - No code changes needed - Just need --recompute-embeddings flag - Search, ask, and build all work - Ready for Claude Code agents and workflows	2025-08-05 12:27:58 -07:00
Andy Lee	8eee90bf80	docs: add a link	2025-08-04 20:10:14 -07:00
Andy Lee	649d4ad03e	docs: Address all configuration guide feedback - Fix grammar: 'If time is not a constraint' instead of 'time expense is not large' - Highlight Qwen3-Embedding-0.6B performance (nearly OpenAI API level) - Add OpenAI quick start section with configuration example - Fold Cloud vs Local trade-offs into collapsible section - Update HNSW as 'default and recommended for extreme low storage' - Add DiskANN beta warning and explain PQ+rerank architecture - Expand Ollama models: add qwen3:0.6b, 4b, 7b variants - Note OpenAI as current default but recommend Ollama switch - Add 'need to install extra software' warning for Ollama - Remove incorrect latency numbers from search-complexity recommendations	2025-08-04 20:01:23 -07:00
Andy Lee	d9b6f195c5	docs: Improve configuration guide based on feedback - List specific files in default data/ directory (2 AI papers, literature, tech report) - Update examples to use English and better RAG-suitable queries - Change full dataset reference to use --max-items -1 - Adjust small model guidance about upgrading to larger models when time allows - Update top-k defaults to reflect actual default of 20 - Ensure consistent use of full model name Qwen/Qwen3-Embedding-0.6B - Reorder optimization steps, move MLX to third position - Remove incorrect chunk size tuning guidance - Change README from 'Having trouble' to 'Need best practices'	2025-08-04 19:29:17 -07:00
Andy Lee	00f506c0bd	docs: Adjust DiskANN positioning in features and roadmap - features.md: Put HNSW/FAISS first as default, DiskANN as optional - roadmap.md: Reorder to show HNSW integration before DiskANN - Consistent with positioning DiskANN as advanced option for large-scale use	2025-08-04 17:53:27 -07:00
Andy Lee	e872dd1d23	docs: Weaken DiskANN emphasis in README - Change backend description to emphasize HNSW as default - DiskANN positioned as optional for billion-scale datasets - Simplify evaluation commands to be more generic	2025-08-04 17:51:21 -07:00
Andy Lee	063c687ff7	chore: move evaluation data .gitattributes to correct location	2025-08-04 17:46:17 -07:00
Andy Lee	bb8ecd54d7	feat: add comprehensive configuration guide and update README - Create docs/configuration-guide.md with detailed guidance on: - Embedding model selection (small/medium/large) - Index selection (HNSW vs DiskANN) - LLM engine and model comparison - Parameter tuning (build/search complexity, top-k) - Performance optimization tips - Deep dive into LEANN's recomputation feature - Update README.md to link to the configuration guide - Include latest 2025 model recommendations (Qwen3, DeepSeek-R1, O3-mini)	2025-08-04 17:41:27 -07:00
Andy Lee	716217ae24	docs: config guidance	2025-08-04 16:21:13 -07:00