fix: detect and report Ollama embedding dimension inconsistency

- Add validation for embedding dimension consistency in Ollama mode - Provide clear error message with troubleshooting steps when dimensions mismatch - Fail fast instead of silent fallback to prevent data corruption Fixes #31
2025-08-11 17:36:44 -07:00
13 changed files with 305 additions and 739 deletions
@@ -64,16 +64,6 @@ jobs:
            python: '3.12'
          - os: macos-14
            python: '3.13'
-          - os: macos-15
-            python: '3.9'
-          - os: macos-15
-            python: '3.10'
-          - os: macos-15
-            python: '3.11'
-          - os: macos-15
-            python: '3.12'
-          - os: macos-15
-            python: '3.13'
          - os: macos-13
            python: '3.9'
          - os: macos-13
@@ -157,14 +147,7 @@ jobs:
            # Use system clang for better compatibility
            export CC=clang
            export CXX=clang++
-            # Homebrew libraries on each macOS version require matching minimum version
-            if [[ "${{ matrix.os }}" == "macos-13" ]]; then
-              export MACOSX_DEPLOYMENT_TARGET=13.0
-            elif [[ "${{ matrix.os }}" == "macos-14" ]]; then
-              export MACOSX_DEPLOYMENT_TARGET=14.0
-            elif [[ "${{ matrix.os }}" == "macos-15" ]]; then
-              export MACOSX_DEPLOYMENT_TARGET=15.0
-            fi
+            export MACOSX_DEPLOYMENT_TARGET=11.0
            uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
          else
            uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
@@ -178,14 +161,7 @@ jobs:
            export CC=clang
            export CXX=clang++
            # DiskANN requires macOS 13.3+ for sgesdd_ LAPACK function
-            # But Homebrew libraries on each macOS version require matching minimum version
-            if [[ "${{ matrix.os }}" == "macos-13" ]]; then
-              export MACOSX_DEPLOYMENT_TARGET=13.3
-            elif [[ "${{ matrix.os }}" == "macos-14" ]]; then
-              export MACOSX_DEPLOYMENT_TARGET=14.0
-            elif [[ "${{ matrix.os }}" == "macos-15" ]]; then
-              export MACOSX_DEPLOYMENT_TARGET=15.0
-            fi
+            export MACOSX_DEPLOYMENT_TARGET=13.3
            uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
          else
            uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
@@ -221,24 +197,10 @@ jobs:
      - name: Repair wheels (macOS)
        if: runner.os == 'macOS'
        run: |
-          # Determine deployment target based on runner OS
-          # Must match the Homebrew libraries for each macOS version
-          if [[ "${{ matrix.os }}" == "macos-13" ]]; then
-            HNSW_TARGET="13.0"
-            DISKANN_TARGET="13.3"
-          elif [[ "${{ matrix.os }}" == "macos-14" ]]; then
-            HNSW_TARGET="14.0"
-            DISKANN_TARGET="14.0"
-          elif [[ "${{ matrix.os }}" == "macos-15" ]]; then
-            HNSW_TARGET="15.0"
-            DISKANN_TARGET="15.0"
-          fi
-
          # Repair HNSW wheel
          cd packages/leann-backend-hnsw
          if [ -d dist ]; then
-            export MACOSX_DEPLOYMENT_TARGET=$HNSW_TARGET
-            delocate-wheel -w dist_repaired -v --require-target-macos-version $HNSW_TARGET dist/*.whl
+            delocate-wheel -w dist_repaired -v dist/*.whl
            rm -rf dist
            mv dist_repaired dist
          fi
@@ -247,8 +209,7 @@ jobs:
          # Repair DiskANN wheel
          cd packages/leann-backend-diskann
          if [ -d dist ]; then
-            export MACOSX_DEPLOYMENT_TARGET=$DISKANN_TARGET
-            delocate-wheel -w dist_repaired -v --require-target-macos-version $DISKANN_TARGET dist/*.whl
+            delocate-wheel -w dist_repaired -v dist/*.whl
            rm -rf dist
            mv dist_repaired dist
          fi
@@ -288,8 +249,8 @@ jobs:
          # Activate virtual environment
          source .venv/bin/activate || source .venv/Scripts/activate

-          # Run tests
-          pytest -v tests/
+          # Run all tests
+          pytest tests/

      - name: Run sanity checks (optional)
        run: |
@@ -71,8 +71,6 @@ source .venv/bin/activate
 uv pip install leann
 ```

-> Low-resource? See “Low-resource setups” in the [Configuration Guide](docs/configuration-guide.md#low-resource-setups).
-
 <details>
 <summary>
 <strong>🔧 Build from Source (Recommended for development)</strong>
@@ -470,7 +468,7 @@ leann --help
 ### Usage Examples

 ```bash
-# build from a specific directory, and my_docs is the index name(Here you can also build from multiple dict or multiple files)
+# build from a specific directory, and my_docs is the index name
 leann build my-docs --docs ./your_documents

 # Search your documents
@@ -259,80 +259,24 @@ Every configuration choice involves trade-offs:

 The key is finding the right balance for your specific use case. Start small and simple, measure performance, then scale up only where needed.

-## Low-resource setups
+## Deep Dive: Critical Configuration Decisions

-If you don’t have a local GPU or builds/searches are too slow, use one or more of the options below.
+### When to Disable Recomputation

-### 1) Use OpenAI embeddings (no local compute)
-
-Fastest path with zero local GPU requirements. Set your API key and use OpenAI embeddings during build and search:
+LEANN's recomputation feature provides exact distance calculations but can be disabled for extreme QPS requirements:

 ```bash
-export OPENAI_API_KEY=sk-...
-
-# Build with OpenAI embeddings
-leann build my-index \
-  --embedding-mode openai \
-  --embedding-model text-embedding-3-small
-
-# Search with OpenAI embeddings (recompute at query time)
-leann search my-index "your query" \
-  --recompute-embeddings
+--no-recompute  # Disable selective recomputation
 ```

-### 2) Run remote builds with SkyPilot (cloud GPU)
+**Trade-offs**:
+- **With recomputation** (default): Exact distances, best quality, higher latency, minimal storage (only stores metadata, recomputes embeddings on-demand)
+- **Without recomputation**: Must store full embeddings, significantly higher memory and storage usage (10-100x more), but faster search

-Offload embedding generation and index building to a GPU VM using SkyPilot. A template is provided at `sky/leann-build.yaml`.
-
-```bash
-# One-time: install and configure SkyPilot
-pip install skypilot
-sky launch -c leann-gpu sky/leann-build.yaml
-
-# Build remotely (template installs uv + leann CLI)
-sky exec leann-gpu -- "leann build my-index --docs ~/leann-data --backend hnsw --complexity 64 --graph-degree 32"
-```
-
-Details: see “Running Builds on SkyPilot (Optional)” below.
-
-### 3) Disable recomputation to trade storage for speed
-
-If you need lower latency and have more storage/memory, disable recomputation. This stores full embeddings and avoids recomputing at search time.
-
-```bash
-# Build without recomputation (HNSW requires non-compact in this mode)
-leann build my-index --no-recompute --no-compact
-
-# Search without recomputation
-leann search my-index "your query" --no-recompute
-```
-
-Trade-offs: lower query-time latency, but significantly higher storage usage.
-
-## Running Builds on SkyPilot (Optional)
-
-You can offload embedding generation and index building to a cloud GPU VM using SkyPilot, without changing any LEANN code. This is useful when your local machine lacks a GPU or you want faster throughput.
-
-### Quick Start
-
-1) Install SkyPilot by following their docs (`pip install skypilot`), then configure cloud credentials.
-
-2) Use the provided SkyPilot template:
-
-```bash
-sky launch -c leann-gpu sky/leann-build.yaml
-```
-
-3) On the remote, either put your data under the mounted path or adjust `file_mounts` in `sky/leann-build.yaml`. Then run the LEANN build:
-
-```bash
-sky exec leann-gpu -- "leann build my-index --docs ~/leann-data --backend hnsw --complexity 64 --graph-degree 32"
-```
-
-Notes:
- The template installs `uv` and the `leann` CLI globally on the remote instance.
- Change the `accelerators` and `cloud` settings in `sky/leann-build.yaml` to match your budget/availability (e.g., `A10G:1`, `A100:1`, or CPU-only if you prefer).
- You can also build with `diskann` by switching `--backend diskann`.
+**Disable when**:
+- You have abundant storage and memory
+- Need extremely low latency (< 100ms)
+- Running a read-heavy workload where storage cost is acceptable

 ## Further Reading

@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"

 [project]
 name = "leann-backend-diskann"
-version = "0.2.9"
-dependencies = ["leann-core==0.2.9", "numpy", "protobuf>=3.19.0"]
+version = "0.2.7"
+dependencies = ["leann-core==0.2.7", "numpy", "protobuf>=3.19.0"]

 [tool.scikit-build]
 # Key: simplified CMake path
@@ -13,7 +13,7 @@ if(APPLE)
    else()
        message(FATAL_ERROR "Could not find libomp installation. Please install with: brew install libomp")
    endif()
-
+    
    set(OpenMP_C_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_PREFIX}/opt/libomp/include")
    set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_PREFIX}/opt/libomp/include")
    set(OpenMP_C_LIB_NAMES "omp")
@@ -95,8 +95,6 @@ def create_hnsw_embedding_server(
        passage_sources.append(source_copy)

    passages = PassageManager(passage_sources)
-    # Use index dimensions from metadata for shaping fallback responses
-    embedding_dim: int = int(meta.get("dimensions", 0))
    logger.info(
        f"Loaded PassageManager with {len(passages.global_offset_map)} passages from metadata"
    )
@@ -111,9 +109,6 @@ def create_hnsw_embedding_server(
        socket.setsockopt(zmq.RCVTIMEO, 300000)
        socket.setsockopt(zmq.SNDTIMEO, 300000)

-        # Track last request type for safe fallback responses on exceptions
-        last_request_type = "unknown"  # one of: 'text', 'distance', 'embedding', 'unknown'
-        last_request_length = 0
        while True:
            try:
                message_bytes = socket.recv()
@@ -126,8 +121,6 @@ def create_hnsw_embedding_server(
                if isinstance(request_payload, list) and len(request_payload) > 0:
                    # Check if this is a direct text request (list of strings)
                    if all(isinstance(item, str) for item in request_payload):
-                        last_request_type = "text"
-                        last_request_length = len(request_payload)
                        logger.info(
                            f"Processing direct text embedding request for {len(request_payload)} texts in {embedding_mode} mode"
                        )
@@ -152,66 +145,43 @@ def create_hnsw_embedding_server(
                ):
                    node_ids = request_payload[0]
                    query_vector = np.array(request_payload[1], dtype=np.float32)
-                    last_request_type = "distance"
-                    last_request_length = len(node_ids)

                    logger.debug("Distance calculation request received")
                    logger.debug(f"    Node IDs: {node_ids}")
                    logger.debug(f"    Query vector dim: {len(query_vector)}")

-                    # Get embeddings for node IDs, tolerate missing IDs
-                    texts: list[str] = []
-                    found_indices: list[int] = []
-                    for idx, nid in enumerate(node_ids):
+                    # Get embeddings for node IDs
+                    texts = []
+                    for nid in node_ids:
                        try:
                            passage_data = passages.get_passage(str(nid))
-                            txt = passage_data.get("text", "")
-                            if isinstance(txt, str) and len(txt) > 0:
-                                texts.append(txt)
-                                found_indices.append(idx)
-                            else:
-                                logger.error(f"Empty text for passage ID {nid}")
+                            txt = passage_data["text"]
+                            texts.append(txt)
                        except KeyError:
                            logger.error(f"Passage ID {nid} not found")
+                            raise RuntimeError(f"FATAL: Passage with ID {nid} not found")
                        except Exception as e:
                            logger.error(f"Exception looking up passage ID {nid}: {e}")
+                            raise

-                    # Prepare full-length response distances with safe fallbacks
-                    large_distance = 1e9
-                    response_distances = [large_distance] * len(node_ids)
-
-                    if texts:
-                        try:
-                            # Process embeddings only for found indices
-                            embeddings = compute_embeddings(texts, model_name, mode=embedding_mode)
-                            logger.info(
-                                f"Computed embeddings for {len(texts)} texts, shape: {embeddings.shape}"
-                            )
-
-                            # Calculate distances for found embeddings only
-                            if distance_metric == "l2":
-                                partial_distances = np.sum(
-                                    np.square(embeddings - query_vector.reshape(1, -1)), axis=1
-                                )
-                            else:  # mips or cosine
-                                partial_distances = -np.dot(embeddings, query_vector)
-
-                            # Place computed distances back into the full response array
-                            for pos, dval in zip(
-                                found_indices, partial_distances.flatten().tolist()
-                            ):
-                                response_distances[pos] = float(dval)
-                        except Exception as e:
-                            logger.error(
-                                f"Distance computation error, falling back to large distances: {e}"
-                            )
-
-                    # Always reply with exactly len(node_ids) distances
-                    response_bytes = msgpack.packb([response_distances], use_single_float=True)
-                    logger.debug(
-                        f"Sending distance response with {len(response_distances)} distances (found={len(found_indices)})"
+                    # Process embeddings
+                    embeddings = compute_embeddings(texts, model_name, mode=embedding_mode)
+                    logger.info(
+                        f"Computed embeddings for {len(texts)} texts, shape: {embeddings.shape}"
                    )

+                    # Calculate distances
+                    if distance_metric == "l2":
+                        distances = np.sum(
+                            np.square(embeddings - query_vector.reshape(1, -1)), axis=1
+                        )
+                    else:  # mips or cosine
+                        distances = -np.dot(embeddings, query_vector)
+
+                    response_payload = distances.flatten().tolist()
+                    response_bytes = msgpack.packb([response_payload], use_single_float=True)
+                    logger.debug(f"Sending distance response with {len(distances)} distances")
+
                    socket.send(response_bytes)
                    e2e_end = time.time()
                    logger.info(f"⏱️  Distance calculation E2E time: {e2e_end - e2e_start:.6f}s")
@@ -231,61 +201,40 @@ def create_hnsw_embedding_server(

                node_ids = request_payload[0]
                logger.debug(f"Request for {len(node_ids)} node embeddings")
-                last_request_type = "embedding"
-                last_request_length = len(node_ids)

-                # Allocate output buffer (B, D) and fill with zeros for robustness
-                if embedding_dim <= 0:
-                    logger.error("Embedding dimension unknown; cannot serve embedding request")
-                    dims = [0, 0]
-                    data = []
-                else:
-                    dims = [len(node_ids), embedding_dim]
-                    data = [0.0] * (dims[0] * dims[1])
-
-                # Look up texts by node IDs; compute embeddings where available
-                texts: list[str] = []
-                found_indices: list[int] = []
-                for idx, nid in enumerate(node_ids):
+                # Look up texts by node IDs
+                texts = []
+                for nid in node_ids:
                    try:
                        passage_data = passages.get_passage(str(nid))
-                        txt = passage_data.get("text", "")
-                        if isinstance(txt, str) and len(txt) > 0:
-                            texts.append(txt)
-                            found_indices.append(idx)
-                        else:
-                            logger.error(f"Empty text for passage ID {nid}")
+                        txt = passage_data["text"]
+                        if not txt:
+                            raise RuntimeError(f"FATAL: Empty text for passage ID {nid}")
+                        texts.append(txt)
                    except KeyError:
-                        logger.error(f"Passage with ID {nid} not found")
+                        raise RuntimeError(f"FATAL: Passage with ID {nid} not found")
                    except Exception as e:
                        logger.error(f"Exception looking up passage ID {nid}: {e}")
+                        raise

-                if texts:
-                    try:
-                        # Process embeddings for found texts only
-                        embeddings = compute_embeddings(texts, model_name, mode=embedding_mode)
-                        logger.info(
-                            f"Computed embeddings for {len(texts)} texts, shape: {embeddings.shape}"
-                        )
+                # Process embeddings
+                embeddings = compute_embeddings(texts, model_name, mode=embedding_mode)
+                logger.info(
+                    f"Computed embeddings for {len(texts)} texts, shape: {embeddings.shape}"
+                )

-                        if np.isnan(embeddings).any() or np.isinf(embeddings).any():
-                            logger.error(
-                                f"NaN or Inf detected in embeddings! Requested IDs: {node_ids[:5]}..."
-                            )
-                            dims = [0, embedding_dim]
-                            data = []
-                        else:
-                            # Copy computed embeddings into the correct positions
-                            emb_f32 = np.ascontiguousarray(embeddings, dtype=np.float32)
-                            flat = emb_f32.flatten().tolist()
-                            for j, pos in enumerate(found_indices):
-                                start = pos * embedding_dim
-                                end = start + embedding_dim
-                                data[start:end] = flat[j * embedding_dim : (j + 1) * embedding_dim]
-                    except Exception as e:
-                        logger.error(f"Embedding computation error, returning zeros: {e}")
+                # Serialization and response
+                if np.isnan(embeddings).any() or np.isinf(embeddings).any():
+                    logger.error(
+                        f"NaN or Inf detected in embeddings! Requested IDs: {node_ids[:5]}..."
+                    )
+                    raise AssertionError()

-                response_payload = [dims, data]
+                hidden_contiguous_f32 = np.ascontiguousarray(embeddings, dtype=np.float32)
+                response_payload = [
+                    list(hidden_contiguous_f32.shape),
+                    hidden_contiguous_f32.flatten().tolist(),
+                ]
                response_bytes = msgpack.packb(response_payload, use_single_float=True)

                socket.send(response_bytes)
@@ -300,22 +249,7 @@ def create_hnsw_embedding_server(
                import traceback

                traceback.print_exc()
-                # Fallback to a safe, minimal-structure response to avoid client crashes
-                if last_request_type == "distance":
-                    # Return a vector of large distances with the expected length
-                    fallback_len = max(0, int(last_request_length))
-                    large_distance = 1e9
-                    safe_response = [[large_distance] * fallback_len]
-                elif last_request_type == "embedding":
-                    # Return an empty embedding block with known dimension if available
-                    if embedding_dim > 0:
-                        safe_response = [[0, embedding_dim], []]
-                    else:
-                        safe_response = [[0, 0], []]
-                else:
-                    # Unknown request type: default to empty embedding structure
-                    safe_response = [[0, int(embedding_dim) if embedding_dim > 0 else 0], []]
-                socket.send(msgpack.packb(safe_response, use_single_float=True))
+                socket.send(msgpack.packb([[], []]))

    zmq_thread = threading.Thread(target=zmq_server_thread, daemon=True)
    zmq_thread.start()
@@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build"

 [project]
 name = "leann-backend-hnsw"
-version = "0.2.9"
+version = "0.2.7"
 description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
 dependencies = [
-    "leann-core==0.2.9",
+    "leann-core==0.2.7",
    "numpy",
    "pyzmq>=23.0.0",
    "msgpack>=1.0.0",
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "leann-core"
-version = "0.2.9"
+version = "0.2.7"
 description = "Core API and plugin system for LEANN"
 readme = "README.md"
 requires-python = ">=3.9"
@@ -5,7 +5,6 @@ from typing import Union

 from llama_index.core import SimpleDirectoryReader
 from llama_index.core.node_parser import SentenceSplitter
-from tqdm import tqdm

 from .api import LeannBuilder, LeannChat, LeannSearcher

@@ -76,14 +75,11 @@ class LeannCLI:
            formatter_class=argparse.RawDescriptionHelpFormatter,
            epilog="""
 Examples:
-  leann build my-docs --docs ./documents                                  # Build index from directory
-  leann build my-code --docs ./src ./tests ./config                      # Build index from multiple directories
-  leann build my-files --docs ./file1.py ./file2.txt ./docs/             # Build index from files and directories
-  leann build my-mixed --docs ./readme.md ./src/ ./config.json           # Build index from mixed files/dirs
-  leann build my-ppts --docs ./ --file-types .pptx,.pdf                  # Index only PowerPoint and PDF files
-  leann search my-docs "query"                                           # Search in my-docs index
-  leann ask my-docs "question"                                           # Ask my-docs index
-  leann list                                                             # List all stored indexes
+  leann build my-docs --docs ./documents                    # Build index named my-docs
+  leann build my-ppts --docs ./ --file-types .pptx,.pdf    # Index only PowerPoint and PDF files
+  leann search my-docs "query"                             # Search in my-docs index
+  leann ask my-docs "question"                             # Ask my-docs index
+  leann list                                              # List all stored indexes
            """,
        )

@@ -95,11 +91,7 @@ Examples:
            "index_name", nargs="?", help="Index name (default: current directory name)"
        )
        build_parser.add_argument(
-            "--docs",
-            type=str,
-            nargs="+",
-            default=["."],
-            help="Documents directories and/or files (default: current directory)",
+            "--docs", type=str, default=".", help="Documents directory (default: current directory)"
        )
        build_parser.add_argument(
            "--backend", type=str, default="hnsw", choices=["hnsw", "diskann"]
@@ -117,19 +109,7 @@ Examples:
        build_parser.add_argument("--complexity", type=int, default=64)
        build_parser.add_argument("--num-threads", type=int, default=1)
        build_parser.add_argument("--compact", action="store_true", default=True)
-        build_parser.add_argument(
-            "--no-compact",
-            dest="compact",
-            action="store_false",
-            help="Disable compact index storage (store full embeddings; higher storage)",
-        )
        build_parser.add_argument("--recompute", action="store_true", default=True)
-        build_parser.add_argument(
-            "--no-recompute",
-            dest="recompute",
-            action="store_false",
-            help="Disable embedding recomputation (store full embeddings; lower query latency)",
-        )
        build_parser.add_argument(
            "--file-types",
            type=str,
@@ -150,18 +130,6 @@ Examples:
            default=True,
            help="Recompute embeddings (default: True)",
        )
-        search_parser.add_argument(
-            "--no-recompute-embeddings",
-            dest="recompute_embeddings",
-            action="store_false",
-            help="Disable embedding recomputation during search",
-        )
-        search_parser.add_argument(
-            "--no-recompute",
-            dest="recompute_embeddings",
-            action="store_false",
-            help="Alias for --no-recompute-embeddings",
-        )
        search_parser.add_argument(
            "--pruning-strategy",
            choices=["global", "local", "proportional"],
@@ -190,18 +158,6 @@ Examples:
            default=True,
            help="Recompute embeddings (default: True)",
        )
-        ask_parser.add_argument(
-            "--no-recompute-embeddings",
-            dest="recompute_embeddings",
-            action="store_false",
-            help="Disable embedding recomputation during ask",
-        )
-        ask_parser.add_argument(
-            "--no-recompute",
-            dest="recompute_embeddings",
-            action="store_false",
-            help="Alias for --no-recompute-embeddings",
-        )
        ask_parser.add_argument(
            "--pruning-strategy",
            choices=["global", "local", "proportional"],
@@ -279,32 +235,6 @@ Examples:
        """Check if a file should be excluded using gitignore parser."""
        return gitignore_matches(str(relative_path))

-    def _is_git_submodule(self, path: Path) -> bool:
-        """Check if a path is a git submodule."""
-        try:
-            # Find the git repo root
-            current_dir = Path.cwd()
-            while current_dir != current_dir.parent:
-                if (current_dir / ".git").exists():
-                    gitmodules_path = current_dir / ".gitmodules"
-                    if gitmodules_path.exists():
-                        # Read .gitmodules to check if this path is a submodule
-                        gitmodules_content = gitmodules_path.read_text()
-                        # Convert path to relative to git root
-                        try:
-                            relative_path = path.resolve().relative_to(current_dir)
-                            # Check if this path appears in .gitmodules
-                            return f"path = {relative_path}" in gitmodules_content
-                        except ValueError:
-                            # Path is not under git root
-                            return False
-                    break
-                current_dir = current_dir.parent
-            return False
-        except Exception:
-            # If anything goes wrong, assume it's not a submodule
-            return False
-
    def list_indexes(self):
        print("Stored LEANN indexes:")

@@ -334,9 +264,7 @@ Examples:
            valid_projects.append(current_path)

        if not valid_projects:
-            print(
-                "No indexes found. Use 'leann build <name> --docs <dir> [<dir2> ...]' to create one."
-            )
+            print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
            return

        total_indexes = 0
@@ -383,88 +311,56 @@ Examples:
                    print(f'  leann search {example_name} "your query"')
                    print(f"  leann ask {example_name} --interactive")

-    def load_documents(
-        self, docs_paths: Union[str, list], custom_file_types: Union[str, None] = None
-    ):
-        # Handle both single path (string) and multiple paths (list) for backward compatibility
-        if isinstance(docs_paths, str):
-            docs_paths = [docs_paths]
-
-        # Separate files and directories
-        files = []
-        directories = []
-        for path in docs_paths:
-            path_obj = Path(path)
-            if path_obj.is_file():
-                files.append(str(path_obj))
-            elif path_obj.is_dir():
-                # Check if this is a git submodule - if so, skip it
-                if self._is_git_submodule(path_obj):
-                    print(f"⚠️  Skipping git submodule: {path}")
-                    continue
-                directories.append(str(path_obj))
-            else:
-                print(f"⚠️  Warning: Path '{path}' does not exist, skipping...")
-                continue
-
-        # Print summary of what we're processing
-        total_items = len(files) + len(directories)
-        items_desc = []
-        if files:
-            items_desc.append(f"{len(files)} file{'s' if len(files) > 1 else ''}")
-        if directories:
-            items_desc.append(
-                f"{len(directories)} director{'ies' if len(directories) > 1 else 'y'}"
-            )
-
-        print(f"Loading documents from {' and '.join(items_desc)} ({total_items} total):")
-        if files:
-            print(f"  📄 Files: {', '.join([Path(f).name for f in files])}")
-        if directories:
-            print(f"  📁 Directories: {', '.join(directories)}")
-
+    def load_documents(self, docs_dir: str, custom_file_types: Union[str, None] = None):
+        print(f"Loading documents from {docs_dir}...")
        if custom_file_types:
            print(f"Using custom file types: {custom_file_types}")

-        all_documents = []
+        # Build gitignore parser
+        gitignore_matches = self._build_gitignore_parser(docs_dir)

-        # First, process individual files if any
-        if files:
-            print(f"\n🔄 Processing {len(files)} individual file{'s' if len(files) > 1 else ''}...")
+        # Try to use better PDF parsers first, but only if PDFs are requested
+        documents = []
+        docs_path = Path(docs_dir)

-            # Load individual files using SimpleDirectoryReader with input_files
-            # Note: We skip gitignore filtering for explicitly specified files
-            try:
-                # Group files by their parent directory for efficient loading
-                from collections import defaultdict
+        # Check if we should process PDFs
+        should_process_pdfs = custom_file_types is None or ".pdf" in custom_file_types

-                files_by_dir = defaultdict(list)
-                for file_path in files:
-                    parent_dir = str(Path(file_path).parent)
-                    files_by_dir[parent_dir].append(file_path)
+        if should_process_pdfs:
+            for file_path in docs_path.rglob("*.pdf"):
+                # Check if file matches any exclude pattern
+                relative_path = file_path.relative_to(docs_path)
+                if self._should_exclude_file(relative_path, gitignore_matches):
+                    continue

-                # Load files from each parent directory
-                for parent_dir, file_list in files_by_dir.items():
-                    print(
-                        f"  Loading {len(file_list)} file{'s' if len(file_list) > 1 else ''} from {parent_dir}"
-                    )
+                print(f"Processing PDF: {file_path}")
+
+                # Try PyMuPDF first (best quality)
+                text = extract_pdf_text_with_pymupdf(str(file_path))
+                if text is None:
+                    # Try pdfplumber
+                    text = extract_pdf_text_with_pdfplumber(str(file_path))
+
+                if text:
+                    # Create a simple document structure
+                    from llama_index.core import Document
+
+                    doc = Document(text=text, metadata={"source": str(file_path)})
+                    documents.append(doc)
+                else:
+                    # Fallback to default reader
+                    print(f"Using default reader for {file_path}")
                    try:
-                        file_docs = SimpleDirectoryReader(
-                            parent_dir,
-                            input_files=file_list,
+                        default_docs = SimpleDirectoryReader(
+                            str(file_path.parent),
                            filename_as_id=True,
+                            required_exts=[file_path.suffix],
                        ).load_data()
-                        all_documents.extend(file_docs)
-                        print(
-                            f"    ✅ Loaded {len(file_docs)} document{'s' if len(file_docs) > 1 else ''}"
-                        )
+                        documents.extend(default_docs)
                    except Exception as e:
-                        print(f"    ❌ Warning: Could not load files from {parent_dir}: {e}")
+                        print(f"Warning: Could not process {file_path}: {e}")

-            except Exception as e:
-                print(f"❌ Error processing individual files: {e}")
-
-        # Define file extensions to process
+        # Load other file types with default reader
        if custom_file_types:
            # Parse custom file types from comma-separated string
            code_extensions = [ext.strip() for ext in custom_file_types.split(",") if ext.strip()]
@@ -526,106 +422,41 @@ Examples:
                ".py",
                ".jl",
            ]
+        # Try to load other file types, but don't fail if none are found
+        try:
+            # Create a custom file filter function using our PathSpec
+            def file_filter(file_path: str) -> bool:
+                """Return True if file should be included (not excluded)"""
+                try:
+                    docs_path_obj = Path(docs_dir)
+                    file_path_obj = Path(file_path)
+                    relative_path = file_path_obj.relative_to(docs_path_obj)
+                    return not self._should_exclude_file(relative_path, gitignore_matches)
+                except (ValueError, OSError):
+                    return True  # Include files that can't be processed

-        # Process each directory
-        if directories:
-            print(
-                f"\n🔄 Processing {len(directories)} director{'ies' if len(directories) > 1 else 'y'}..."
-            )
+            other_docs = SimpleDirectoryReader(
+                docs_dir,
+                recursive=True,
+                encoding="utf-8",
+                required_exts=code_extensions,
+                file_extractor={},  # Use default extractors
+                filename_as_id=True,
+            ).load_data(show_progress=True)

-        for docs_dir in directories:
-            print(f"Processing directory: {docs_dir}")
-            # Build gitignore parser for each directory
-            gitignore_matches = self._build_gitignore_parser(docs_dir)
+            # Filter documents after loading based on gitignore rules
+            filtered_docs = []
+            for doc in other_docs:
+                file_path = doc.metadata.get("file_path", "")
+                if file_filter(file_path):
+                    filtered_docs.append(doc)

-            # Try to use better PDF parsers first, but only if PDFs are requested
-            documents = []
-            docs_path = Path(docs_dir)
-
-            # Check if we should process PDFs
-            should_process_pdfs = custom_file_types is None or ".pdf" in custom_file_types
-
-            if should_process_pdfs:
-                for file_path in docs_path.rglob("*.pdf"):
-                    # Check if file matches any exclude pattern
-                    try:
-                        relative_path = file_path.relative_to(docs_path)
-                        if self._should_exclude_file(relative_path, gitignore_matches):
-                            continue
-                    except ValueError:
-                        # Skip files that can't be made relative to docs_path
-                        print(f"⚠️  Skipping file outside directory scope: {file_path}")
-                        continue
-
-                    print(f"Processing PDF: {file_path}")
-
-                    # Try PyMuPDF first (best quality)
-                    text = extract_pdf_text_with_pymupdf(str(file_path))
-                    if text is None:
-                        # Try pdfplumber
-                        text = extract_pdf_text_with_pdfplumber(str(file_path))
-
-                    if text:
-                        # Create a simple document structure
-                        from llama_index.core import Document
-
-                        doc = Document(text=text, metadata={"source": str(file_path)})
-                        documents.append(doc)
-                    else:
-                        # Fallback to default reader
-                        print(f"Using default reader for {file_path}")
-                        try:
-                            default_docs = SimpleDirectoryReader(
-                                str(file_path.parent),
-                                filename_as_id=True,
-                                required_exts=[file_path.suffix],
-                            ).load_data()
-                            documents.extend(default_docs)
-                        except Exception as e:
-                            print(f"Warning: Could not process {file_path}: {e}")
-
-            # Load other file types with default reader
-            try:
-                # Create a custom file filter function using our PathSpec
-                def file_filter(
-                    file_path: str, docs_dir=docs_dir, gitignore_matches=gitignore_matches
-                ) -> bool:
-                    """Return True if file should be included (not excluded)"""
-                    try:
-                        docs_path_obj = Path(docs_dir)
-                        file_path_obj = Path(file_path)
-                        relative_path = file_path_obj.relative_to(docs_path_obj)
-                        return not self._should_exclude_file(relative_path, gitignore_matches)
-                    except (ValueError, OSError):
-                        return True  # Include files that can't be processed
-
-                other_docs = SimpleDirectoryReader(
-                    docs_dir,
-                    recursive=True,
-                    encoding="utf-8",
-                    required_exts=code_extensions,
-                    file_extractor={},  # Use default extractors
-                    filename_as_id=True,
-                ).load_data(show_progress=True)
-
-                # Filter documents after loading based on gitignore rules
-                filtered_docs = []
-                for doc in other_docs:
-                    file_path = doc.metadata.get("file_path", "")
-                    if file_filter(file_path):
-                        filtered_docs.append(doc)
-
-                documents.extend(filtered_docs)
-            except ValueError as e:
-                if "No files found" in str(e):
-                    print(f"No additional files found for other supported types in {docs_dir}.")
-                else:
-                    raise e
-
-            all_documents.extend(documents)
-            print(f"Loaded {len(documents)} documents from {docs_dir}")
-
-        documents = all_documents
+            documents.extend(filtered_docs)
+        except ValueError as e:
+            if "No files found" in str(e):
+                print("No additional files found for other supported types.")
+            else:
+                raise e

        all_texts = []

@@ -676,9 +507,7 @@ Examples:
            ".jl",
        }

-        print("start chunking documents")
-        # Add progress bar for document chunking
-        for doc in tqdm(documents, desc="Chunking documents", unit="doc"):
+        for doc in documents:
            # Check if this is a code file based on source path
            source_path = doc.metadata.get("source", "")
            is_code_file = any(source_path.endswith(ext) for ext in code_file_exts)
@@ -694,7 +523,7 @@ Examples:
        return all_texts

    async def build_index(self, args):
-        docs_paths = args.docs
+        docs_dir = args.docs
        # Use current directory name if index_name not provided
        if args.index_name:
            index_name = args.index_name
@@ -705,25 +534,13 @@ Examples:
        index_dir = self.indexes_dir / index_name
        index_path = self.get_index_path(index_name)

-        # Display all paths being indexed with file/directory distinction
-        files = [p for p in docs_paths if Path(p).is_file()]
-        directories = [p for p in docs_paths if Path(p).is_dir()]
-
-        print(f"📂 Indexing {len(docs_paths)} path{'s' if len(docs_paths) > 1 else ''}:")
-        if files:
-            print(f"  📄 Files ({len(files)}):")
-            for i, file_path in enumerate(files, 1):
-                print(f"    {i}. {Path(file_path).resolve()}")
-        if directories:
-            print(f"  📁 Directories ({len(directories)}):")
-            for i, dir_path in enumerate(directories, 1):
-                print(f"    {i}. {Path(dir_path).resolve()}")
+        print(f"📂 Indexing: {Path(docs_dir).resolve()}")

        if index_dir.exists() and not args.force:
            print(f"Index '{index_name}' already exists. Use --force to rebuild.")
            return

-        all_texts = self.load_documents(docs_paths, args.file_types)
+        all_texts = self.load_documents(docs_dir, args.file_types)
        if not all_texts:
            print("No documents found")
            return
@@ -759,7 +576,7 @@ Examples:

        if not self.index_exists(index_name):
            print(
-                f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir> [<dir2> ...]' to create it."
+                f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it."
            )
            return

@@ -786,7 +603,7 @@ Examples:

        if not self.index_exists(index_name):
            print(
-                f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir> [<dir2> ...]' to create it."
+                f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it."
            )
            return

@@ -6,6 +6,7 @@ Preserves all optimization parameters to ensure performance

 import logging
 import os
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Any

 import numpy as np
@@ -373,9 +374,7 @@ def compute_embeddings_ollama(
    texts: list[str], model_name: str, is_build: bool = False, host: str = "http://localhost:11434"
 ) -> np.ndarray:
    """
-    Compute embeddings using Ollama API with simplified batch processing.
-
-    Uses batch size of 32 for MPS/CPU and 128 for CUDA to optimize performance.
+    Compute embeddings using Ollama API.

    Args:
        texts: List of texts to compute embeddings for
@@ -439,19 +438,12 @@ def compute_embeddings_ollama(
            if any(emb in base_name for emb in ["embed", "bge", "minilm", "e5"]):
                embedding_models.append(model)

-        # Check if model exists (handle versioned names) and resolve to full name
-        resolved_model_name = None
-        for name in model_names:
-            # Exact match
-            if model_name == name:
-                resolved_model_name = name
-                break
-            # Match without version tag (use the versioned name)
-            elif model_name == name.split(":")[0]:
-                resolved_model_name = name
-                break
+        # Check if model exists (handle versioned names)
+        model_found = any(
+            model_name == name.split(":")[0] or model_name == name for name in model_names
+        )

-        if not resolved_model_name:
+        if not model_found:
            error_msg = f"❌ Model '{model_name}' not found in local Ollama.\n\n"

            # Suggest pulling the model
@@ -473,11 +465,6 @@ def compute_embeddings_ollama(
            error_msg += "\n📚 Browse more: https://ollama.com/library"
            raise ValueError(error_msg)

-        # Use the resolved model name for all subsequent operations
-        if resolved_model_name != model_name:
-            logger.info(f"Resolved model name '{model_name}' to '{resolved_model_name}'")
-        model_name = resolved_model_name
-
        # Verify the model supports embeddings by testing it
        try:
            test_response = requests.post(
@@ -498,147 +485,162 @@ def compute_embeddings_ollama(
    except requests.exceptions.RequestException as e:
        logger.warning(f"Could not verify model existence: {e}")

-    # Determine batch size based on device availability
-    # Check for CUDA/MPS availability using torch if available
-    batch_size = 32  # Default for MPS/CPU
-    try:
-        import torch
+    # Process embeddings with optimized concurrent processing
+    import requests

-        if torch.cuda.is_available():
-            batch_size = 128  # CUDA gets larger batch size
-        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-            batch_size = 32  # MPS gets smaller batch size
-    except ImportError:
-        # If torch is not available, use conservative batch size
-        batch_size = 32
+    def get_single_embedding(text_idx_tuple):
+        """Helper function to get embedding for a single text."""
+        text, idx = text_idx_tuple
+        max_retries = 3
+        retry_count = 0

-    logger.info(f"Using batch size: {batch_size}")
+        # Truncate very long texts to avoid API issues
+        truncated_text = text[:8000] if len(text) > 8000 else text

-    def get_batch_embeddings(batch_texts):
-        """Get embeddings for a batch of texts."""
-        all_embeddings = []
-        failed_indices = []
+        while retry_count < max_retries:
+            try:
+                response = requests.post(
+                    f"{host}/api/embeddings",
+                    json={"model": model_name, "prompt": truncated_text},
+                    timeout=30,
+                )
+                response.raise_for_status()

-        for i, text in enumerate(batch_texts):
-            max_retries = 3
-            retry_count = 0
+                result = response.json()
+                embedding = result.get("embedding")

-            # Truncate very long texts to avoid API issues
-            truncated_text = text[:8000] if len(text) > 8000 else text
-            while retry_count < max_retries:
-                try:
-                    response = requests.post(
-                        f"{host}/api/embeddings",
-                        json={"model": model_name, "prompt": truncated_text},
-                        timeout=30,
+                if embedding is None:
+                    raise ValueError(f"No embedding returned for text {idx}")
+
+                return idx, embedding
+
+            except requests.exceptions.Timeout:
+                retry_count += 1
+                if retry_count >= max_retries:
+                    logger.warning(f"Timeout for text {idx} after {max_retries} retries")
+                    return idx, None
+
+            except Exception as e:
+                if retry_count >= max_retries - 1:
+                    logger.error(f"Failed to get embedding for text {idx}: {e}")
+                    return idx, None
+                retry_count += 1
+
+        return idx, None
+
+    # Determine if we should use concurrent processing
+    use_concurrent = (
+        len(texts) > 5 and not is_build
+    )  # Don't use concurrent in build mode to avoid overwhelming
+    max_workers = min(4, len(texts))  # Limit concurrent requests to avoid overwhelming Ollama
+
+    all_embeddings = [None] * len(texts)  # Pre-allocate list to maintain order
+    failed_indices = []
+
+    if use_concurrent:
+        logger.info(
+            f"Using concurrent processing with {max_workers} workers for {len(texts)} texts"
+        )
+
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            # Submit all tasks
+            future_to_idx = {
+                executor.submit(get_single_embedding, (text, idx)): idx
+                for idx, text in enumerate(texts)
+            }
+
+            # Add progress bar for concurrent processing
+            try:
+                if is_build or len(texts) > 10:
+                    from tqdm import tqdm
+
+                    futures_iterator = tqdm(
+                        as_completed(future_to_idx),
+                        total=len(texts),
+                        desc="Computing Ollama embeddings",
                    )
-                    response.raise_for_status()
-
-                    result = response.json()
-                    embedding = result.get("embedding")
-
-                    if embedding is None:
-                        raise ValueError(f"No embedding returned for text {i}")
-
-                    if not isinstance(embedding, list) or len(embedding) == 0:
-                        raise ValueError(f"Invalid embedding format for text {i}")
-
-                    all_embeddings.append(embedding)
-                    break
-
-                except requests.exceptions.Timeout:
-                    retry_count += 1
-                    if retry_count >= max_retries:
-                        logger.warning(f"Timeout for text {i} after {max_retries} retries")
-                        failed_indices.append(i)
-                        all_embeddings.append(None)
-                        break
+                else:
+                    futures_iterator = as_completed(future_to_idx)
+            except ImportError:
+                futures_iterator = as_completed(future_to_idx)

+            # Collect results as they complete
+            for future in futures_iterator:
+                try:
+                    idx, embedding = future.result()
+                    if embedding is not None:
+                        all_embeddings[idx] = embedding
+                    else:
+                        failed_indices.append(idx)
                except Exception as e:
-                    retry_count += 1
-                    if retry_count >= max_retries:
-                        logger.error(f"Failed to get embedding for text {i}: {e}")
-                        failed_indices.append(i)
-                        all_embeddings.append(None)
-                        break
-        return all_embeddings, failed_indices
+                    idx = future_to_idx[future]
+                    logger.error(f"Exception for text {idx}: {e}")
+                    failed_indices.append(idx)

-    # Process texts in batches
-    all_embeddings = []
-    all_failed_indices = []
-
-    # Setup progress bar if needed
-    show_progress = is_build or len(texts) > 10
-    try:
-        if show_progress:
-            from tqdm import tqdm
-    except ImportError:
-        show_progress = False
-
-    # Process batches
-    num_batches = (len(texts) + batch_size - 1) // batch_size
-
-    if show_progress:
-        batch_iterator = tqdm(range(num_batches), desc="Computing Ollama embeddings")
    else:
-        batch_iterator = range(num_batches)
+        # Sequential processing with progress bar
+        show_progress = is_build or len(texts) > 10

-    for batch_idx in batch_iterator:
-        start_idx = batch_idx * batch_size
-        end_idx = min(start_idx + batch_size, len(texts))
-        batch_texts = texts[start_idx:end_idx]
+        try:
+            if show_progress:
+                from tqdm import tqdm

-        batch_embeddings, batch_failed = get_batch_embeddings(batch_texts)
+                iterator = tqdm(
+                    enumerate(texts), total=len(texts), desc="Computing Ollama embeddings"
+                )
+            else:
+                iterator = enumerate(texts)
+        except ImportError:
+            iterator = enumerate(texts)

-        # Adjust failed indices to global indices
-        global_failed = [start_idx + idx for idx in batch_failed]
-        all_failed_indices.extend(global_failed)
-        all_embeddings.extend(batch_embeddings)
+        for idx, text in iterator:
+            result_idx, embedding = get_single_embedding((text, idx))
+            if embedding is not None:
+                all_embeddings[idx] = embedding
+            else:
+                failed_indices.append(idx)

    # Handle failed embeddings
-    if all_failed_indices:
-        if len(all_failed_indices) == len(texts):
+    if failed_indices:
+        if len(failed_indices) == len(texts):
            raise RuntimeError("Failed to compute any embeddings")

-        logger.warning(
-            f"Failed to compute embeddings for {len(all_failed_indices)}/{len(texts)} texts"
-        )
+        logger.warning(f"Failed to compute embeddings for {len(failed_indices)}/{len(texts)} texts")

        # Use zero embeddings as fallback for failed ones
        valid_embedding = next((e for e in all_embeddings if e is not None), None)
        if valid_embedding:
            embedding_dim = len(valid_embedding)
-            for i, embedding in enumerate(all_embeddings):
-                if embedding is None:
-                    all_embeddings[i] = [0.0] * embedding_dim
+            for idx in failed_indices:
+                all_embeddings[idx] = [0.0] * embedding_dim

-    # Remove None values
+    # Remove None values and convert to numpy array
    all_embeddings = [e for e in all_embeddings if e is not None]

-    if not all_embeddings:
-        raise RuntimeError("No valid embeddings were computed")
+    # Validate embedding dimensions before creating numpy array
+    if all_embeddings:
+        expected_dim = len(all_embeddings[0])
+        inconsistent_dims = []
+        for i, embedding in enumerate(all_embeddings):
+            if len(embedding) != expected_dim:
+                inconsistent_dims.append((i, len(embedding)))

-    # Validate embedding dimensions
-    expected_dim = len(all_embeddings[0])
-    inconsistent_dims = []
-    for i, embedding in enumerate(all_embeddings):
-        if len(embedding) != expected_dim:
-            inconsistent_dims.append((i, len(embedding)))
-
-    if inconsistent_dims:
-        error_msg = f"Ollama returned inconsistent embedding dimensions. Expected {expected_dim}, but got:\n"
-        for idx, dim in inconsistent_dims[:10]:  # Show first 10 inconsistent ones
-            error_msg += f"  - Text {idx}: {dim} dimensions\n"
-        if len(inconsistent_dims) > 10:
-            error_msg += f"  ... and {len(inconsistent_dims) - 10} more\n"
-        error_msg += f"\nThis is likely an Ollama API bug with model '{model_name}'. Please try:\n"
-        error_msg += "1. Restart Ollama service: 'ollama serve'\n"
-        error_msg += f"2. Re-pull the model: 'ollama pull {model_name}'\n"
-        error_msg += (
-            "3. Use sentence-transformers instead: --embedding-mode sentence-transformers\n"
-        )
-        error_msg += "4. Report this issue to Ollama: https://github.com/ollama/ollama/issues"
-        raise ValueError(error_msg)
+        if inconsistent_dims:
+            error_msg = f"Ollama returned inconsistent embedding dimensions. Expected {expected_dim}, but got:\n"
+            for idx, dim in inconsistent_dims[:10]:  # Show first 10 inconsistent ones
+                error_msg += f"  - Text {idx}: {dim} dimensions\n"
+            if len(inconsistent_dims) > 10:
+                error_msg += f"  ... and {len(inconsistent_dims) - 10} more\n"
+            error_msg += (
+                f"\nThis is likely an Ollama API bug with model '{model_name}'. Please try:\n"
+            )
+            error_msg += "1. Restart Ollama service: 'ollama serve'\n"
+            error_msg += f"2. Re-pull the model: 'ollama pull {model_name}'\n"
+            error_msg += (
+                "3. Use sentence-transformers instead: --embedding-mode sentence-transformers\n"
+            )
+            error_msg += "4. Report this issue to Ollama: https://github.com/ollama/ollama/issues"
+            raise ValueError(error_msg)

    # Convert to numpy array and normalize
    embeddings = np.array(all_embeddings, dtype=np.float32)
@@ -4,12 +4,20 @@ Transform your development workflow with intelligent code assistance using LEANN

 ## Prerequisites

-Install LEANN globally for MCP integration (with default backend):
+**Step 1:** First, complete the basic LEANN installation following the [📦 Installation guide](../../README.md#installation) in the root README:

 ```bash
-uv tool install leann-core --with leann
+uv venv
+source .venv/bin/activate
+uv pip install leann
 ```
-This installs the `leann` CLI into an isolated tool environment and includes both backends so `leann build` works out-of-the-box.
+
+**Step 2:** Install LEANN globally for MCP integration:
+```bash
+uv tool install leann-core
+```
+
+This makes the `leann` command available system-wide, which `leann_mcp` requires.

 ## 🚀 Quick Setup

@@ -37,42 +45,6 @@ leann build my-project --docs ./
 claude
 ```

-## 🚀 Advanced Usage Examples
-
-### Index Entire Git Repository
-```bash
-# Index all tracked files in your git repository, note right now we will skip submodules, but we can add it back easily if you want
-leann build my-repo --docs $(git ls-files) --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-
-# Index only specific file types from git
-leann build my-python-code --docs $(git ls-files "*.py") --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-```
-
-### Multiple Directories and Files
-```bash
-# Index multiple directories
-leann build my-codebase --docs ./src ./tests ./docs ./config --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-
-# Mix files and directories
-leann build my-project --docs ./README.md ./src/ ./package.json ./docs/ --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-
-# Specific files only
-leann build my-configs --docs ./tsconfig.json ./package.json ./webpack.config.js --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-```
-
-### Advanced Git Integration
-```bash
-# Index recently modified files
-leann build recent-changes --docs $(git diff --name-only HEAD~10..HEAD) --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-
-# Index files matching pattern
-leann build frontend --docs $(git ls-files "*.tsx" "*.ts" "*.jsx" "*.js") --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-
-# Index documentation and config files
-leann build docs-and-configs --docs $(git ls-files "*.md" "*.yml" "*.yaml" "*.json" "*.toml") --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
-```
-
-
 **Try this in Claude Code:**
 ```
 Help me understand this codebase. List available indexes and search for authentication patterns.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "leann"
-version = "0.2.9"
+version = "0.2.7"
 description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
 readme = "README.md"
 requires-python = ">=3.9"
@@ -1,62 +0,0 @@
-name: leann-build
-
-resources:
-  # Choose a GPU for fast embeddings (examples: L4, A10G, A100). CPU also works but is slower.
-  accelerators: L4:1
-  # Optionally pin a cloud, otherwise SkyPilot will auto-select
-  # cloud: aws
-  disk_size: 100
-
-env:
-  # Build parameters (override with: sky launch -c leann-gpu sky/leann-build.yaml -e key=value)
-  index_name: my-index
-  docs: ./data
-  backend: hnsw               # hnsw | diskann
-  complexity: 64
-  graph_degree: 32
-  num_threads: 8
-  # Embedding selection
-  embedding_mode: sentence-transformers   # sentence-transformers | openai | mlx | ollama
-  embedding_model: facebook/contriever
-  # Storage/latency knobs
-  recompute: true             # true => selective recomputation; false => store full embeddings
-  compact: true               # for HNSW only: false when recompute=false
-  # Optional pass-through
-  extra_args: ""
-
-# Sync local paths to the remote VM. Adjust as needed.
-file_mounts:
-  # Example: mount your local data directory used for building
-  ~/leann-data: ${docs}
-
-setup: |
-  set -e
-  # Install uv (package manager)
-  curl -LsSf https://astral.sh/uv/install.sh | sh
-  export PATH="$HOME/.local/bin:$PATH"
-
-  # Install the LEANN CLI globally on the remote machine
-  uv tool install leann
-
-run: |
-  export PATH="$HOME/.local/bin:$PATH"
-  # Derive flags from env
-  recompute_flag=""
-  if [ "${recompute}" = "false" ] || [ "${recompute}" = "0" ]; then
-    recompute_flag="--no-recompute"
-  fi
-  compact_flag=""
-  if [ "${compact}" = "false" ] || [ "${compact}" = "0" ]; then
-    compact_flag="--no-compact"
-  fi
-
-  # Build command
-  leann build ${index_name} \
-    --docs ~/leann-data \
-    --backend ${backend} \
-    --complexity ${complexity} \
-    --graph-degree ${graph_degree} \
-    --num-threads ${num_threads} \
-    --embedding-mode ${embedding_mode} \
-    --embedding-model ${embedding_model} \
-    ${recompute_flag} ${compact_flag} ${extra_args}