From 9f2e82a838bc73d43bba1b0279e7b73b059c6250 Mon Sep 17 00:00:00 2001
From: joshuashaffer <joshua.shaffer@icmrl.net>
Date: Fri, 8 Aug 2025 18:31:15 -0400
Subject: [PATCH 1/4] Propagate hosts argument for ollama through chat.py (#21)

* Propigate hosts argument for ollama through chat.py

* Apply suggestions from code review

Good AI slop suggestions.

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 packages/leann-core/src/leann/chat.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/packages/leann-core/src/leann/chat.py b/packages/leann-core/src/leann/chat.py
index 2d69bec..3a5acb1 100644
--- a/packages/leann-core/src/leann/chat.py
+++ b/packages/leann-core/src/leann/chat.py
@@ -17,12 +17,12 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
 
-def check_ollama_models() -> list[str]:
+def check_ollama_models(host: str) -> list[str]:
     """Check available Ollama models and return a list"""
     try:
         import requests
 
-        response = requests.get("http://localhost:11434/api/tags", timeout=5)
+        response = requests.get(f"{host}/api/tags", timeout=5)
         if response.status_code == 200:
             data = response.json()
             return [model["name"] for model in data.get("models", [])]
@@ -309,10 +309,12 @@ def search_hf_models(query: str, limit: int = 10) -> list[str]:
     return search_hf_models_fuzzy(query, limit)
 
 
-def validate_model_and_suggest(model_name: str, llm_type: str) -> str | None:
+def validate_model_and_suggest(
+    model_name: str, llm_type: str, host: str = "http://localhost:11434"
+) -> str | None:
     """Validate model name and provide suggestions if invalid"""
     if llm_type == "ollama":
-        available_models = check_ollama_models()
+        available_models = check_ollama_models(host)
         if available_models and model_name not in available_models:
             error_msg = f"Model '{model_name}' not found in your local Ollama installation."
 
@@ -469,7 +471,7 @@ class OllamaChat(LLMInterface):
                 requests.get(host)
 
             # Pre-check model availability with helpful suggestions
-            model_error = validate_model_and_suggest(model, "ollama")
+            model_error = validate_model_and_suggest(model, "ollama", host)
             if model_error:
                 raise ValueError(model_error)
 

From b6ab6f19938fec60b72961c7f5ed4397cecced71 Mon Sep 17 00:00:00 2001
From: GitHub Actions <actions@github.com>
Date: Fri, 8 Aug 2025 22:32:27 +0000
Subject: [PATCH 2/4] chore: release v0.2.5

---
 packages/leann-backend-diskann/pyproject.toml | 4 ++--
 packages/leann-backend-hnsw/pyproject.toml    | 4 ++--
 packages/leann-core/pyproject.toml            | 2 +-
 packages/leann/pyproject.toml                 | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/packages/leann-backend-diskann/pyproject.toml b/packages/leann-backend-diskann/pyproject.toml
index 955f1fd..5519ac2 100644
--- a/packages/leann-backend-diskann/pyproject.toml
+++ b/packages/leann-backend-diskann/pyproject.toml
@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"
 
 [project]
 name = "leann-backend-diskann"
-version = "0.2.4"
-dependencies = ["leann-core==0.2.4", "numpy", "protobuf>=3.19.0"]
+version = "0.2.5"
+dependencies = ["leann-core==0.2.5", "numpy", "protobuf>=3.19.0"]
 
 [tool.scikit-build]
 # Key: simplified CMake path
diff --git a/packages/leann-backend-hnsw/pyproject.toml b/packages/leann-backend-hnsw/pyproject.toml
index 5c5ce27..89e63eb 100644
--- a/packages/leann-backend-hnsw/pyproject.toml
+++ b/packages/leann-backend-hnsw/pyproject.toml
@@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build"
 
 [project]
 name = "leann-backend-hnsw"
-version = "0.2.4"
+version = "0.2.5"
 description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
 dependencies = [
-    "leann-core==0.2.4",
+    "leann-core==0.2.5",
     "numpy",
     "pyzmq>=23.0.0",
     "msgpack>=1.0.0",
diff --git a/packages/leann-core/pyproject.toml b/packages/leann-core/pyproject.toml
index 8be8484..7e564f4 100644
--- a/packages/leann-core/pyproject.toml
+++ b/packages/leann-core/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "leann-core"
-version = "0.2.4"
+version = "0.2.5"
 description = "Core API and plugin system for LEANN"
 readme = "README.md"
 requires-python = ">=3.9"
diff --git a/packages/leann/pyproject.toml b/packages/leann/pyproject.toml
index ae634de..17b50d8 100644
--- a/packages/leann/pyproject.toml
+++ b/packages/leann/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "leann"
-version = "0.2.4"
+version = "0.2.5"
 description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
 readme = "README.md"
 requires-python = ">=3.9"

From 67fef60466746b86ffcf7933cad920f4e65a657d Mon Sep 17 00:00:00 2001
From: yichuan520030910320 <yichuan_wang@berkeley.edu>
Date: Fri, 8 Aug 2025 16:03:18 -0700
Subject: [PATCH 3/4] [Readme]More about claude code

---
 README.md                    |  2 +-
 packages/leann-mcp/README.md | 66 ++++++++++++++++++++++++------------
 2 files changed, 45 insertions(+), 23 deletions(-)

diff --git a/README.md b/README.md
index 614d60c..5b08fac 100755
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ LEANN achieves this through *graph-based selective recomputation* with *high-deg
 **Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can semantic search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)**, **[codebase](#-claude-code-integration-transform-your-development-workflow)**\* , or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy.
 
 
-\* Claude Code only supports basic `grep`-style keyword search.  **LEANN** is a drop-in **semantic search MCP service fully compatible with Claude Code**, unlocking intelligent retrieval without changing your workflow.
+\* Claude Code only supports basic `grep`-style keyword search. **LEANN** is a drop-in **semantic search MCP service fully compatible with Claude Code**, unlocking intelligent retrieval without changing your workflow. 🔥 Check out [the easy setup →](packages/leann-mcp/README.md)
 
 
 
diff --git a/packages/leann-mcp/README.md b/packages/leann-mcp/README.md
index d5fa99f..b762ae9 100644
--- a/packages/leann-mcp/README.md
+++ b/packages/leann-mcp/README.md
@@ -1,18 +1,25 @@
-# LEANN Claude Code Integration
+# 🔥 LEANN Claude Code Integration
 
-Intelligent code assistance using LEANN's vector search directly in Claude Code.
+Transform your development workflow with intelligent code assistance using LEANN's semantic search directly in Claude Code.
 
 ## Prerequisites
 
-First, install LEANN CLI globally:
+**Step 1:** First, complete the basic LEANN installation following the [📦 Installation guide](../../README.md#installation) in the root README:
 
+```bash
+uv venv
+source .venv/bin/activate
+uv pip install leann
+```
+
+**Step 2:** Install LEANN globally for MCP integration:
 ```bash
 uv tool install leann-core
 ```
 
 This makes the `leann` command available system-wide, which `leann_mcp` requires.
 
-## Quick Setup
+## 🚀 Quick Setup
 
 Add the LEANN MCP server to Claude Code:
 
@@ -20,23 +27,25 @@ Add the LEANN MCP server to Claude Code:
 claude mcp add leann-server -- leann_mcp
 ```
 
-## Available Tools
+## 🛠️ Available Tools
 
-- **`leann_list`** - List available indexes across all projects
-- **`leann_search`** - Search code and documents with semantic queries
-- **`leann_ask`** - Ask questions and get AI-powered answers from your codebase
+Once connected, you'll have access to these powerful semantic search tools in Claude Code:
 
-## Quick Start
+- **`leann_list`** - List all available indexes across your projects
+- **`leann_search`** - Perform semantic searches across code and documents
+- **`leann_ask`** - Ask natural language questions and get AI-powered answers from your codebase
+
+## 🎯 Quick Start Example
 
 ```bash
-# Build an index for your project
-leann build my-project --docs ./ #change to your doc PATH
+# Build an index for your project (change to your actual path)
+leann build my-project --docs ./
 
 # Start Claude Code
 claude
 ```
 
-Then in Claude Code:
+**Try this in Claude Code:**
 ```
 Help me understand this codebase. List available indexes and search for authentication patterns.
 ```
@@ -46,24 +55,37 @@ Help me understand this codebase. List available indexes and search for authenti
 </p>
 
 
-## How It Works
+## 🧠 How It Works
 
-- **`leann`** - Core CLI tool for indexing and searching (installed globally)
+The integration consists of three key components working seamlessly together:
+
+- **`leann`** - Core CLI tool for indexing and searching (installed globally via `uv tool install`)
 - **`leann_mcp`** - MCP server that wraps `leann` commands for Claude Code integration
-- Claude Code calls `leann_mcp`, which executes `leann` commands and returns results
+- **Claude Code** - Calls `leann_mcp`, which executes `leann` commands and returns intelligent results
 
-## File Support
+## 📁 File Support
 
-Python, JavaScript, TypeScript, Java, Go, Rust, SQL, YAML, JSON, and 30+ more file types.
+LEANN understands **30+ file types** including:
+- **Programming**: Python, JavaScript, TypeScript, Java, Go, Rust, C++, C#
+- **Data**: SQL, YAML, JSON, CSV, XML
+- **Documentation**: Markdown, TXT, PDF
+- **And many more!**
 
-## Storage
+## 💾 Storage & Organization
 
-- Project indexes in `.leann/` directory (like `.git`)
-- Global project registry at `~/.leann/projects.json`
-- Multi-project support built-in
+- **Project indexes**: Stored in `.leann/` directory (just like `.git`)
+- **Global registry**: Project tracking at `~/.leann/projects.json`
+- **Multi-project support**: Switch between different codebases seamlessly
+- **Portable**: Transfer indexes between machines with minimal overhead
 
-## Removing
+## 🗑️ Uninstalling
+
+To remove the LEANN MCP server from Claude Code:
 
 ```bash
 claude mcp remove leann-server
 ```
+To remove LEANN
+```
+uv pip uninstall leann leann-backend-hnsw leann-core
+```

From 3ff5aac8e0b7b56d5bdd60415fbe7c2f6492d846 Mon Sep 17 00:00:00 2001
From: Andy Lee <andylizf@outlook.com>
Date: Fri, 8 Aug 2025 18:44:07 -0700
Subject: [PATCH 4/4] Add Ollama embedding support to enable local embedding
 models (#22)

* feat: Add Ollama embedding support for local embedding models

* docs: Add clear documentation for Ollama embedding usage

* feat: Enhance Ollama embedding with better error handling and concurrent processing

- Add intelligent model validation and suggestions (inspired by OllamaChat)
- Implement concurrent processing for better performance
- Add retry mechanism with timeout handling
- Provide user-friendly error messages with emojis
- Auto-detect and recommend embedding models
- Add text truncation for long texts
- Improve progress bar display logic

* docs: don't mention it in README
---
 README.md                                     |   5 +-
 apps/base_rag_example.py                      |   4 +-
 docs/configuration-guide.md                   |  13 +-
 .../diskann_embedding_server.py               |   2 +-
 .../hnsw_embedding_server.py                  |   2 +-
 packages/leann-core/src/leann/cli.py          |   8 +
 .../leann-core/src/leann/embedding_compute.py | 264 +++++++++++++++++-
 7 files changed, 289 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 5b08fac..40c07ec 100755
--- a/README.md
+++ b/README.md
@@ -97,7 +97,6 @@ uv sync
 
 </details>
 
-
 ## Quick Start
 
 Our declarative API makes RAG as easy as writing a config file.
@@ -189,8 +188,8 @@ All RAG examples share these common parameters. **Interactive mode** is availabl
 --force-rebuild         # Force rebuild index even if it exists
 
 # Embedding Parameters
---embedding-model MODEL  # e.g., facebook/contriever, text-embedding-3-small or mlx-community/multilingual-e5-base-mlx
---embedding-mode MODE    # sentence-transformers, openai, or mlx
+--embedding-model MODEL  # e.g., facebook/contriever, text-embedding-3-small, nomic-embed-text, or mlx-community/multilingual-e5-base-mlx
+--embedding-mode MODE    # sentence-transformers, openai, mlx, or ollama
 
 # LLM Parameters (Text generation models)
 --llm TYPE              # LLM backend: openai, ollama, or hf (default: openai)
diff --git a/apps/base_rag_example.py b/apps/base_rag_example.py
index f5a481c..4bd62b9 100644
--- a/apps/base_rag_example.py
+++ b/apps/base_rag_example.py
@@ -75,7 +75,7 @@ class BaseRAGExample(ABC):
             "--embedding-mode",
             type=str,
             default="sentence-transformers",
-            choices=["sentence-transformers", "openai", "mlx"],
+            choices=["sentence-transformers", "openai", "mlx", "ollama"],
             help="Embedding backend mode (default: sentence-transformers)",
         )
 
@@ -85,7 +85,7 @@ class BaseRAGExample(ABC):
             "--llm",
             type=str,
             default="openai",
-            choices=["openai", "ollama", "hf"],
+            choices=["openai", "ollama", "hf", "simulated"],
             help="LLM backend to use (default: openai)",
         )
         llm_group.add_argument(
diff --git a/docs/configuration-guide.md b/docs/configuration-guide.md
index 8d910f6..95cb3f0 100644
--- a/docs/configuration-guide.md
+++ b/docs/configuration-guide.md
@@ -49,14 +49,25 @@ Based on our experience developing LEANN, embedding models fall into three categ
 - **Cons**: Slower inference, longer index build times
 - **Use when**: Quality is paramount and you have sufficient compute resources. **Highly recommended** for production use
 
-### Quick Start: OpenAI Embeddings (Fastest Setup)
+### Quick Start: Cloud and Local Embedding Options
 
+**OpenAI Embeddings (Fastest Setup)**
 For immediate testing without local model downloads:
 ```bash
 # Set OpenAI embeddings (requires OPENAI_API_KEY)
 --embedding-mode openai --embedding-model text-embedding-3-small
 ```
 
+**Ollama Embeddings (Privacy-Focused)**
+For local embeddings with complete privacy:
+```bash
+# First, pull an embedding model
+ollama pull nomic-embed-text
+
+# Use Ollama embeddings
+--embedding-mode ollama --embedding-model nomic-embed-text
+```
+
 <details>
 <summary><strong>Cloud vs Local Trade-offs</strong></summary>
 
diff --git a/packages/leann-backend-diskann/leann_backend_diskann/diskann_embedding_server.py b/packages/leann-backend-diskann/leann_backend_diskann/diskann_embedding_server.py
index ee7423f..1928dc8 100644
--- a/packages/leann-backend-diskann/leann_backend_diskann/diskann_embedding_server.py
+++ b/packages/leann-backend-diskann/leann_backend_diskann/diskann_embedding_server.py
@@ -261,7 +261,7 @@ if __name__ == "__main__":
         "--embedding-mode",
         type=str,
         default="sentence-transformers",
-        choices=["sentence-transformers", "openai", "mlx"],
+        choices=["sentence-transformers", "openai", "mlx", "ollama"],
         help="Embedding backend mode",
     )
     parser.add_argument(
diff --git a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py
index 331477f..e9c246c 100644
--- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py
+++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py
@@ -295,7 +295,7 @@ if __name__ == "__main__":
         "--embedding-mode",
         type=str,
         default="sentence-transformers",
-        choices=["sentence-transformers", "openai", "mlx"],
+        choices=["sentence-transformers", "openai", "mlx", "ollama"],
         help="Embedding backend mode",
     )
 
diff --git a/packages/leann-core/src/leann/cli.py b/packages/leann-core/src/leann/cli.py
index 787cadd..f307204 100644
--- a/packages/leann-core/src/leann/cli.py
+++ b/packages/leann-core/src/leann/cli.py
@@ -94,6 +94,13 @@ Examples:
             "--backend", type=str, default="hnsw", choices=["hnsw", "diskann"]
         )
         build_parser.add_argument("--embedding-model", type=str, default="facebook/contriever")
+        build_parser.add_argument(
+            "--embedding-mode",
+            type=str,
+            default="sentence-transformers",
+            choices=["sentence-transformers", "openai", "mlx", "ollama"],
+            help="Embedding backend mode (default: sentence-transformers)",
+        )
         build_parser.add_argument("--force", "-f", action="store_true", help="Force rebuild")
         build_parser.add_argument("--graph-degree", type=int, default=32)
         build_parser.add_argument("--complexity", type=int, default=64)
@@ -469,6 +476,7 @@ Examples:
         builder = LeannBuilder(
             backend_name=args.backend,
             embedding_model=args.embedding_model,
+            embedding_mode=args.embedding_mode,
             graph_degree=args.graph_degree,
             complexity=args.complexity,
             is_compact=args.compact,
diff --git a/packages/leann-core/src/leann/embedding_compute.py b/packages/leann-core/src/leann/embedding_compute.py
index 95fa9e4..67f33d1 100644
--- a/packages/leann-core/src/leann/embedding_compute.py
+++ b/packages/leann-core/src/leann/embedding_compute.py
@@ -6,6 +6,7 @@ Preserves all optimization parameters to ensure performance
 
 import logging
 import os
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Any
 
 import numpy as np
@@ -35,7 +36,7 @@ def compute_embeddings(
     Args:
         texts: List of texts to compute embeddings for
         model_name: Model name
-        mode: Computation mode ('sentence-transformers', 'openai', 'mlx')
+        mode: Computation mode ('sentence-transformers', 'openai', 'mlx', 'ollama')
         is_build: Whether this is a build operation (shows progress bar)
         batch_size: Batch size for processing
         adaptive_optimization: Whether to use adaptive optimization based on batch size
@@ -55,6 +56,8 @@ def compute_embeddings(
         return compute_embeddings_openai(texts, model_name)
     elif mode == "mlx":
         return compute_embeddings_mlx(texts, model_name)
+    elif mode == "ollama":
+        return compute_embeddings_ollama(texts, model_name, is_build=is_build)
     else:
         raise ValueError(f"Unsupported embedding mode: {mode}")
 
@@ -365,3 +368,262 @@ def compute_embeddings_mlx(chunks: list[str], model_name: str, batch_size: int =
 
     # Stack numpy arrays
     return np.stack(all_embeddings)
+
+
+def compute_embeddings_ollama(
+    texts: list[str], model_name: str, is_build: bool = False, host: str = "http://localhost:11434"
+) -> np.ndarray:
+    """
+    Compute embeddings using Ollama API.
+
+    Args:
+        texts: List of texts to compute embeddings for
+        model_name: Ollama model name (e.g., "nomic-embed-text", "mxbai-embed-large")
+        is_build: Whether this is a build operation (shows progress bar)
+        host: Ollama host URL (default: http://localhost:11434)
+
+    Returns:
+        Normalized embeddings array, shape: (len(texts), embedding_dim)
+    """
+    try:
+        import requests
+    except ImportError:
+        raise ImportError(
+            "The 'requests' library is required for Ollama embeddings. Install with: uv pip install requests"
+        )
+
+    if not texts:
+        raise ValueError("Cannot compute embeddings for empty text list")
+
+    logger.info(
+        f"Computing embeddings for {len(texts)} texts using Ollama API, model: '{model_name}'"
+    )
+
+    # Check if Ollama is running
+    try:
+        response = requests.get(f"{host}/api/version", timeout=5)
+        response.raise_for_status()
+    except requests.exceptions.ConnectionError:
+        error_msg = (
+            f"❌ Could not connect to Ollama at {host}.\n\n"
+            "Please ensure Ollama is running:\n"
+            "  • macOS/Linux: ollama serve\n"
+            "  • Windows: Make sure Ollama is running in the system tray\n\n"
+            "Installation: https://ollama.com/download"
+        )
+        raise RuntimeError(error_msg)
+    except Exception as e:
+        raise RuntimeError(f"Unexpected error connecting to Ollama: {e}")
+
+    # Check if model exists and provide helpful suggestions
+    try:
+        response = requests.get(f"{host}/api/tags", timeout=5)
+        response.raise_for_status()
+        models = response.json()
+        model_names = [model["name"] for model in models.get("models", [])]
+
+        # Filter for embedding models (models that support embeddings)
+        embedding_models = []
+        suggested_embedding_models = [
+            "nomic-embed-text",
+            "mxbai-embed-large",
+            "bge-m3",
+            "all-minilm",
+            "snowflake-arctic-embed",
+        ]
+
+        for model in model_names:
+            # Check if it's an embedding model (by name patterns or known models)
+            base_name = model.split(":")[0]
+            if any(emb in base_name for emb in ["embed", "bge", "minilm", "e5"]):
+                embedding_models.append(model)
+
+        # Check if model exists (handle versioned names)
+        model_found = any(
+            model_name == name.split(":")[0] or model_name == name for name in model_names
+        )
+
+        if not model_found:
+            error_msg = f"❌ Model '{model_name}' not found in local Ollama.\n\n"
+
+            # Suggest pulling the model
+            error_msg += "📦 To install this embedding model:\n"
+            error_msg += f"   ollama pull {model_name}\n\n"
+
+            # Show available embedding models
+            if embedding_models:
+                error_msg += "✅ Available embedding models:\n"
+                for model in embedding_models[:5]:
+                    error_msg += f"   • {model}\n"
+                if len(embedding_models) > 5:
+                    error_msg += f"   ... and {len(embedding_models) - 5} more\n"
+            else:
+                error_msg += "💡 Popular embedding models to install:\n"
+                for model in suggested_embedding_models[:3]:
+                    error_msg += f"   • ollama pull {model}\n"
+
+            error_msg += "\n📚 Browse more: https://ollama.com/library"
+            raise ValueError(error_msg)
+
+        # Verify the model supports embeddings by testing it
+        try:
+            test_response = requests.post(
+                f"{host}/api/embeddings", json={"model": model_name, "prompt": "test"}, timeout=10
+            )
+            if test_response.status_code != 200:
+                error_msg = (
+                    f"⚠️ Model '{model_name}' exists but may not support embeddings.\n\n"
+                    f"Please use an embedding model like:\n"
+                )
+                for model in suggested_embedding_models[:3]:
+                    error_msg += f"   • {model}\n"
+                raise ValueError(error_msg)
+        except requests.exceptions.RequestException:
+            # If test fails, continue anyway - model might still work
+            pass
+
+    except requests.exceptions.RequestException as e:
+        logger.warning(f"Could not verify model existence: {e}")
+
+    # Process embeddings with optimized concurrent processing
+    import requests
+
+    def get_single_embedding(text_idx_tuple):
+        """Helper function to get embedding for a single text."""
+        text, idx = text_idx_tuple
+        max_retries = 3
+        retry_count = 0
+
+        # Truncate very long texts to avoid API issues
+        truncated_text = text[:8000] if len(text) > 8000 else text
+
+        while retry_count < max_retries:
+            try:
+                response = requests.post(
+                    f"{host}/api/embeddings",
+                    json={"model": model_name, "prompt": truncated_text},
+                    timeout=30,
+                )
+                response.raise_for_status()
+
+                result = response.json()
+                embedding = result.get("embedding")
+
+                if embedding is None:
+                    raise ValueError(f"No embedding returned for text {idx}")
+
+                return idx, embedding
+
+            except requests.exceptions.Timeout:
+                retry_count += 1
+                if retry_count >= max_retries:
+                    logger.warning(f"Timeout for text {idx} after {max_retries} retries")
+                    return idx, None
+
+            except Exception as e:
+                if retry_count >= max_retries - 1:
+                    logger.error(f"Failed to get embedding for text {idx}: {e}")
+                    return idx, None
+                retry_count += 1
+
+        return idx, None
+
+    # Determine if we should use concurrent processing
+    use_concurrent = (
+        len(texts) > 5 and not is_build
+    )  # Don't use concurrent in build mode to avoid overwhelming
+    max_workers = min(4, len(texts))  # Limit concurrent requests to avoid overwhelming Ollama
+
+    all_embeddings = [None] * len(texts)  # Pre-allocate list to maintain order
+    failed_indices = []
+
+    if use_concurrent:
+        logger.info(
+            f"Using concurrent processing with {max_workers} workers for {len(texts)} texts"
+        )
+
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            # Submit all tasks
+            future_to_idx = {
+                executor.submit(get_single_embedding, (text, idx)): idx
+                for idx, text in enumerate(texts)
+            }
+
+            # Add progress bar for concurrent processing
+            try:
+                if is_build or len(texts) > 10:
+                    from tqdm import tqdm
+
+                    futures_iterator = tqdm(
+                        as_completed(future_to_idx),
+                        total=len(texts),
+                        desc="Computing Ollama embeddings",
+                    )
+                else:
+                    futures_iterator = as_completed(future_to_idx)
+            except ImportError:
+                futures_iterator = as_completed(future_to_idx)
+
+            # Collect results as they complete
+            for future in futures_iterator:
+                try:
+                    idx, embedding = future.result()
+                    if embedding is not None:
+                        all_embeddings[idx] = embedding
+                    else:
+                        failed_indices.append(idx)
+                except Exception as e:
+                    idx = future_to_idx[future]
+                    logger.error(f"Exception for text {idx}: {e}")
+                    failed_indices.append(idx)
+
+    else:
+        # Sequential processing with progress bar
+        show_progress = is_build or len(texts) > 10
+
+        try:
+            if show_progress:
+                from tqdm import tqdm
+
+                iterator = tqdm(
+                    enumerate(texts), total=len(texts), desc="Computing Ollama embeddings"
+                )
+            else:
+                iterator = enumerate(texts)
+        except ImportError:
+            iterator = enumerate(texts)
+
+        for idx, text in iterator:
+            result_idx, embedding = get_single_embedding((text, idx))
+            if embedding is not None:
+                all_embeddings[idx] = embedding
+            else:
+                failed_indices.append(idx)
+
+    # Handle failed embeddings
+    if failed_indices:
+        if len(failed_indices) == len(texts):
+            raise RuntimeError("Failed to compute any embeddings")
+
+        logger.warning(f"Failed to compute embeddings for {len(failed_indices)}/{len(texts)} texts")
+
+        # Use zero embeddings as fallback for failed ones
+        valid_embedding = next((e for e in all_embeddings if e is not None), None)
+        if valid_embedding:
+            embedding_dim = len(valid_embedding)
+            for idx in failed_indices:
+                all_embeddings[idx] = [0.0] * embedding_dim
+
+    # Remove None values and convert to numpy array
+    all_embeddings = [e for e in all_embeddings if e is not None]
+
+    # Convert to numpy array and normalize
+    embeddings = np.array(all_embeddings, dtype=np.float32)
+
+    # Normalize embeddings (L2 normalization)
+    norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
+    embeddings = embeddings / (norms + 1e-8)  # Add small epsilon to avoid division by zero
+
+    logger.info(f"Generated {len(embeddings)} embeddings, dimension: {embeddings.shape[1]}")
+
+    return embeddings