From 9f2e82a838bc73d43bba1b0279e7b73b059c6250 Mon Sep 17 00:00:00 2001 From: joshuashaffer Date: Fri, 8 Aug 2025 18:31:15 -0400 Subject: [PATCH 1/4] Propagate hosts argument for ollama through chat.py (#21) * Propigate hosts argument for ollama through chat.py * Apply suggestions from code review Good AI slop suggestions. Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- packages/leann-core/src/leann/chat.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/packages/leann-core/src/leann/chat.py b/packages/leann-core/src/leann/chat.py index 2d69bec..3a5acb1 100644 --- a/packages/leann-core/src/leann/chat.py +++ b/packages/leann-core/src/leann/chat.py @@ -17,12 +17,12 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -def check_ollama_models() -> list[str]: +def check_ollama_models(host: str) -> list[str]: """Check available Ollama models and return a list""" try: import requests - response = requests.get("http://localhost:11434/api/tags", timeout=5) + response = requests.get(f"{host}/api/tags", timeout=5) if response.status_code == 200: data = response.json() return [model["name"] for model in data.get("models", [])] @@ -309,10 +309,12 @@ def search_hf_models(query: str, limit: int = 10) -> list[str]: return search_hf_models_fuzzy(query, limit) -def validate_model_and_suggest(model_name: str, llm_type: str) -> str | None: +def validate_model_and_suggest( + model_name: str, llm_type: str, host: str = "http://localhost:11434" +) -> str | None: """Validate model name and provide suggestions if invalid""" if llm_type == "ollama": - available_models = check_ollama_models() + available_models = check_ollama_models(host) if available_models and model_name not in available_models: error_msg = f"Model '{model_name}' not found in your local Ollama installation." @@ -469,7 +471,7 @@ class OllamaChat(LLMInterface): requests.get(host) # Pre-check model availability with helpful suggestions - model_error = validate_model_and_suggest(model, "ollama") + model_error = validate_model_and_suggest(model, "ollama", host) if model_error: raise ValueError(model_error) From b6ab6f19938fec60b72961c7f5ed4397cecced71 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Fri, 8 Aug 2025 22:32:27 +0000 Subject: [PATCH 2/4] chore: release v0.2.5 --- packages/leann-backend-diskann/pyproject.toml | 4 ++-- packages/leann-backend-hnsw/pyproject.toml | 4 ++-- packages/leann-core/pyproject.toml | 2 +- packages/leann/pyproject.toml | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/leann-backend-diskann/pyproject.toml b/packages/leann-backend-diskann/pyproject.toml index 955f1fd..5519ac2 100644 --- a/packages/leann-backend-diskann/pyproject.toml +++ b/packages/leann-backend-diskann/pyproject.toml @@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build" [project] name = "leann-backend-diskann" -version = "0.2.4" -dependencies = ["leann-core==0.2.4", "numpy", "protobuf>=3.19.0"] +version = "0.2.5" +dependencies = ["leann-core==0.2.5", "numpy", "protobuf>=3.19.0"] [tool.scikit-build] # Key: simplified CMake path diff --git a/packages/leann-backend-hnsw/pyproject.toml b/packages/leann-backend-hnsw/pyproject.toml index 5c5ce27..89e63eb 100644 --- a/packages/leann-backend-hnsw/pyproject.toml +++ b/packages/leann-backend-hnsw/pyproject.toml @@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build" [project] name = "leann-backend-hnsw" -version = "0.2.4" +version = "0.2.5" description = "Custom-built HNSW (Faiss) backend for the Leann toolkit." dependencies = [ - "leann-core==0.2.4", + "leann-core==0.2.5", "numpy", "pyzmq>=23.0.0", "msgpack>=1.0.0", diff --git a/packages/leann-core/pyproject.toml b/packages/leann-core/pyproject.toml index 8be8484..7e564f4 100644 --- a/packages/leann-core/pyproject.toml +++ b/packages/leann-core/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "leann-core" -version = "0.2.4" +version = "0.2.5" description = "Core API and plugin system for LEANN" readme = "README.md" requires-python = ">=3.9" diff --git a/packages/leann/pyproject.toml b/packages/leann/pyproject.toml index ae634de..17b50d8 100644 --- a/packages/leann/pyproject.toml +++ b/packages/leann/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "leann" -version = "0.2.4" +version = "0.2.5" description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!" readme = "README.md" requires-python = ">=3.9" From 67fef60466746b86ffcf7933cad920f4e65a657d Mon Sep 17 00:00:00 2001 From: yichuan520030910320 Date: Fri, 8 Aug 2025 16:03:18 -0700 Subject: [PATCH 3/4] [Readme]More about claude code --- README.md | 2 +- packages/leann-mcp/README.md | 66 ++++++++++++++++++++++++------------ 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 614d60c..5b08fac 100755 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ LEANN achieves this through *graph-based selective recomputation* with *high-deg **Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can semantic search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)**, **[codebase](#-claude-code-integration-transform-your-development-workflow)**\* , or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy. -\* Claude Code only supports basic `grep`-style keyword search. **LEANN** is a drop-in **semantic search MCP service fully compatible with Claude Code**, unlocking intelligent retrieval without changing your workflow. +\* Claude Code only supports basic `grep`-style keyword search. **LEANN** is a drop-in **semantic search MCP service fully compatible with Claude Code**, unlocking intelligent retrieval without changing your workflow. šŸ”„ Check out [the easy setup →](packages/leann-mcp/README.md) diff --git a/packages/leann-mcp/README.md b/packages/leann-mcp/README.md index d5fa99f..b762ae9 100644 --- a/packages/leann-mcp/README.md +++ b/packages/leann-mcp/README.md @@ -1,18 +1,25 @@ -# LEANN Claude Code Integration +# šŸ”„ LEANN Claude Code Integration -Intelligent code assistance using LEANN's vector search directly in Claude Code. +Transform your development workflow with intelligent code assistance using LEANN's semantic search directly in Claude Code. ## Prerequisites -First, install LEANN CLI globally: +**Step 1:** First, complete the basic LEANN installation following the [šŸ“¦ Installation guide](../../README.md#installation) in the root README: +```bash +uv venv +source .venv/bin/activate +uv pip install leann +``` + +**Step 2:** Install LEANN globally for MCP integration: ```bash uv tool install leann-core ``` This makes the `leann` command available system-wide, which `leann_mcp` requires. -## Quick Setup +## šŸš€ Quick Setup Add the LEANN MCP server to Claude Code: @@ -20,23 +27,25 @@ Add the LEANN MCP server to Claude Code: claude mcp add leann-server -- leann_mcp ``` -## Available Tools +## šŸ› ļø Available Tools -- **`leann_list`** - List available indexes across all projects -- **`leann_search`** - Search code and documents with semantic queries -- **`leann_ask`** - Ask questions and get AI-powered answers from your codebase +Once connected, you'll have access to these powerful semantic search tools in Claude Code: -## Quick Start +- **`leann_list`** - List all available indexes across your projects +- **`leann_search`** - Perform semantic searches across code and documents +- **`leann_ask`** - Ask natural language questions and get AI-powered answers from your codebase + +## šŸŽÆ Quick Start Example ```bash -# Build an index for your project -leann build my-project --docs ./ #change to your doc PATH +# Build an index for your project (change to your actual path) +leann build my-project --docs ./ # Start Claude Code claude ``` -Then in Claude Code: +**Try this in Claude Code:** ``` Help me understand this codebase. List available indexes and search for authentication patterns. ``` @@ -46,24 +55,37 @@ Help me understand this codebase. List available indexes and search for authenti

-## How It Works +## 🧠 How It Works -- **`leann`** - Core CLI tool for indexing and searching (installed globally) +The integration consists of three key components working seamlessly together: + +- **`leann`** - Core CLI tool for indexing and searching (installed globally via `uv tool install`) - **`leann_mcp`** - MCP server that wraps `leann` commands for Claude Code integration -- Claude Code calls `leann_mcp`, which executes `leann` commands and returns results +- **Claude Code** - Calls `leann_mcp`, which executes `leann` commands and returns intelligent results -## File Support +## šŸ“ File Support -Python, JavaScript, TypeScript, Java, Go, Rust, SQL, YAML, JSON, and 30+ more file types. +LEANN understands **30+ file types** including: +- **Programming**: Python, JavaScript, TypeScript, Java, Go, Rust, C++, C# +- **Data**: SQL, YAML, JSON, CSV, XML +- **Documentation**: Markdown, TXT, PDF +- **And many more!** -## Storage +## šŸ’¾ Storage & Organization -- Project indexes in `.leann/` directory (like `.git`) -- Global project registry at `~/.leann/projects.json` -- Multi-project support built-in +- **Project indexes**: Stored in `.leann/` directory (just like `.git`) +- **Global registry**: Project tracking at `~/.leann/projects.json` +- **Multi-project support**: Switch between different codebases seamlessly +- **Portable**: Transfer indexes between machines with minimal overhead -## Removing +## šŸ—‘ļø Uninstalling + +To remove the LEANN MCP server from Claude Code: ```bash claude mcp remove leann-server ``` +To remove LEANN +``` +uv pip uninstall leann leann-backend-hnsw leann-core +``` From 3ff5aac8e0b7b56d5bdd60415fbe7c2f6492d846 Mon Sep 17 00:00:00 2001 From: Andy Lee Date: Fri, 8 Aug 2025 18:44:07 -0700 Subject: [PATCH 4/4] Add Ollama embedding support to enable local embedding models (#22) * feat: Add Ollama embedding support for local embedding models * docs: Add clear documentation for Ollama embedding usage * feat: Enhance Ollama embedding with better error handling and concurrent processing - Add intelligent model validation and suggestions (inspired by OllamaChat) - Implement concurrent processing for better performance - Add retry mechanism with timeout handling - Provide user-friendly error messages with emojis - Auto-detect and recommend embedding models - Add text truncation for long texts - Improve progress bar display logic * docs: don't mention it in README --- README.md | 5 +- apps/base_rag_example.py | 4 +- docs/configuration-guide.md | 13 +- .../diskann_embedding_server.py | 2 +- .../hnsw_embedding_server.py | 2 +- packages/leann-core/src/leann/cli.py | 8 + .../leann-core/src/leann/embedding_compute.py | 264 +++++++++++++++++- 7 files changed, 289 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 5b08fac..40c07ec 100755 --- a/README.md +++ b/README.md @@ -97,7 +97,6 @@ uv sync - ## Quick Start Our declarative API makes RAG as easy as writing a config file. @@ -189,8 +188,8 @@ All RAG examples share these common parameters. **Interactive mode** is availabl --force-rebuild # Force rebuild index even if it exists # Embedding Parameters ---embedding-model MODEL # e.g., facebook/contriever, text-embedding-3-small or mlx-community/multilingual-e5-base-mlx ---embedding-mode MODE # sentence-transformers, openai, or mlx +--embedding-model MODEL # e.g., facebook/contriever, text-embedding-3-small, nomic-embed-text, or mlx-community/multilingual-e5-base-mlx +--embedding-mode MODE # sentence-transformers, openai, mlx, or ollama # LLM Parameters (Text generation models) --llm TYPE # LLM backend: openai, ollama, or hf (default: openai) diff --git a/apps/base_rag_example.py b/apps/base_rag_example.py index f5a481c..4bd62b9 100644 --- a/apps/base_rag_example.py +++ b/apps/base_rag_example.py @@ -75,7 +75,7 @@ class BaseRAGExample(ABC): "--embedding-mode", type=str, default="sentence-transformers", - choices=["sentence-transformers", "openai", "mlx"], + choices=["sentence-transformers", "openai", "mlx", "ollama"], help="Embedding backend mode (default: sentence-transformers)", ) @@ -85,7 +85,7 @@ class BaseRAGExample(ABC): "--llm", type=str, default="openai", - choices=["openai", "ollama", "hf"], + choices=["openai", "ollama", "hf", "simulated"], help="LLM backend to use (default: openai)", ) llm_group.add_argument( diff --git a/docs/configuration-guide.md b/docs/configuration-guide.md index 8d910f6..95cb3f0 100644 --- a/docs/configuration-guide.md +++ b/docs/configuration-guide.md @@ -49,14 +49,25 @@ Based on our experience developing LEANN, embedding models fall into three categ - **Cons**: Slower inference, longer index build times - **Use when**: Quality is paramount and you have sufficient compute resources. **Highly recommended** for production use -### Quick Start: OpenAI Embeddings (Fastest Setup) +### Quick Start: Cloud and Local Embedding Options +**OpenAI Embeddings (Fastest Setup)** For immediate testing without local model downloads: ```bash # Set OpenAI embeddings (requires OPENAI_API_KEY) --embedding-mode openai --embedding-model text-embedding-3-small ``` +**Ollama Embeddings (Privacy-Focused)** +For local embeddings with complete privacy: +```bash +# First, pull an embedding model +ollama pull nomic-embed-text + +# Use Ollama embeddings +--embedding-mode ollama --embedding-model nomic-embed-text +``` +
Cloud vs Local Trade-offs diff --git a/packages/leann-backend-diskann/leann_backend_diskann/diskann_embedding_server.py b/packages/leann-backend-diskann/leann_backend_diskann/diskann_embedding_server.py index ee7423f..1928dc8 100644 --- a/packages/leann-backend-diskann/leann_backend_diskann/diskann_embedding_server.py +++ b/packages/leann-backend-diskann/leann_backend_diskann/diskann_embedding_server.py @@ -261,7 +261,7 @@ if __name__ == "__main__": "--embedding-mode", type=str, default="sentence-transformers", - choices=["sentence-transformers", "openai", "mlx"], + choices=["sentence-transformers", "openai", "mlx", "ollama"], help="Embedding backend mode", ) parser.add_argument( diff --git a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py index 331477f..e9c246c 100644 --- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py +++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py @@ -295,7 +295,7 @@ if __name__ == "__main__": "--embedding-mode", type=str, default="sentence-transformers", - choices=["sentence-transformers", "openai", "mlx"], + choices=["sentence-transformers", "openai", "mlx", "ollama"], help="Embedding backend mode", ) diff --git a/packages/leann-core/src/leann/cli.py b/packages/leann-core/src/leann/cli.py index 787cadd..f307204 100644 --- a/packages/leann-core/src/leann/cli.py +++ b/packages/leann-core/src/leann/cli.py @@ -94,6 +94,13 @@ Examples: "--backend", type=str, default="hnsw", choices=["hnsw", "diskann"] ) build_parser.add_argument("--embedding-model", type=str, default="facebook/contriever") + build_parser.add_argument( + "--embedding-mode", + type=str, + default="sentence-transformers", + choices=["sentence-transformers", "openai", "mlx", "ollama"], + help="Embedding backend mode (default: sentence-transformers)", + ) build_parser.add_argument("--force", "-f", action="store_true", help="Force rebuild") build_parser.add_argument("--graph-degree", type=int, default=32) build_parser.add_argument("--complexity", type=int, default=64) @@ -469,6 +476,7 @@ Examples: builder = LeannBuilder( backend_name=args.backend, embedding_model=args.embedding_model, + embedding_mode=args.embedding_mode, graph_degree=args.graph_degree, complexity=args.complexity, is_compact=args.compact, diff --git a/packages/leann-core/src/leann/embedding_compute.py b/packages/leann-core/src/leann/embedding_compute.py index 95fa9e4..67f33d1 100644 --- a/packages/leann-core/src/leann/embedding_compute.py +++ b/packages/leann-core/src/leann/embedding_compute.py @@ -6,6 +6,7 @@ Preserves all optimization parameters to ensure performance import logging import os +from concurrent.futures import ThreadPoolExecutor, as_completed from typing import Any import numpy as np @@ -35,7 +36,7 @@ def compute_embeddings( Args: texts: List of texts to compute embeddings for model_name: Model name - mode: Computation mode ('sentence-transformers', 'openai', 'mlx') + mode: Computation mode ('sentence-transformers', 'openai', 'mlx', 'ollama') is_build: Whether this is a build operation (shows progress bar) batch_size: Batch size for processing adaptive_optimization: Whether to use adaptive optimization based on batch size @@ -55,6 +56,8 @@ def compute_embeddings( return compute_embeddings_openai(texts, model_name) elif mode == "mlx": return compute_embeddings_mlx(texts, model_name) + elif mode == "ollama": + return compute_embeddings_ollama(texts, model_name, is_build=is_build) else: raise ValueError(f"Unsupported embedding mode: {mode}") @@ -365,3 +368,262 @@ def compute_embeddings_mlx(chunks: list[str], model_name: str, batch_size: int = # Stack numpy arrays return np.stack(all_embeddings) + + +def compute_embeddings_ollama( + texts: list[str], model_name: str, is_build: bool = False, host: str = "http://localhost:11434" +) -> np.ndarray: + """ + Compute embeddings using Ollama API. + + Args: + texts: List of texts to compute embeddings for + model_name: Ollama model name (e.g., "nomic-embed-text", "mxbai-embed-large") + is_build: Whether this is a build operation (shows progress bar) + host: Ollama host URL (default: http://localhost:11434) + + Returns: + Normalized embeddings array, shape: (len(texts), embedding_dim) + """ + try: + import requests + except ImportError: + raise ImportError( + "The 'requests' library is required for Ollama embeddings. Install with: uv pip install requests" + ) + + if not texts: + raise ValueError("Cannot compute embeddings for empty text list") + + logger.info( + f"Computing embeddings for {len(texts)} texts using Ollama API, model: '{model_name}'" + ) + + # Check if Ollama is running + try: + response = requests.get(f"{host}/api/version", timeout=5) + response.raise_for_status() + except requests.exceptions.ConnectionError: + error_msg = ( + f"āŒ Could not connect to Ollama at {host}.\n\n" + "Please ensure Ollama is running:\n" + " • macOS/Linux: ollama serve\n" + " • Windows: Make sure Ollama is running in the system tray\n\n" + "Installation: https://ollama.com/download" + ) + raise RuntimeError(error_msg) + except Exception as e: + raise RuntimeError(f"Unexpected error connecting to Ollama: {e}") + + # Check if model exists and provide helpful suggestions + try: + response = requests.get(f"{host}/api/tags", timeout=5) + response.raise_for_status() + models = response.json() + model_names = [model["name"] for model in models.get("models", [])] + + # Filter for embedding models (models that support embeddings) + embedding_models = [] + suggested_embedding_models = [ + "nomic-embed-text", + "mxbai-embed-large", + "bge-m3", + "all-minilm", + "snowflake-arctic-embed", + ] + + for model in model_names: + # Check if it's an embedding model (by name patterns or known models) + base_name = model.split(":")[0] + if any(emb in base_name for emb in ["embed", "bge", "minilm", "e5"]): + embedding_models.append(model) + + # Check if model exists (handle versioned names) + model_found = any( + model_name == name.split(":")[0] or model_name == name for name in model_names + ) + + if not model_found: + error_msg = f"āŒ Model '{model_name}' not found in local Ollama.\n\n" + + # Suggest pulling the model + error_msg += "šŸ“¦ To install this embedding model:\n" + error_msg += f" ollama pull {model_name}\n\n" + + # Show available embedding models + if embedding_models: + error_msg += "āœ… Available embedding models:\n" + for model in embedding_models[:5]: + error_msg += f" • {model}\n" + if len(embedding_models) > 5: + error_msg += f" ... and {len(embedding_models) - 5} more\n" + else: + error_msg += "šŸ’” Popular embedding models to install:\n" + for model in suggested_embedding_models[:3]: + error_msg += f" • ollama pull {model}\n" + + error_msg += "\nšŸ“š Browse more: https://ollama.com/library" + raise ValueError(error_msg) + + # Verify the model supports embeddings by testing it + try: + test_response = requests.post( + f"{host}/api/embeddings", json={"model": model_name, "prompt": "test"}, timeout=10 + ) + if test_response.status_code != 200: + error_msg = ( + f"āš ļø Model '{model_name}' exists but may not support embeddings.\n\n" + f"Please use an embedding model like:\n" + ) + for model in suggested_embedding_models[:3]: + error_msg += f" • {model}\n" + raise ValueError(error_msg) + except requests.exceptions.RequestException: + # If test fails, continue anyway - model might still work + pass + + except requests.exceptions.RequestException as e: + logger.warning(f"Could not verify model existence: {e}") + + # Process embeddings with optimized concurrent processing + import requests + + def get_single_embedding(text_idx_tuple): + """Helper function to get embedding for a single text.""" + text, idx = text_idx_tuple + max_retries = 3 + retry_count = 0 + + # Truncate very long texts to avoid API issues + truncated_text = text[:8000] if len(text) > 8000 else text + + while retry_count < max_retries: + try: + response = requests.post( + f"{host}/api/embeddings", + json={"model": model_name, "prompt": truncated_text}, + timeout=30, + ) + response.raise_for_status() + + result = response.json() + embedding = result.get("embedding") + + if embedding is None: + raise ValueError(f"No embedding returned for text {idx}") + + return idx, embedding + + except requests.exceptions.Timeout: + retry_count += 1 + if retry_count >= max_retries: + logger.warning(f"Timeout for text {idx} after {max_retries} retries") + return idx, None + + except Exception as e: + if retry_count >= max_retries - 1: + logger.error(f"Failed to get embedding for text {idx}: {e}") + return idx, None + retry_count += 1 + + return idx, None + + # Determine if we should use concurrent processing + use_concurrent = ( + len(texts) > 5 and not is_build + ) # Don't use concurrent in build mode to avoid overwhelming + max_workers = min(4, len(texts)) # Limit concurrent requests to avoid overwhelming Ollama + + all_embeddings = [None] * len(texts) # Pre-allocate list to maintain order + failed_indices = [] + + if use_concurrent: + logger.info( + f"Using concurrent processing with {max_workers} workers for {len(texts)} texts" + ) + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + # Submit all tasks + future_to_idx = { + executor.submit(get_single_embedding, (text, idx)): idx + for idx, text in enumerate(texts) + } + + # Add progress bar for concurrent processing + try: + if is_build or len(texts) > 10: + from tqdm import tqdm + + futures_iterator = tqdm( + as_completed(future_to_idx), + total=len(texts), + desc="Computing Ollama embeddings", + ) + else: + futures_iterator = as_completed(future_to_idx) + except ImportError: + futures_iterator = as_completed(future_to_idx) + + # Collect results as they complete + for future in futures_iterator: + try: + idx, embedding = future.result() + if embedding is not None: + all_embeddings[idx] = embedding + else: + failed_indices.append(idx) + except Exception as e: + idx = future_to_idx[future] + logger.error(f"Exception for text {idx}: {e}") + failed_indices.append(idx) + + else: + # Sequential processing with progress bar + show_progress = is_build or len(texts) > 10 + + try: + if show_progress: + from tqdm import tqdm + + iterator = tqdm( + enumerate(texts), total=len(texts), desc="Computing Ollama embeddings" + ) + else: + iterator = enumerate(texts) + except ImportError: + iterator = enumerate(texts) + + for idx, text in iterator: + result_idx, embedding = get_single_embedding((text, idx)) + if embedding is not None: + all_embeddings[idx] = embedding + else: + failed_indices.append(idx) + + # Handle failed embeddings + if failed_indices: + if len(failed_indices) == len(texts): + raise RuntimeError("Failed to compute any embeddings") + + logger.warning(f"Failed to compute embeddings for {len(failed_indices)}/{len(texts)} texts") + + # Use zero embeddings as fallback for failed ones + valid_embedding = next((e for e in all_embeddings if e is not None), None) + if valid_embedding: + embedding_dim = len(valid_embedding) + for idx in failed_indices: + all_embeddings[idx] = [0.0] * embedding_dim + + # Remove None values and convert to numpy array + all_embeddings = [e for e in all_embeddings if e is not None] + + # Convert to numpy array and normalize + embeddings = np.array(all_embeddings, dtype=np.float32) + + # Normalize embeddings (L2 normalization) + norms = np.linalg.norm(embeddings, axis=1, keepdims=True) + embeddings = embeddings / (norms + 1e-8) # Add small epsilon to avoid division by zero + + logger.info(f"Generated {len(embeddings)} embeddings, dimension: {embeddings.shape[1]}") + + return embeddings