Compare commits

..

1 Commits

Author SHA1 Message Date
Andy Lee
2c6b65d69f fix: detect and report Ollama embedding dimension inconsistency
- Add validation for embedding dimension consistency in Ollama mode
- Provide clear error message with troubleshooting steps when dimensions mismatch
- Fail fast instead of silent fallback to prevent data corruption

Fixes #31
2025-08-11 17:36:44 -07:00
13 changed files with 305 additions and 739 deletions

View File

@@ -64,16 +64,6 @@ jobs:
python: '3.12' python: '3.12'
- os: macos-14 - os: macos-14
python: '3.13' python: '3.13'
- os: macos-15
python: '3.9'
- os: macos-15
python: '3.10'
- os: macos-15
python: '3.11'
- os: macos-15
python: '3.12'
- os: macos-15
python: '3.13'
- os: macos-13 - os: macos-13
python: '3.9' python: '3.9'
- os: macos-13 - os: macos-13
@@ -157,14 +147,7 @@ jobs:
# Use system clang for better compatibility # Use system clang for better compatibility
export CC=clang export CC=clang
export CXX=clang++ export CXX=clang++
# Homebrew libraries on each macOS version require matching minimum version export MACOSX_DEPLOYMENT_TARGET=11.0
if [[ "${{ matrix.os }}" == "macos-13" ]]; then
export MACOSX_DEPLOYMENT_TARGET=13.0
elif [[ "${{ matrix.os }}" == "macos-14" ]]; then
export MACOSX_DEPLOYMENT_TARGET=14.0
elif [[ "${{ matrix.os }}" == "macos-15" ]]; then
export MACOSX_DEPLOYMENT_TARGET=15.0
fi
uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
else else
uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
@@ -178,14 +161,7 @@ jobs:
export CC=clang export CC=clang
export CXX=clang++ export CXX=clang++
# DiskANN requires macOS 13.3+ for sgesdd_ LAPACK function # DiskANN requires macOS 13.3+ for sgesdd_ LAPACK function
# But Homebrew libraries on each macOS version require matching minimum version export MACOSX_DEPLOYMENT_TARGET=13.3
if [[ "${{ matrix.os }}" == "macos-13" ]]; then
export MACOSX_DEPLOYMENT_TARGET=13.3
elif [[ "${{ matrix.os }}" == "macos-14" ]]; then
export MACOSX_DEPLOYMENT_TARGET=14.0
elif [[ "${{ matrix.os }}" == "macos-15" ]]; then
export MACOSX_DEPLOYMENT_TARGET=15.0
fi
uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
else else
uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
@@ -221,24 +197,10 @@ jobs:
- name: Repair wheels (macOS) - name: Repair wheels (macOS)
if: runner.os == 'macOS' if: runner.os == 'macOS'
run: | run: |
# Determine deployment target based on runner OS
# Must match the Homebrew libraries for each macOS version
if [[ "${{ matrix.os }}" == "macos-13" ]]; then
HNSW_TARGET="13.0"
DISKANN_TARGET="13.3"
elif [[ "${{ matrix.os }}" == "macos-14" ]]; then
HNSW_TARGET="14.0"
DISKANN_TARGET="14.0"
elif [[ "${{ matrix.os }}" == "macos-15" ]]; then
HNSW_TARGET="15.0"
DISKANN_TARGET="15.0"
fi
# Repair HNSW wheel # Repair HNSW wheel
cd packages/leann-backend-hnsw cd packages/leann-backend-hnsw
if [ -d dist ]; then if [ -d dist ]; then
export MACOSX_DEPLOYMENT_TARGET=$HNSW_TARGET delocate-wheel -w dist_repaired -v dist/*.whl
delocate-wheel -w dist_repaired -v --require-target-macos-version $HNSW_TARGET dist/*.whl
rm -rf dist rm -rf dist
mv dist_repaired dist mv dist_repaired dist
fi fi
@@ -247,8 +209,7 @@ jobs:
# Repair DiskANN wheel # Repair DiskANN wheel
cd packages/leann-backend-diskann cd packages/leann-backend-diskann
if [ -d dist ]; then if [ -d dist ]; then
export MACOSX_DEPLOYMENT_TARGET=$DISKANN_TARGET delocate-wheel -w dist_repaired -v dist/*.whl
delocate-wheel -w dist_repaired -v --require-target-macos-version $DISKANN_TARGET dist/*.whl
rm -rf dist rm -rf dist
mv dist_repaired dist mv dist_repaired dist
fi fi
@@ -288,8 +249,8 @@ jobs:
# Activate virtual environment # Activate virtual environment
source .venv/bin/activate || source .venv/Scripts/activate source .venv/bin/activate || source .venv/Scripts/activate
# Run tests # Run all tests
pytest -v tests/ pytest tests/
- name: Run sanity checks (optional) - name: Run sanity checks (optional)
run: | run: |

View File

@@ -71,8 +71,6 @@ source .venv/bin/activate
uv pip install leann uv pip install leann
``` ```
> Low-resource? See “Low-resource setups” in the [Configuration Guide](docs/configuration-guide.md#low-resource-setups).
<details> <details>
<summary> <summary>
<strong>🔧 Build from Source (Recommended for development)</strong> <strong>🔧 Build from Source (Recommended for development)</strong>
@@ -470,7 +468,7 @@ leann --help
### Usage Examples ### Usage Examples
```bash ```bash
# build from a specific directory, and my_docs is the index name(Here you can also build from multiple dict or multiple files) # build from a specific directory, and my_docs is the index name
leann build my-docs --docs ./your_documents leann build my-docs --docs ./your_documents
# Search your documents # Search your documents

View File

@@ -259,80 +259,24 @@ Every configuration choice involves trade-offs:
The key is finding the right balance for your specific use case. Start small and simple, measure performance, then scale up only where needed. The key is finding the right balance for your specific use case. Start small and simple, measure performance, then scale up only where needed.
## Low-resource setups ## Deep Dive: Critical Configuration Decisions
If you dont have a local GPU or builds/searches are too slow, use one or more of the options below. ### When to Disable Recomputation
### 1) Use OpenAI embeddings (no local compute) LEANN's recomputation feature provides exact distance calculations but can be disabled for extreme QPS requirements:
Fastest path with zero local GPU requirements. Set your API key and use OpenAI embeddings during build and search:
```bash ```bash
export OPENAI_API_KEY=sk-... --no-recompute # Disable selective recomputation
# Build with OpenAI embeddings
leann build my-index \
--embedding-mode openai \
--embedding-model text-embedding-3-small
# Search with OpenAI embeddings (recompute at query time)
leann search my-index "your query" \
--recompute-embeddings
``` ```
### 2) Run remote builds with SkyPilot (cloud GPU) **Trade-offs**:
- **With recomputation** (default): Exact distances, best quality, higher latency, minimal storage (only stores metadata, recomputes embeddings on-demand)
- **Without recomputation**: Must store full embeddings, significantly higher memory and storage usage (10-100x more), but faster search
Offload embedding generation and index building to a GPU VM using SkyPilot. A template is provided at `sky/leann-build.yaml`. **Disable when**:
- You have abundant storage and memory
```bash - Need extremely low latency (< 100ms)
# One-time: install and configure SkyPilot - Running a read-heavy workload where storage cost is acceptable
pip install skypilot
sky launch -c leann-gpu sky/leann-build.yaml
# Build remotely (template installs uv + leann CLI)
sky exec leann-gpu -- "leann build my-index --docs ~/leann-data --backend hnsw --complexity 64 --graph-degree 32"
```
Details: see “Running Builds on SkyPilot (Optional)” below.
### 3) Disable recomputation to trade storage for speed
If you need lower latency and have more storage/memory, disable recomputation. This stores full embeddings and avoids recomputing at search time.
```bash
# Build without recomputation (HNSW requires non-compact in this mode)
leann build my-index --no-recompute --no-compact
# Search without recomputation
leann search my-index "your query" --no-recompute
```
Trade-offs: lower query-time latency, but significantly higher storage usage.
## Running Builds on SkyPilot (Optional)
You can offload embedding generation and index building to a cloud GPU VM using SkyPilot, without changing any LEANN code. This is useful when your local machine lacks a GPU or you want faster throughput.
### Quick Start
1) Install SkyPilot by following their docs (`pip install skypilot`), then configure cloud credentials.
2) Use the provided SkyPilot template:
```bash
sky launch -c leann-gpu sky/leann-build.yaml
```
3) On the remote, either put your data under the mounted path or adjust `file_mounts` in `sky/leann-build.yaml`. Then run the LEANN build:
```bash
sky exec leann-gpu -- "leann build my-index --docs ~/leann-data --backend hnsw --complexity 64 --graph-degree 32"
```
Notes:
- The template installs `uv` and the `leann` CLI globally on the remote instance.
- Change the `accelerators` and `cloud` settings in `sky/leann-build.yaml` to match your budget/availability (e.g., `A10G:1`, `A100:1`, or CPU-only if you prefer).
- You can also build with `diskann` by switching `--backend diskann`.
## Further Reading ## Further Reading

View File

@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"
[project] [project]
name = "leann-backend-diskann" name = "leann-backend-diskann"
version = "0.2.9" version = "0.2.7"
dependencies = ["leann-core==0.2.9", "numpy", "protobuf>=3.19.0"] dependencies = ["leann-core==0.2.7", "numpy", "protobuf>=3.19.0"]
[tool.scikit-build] [tool.scikit-build]
# Key: simplified CMake path # Key: simplified CMake path

View File

@@ -95,8 +95,6 @@ def create_hnsw_embedding_server(
passage_sources.append(source_copy) passage_sources.append(source_copy)
passages = PassageManager(passage_sources) passages = PassageManager(passage_sources)
# Use index dimensions from metadata for shaping fallback responses
embedding_dim: int = int(meta.get("dimensions", 0))
logger.info( logger.info(
f"Loaded PassageManager with {len(passages.global_offset_map)} passages from metadata" f"Loaded PassageManager with {len(passages.global_offset_map)} passages from metadata"
) )
@@ -111,9 +109,6 @@ def create_hnsw_embedding_server(
socket.setsockopt(zmq.RCVTIMEO, 300000) socket.setsockopt(zmq.RCVTIMEO, 300000)
socket.setsockopt(zmq.SNDTIMEO, 300000) socket.setsockopt(zmq.SNDTIMEO, 300000)
# Track last request type for safe fallback responses on exceptions
last_request_type = "unknown" # one of: 'text', 'distance', 'embedding', 'unknown'
last_request_length = 0
while True: while True:
try: try:
message_bytes = socket.recv() message_bytes = socket.recv()
@@ -126,8 +121,6 @@ def create_hnsw_embedding_server(
if isinstance(request_payload, list) and len(request_payload) > 0: if isinstance(request_payload, list) and len(request_payload) > 0:
# Check if this is a direct text request (list of strings) # Check if this is a direct text request (list of strings)
if all(isinstance(item, str) for item in request_payload): if all(isinstance(item, str) for item in request_payload):
last_request_type = "text"
last_request_length = len(request_payload)
logger.info( logger.info(
f"Processing direct text embedding request for {len(request_payload)} texts in {embedding_mode} mode" f"Processing direct text embedding request for {len(request_payload)} texts in {embedding_mode} mode"
) )
@@ -152,66 +145,43 @@ def create_hnsw_embedding_server(
): ):
node_ids = request_payload[0] node_ids = request_payload[0]
query_vector = np.array(request_payload[1], dtype=np.float32) query_vector = np.array(request_payload[1], dtype=np.float32)
last_request_type = "distance"
last_request_length = len(node_ids)
logger.debug("Distance calculation request received") logger.debug("Distance calculation request received")
logger.debug(f" Node IDs: {node_ids}") logger.debug(f" Node IDs: {node_ids}")
logger.debug(f" Query vector dim: {len(query_vector)}") logger.debug(f" Query vector dim: {len(query_vector)}")
# Get embeddings for node IDs, tolerate missing IDs # Get embeddings for node IDs
texts: list[str] = [] texts = []
found_indices: list[int] = [] for nid in node_ids:
for idx, nid in enumerate(node_ids):
try: try:
passage_data = passages.get_passage(str(nid)) passage_data = passages.get_passage(str(nid))
txt = passage_data.get("text", "") txt = passage_data["text"]
if isinstance(txt, str) and len(txt) > 0: texts.append(txt)
texts.append(txt)
found_indices.append(idx)
else:
logger.error(f"Empty text for passage ID {nid}")
except KeyError: except KeyError:
logger.error(f"Passage ID {nid} not found") logger.error(f"Passage ID {nid} not found")
raise RuntimeError(f"FATAL: Passage with ID {nid} not found")
except Exception as e: except Exception as e:
logger.error(f"Exception looking up passage ID {nid}: {e}") logger.error(f"Exception looking up passage ID {nid}: {e}")
raise
# Prepare full-length response distances with safe fallbacks # Process embeddings
large_distance = 1e9 embeddings = compute_embeddings(texts, model_name, mode=embedding_mode)
response_distances = [large_distance] * len(node_ids) logger.info(
f"Computed embeddings for {len(texts)} texts, shape: {embeddings.shape}"
if texts:
try:
# Process embeddings only for found indices
embeddings = compute_embeddings(texts, model_name, mode=embedding_mode)
logger.info(
f"Computed embeddings for {len(texts)} texts, shape: {embeddings.shape}"
)
# Calculate distances for found embeddings only
if distance_metric == "l2":
partial_distances = np.sum(
np.square(embeddings - query_vector.reshape(1, -1)), axis=1
)
else: # mips or cosine
partial_distances = -np.dot(embeddings, query_vector)
# Place computed distances back into the full response array
for pos, dval in zip(
found_indices, partial_distances.flatten().tolist()
):
response_distances[pos] = float(dval)
except Exception as e:
logger.error(
f"Distance computation error, falling back to large distances: {e}"
)
# Always reply with exactly len(node_ids) distances
response_bytes = msgpack.packb([response_distances], use_single_float=True)
logger.debug(
f"Sending distance response with {len(response_distances)} distances (found={len(found_indices)})"
) )
# Calculate distances
if distance_metric == "l2":
distances = np.sum(
np.square(embeddings - query_vector.reshape(1, -1)), axis=1
)
else: # mips or cosine
distances = -np.dot(embeddings, query_vector)
response_payload = distances.flatten().tolist()
response_bytes = msgpack.packb([response_payload], use_single_float=True)
logger.debug(f"Sending distance response with {len(distances)} distances")
socket.send(response_bytes) socket.send(response_bytes)
e2e_end = time.time() e2e_end = time.time()
logger.info(f"⏱️ Distance calculation E2E time: {e2e_end - e2e_start:.6f}s") logger.info(f"⏱️ Distance calculation E2E time: {e2e_end - e2e_start:.6f}s")
@@ -231,61 +201,40 @@ def create_hnsw_embedding_server(
node_ids = request_payload[0] node_ids = request_payload[0]
logger.debug(f"Request for {len(node_ids)} node embeddings") logger.debug(f"Request for {len(node_ids)} node embeddings")
last_request_type = "embedding"
last_request_length = len(node_ids)
# Allocate output buffer (B, D) and fill with zeros for robustness # Look up texts by node IDs
if embedding_dim <= 0: texts = []
logger.error("Embedding dimension unknown; cannot serve embedding request") for nid in node_ids:
dims = [0, 0]
data = []
else:
dims = [len(node_ids), embedding_dim]
data = [0.0] * (dims[0] * dims[1])
# Look up texts by node IDs; compute embeddings where available
texts: list[str] = []
found_indices: list[int] = []
for idx, nid in enumerate(node_ids):
try: try:
passage_data = passages.get_passage(str(nid)) passage_data = passages.get_passage(str(nid))
txt = passage_data.get("text", "") txt = passage_data["text"]
if isinstance(txt, str) and len(txt) > 0: if not txt:
texts.append(txt) raise RuntimeError(f"FATAL: Empty text for passage ID {nid}")
found_indices.append(idx) texts.append(txt)
else:
logger.error(f"Empty text for passage ID {nid}")
except KeyError: except KeyError:
logger.error(f"Passage with ID {nid} not found") raise RuntimeError(f"FATAL: Passage with ID {nid} not found")
except Exception as e: except Exception as e:
logger.error(f"Exception looking up passage ID {nid}: {e}") logger.error(f"Exception looking up passage ID {nid}: {e}")
raise
if texts: # Process embeddings
try: embeddings = compute_embeddings(texts, model_name, mode=embedding_mode)
# Process embeddings for found texts only logger.info(
embeddings = compute_embeddings(texts, model_name, mode=embedding_mode) f"Computed embeddings for {len(texts)} texts, shape: {embeddings.shape}"
logger.info( )
f"Computed embeddings for {len(texts)} texts, shape: {embeddings.shape}"
)
if np.isnan(embeddings).any() or np.isinf(embeddings).any(): # Serialization and response
logger.error( if np.isnan(embeddings).any() or np.isinf(embeddings).any():
f"NaN or Inf detected in embeddings! Requested IDs: {node_ids[:5]}..." logger.error(
) f"NaN or Inf detected in embeddings! Requested IDs: {node_ids[:5]}..."
dims = [0, embedding_dim] )
data = [] raise AssertionError()
else:
# Copy computed embeddings into the correct positions
emb_f32 = np.ascontiguousarray(embeddings, dtype=np.float32)
flat = emb_f32.flatten().tolist()
for j, pos in enumerate(found_indices):
start = pos * embedding_dim
end = start + embedding_dim
data[start:end] = flat[j * embedding_dim : (j + 1) * embedding_dim]
except Exception as e:
logger.error(f"Embedding computation error, returning zeros: {e}")
response_payload = [dims, data] hidden_contiguous_f32 = np.ascontiguousarray(embeddings, dtype=np.float32)
response_payload = [
list(hidden_contiguous_f32.shape),
hidden_contiguous_f32.flatten().tolist(),
]
response_bytes = msgpack.packb(response_payload, use_single_float=True) response_bytes = msgpack.packb(response_payload, use_single_float=True)
socket.send(response_bytes) socket.send(response_bytes)
@@ -300,22 +249,7 @@ def create_hnsw_embedding_server(
import traceback import traceback
traceback.print_exc() traceback.print_exc()
# Fallback to a safe, minimal-structure response to avoid client crashes socket.send(msgpack.packb([[], []]))
if last_request_type == "distance":
# Return a vector of large distances with the expected length
fallback_len = max(0, int(last_request_length))
large_distance = 1e9
safe_response = [[large_distance] * fallback_len]
elif last_request_type == "embedding":
# Return an empty embedding block with known dimension if available
if embedding_dim > 0:
safe_response = [[0, embedding_dim], []]
else:
safe_response = [[0, 0], []]
else:
# Unknown request type: default to empty embedding structure
safe_response = [[0, int(embedding_dim) if embedding_dim > 0 else 0], []]
socket.send(msgpack.packb(safe_response, use_single_float=True))
zmq_thread = threading.Thread(target=zmq_server_thread, daemon=True) zmq_thread = threading.Thread(target=zmq_server_thread, daemon=True)
zmq_thread.start() zmq_thread.start()

View File

@@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build"
[project] [project]
name = "leann-backend-hnsw" name = "leann-backend-hnsw"
version = "0.2.9" version = "0.2.7"
description = "Custom-built HNSW (Faiss) backend for the Leann toolkit." description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
dependencies = [ dependencies = [
"leann-core==0.2.9", "leann-core==0.2.7",
"numpy", "numpy",
"pyzmq>=23.0.0", "pyzmq>=23.0.0",
"msgpack>=1.0.0", "msgpack>=1.0.0",

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "leann-core" name = "leann-core"
version = "0.2.9" version = "0.2.7"
description = "Core API and plugin system for LEANN" description = "Core API and plugin system for LEANN"
readme = "README.md" readme = "README.md"
requires-python = ">=3.9" requires-python = ">=3.9"

View File

@@ -5,7 +5,6 @@ from typing import Union
from llama_index.core import SimpleDirectoryReader from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter from llama_index.core.node_parser import SentenceSplitter
from tqdm import tqdm
from .api import LeannBuilder, LeannChat, LeannSearcher from .api import LeannBuilder, LeannChat, LeannSearcher
@@ -76,14 +75,11 @@ class LeannCLI:
formatter_class=argparse.RawDescriptionHelpFormatter, formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=""" epilog="""
Examples: Examples:
leann build my-docs --docs ./documents # Build index from directory leann build my-docs --docs ./documents # Build index named my-docs
leann build my-code --docs ./src ./tests ./config # Build index from multiple directories leann build my-ppts --docs ./ --file-types .pptx,.pdf # Index only PowerPoint and PDF files
leann build my-files --docs ./file1.py ./file2.txt ./docs/ # Build index from files and directories leann search my-docs "query" # Search in my-docs index
leann build my-mixed --docs ./readme.md ./src/ ./config.json # Build index from mixed files/dirs leann ask my-docs "question" # Ask my-docs index
leann build my-ppts --docs ./ --file-types .pptx,.pdf # Index only PowerPoint and PDF files leann list # List all stored indexes
leann search my-docs "query" # Search in my-docs index
leann ask my-docs "question" # Ask my-docs index
leann list # List all stored indexes
""", """,
) )
@@ -95,11 +91,7 @@ Examples:
"index_name", nargs="?", help="Index name (default: current directory name)" "index_name", nargs="?", help="Index name (default: current directory name)"
) )
build_parser.add_argument( build_parser.add_argument(
"--docs", "--docs", type=str, default=".", help="Documents directory (default: current directory)"
type=str,
nargs="+",
default=["."],
help="Documents directories and/or files (default: current directory)",
) )
build_parser.add_argument( build_parser.add_argument(
"--backend", type=str, default="hnsw", choices=["hnsw", "diskann"] "--backend", type=str, default="hnsw", choices=["hnsw", "diskann"]
@@ -117,19 +109,7 @@ Examples:
build_parser.add_argument("--complexity", type=int, default=64) build_parser.add_argument("--complexity", type=int, default=64)
build_parser.add_argument("--num-threads", type=int, default=1) build_parser.add_argument("--num-threads", type=int, default=1)
build_parser.add_argument("--compact", action="store_true", default=True) build_parser.add_argument("--compact", action="store_true", default=True)
build_parser.add_argument(
"--no-compact",
dest="compact",
action="store_false",
help="Disable compact index storage (store full embeddings; higher storage)",
)
build_parser.add_argument("--recompute", action="store_true", default=True) build_parser.add_argument("--recompute", action="store_true", default=True)
build_parser.add_argument(
"--no-recompute",
dest="recompute",
action="store_false",
help="Disable embedding recomputation (store full embeddings; lower query latency)",
)
build_parser.add_argument( build_parser.add_argument(
"--file-types", "--file-types",
type=str, type=str,
@@ -150,18 +130,6 @@ Examples:
default=True, default=True,
help="Recompute embeddings (default: True)", help="Recompute embeddings (default: True)",
) )
search_parser.add_argument(
"--no-recompute-embeddings",
dest="recompute_embeddings",
action="store_false",
help="Disable embedding recomputation during search",
)
search_parser.add_argument(
"--no-recompute",
dest="recompute_embeddings",
action="store_false",
help="Alias for --no-recompute-embeddings",
)
search_parser.add_argument( search_parser.add_argument(
"--pruning-strategy", "--pruning-strategy",
choices=["global", "local", "proportional"], choices=["global", "local", "proportional"],
@@ -190,18 +158,6 @@ Examples:
default=True, default=True,
help="Recompute embeddings (default: True)", help="Recompute embeddings (default: True)",
) )
ask_parser.add_argument(
"--no-recompute-embeddings",
dest="recompute_embeddings",
action="store_false",
help="Disable embedding recomputation during ask",
)
ask_parser.add_argument(
"--no-recompute",
dest="recompute_embeddings",
action="store_false",
help="Alias for --no-recompute-embeddings",
)
ask_parser.add_argument( ask_parser.add_argument(
"--pruning-strategy", "--pruning-strategy",
choices=["global", "local", "proportional"], choices=["global", "local", "proportional"],
@@ -279,32 +235,6 @@ Examples:
"""Check if a file should be excluded using gitignore parser.""" """Check if a file should be excluded using gitignore parser."""
return gitignore_matches(str(relative_path)) return gitignore_matches(str(relative_path))
def _is_git_submodule(self, path: Path) -> bool:
"""Check if a path is a git submodule."""
try:
# Find the git repo root
current_dir = Path.cwd()
while current_dir != current_dir.parent:
if (current_dir / ".git").exists():
gitmodules_path = current_dir / ".gitmodules"
if gitmodules_path.exists():
# Read .gitmodules to check if this path is a submodule
gitmodules_content = gitmodules_path.read_text()
# Convert path to relative to git root
try:
relative_path = path.resolve().relative_to(current_dir)
# Check if this path appears in .gitmodules
return f"path = {relative_path}" in gitmodules_content
except ValueError:
# Path is not under git root
return False
break
current_dir = current_dir.parent
return False
except Exception:
# If anything goes wrong, assume it's not a submodule
return False
def list_indexes(self): def list_indexes(self):
print("Stored LEANN indexes:") print("Stored LEANN indexes:")
@@ -334,9 +264,7 @@ Examples:
valid_projects.append(current_path) valid_projects.append(current_path)
if not valid_projects: if not valid_projects:
print( print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
"No indexes found. Use 'leann build <name> --docs <dir> [<dir2> ...]' to create one."
)
return return
total_indexes = 0 total_indexes = 0
@@ -383,88 +311,56 @@ Examples:
print(f' leann search {example_name} "your query"') print(f' leann search {example_name} "your query"')
print(f" leann ask {example_name} --interactive") print(f" leann ask {example_name} --interactive")
def load_documents( def load_documents(self, docs_dir: str, custom_file_types: Union[str, None] = None):
self, docs_paths: Union[str, list], custom_file_types: Union[str, None] = None print(f"Loading documents from {docs_dir}...")
):
# Handle both single path (string) and multiple paths (list) for backward compatibility
if isinstance(docs_paths, str):
docs_paths = [docs_paths]
# Separate files and directories
files = []
directories = []
for path in docs_paths:
path_obj = Path(path)
if path_obj.is_file():
files.append(str(path_obj))
elif path_obj.is_dir():
# Check if this is a git submodule - if so, skip it
if self._is_git_submodule(path_obj):
print(f"⚠️ Skipping git submodule: {path}")
continue
directories.append(str(path_obj))
else:
print(f"⚠️ Warning: Path '{path}' does not exist, skipping...")
continue
# Print summary of what we're processing
total_items = len(files) + len(directories)
items_desc = []
if files:
items_desc.append(f"{len(files)} file{'s' if len(files) > 1 else ''}")
if directories:
items_desc.append(
f"{len(directories)} director{'ies' if len(directories) > 1 else 'y'}"
)
print(f"Loading documents from {' and '.join(items_desc)} ({total_items} total):")
if files:
print(f" 📄 Files: {', '.join([Path(f).name for f in files])}")
if directories:
print(f" 📁 Directories: {', '.join(directories)}")
if custom_file_types: if custom_file_types:
print(f"Using custom file types: {custom_file_types}") print(f"Using custom file types: {custom_file_types}")
all_documents = [] # Build gitignore parser
gitignore_matches = self._build_gitignore_parser(docs_dir)
# First, process individual files if any # Try to use better PDF parsers first, but only if PDFs are requested
if files: documents = []
print(f"\n🔄 Processing {len(files)} individual file{'s' if len(files) > 1 else ''}...") docs_path = Path(docs_dir)
# Load individual files using SimpleDirectoryReader with input_files # Check if we should process PDFs
# Note: We skip gitignore filtering for explicitly specified files should_process_pdfs = custom_file_types is None or ".pdf" in custom_file_types
try:
# Group files by their parent directory for efficient loading
from collections import defaultdict
files_by_dir = defaultdict(list) if should_process_pdfs:
for file_path in files: for file_path in docs_path.rglob("*.pdf"):
parent_dir = str(Path(file_path).parent) # Check if file matches any exclude pattern
files_by_dir[parent_dir].append(file_path) relative_path = file_path.relative_to(docs_path)
if self._should_exclude_file(relative_path, gitignore_matches):
continue
# Load files from each parent directory print(f"Processing PDF: {file_path}")
for parent_dir, file_list in files_by_dir.items():
print( # Try PyMuPDF first (best quality)
f" Loading {len(file_list)} file{'s' if len(file_list) > 1 else ''} from {parent_dir}" text = extract_pdf_text_with_pymupdf(str(file_path))
) if text is None:
# Try pdfplumber
text = extract_pdf_text_with_pdfplumber(str(file_path))
if text:
# Create a simple document structure
from llama_index.core import Document
doc = Document(text=text, metadata={"source": str(file_path)})
documents.append(doc)
else:
# Fallback to default reader
print(f"Using default reader for {file_path}")
try: try:
file_docs = SimpleDirectoryReader( default_docs = SimpleDirectoryReader(
parent_dir, str(file_path.parent),
input_files=file_list,
filename_as_id=True, filename_as_id=True,
required_exts=[file_path.suffix],
).load_data() ).load_data()
all_documents.extend(file_docs) documents.extend(default_docs)
print(
f" ✅ Loaded {len(file_docs)} document{'s' if len(file_docs) > 1 else ''}"
)
except Exception as e: except Exception as e:
print(f"Warning: Could not load files from {parent_dir}: {e}") print(f"Warning: Could not process {file_path}: {e}")
except Exception as e: # Load other file types with default reader
print(f"❌ Error processing individual files: {e}")
# Define file extensions to process
if custom_file_types: if custom_file_types:
# Parse custom file types from comma-separated string # Parse custom file types from comma-separated string
code_extensions = [ext.strip() for ext in custom_file_types.split(",") if ext.strip()] code_extensions = [ext.strip() for ext in custom_file_types.split(",") if ext.strip()]
@@ -526,106 +422,41 @@ Examples:
".py", ".py",
".jl", ".jl",
] ]
# Try to load other file types, but don't fail if none are found
try:
# Create a custom file filter function using our PathSpec
def file_filter(file_path: str) -> bool:
"""Return True if file should be included (not excluded)"""
try:
docs_path_obj = Path(docs_dir)
file_path_obj = Path(file_path)
relative_path = file_path_obj.relative_to(docs_path_obj)
return not self._should_exclude_file(relative_path, gitignore_matches)
except (ValueError, OSError):
return True # Include files that can't be processed
# Process each directory other_docs = SimpleDirectoryReader(
if directories: docs_dir,
print( recursive=True,
f"\n🔄 Processing {len(directories)} director{'ies' if len(directories) > 1 else 'y'}..." encoding="utf-8",
) required_exts=code_extensions,
file_extractor={}, # Use default extractors
filename_as_id=True,
).load_data(show_progress=True)
for docs_dir in directories: # Filter documents after loading based on gitignore rules
print(f"Processing directory: {docs_dir}") filtered_docs = []
# Build gitignore parser for each directory for doc in other_docs:
gitignore_matches = self._build_gitignore_parser(docs_dir) file_path = doc.metadata.get("file_path", "")
if file_filter(file_path):
filtered_docs.append(doc)
# Try to use better PDF parsers first, but only if PDFs are requested documents.extend(filtered_docs)
documents = [] except ValueError as e:
docs_path = Path(docs_dir) if "No files found" in str(e):
print("No additional files found for other supported types.")
# Check if we should process PDFs else:
should_process_pdfs = custom_file_types is None or ".pdf" in custom_file_types raise e
if should_process_pdfs:
for file_path in docs_path.rglob("*.pdf"):
# Check if file matches any exclude pattern
try:
relative_path = file_path.relative_to(docs_path)
if self._should_exclude_file(relative_path, gitignore_matches):
continue
except ValueError:
# Skip files that can't be made relative to docs_path
print(f"⚠️ Skipping file outside directory scope: {file_path}")
continue
print(f"Processing PDF: {file_path}")
# Try PyMuPDF first (best quality)
text = extract_pdf_text_with_pymupdf(str(file_path))
if text is None:
# Try pdfplumber
text = extract_pdf_text_with_pdfplumber(str(file_path))
if text:
# Create a simple document structure
from llama_index.core import Document
doc = Document(text=text, metadata={"source": str(file_path)})
documents.append(doc)
else:
# Fallback to default reader
print(f"Using default reader for {file_path}")
try:
default_docs = SimpleDirectoryReader(
str(file_path.parent),
filename_as_id=True,
required_exts=[file_path.suffix],
).load_data()
documents.extend(default_docs)
except Exception as e:
print(f"Warning: Could not process {file_path}: {e}")
# Load other file types with default reader
try:
# Create a custom file filter function using our PathSpec
def file_filter(
file_path: str, docs_dir=docs_dir, gitignore_matches=gitignore_matches
) -> bool:
"""Return True if file should be included (not excluded)"""
try:
docs_path_obj = Path(docs_dir)
file_path_obj = Path(file_path)
relative_path = file_path_obj.relative_to(docs_path_obj)
return not self._should_exclude_file(relative_path, gitignore_matches)
except (ValueError, OSError):
return True # Include files that can't be processed
other_docs = SimpleDirectoryReader(
docs_dir,
recursive=True,
encoding="utf-8",
required_exts=code_extensions,
file_extractor={}, # Use default extractors
filename_as_id=True,
).load_data(show_progress=True)
# Filter documents after loading based on gitignore rules
filtered_docs = []
for doc in other_docs:
file_path = doc.metadata.get("file_path", "")
if file_filter(file_path):
filtered_docs.append(doc)
documents.extend(filtered_docs)
except ValueError as e:
if "No files found" in str(e):
print(f"No additional files found for other supported types in {docs_dir}.")
else:
raise e
all_documents.extend(documents)
print(f"Loaded {len(documents)} documents from {docs_dir}")
documents = all_documents
all_texts = [] all_texts = []
@@ -676,9 +507,7 @@ Examples:
".jl", ".jl",
} }
print("start chunking documents") for doc in documents:
# Add progress bar for document chunking
for doc in tqdm(documents, desc="Chunking documents", unit="doc"):
# Check if this is a code file based on source path # Check if this is a code file based on source path
source_path = doc.metadata.get("source", "") source_path = doc.metadata.get("source", "")
is_code_file = any(source_path.endswith(ext) for ext in code_file_exts) is_code_file = any(source_path.endswith(ext) for ext in code_file_exts)
@@ -694,7 +523,7 @@ Examples:
return all_texts return all_texts
async def build_index(self, args): async def build_index(self, args):
docs_paths = args.docs docs_dir = args.docs
# Use current directory name if index_name not provided # Use current directory name if index_name not provided
if args.index_name: if args.index_name:
index_name = args.index_name index_name = args.index_name
@@ -705,25 +534,13 @@ Examples:
index_dir = self.indexes_dir / index_name index_dir = self.indexes_dir / index_name
index_path = self.get_index_path(index_name) index_path = self.get_index_path(index_name)
# Display all paths being indexed with file/directory distinction print(f"📂 Indexing: {Path(docs_dir).resolve()}")
files = [p for p in docs_paths if Path(p).is_file()]
directories = [p for p in docs_paths if Path(p).is_dir()]
print(f"📂 Indexing {len(docs_paths)} path{'s' if len(docs_paths) > 1 else ''}:")
if files:
print(f" 📄 Files ({len(files)}):")
for i, file_path in enumerate(files, 1):
print(f" {i}. {Path(file_path).resolve()}")
if directories:
print(f" 📁 Directories ({len(directories)}):")
for i, dir_path in enumerate(directories, 1):
print(f" {i}. {Path(dir_path).resolve()}")
if index_dir.exists() and not args.force: if index_dir.exists() and not args.force:
print(f"Index '{index_name}' already exists. Use --force to rebuild.") print(f"Index '{index_name}' already exists. Use --force to rebuild.")
return return
all_texts = self.load_documents(docs_paths, args.file_types) all_texts = self.load_documents(docs_dir, args.file_types)
if not all_texts: if not all_texts:
print("No documents found") print("No documents found")
return return
@@ -759,7 +576,7 @@ Examples:
if not self.index_exists(index_name): if not self.index_exists(index_name):
print( print(
f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir> [<dir2> ...]' to create it." f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it."
) )
return return
@@ -786,7 +603,7 @@ Examples:
if not self.index_exists(index_name): if not self.index_exists(index_name):
print( print(
f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir> [<dir2> ...]' to create it." f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it."
) )
return return

View File

@@ -6,6 +6,7 @@ Preserves all optimization parameters to ensure performance
import logging import logging
import os import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Any from typing import Any
import numpy as np import numpy as np
@@ -373,9 +374,7 @@ def compute_embeddings_ollama(
texts: list[str], model_name: str, is_build: bool = False, host: str = "http://localhost:11434" texts: list[str], model_name: str, is_build: bool = False, host: str = "http://localhost:11434"
) -> np.ndarray: ) -> np.ndarray:
""" """
Compute embeddings using Ollama API with simplified batch processing. Compute embeddings using Ollama API.
Uses batch size of 32 for MPS/CPU and 128 for CUDA to optimize performance.
Args: Args:
texts: List of texts to compute embeddings for texts: List of texts to compute embeddings for
@@ -439,19 +438,12 @@ def compute_embeddings_ollama(
if any(emb in base_name for emb in ["embed", "bge", "minilm", "e5"]): if any(emb in base_name for emb in ["embed", "bge", "minilm", "e5"]):
embedding_models.append(model) embedding_models.append(model)
# Check if model exists (handle versioned names) and resolve to full name # Check if model exists (handle versioned names)
resolved_model_name = None model_found = any(
for name in model_names: model_name == name.split(":")[0] or model_name == name for name in model_names
# Exact match )
if model_name == name:
resolved_model_name = name
break
# Match without version tag (use the versioned name)
elif model_name == name.split(":")[0]:
resolved_model_name = name
break
if not resolved_model_name: if not model_found:
error_msg = f"❌ Model '{model_name}' not found in local Ollama.\n\n" error_msg = f"❌ Model '{model_name}' not found in local Ollama.\n\n"
# Suggest pulling the model # Suggest pulling the model
@@ -473,11 +465,6 @@ def compute_embeddings_ollama(
error_msg += "\n📚 Browse more: https://ollama.com/library" error_msg += "\n📚 Browse more: https://ollama.com/library"
raise ValueError(error_msg) raise ValueError(error_msg)
# Use the resolved model name for all subsequent operations
if resolved_model_name != model_name:
logger.info(f"Resolved model name '{model_name}' to '{resolved_model_name}'")
model_name = resolved_model_name
# Verify the model supports embeddings by testing it # Verify the model supports embeddings by testing it
try: try:
test_response = requests.post( test_response = requests.post(
@@ -498,147 +485,162 @@ def compute_embeddings_ollama(
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
logger.warning(f"Could not verify model existence: {e}") logger.warning(f"Could not verify model existence: {e}")
# Determine batch size based on device availability # Process embeddings with optimized concurrent processing
# Check for CUDA/MPS availability using torch if available import requests
batch_size = 32 # Default for MPS/CPU
try:
import torch
if torch.cuda.is_available(): def get_single_embedding(text_idx_tuple):
batch_size = 128 # CUDA gets larger batch size """Helper function to get embedding for a single text."""
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): text, idx = text_idx_tuple
batch_size = 32 # MPS gets smaller batch size max_retries = 3
except ImportError: retry_count = 0
# If torch is not available, use conservative batch size
batch_size = 32
logger.info(f"Using batch size: {batch_size}") # Truncate very long texts to avoid API issues
truncated_text = text[:8000] if len(text) > 8000 else text
def get_batch_embeddings(batch_texts): while retry_count < max_retries:
"""Get embeddings for a batch of texts.""" try:
all_embeddings = [] response = requests.post(
failed_indices = [] f"{host}/api/embeddings",
json={"model": model_name, "prompt": truncated_text},
timeout=30,
)
response.raise_for_status()
for i, text in enumerate(batch_texts): result = response.json()
max_retries = 3 embedding = result.get("embedding")
retry_count = 0
# Truncate very long texts to avoid API issues if embedding is None:
truncated_text = text[:8000] if len(text) > 8000 else text raise ValueError(f"No embedding returned for text {idx}")
while retry_count < max_retries:
try: return idx, embedding
response = requests.post(
f"{host}/api/embeddings", except requests.exceptions.Timeout:
json={"model": model_name, "prompt": truncated_text}, retry_count += 1
timeout=30, if retry_count >= max_retries:
logger.warning(f"Timeout for text {idx} after {max_retries} retries")
return idx, None
except Exception as e:
if retry_count >= max_retries - 1:
logger.error(f"Failed to get embedding for text {idx}: {e}")
return idx, None
retry_count += 1
return idx, None
# Determine if we should use concurrent processing
use_concurrent = (
len(texts) > 5 and not is_build
) # Don't use concurrent in build mode to avoid overwhelming
max_workers = min(4, len(texts)) # Limit concurrent requests to avoid overwhelming Ollama
all_embeddings = [None] * len(texts) # Pre-allocate list to maintain order
failed_indices = []
if use_concurrent:
logger.info(
f"Using concurrent processing with {max_workers} workers for {len(texts)} texts"
)
with ThreadPoolExecutor(max_workers=max_workers) as executor:
# Submit all tasks
future_to_idx = {
executor.submit(get_single_embedding, (text, idx)): idx
for idx, text in enumerate(texts)
}
# Add progress bar for concurrent processing
try:
if is_build or len(texts) > 10:
from tqdm import tqdm
futures_iterator = tqdm(
as_completed(future_to_idx),
total=len(texts),
desc="Computing Ollama embeddings",
) )
response.raise_for_status() else:
futures_iterator = as_completed(future_to_idx)
result = response.json() except ImportError:
embedding = result.get("embedding") futures_iterator = as_completed(future_to_idx)
if embedding is None:
raise ValueError(f"No embedding returned for text {i}")
if not isinstance(embedding, list) or len(embedding) == 0:
raise ValueError(f"Invalid embedding format for text {i}")
all_embeddings.append(embedding)
break
except requests.exceptions.Timeout:
retry_count += 1
if retry_count >= max_retries:
logger.warning(f"Timeout for text {i} after {max_retries} retries")
failed_indices.append(i)
all_embeddings.append(None)
break
# Collect results as they complete
for future in futures_iterator:
try:
idx, embedding = future.result()
if embedding is not None:
all_embeddings[idx] = embedding
else:
failed_indices.append(idx)
except Exception as e: except Exception as e:
retry_count += 1 idx = future_to_idx[future]
if retry_count >= max_retries: logger.error(f"Exception for text {idx}: {e}")
logger.error(f"Failed to get embedding for text {i}: {e}") failed_indices.append(idx)
failed_indices.append(i)
all_embeddings.append(None)
break
return all_embeddings, failed_indices
# Process texts in batches
all_embeddings = []
all_failed_indices = []
# Setup progress bar if needed
show_progress = is_build or len(texts) > 10
try:
if show_progress:
from tqdm import tqdm
except ImportError:
show_progress = False
# Process batches
num_batches = (len(texts) + batch_size - 1) // batch_size
if show_progress:
batch_iterator = tqdm(range(num_batches), desc="Computing Ollama embeddings")
else: else:
batch_iterator = range(num_batches) # Sequential processing with progress bar
show_progress = is_build or len(texts) > 10
for batch_idx in batch_iterator: try:
start_idx = batch_idx * batch_size if show_progress:
end_idx = min(start_idx + batch_size, len(texts)) from tqdm import tqdm
batch_texts = texts[start_idx:end_idx]
batch_embeddings, batch_failed = get_batch_embeddings(batch_texts) iterator = tqdm(
enumerate(texts), total=len(texts), desc="Computing Ollama embeddings"
)
else:
iterator = enumerate(texts)
except ImportError:
iterator = enumerate(texts)
# Adjust failed indices to global indices for idx, text in iterator:
global_failed = [start_idx + idx for idx in batch_failed] result_idx, embedding = get_single_embedding((text, idx))
all_failed_indices.extend(global_failed) if embedding is not None:
all_embeddings.extend(batch_embeddings) all_embeddings[idx] = embedding
else:
failed_indices.append(idx)
# Handle failed embeddings # Handle failed embeddings
if all_failed_indices: if failed_indices:
if len(all_failed_indices) == len(texts): if len(failed_indices) == len(texts):
raise RuntimeError("Failed to compute any embeddings") raise RuntimeError("Failed to compute any embeddings")
logger.warning( logger.warning(f"Failed to compute embeddings for {len(failed_indices)}/{len(texts)} texts")
f"Failed to compute embeddings for {len(all_failed_indices)}/{len(texts)} texts"
)
# Use zero embeddings as fallback for failed ones # Use zero embeddings as fallback for failed ones
valid_embedding = next((e for e in all_embeddings if e is not None), None) valid_embedding = next((e for e in all_embeddings if e is not None), None)
if valid_embedding: if valid_embedding:
embedding_dim = len(valid_embedding) embedding_dim = len(valid_embedding)
for i, embedding in enumerate(all_embeddings): for idx in failed_indices:
if embedding is None: all_embeddings[idx] = [0.0] * embedding_dim
all_embeddings[i] = [0.0] * embedding_dim
# Remove None values # Remove None values and convert to numpy array
all_embeddings = [e for e in all_embeddings if e is not None] all_embeddings = [e for e in all_embeddings if e is not None]
if not all_embeddings: # Validate embedding dimensions before creating numpy array
raise RuntimeError("No valid embeddings were computed") if all_embeddings:
expected_dim = len(all_embeddings[0])
inconsistent_dims = []
for i, embedding in enumerate(all_embeddings):
if len(embedding) != expected_dim:
inconsistent_dims.append((i, len(embedding)))
# Validate embedding dimensions if inconsistent_dims:
expected_dim = len(all_embeddings[0]) error_msg = f"Ollama returned inconsistent embedding dimensions. Expected {expected_dim}, but got:\n"
inconsistent_dims = [] for idx, dim in inconsistent_dims[:10]: # Show first 10 inconsistent ones
for i, embedding in enumerate(all_embeddings): error_msg += f" - Text {idx}: {dim} dimensions\n"
if len(embedding) != expected_dim: if len(inconsistent_dims) > 10:
inconsistent_dims.append((i, len(embedding))) error_msg += f" ... and {len(inconsistent_dims) - 10} more\n"
error_msg += (
if inconsistent_dims: f"\nThis is likely an Ollama API bug with model '{model_name}'. Please try:\n"
error_msg = f"Ollama returned inconsistent embedding dimensions. Expected {expected_dim}, but got:\n" )
for idx, dim in inconsistent_dims[:10]: # Show first 10 inconsistent ones error_msg += "1. Restart Ollama service: 'ollama serve'\n"
error_msg += f" - Text {idx}: {dim} dimensions\n" error_msg += f"2. Re-pull the model: 'ollama pull {model_name}'\n"
if len(inconsistent_dims) > 10: error_msg += (
error_msg += f" ... and {len(inconsistent_dims) - 10} more\n" "3. Use sentence-transformers instead: --embedding-mode sentence-transformers\n"
error_msg += f"\nThis is likely an Ollama API bug with model '{model_name}'. Please try:\n" )
error_msg += "1. Restart Ollama service: 'ollama serve'\n" error_msg += "4. Report this issue to Ollama: https://github.com/ollama/ollama/issues"
error_msg += f"2. Re-pull the model: 'ollama pull {model_name}'\n" raise ValueError(error_msg)
error_msg += (
"3. Use sentence-transformers instead: --embedding-mode sentence-transformers\n"
)
error_msg += "4. Report this issue to Ollama: https://github.com/ollama/ollama/issues"
raise ValueError(error_msg)
# Convert to numpy array and normalize # Convert to numpy array and normalize
embeddings = np.array(all_embeddings, dtype=np.float32) embeddings = np.array(all_embeddings, dtype=np.float32)

View File

@@ -4,12 +4,20 @@ Transform your development workflow with intelligent code assistance using LEANN
## Prerequisites ## Prerequisites
Install LEANN globally for MCP integration (with default backend): **Step 1:** First, complete the basic LEANN installation following the [📦 Installation guide](../../README.md#installation) in the root README:
```bash ```bash
uv tool install leann-core --with leann uv venv
source .venv/bin/activate
uv pip install leann
``` ```
This installs the `leann` CLI into an isolated tool environment and includes both backends so `leann build` works out-of-the-box.
**Step 2:** Install LEANN globally for MCP integration:
```bash
uv tool install leann-core
```
This makes the `leann` command available system-wide, which `leann_mcp` requires.
## 🚀 Quick Setup ## 🚀 Quick Setup
@@ -37,42 +45,6 @@ leann build my-project --docs ./
claude claude
``` ```
## 🚀 Advanced Usage Examples
### Index Entire Git Repository
```bash
# Index all tracked files in your git repository, note right now we will skip submodules, but we can add it back easily if you want
leann build my-repo --docs $(git ls-files) --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
# Index only specific file types from git
leann build my-python-code --docs $(git ls-files "*.py") --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
```
### Multiple Directories and Files
```bash
# Index multiple directories
leann build my-codebase --docs ./src ./tests ./docs ./config --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
# Mix files and directories
leann build my-project --docs ./README.md ./src/ ./package.json ./docs/ --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
# Specific files only
leann build my-configs --docs ./tsconfig.json ./package.json ./webpack.config.js --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
```
### Advanced Git Integration
```bash
# Index recently modified files
leann build recent-changes --docs $(git diff --name-only HEAD~10..HEAD) --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
# Index files matching pattern
leann build frontend --docs $(git ls-files "*.tsx" "*.ts" "*.jsx" "*.js") --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
# Index documentation and config files
leann build docs-and-configs --docs $(git ls-files "*.md" "*.yml" "*.yaml" "*.json" "*.toml") --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
```
**Try this in Claude Code:** **Try this in Claude Code:**
``` ```
Help me understand this codebase. List available indexes and search for authentication patterns. Help me understand this codebase. List available indexes and search for authentication patterns.

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "leann" name = "leann"
version = "0.2.9" version = "0.2.7"
description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!" description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
readme = "README.md" readme = "README.md"
requires-python = ">=3.9" requires-python = ">=3.9"

View File

@@ -1,62 +0,0 @@
name: leann-build
resources:
# Choose a GPU for fast embeddings (examples: L4, A10G, A100). CPU also works but is slower.
accelerators: L4:1
# Optionally pin a cloud, otherwise SkyPilot will auto-select
# cloud: aws
disk_size: 100
env:
# Build parameters (override with: sky launch -c leann-gpu sky/leann-build.yaml -e key=value)
index_name: my-index
docs: ./data
backend: hnsw # hnsw | diskann
complexity: 64
graph_degree: 32
num_threads: 8
# Embedding selection
embedding_mode: sentence-transformers # sentence-transformers | openai | mlx | ollama
embedding_model: facebook/contriever
# Storage/latency knobs
recompute: true # true => selective recomputation; false => store full embeddings
compact: true # for HNSW only: false when recompute=false
# Optional pass-through
extra_args: ""
# Sync local paths to the remote VM. Adjust as needed.
file_mounts:
# Example: mount your local data directory used for building
~/leann-data: ${docs}
setup: |
set -e
# Install uv (package manager)
curl -LsSf https://astral.sh/uv/install.sh | sh
export PATH="$HOME/.local/bin:$PATH"
# Install the LEANN CLI globally on the remote machine
uv tool install leann
run: |
export PATH="$HOME/.local/bin:$PATH"
# Derive flags from env
recompute_flag=""
if [ "${recompute}" = "false" ] || [ "${recompute}" = "0" ]; then
recompute_flag="--no-recompute"
fi
compact_flag=""
if [ "${compact}" = "false" ] || [ "${compact}" = "0" ]; then
compact_flag="--no-compact"
fi
# Build command
leann build ${index_name} \
--docs ~/leann-data \
--backend ${backend} \
--complexity ${complexity} \
--graph-degree ${graph_degree} \
--num-threads ${num_threads} \
--embedding-mode ${embedding_mode} \
--embedding-model ${embedding_model} \
${recompute_flag} ${compact_flag} ${extra_args}