Compare commits
2 Commits
refactor-a
...
feat/diska
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fcbcde1ea8 | ||
|
|
54df6310c5 |
@@ -7,6 +7,7 @@ from pathlib import Path
|
|||||||
from typing import Any, Literal
|
from typing import Any, Literal
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import psutil
|
||||||
from leann.interface import (
|
from leann.interface import (
|
||||||
LeannBackendBuilderInterface,
|
LeannBackendBuilderInterface,
|
||||||
LeannBackendFactoryInterface,
|
LeannBackendFactoryInterface,
|
||||||
@@ -84,6 +85,43 @@ def _write_vectors_to_bin(data: np.ndarray, file_path: Path):
|
|||||||
f.write(data.tobytes())
|
f.write(data.tobytes())
|
||||||
|
|
||||||
|
|
||||||
|
def _calculate_smart_memory_config(data: np.ndarray) -> tuple[float, float]:
|
||||||
|
"""
|
||||||
|
Calculate smart memory configuration for DiskANN based on data size and system specs.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data: The embedding data array
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (search_memory_maximum, build_memory_maximum) in GB
|
||||||
|
"""
|
||||||
|
num_vectors, dim = data.shape
|
||||||
|
|
||||||
|
# Calculate embedding storage size
|
||||||
|
embedding_size_bytes = num_vectors * dim * 4 # float32 = 4 bytes
|
||||||
|
embedding_size_gb = embedding_size_bytes / (1024**3)
|
||||||
|
|
||||||
|
# search_memory_maximum: 1/10 of embedding size for optimal PQ compression
|
||||||
|
# This controls Product Quantization size - smaller means more compression
|
||||||
|
search_memory_gb = max(0.1, embedding_size_gb / 10) # At least 100MB
|
||||||
|
|
||||||
|
# build_memory_maximum: Based on available system RAM for sharding control
|
||||||
|
# This controls how much memory DiskANN uses during index construction
|
||||||
|
available_memory_gb = psutil.virtual_memory().available / (1024**3)
|
||||||
|
total_memory_gb = psutil.virtual_memory().total / (1024**3)
|
||||||
|
|
||||||
|
# Use 50% of available memory, but at least 2GB and at most 75% of total
|
||||||
|
build_memory_gb = max(2.0, min(available_memory_gb * 0.5, total_memory_gb * 0.75))
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Smart memory config - Data: {embedding_size_gb:.2f}GB, "
|
||||||
|
f"Search mem: {search_memory_gb:.2f}GB (PQ control), "
|
||||||
|
f"Build mem: {build_memory_gb:.2f}GB (sharding control)"
|
||||||
|
)
|
||||||
|
|
||||||
|
return search_memory_gb, build_memory_gb
|
||||||
|
|
||||||
|
|
||||||
@register_backend("diskann")
|
@register_backend("diskann")
|
||||||
class DiskannBackend(LeannBackendFactoryInterface):
|
class DiskannBackend(LeannBackendFactoryInterface):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -121,6 +159,16 @@ class DiskannBuilder(LeannBackendBuilderInterface):
|
|||||||
f"Unsupported distance_metric '{build_kwargs.get('distance_metric', 'unknown')}'."
|
f"Unsupported distance_metric '{build_kwargs.get('distance_metric', 'unknown')}'."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Calculate smart memory configuration if not explicitly provided
|
||||||
|
if (
|
||||||
|
"search_memory_maximum" not in build_kwargs
|
||||||
|
or "build_memory_maximum" not in build_kwargs
|
||||||
|
):
|
||||||
|
smart_search_mem, smart_build_mem = _calculate_smart_memory_config(data)
|
||||||
|
else:
|
||||||
|
smart_search_mem = build_kwargs.get("search_memory_maximum", 4.0)
|
||||||
|
smart_build_mem = build_kwargs.get("build_memory_maximum", 8.0)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from . import _diskannpy as diskannpy # type: ignore
|
from . import _diskannpy as diskannpy # type: ignore
|
||||||
|
|
||||||
@@ -131,8 +179,8 @@ class DiskannBuilder(LeannBackendBuilderInterface):
|
|||||||
index_prefix,
|
index_prefix,
|
||||||
build_kwargs.get("complexity", 64),
|
build_kwargs.get("complexity", 64),
|
||||||
build_kwargs.get("graph_degree", 32),
|
build_kwargs.get("graph_degree", 32),
|
||||||
build_kwargs.get("search_memory_maximum", 4.0),
|
build_kwargs.get("search_memory_maximum", smart_search_mem),
|
||||||
build_kwargs.get("build_memory_maximum", 8.0),
|
build_kwargs.get("build_memory_maximum", smart_build_mem),
|
||||||
build_kwargs.get("num_threads", 8),
|
build_kwargs.get("num_threads", 8),
|
||||||
build_kwargs.get("pq_disk_bytes", 0),
|
build_kwargs.get("pq_disk_bytes", 0),
|
||||||
"",
|
"",
|
||||||
|
|||||||
Submodule packages/leann-backend-diskann/third_party/DiskANN updated: af2a26481e...67a2611ad1
@@ -542,14 +542,41 @@ class HFChat(LLMInterface):
|
|||||||
self.device = "cpu"
|
self.device = "cpu"
|
||||||
logger.info("No GPU detected. Using CPU.")
|
logger.info("No GPU detected. Using CPU.")
|
||||||
|
|
||||||
# Load tokenizer and model
|
# Load tokenizer and model with timeout protection
|
||||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
try:
|
||||||
self.model = AutoModelForCausalLM.from_pretrained(
|
import signal
|
||||||
model_name,
|
|
||||||
torch_dtype=torch.float16 if self.device != "cpu" else torch.float32,
|
def timeout_handler(signum, frame):
|
||||||
device_map="auto" if self.device != "cpu" else None,
|
raise TimeoutError("Model download/loading timed out")
|
||||||
trust_remote_code=True,
|
|
||||||
)
|
# Set timeout for model loading (60 seconds)
|
||||||
|
old_handler = signal.signal(signal.SIGALRM, timeout_handler)
|
||||||
|
signal.alarm(60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
logger.info(f"Loading tokenizer for {model_name}...")
|
||||||
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||||
|
|
||||||
|
logger.info(f"Loading model {model_name}...")
|
||||||
|
self.model = AutoModelForCausalLM.from_pretrained(
|
||||||
|
model_name,
|
||||||
|
torch_dtype=torch.float16 if self.device != "cpu" else torch.float32,
|
||||||
|
device_map="auto" if self.device != "cpu" else None,
|
||||||
|
trust_remote_code=True,
|
||||||
|
)
|
||||||
|
logger.info(f"Successfully loaded {model_name}")
|
||||||
|
finally:
|
||||||
|
signal.alarm(0) # Cancel the alarm
|
||||||
|
signal.signal(signal.SIGALRM, old_handler) # Restore old handler
|
||||||
|
|
||||||
|
except TimeoutError:
|
||||||
|
logger.error(f"Model loading timed out for {model_name}")
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Model loading timed out for {model_name}. Please check your internet connection or try a smaller model."
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to load model {model_name}: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
# Move model to device if not using device_map
|
# Move model to device if not using device_map
|
||||||
if self.device != "cpu" and "device_map" not in str(self.model):
|
if self.device != "cpu" and "device_map" not in str(self.model):
|
||||||
|
|||||||
@@ -354,13 +354,21 @@ class EmbeddingServerManager:
|
|||||||
self.server_process.terminate()
|
self.server_process.terminate()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.server_process.wait(timeout=5)
|
self.server_process.wait(timeout=3)
|
||||||
logger.info(f"Server process {self.server_process.pid} terminated.")
|
logger.info(f"Server process {self.server_process.pid} terminated.")
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Server process {self.server_process.pid} did not terminate gracefully, killing it."
|
f"Server process {self.server_process.pid} did not terminate gracefully within 3 seconds, killing it."
|
||||||
)
|
)
|
||||||
self.server_process.kill()
|
self.server_process.kill()
|
||||||
|
try:
|
||||||
|
self.server_process.wait(timeout=2)
|
||||||
|
logger.info(f"Server process {self.server_process.pid} killed successfully.")
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
logger.error(
|
||||||
|
f"Failed to kill server process {self.server_process.pid} - it may be hung"
|
||||||
|
)
|
||||||
|
# Don't hang indefinitely
|
||||||
|
|
||||||
# Clean up process resources to prevent resource tracker warnings
|
# Clean up process resources to prevent resource tracker warnings
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -5,11 +5,8 @@ LEANN is a revolutionary vector database that democratizes personal AI. Transfor
|
|||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Default installation (HNSW backend, recommended)
|
# Default installation (includes both HNSW and DiskANN backends)
|
||||||
uv pip install leann
|
uv pip install leann
|
||||||
|
|
||||||
# With DiskANN backend (for large-scale deployments)
|
|
||||||
uv pip install leann[diskann]
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
@@ -19,8 +16,8 @@ from leann import LeannBuilder, LeannSearcher, LeannChat
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
INDEX_PATH = str(Path("./").resolve() / "demo.leann")
|
INDEX_PATH = str(Path("./").resolve() / "demo.leann")
|
||||||
|
|
||||||
# Build an index
|
# Build an index (choose backend: "hnsw" or "diskann")
|
||||||
builder = LeannBuilder(backend_name="hnsw")
|
builder = LeannBuilder(backend_name="hnsw") # or "diskann" for large-scale deployments
|
||||||
builder.add_text("LEANN saves 97% storage compared to traditional vector databases.")
|
builder.add_text("LEANN saves 97% storage compared to traditional vector databases.")
|
||||||
builder.add_text("Tung Tung Tung Sahur called—they need their banana‑crocodile hybrid back")
|
builder.add_text("Tung Tung Tung Sahur called—they need their banana‑crocodile hybrid back")
|
||||||
builder.build_index(INDEX_PATH)
|
builder.build_index(INDEX_PATH)
|
||||||
|
|||||||
@@ -24,16 +24,15 @@ classifiers = [
|
|||||||
"Programming Language :: Python :: 3.12",
|
"Programming Language :: Python :: 3.12",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Default installation: core + hnsw
|
# Default installation: core + hnsw + diskann
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"leann-core>=0.1.0",
|
"leann-core>=0.1.0",
|
||||||
"leann-backend-hnsw>=0.1.0",
|
"leann-backend-hnsw>=0.1.0",
|
||||||
|
"leann-backend-diskann>=0.1.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
diskann = [
|
# All backends now included by default
|
||||||
"leann-backend-diskann>=0.1.0",
|
|
||||||
]
|
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
Repository = "https://github.com/yichuan-w/LEANN"
|
Repository = "https://github.com/yichuan-w/LEANN"
|
||||||
|
|||||||
Reference in New Issue
Block a user