perf: reuse embedding server for query embed
This commit is contained in:
@@ -2,13 +2,16 @@ from abc import ABC, abstractmethod
|
||||
import numpy as np
|
||||
from typing import Dict, Any, List, Literal
|
||||
|
||||
|
||||
class LeannBackendBuilderInterface(ABC):
|
||||
"""Backend interface for building indexes"""
|
||||
|
||||
@abstractmethod
|
||||
def build(self, data: np.ndarray, ids: List[str], index_path: str, **kwargs) -> None:
|
||||
|
||||
@abstractmethod
|
||||
def build(
|
||||
self, data: np.ndarray, ids: List[str], index_path: str, **kwargs
|
||||
) -> None:
|
||||
"""Build index
|
||||
|
||||
|
||||
Args:
|
||||
data: Vector data (N, D)
|
||||
ids: List of string IDs for each vector
|
||||
@@ -17,30 +20,35 @@ class LeannBackendBuilderInterface(ABC):
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class LeannBackendSearcherInterface(ABC):
|
||||
"""Backend interface for searching"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self, index_path: str, **kwargs):
|
||||
"""Initialize searcher
|
||||
|
||||
|
||||
Args:
|
||||
index_path: Path to index file
|
||||
**kwargs: Backend-specific loading parameters
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def search(self, query: np.ndarray, top_k: int,
|
||||
complexity: int = 64,
|
||||
beam_width: int = 1,
|
||||
prune_ratio: float = 0.0,
|
||||
recompute_embeddings: bool = False,
|
||||
pruning_strategy: Literal["global", "local", "proportional"] = "global",
|
||||
zmq_port: int = 5557,
|
||||
**kwargs) -> Dict[str, Any]:
|
||||
def search(
|
||||
self,
|
||||
query: np.ndarray,
|
||||
top_k: int,
|
||||
complexity: int = 64,
|
||||
beam_width: int = 1,
|
||||
prune_ratio: float = 0.0,
|
||||
recompute_embeddings: bool = False,
|
||||
pruning_strategy: Literal["global", "local", "proportional"] = "global",
|
||||
zmq_port: int = 5557,
|
||||
**kwargs,
|
||||
) -> Dict[str, Any]:
|
||||
"""Search for nearest neighbors
|
||||
|
||||
|
||||
Args:
|
||||
query: Query vectors (B, D) where B is batch size, D is dimension
|
||||
top_k: Number of nearest neighbors to return
|
||||
@@ -51,23 +59,40 @@ class LeannBackendSearcherInterface(ABC):
|
||||
pruning_strategy: PQ candidate selection strategy - "global" (default), "local", or "proportional"
|
||||
zmq_port: ZMQ port for embedding server communication
|
||||
**kwargs: Backend-specific parameters
|
||||
|
||||
|
||||
Returns:
|
||||
{"labels": [...], "distances": [...]}
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def compute_query_embedding(
|
||||
self, query: str, zmq_port: int = 5557, use_server_if_available: bool = True
|
||||
) -> np.ndarray:
|
||||
"""Compute embedding for a query string
|
||||
|
||||
Args:
|
||||
query: The query string to embed
|
||||
zmq_port: ZMQ port for embedding server
|
||||
use_server_if_available: Whether to try using embedding server first
|
||||
|
||||
Returns:
|
||||
Query embedding as numpy array with shape (1, D)
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class LeannBackendFactoryInterface(ABC):
|
||||
"""Backend factory interface"""
|
||||
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def builder(**kwargs) -> LeannBackendBuilderInterface:
|
||||
"""Create Builder instance"""
|
||||
pass
|
||||
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
@abstractmethod
|
||||
def searcher(index_path: str, **kwargs) -> LeannBackendSearcherInterface:
|
||||
"""Create Searcher instance"""
|
||||
pass
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user