fix: cache the loaded model

This commit is contained in:
Andy Lee
2025-07-21 21:20:53 -07:00
parent 727724990e
commit b3970793cf
9 changed files with 163 additions and 146 deletions

View File

@@ -1,5 +1,4 @@
import json
import pickle
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Dict, Any, Literal, Optional
@@ -88,15 +87,15 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
def compute_query_embedding(
self,
query: str,
expected_zmq_port: int = 5557,
use_server_if_available: bool = True,
zmq_port: int = 5557,
) -> np.ndarray:
"""
Compute embedding for a query string.
Args:
query: The query string to embed
expected_zmq_port: ZMQ port for embedding server
zmq_port: ZMQ port for embedding server
use_server_if_available: Whether to try using embedding server first
Returns:
@@ -110,7 +109,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
self.index_dir / f"{self.index_path.name}.meta.json"
)
zmq_port = self._ensure_server_running(
str(passages_source_file), expected_zmq_port
str(passages_source_file), zmq_port
)
return self._compute_embedding_via_server([query], zmq_port)[
@@ -168,7 +167,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
prune_ratio: float = 0.0,
recompute_embeddings: bool = False,
pruning_strategy: Literal["global", "local", "proportional"] = "global",
expected_zmq_port: Optional[int] = None,
zmq_port: Optional[int] = None,
**kwargs,
) -> Dict[str, Any]:
"""
@@ -182,7 +181,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
prune_ratio: Ratio of neighbors to prune via approximate distance (0.0-1.0)
recompute_embeddings: Whether to fetch fresh embeddings from server vs use stored PQ codes
pruning_strategy: PQ candidate selection strategy - "global" (default), "local", or "proportional"
expected_zmq_port: ZMQ port for embedding server communication
zmq_port: ZMQ port for embedding server communication
**kwargs: Backend-specific parameters (e.g., batch_size, dedup_node_dis, etc.)
Returns: