fix: do not reuse emb_server and close it properly

This commit is contained in:
Andy Lee
2025-07-20 18:07:51 -07:00
parent f4998bb316
commit 7e226a51c9
7 changed files with 58 additions and 130 deletions

View File

@@ -59,10 +59,6 @@ class HNSWBuilder(LeannBackendBuilderInterface):
if data.dtype != np.float32:
data = data.astype(np.float32)
label_map = {i: str_id for i, str_id in enumerate(ids)}
label_map_file = index_dir / "leann.labels.map"
with open(label_map_file, "wb") as f:
pickle.dump(label_map, f)
metric_enum = get_metric_map().get(self.distance_metric.lower())
if metric_enum is None:
@@ -142,13 +138,6 @@ class HNSWSearcher(BaseSearcher):
self._index = faiss.read_index(str(index_file), faiss.IO_FLAG_MMAP, hnsw_config)
# Load label mapping
label_map_file = self.index_dir / "leann.labels.map"
if not label_map_file.exists():
raise FileNotFoundError(f"Label map file not found at {label_map_file}")
with open(label_map_file, "rb") as f:
self.label_map = pickle.load(f)
def search(
self,
@@ -239,10 +228,7 @@ class HNSWSearcher(BaseSearcher):
)
string_labels = [
[
self.label_map.get(int_label, f"unknown_{int_label}")
for int_label in batch_labels
]
[str(int_label) for int_label in batch_labels]
for batch_labels in labels
]

View File

@@ -114,25 +114,11 @@ def load_passages_from_metadata(meta_file: str) -> SimplePassageLoader:
finally:
sys.path.pop(0)
# Load label map
passages_dir = Path(meta_file).parent
label_map_file = passages_dir / "leann.labels.map"
if label_map_file.exists():
import pickle
with open(label_map_file, "rb") as f:
label_map = pickle.load(f)
print(f"Loaded label map with {len(label_map)} entries")
else:
raise FileNotFoundError(f"Label map file not found: {label_map_file}")
print(f"Initialized lazy passage loading for {len(label_map)} passages")
print(f"Initialized lazy passage loading for {len(passage_manager.global_offset_map)} passages")
class LazyPassageLoader(SimplePassageLoader):
def __init__(self, passage_manager, label_map):
def __init__(self, passage_manager):
self.passage_manager = passage_manager
self.label_map = label_map
# Initialize parent with empty data
super().__init__({})
@@ -140,28 +126,24 @@ def load_passages_from_metadata(meta_file: str) -> SimplePassageLoader:
"""Get passage by ID with lazy loading"""
try:
int_id = int(passage_id)
if int_id in self.label_map:
string_id = self.label_map[int_id]
passage_data = self.passage_manager.get_passage(string_id)
if passage_data and passage_data.get("text"):
return {"text": passage_data["text"]}
else:
logger.debug(f"Empty text for ID {int_id} -> {string_id}")
return {"text": ""}
string_id = str(int_id)
passage_data = self.passage_manager.get_passage(string_id)
if passage_data and passage_data.get("text"):
return {"text": passage_data["text"]}
else:
logger.debug(f"ID {int_id} not found in label_map")
logger.debug(f"Empty text for ID {int_id} -> {string_id}")
return {"text": ""}
except Exception as e:
logger.debug(f"Exception getting passage {passage_id}: {e}")
return {"text": ""}
def __len__(self) -> int:
return len(self.label_map)
return len(self.passage_manager.global_offset_map)
def keys(self):
return self.label_map.keys()
return self.passage_manager.global_offset_map.keys()
return LazyPassageLoader(passage_manager, label_map)
return LazyPassageLoader(passage_manager)
def create_hnsw_embedding_server(