reader: non-destructive portability (relative hints + fallback); fix comments; sky: refine yaml
This commit is contained in:
@@ -82,17 +82,35 @@ def create_hnsw_embedding_server(
|
||||
with open(passages_file) as f:
|
||||
meta = json.load(f)
|
||||
|
||||
# Convert relative paths to absolute paths based on metadata file location
|
||||
metadata_dir = Path(passages_file).parent.parent # Go up one level from the metadata file
|
||||
# Resolve passage files for cross-machine portability
|
||||
metadata_dir = Path(passages_file).parent # Same directory as meta.json
|
||||
passage_sources = []
|
||||
for source in meta["passage_sources"]:
|
||||
source_copy = source.copy()
|
||||
# Convert relative paths to absolute paths
|
||||
if not Path(source_copy["path"]).is_absolute():
|
||||
source_copy["path"] = str(metadata_dir / source_copy["path"])
|
||||
if not Path(source_copy["index_path"]).is_absolute():
|
||||
source_copy["index_path"] = str(metadata_dir / source_copy["index_path"])
|
||||
passage_sources.append(source_copy)
|
||||
src = dict(source)
|
||||
# Absolute candidates from meta
|
||||
cand_path = Path(src.get("path", ""))
|
||||
cand_idx = Path(src.get("index_path", ""))
|
||||
# Relative hints if provided
|
||||
rel_path = src.get("path_relative")
|
||||
rel_idx = src.get("index_path_relative")
|
||||
# Defaults (siblings of meta)
|
||||
default_path = metadata_dir / "documents.leann.passages.jsonl"
|
||||
default_idx = metadata_dir / "documents.leann.passages.idx"
|
||||
|
||||
# Normalize path
|
||||
if not cand_path.exists():
|
||||
if rel_path and (metadata_dir / rel_path).exists():
|
||||
src["path"] = str(metadata_dir / rel_path)
|
||||
elif default_path.exists():
|
||||
src["path"] = str(default_path)
|
||||
# Normalize index_path
|
||||
if not cand_idx.exists():
|
||||
if rel_idx and (metadata_dir / rel_idx).exists():
|
||||
src["index_path"] = str(metadata_dir / rel_idx)
|
||||
elif default_idx.exists():
|
||||
src["index_path"] = str(default_idx)
|
||||
|
||||
passage_sources.append(src)
|
||||
|
||||
passages = PassageManager(passage_sources)
|
||||
logger.info(
|
||||
|
||||
@@ -328,6 +328,9 @@ class LeannBuilder:
|
||||
"type": "jsonl",
|
||||
"path": str(passages_file),
|
||||
"index_path": str(offset_file),
|
||||
# Relative hints for cross-machine portability (non-breaking addition)
|
||||
"path_relative": f"{index_name}.passages.jsonl",
|
||||
"index_path_relative": f"{index_name}.passages.idx",
|
||||
}
|
||||
],
|
||||
}
|
||||
@@ -444,6 +447,9 @@ class LeannBuilder:
|
||||
"type": "jsonl",
|
||||
"path": str(passages_file),
|
||||
"index_path": str(offset_file),
|
||||
# Relative hints for cross-machine portability (non-breaking addition)
|
||||
"path_relative": f"{index_name}.passages.jsonl",
|
||||
"index_path_relative": f"{index_name}.passages.idx",
|
||||
}
|
||||
],
|
||||
"built_from_precomputed_embeddings": True,
|
||||
@@ -485,6 +491,42 @@ class LeannSearcher:
|
||||
self.embedding_model = self.meta_data["embedding_model"]
|
||||
# Support both old and new format
|
||||
self.embedding_mode = self.meta_data.get("embedding_mode", "sentence-transformers")
|
||||
# Best-effort portability: if meta contains absolute paths from another machine,
|
||||
# and those paths do not exist locally, try relative hints or fallback sibling filenames.
|
||||
try:
|
||||
idx_path_obj = Path(self.meta_path_str).with_suffix("").with_suffix("")
|
||||
index_dir = idx_path_obj.parent
|
||||
index_name = idx_path_obj.name
|
||||
default_passages = index_dir / f"{index_name}.passages.jsonl"
|
||||
default_offsets = index_dir / f"{index_name}.passages.idx"
|
||||
|
||||
sources = self.meta_data.get("passage_sources", [])
|
||||
normalized_sources: list[dict[str, Any]] = []
|
||||
for src in sources:
|
||||
new_src = dict(src)
|
||||
raw_path = Path(new_src.get("path", ""))
|
||||
raw_idx = Path(new_src.get("index_path", ""))
|
||||
rel_path = new_src.get("path_relative")
|
||||
rel_idx = new_src.get("index_path_relative")
|
||||
|
||||
# Normalize path
|
||||
if not raw_path.exists():
|
||||
cand = index_dir / rel_path if rel_path else default_passages
|
||||
if cand.exists():
|
||||
new_src["path"] = str(cand)
|
||||
# Normalize idx
|
||||
if not raw_idx.exists():
|
||||
cand = index_dir / rel_idx if rel_idx else default_offsets
|
||||
if cand.exists():
|
||||
new_src["index_path"] = str(cand)
|
||||
|
||||
normalized_sources.append(new_src)
|
||||
|
||||
# Only override in-memory view; do not rewrite meta file (non-destructive)
|
||||
self.meta_data["passage_sources"] = normalized_sources
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self.passage_manager = PassageManager(self.meta_data.get("passage_sources", []))
|
||||
backend_factory = BACKEND_REGISTRY.get(backend_name)
|
||||
if backend_factory is None:
|
||||
|
||||
Reference in New Issue
Block a user