import os import struct from pathlib import Path from .convert_to_csr import ( EXPECTED_HNSW_FOURCCS, NULL_INDEX_FOURCC, read_struct, read_vector_raw, ) def _write_vector_raw(f_out, count: int, data_bytes: bytes) -> None: """Write a vector in the same binary layout as read_vector_raw reads: + raw bytes.""" f_out.write(struct.pack(" 0 and data_bytes: f_out.write(data_bytes) def prune_embeddings_preserve_graph(input_filename: str, output_filename: str) -> bool: """ Copy an original (non-compact) HNSW index file while pruning the trailing embedding storage. Preserves the graph structure and metadata exactly; only writes a NULL storage marker instead of the original storage fourcc and payload. Returns True on success. """ print(f"Pruning embeddings from {input_filename} to {output_filename}") print("--------------------------------") # running in mode is-recompute=True and is-compact=False in_path = Path(input_filename) out_path = Path(output_filename) try: with open(in_path, "rb") as f_in, open(out_path, "wb") as f_out: # Header index_fourcc = read_struct(f_in, " 1: metric_arg = read_struct(f_in, " bool: """ Convenience wrapper: write pruned file to a temporary path next to the original, then atomically replace on success. """ print(f"Pruning embeddings from {index_file_path} to {index_file_path}") print("--------------------------------") # running in mode is-recompute=True and is-compact=False src = Path(index_file_path) tmp = src.with_suffix(".pruned.tmp") ok = prune_embeddings_preserve_graph(str(src), str(tmp)) if not ok: if tmp.exists(): try: tmp.unlink() except OSError: pass return False try: os.replace(str(tmp), str(src)) except Exception: # Rollback on failure try: if tmp.exists(): tmp.unlink() except OSError: pass return False return True