feat: hnsw embedding server and csr format

2025-07-05 23:04:41 +00:00
parent 368474d036
commit 0aa84e147b
9 changed files with 959 additions and 154 deletions
--- a/packages/leann-backend-hnsw/leann_backend_hnsw/convert_to_csr.py
+++ b/packages/leann-backend-hnsw/leann_backend_hnsw/convert_to_csr.py
@@ -0,0 +1,543 @@
+import struct
+import sys
+import numpy as np
+import os
+import argparse
+import gc # Import garbage collector interface
+import time
+# --- FourCCs (add more if needed) ---
+INDEX_HNSW_FLAT_FOURCC = int.from_bytes(b'IHNf', 'little')
+# Add other HNSW fourccs if you expect different storage types inside HNSW
+# INDEX_HNSW_PQ_FOURCC = int.from_bytes(b'IHNp', 'little')
+# INDEX_HNSW_SQ_FOURCC = int.from_bytes(b'IHNs', 'little')
+# INDEX_HNSW_CAGRA_FOURCC = int.from_bytes(b'IHNc', 'little') # Example
+
+EXPECTED_HNSW_FOURCCS = {INDEX_HNSW_FLAT_FOURCC} # Modify if needed
+NULL_INDEX_FOURCC = int.from_bytes(b'null', 'little')
+
+# --- Helper functions for reading/writing binary data ---
+
+def read_struct(f, fmt):
+    """Reads data according to the struct format."""
+    size = struct.calcsize(fmt)
+    data = f.read(size)
+    if len(data) != size:
+        raise EOFError(f"File ended unexpectedly reading struct fmt '{fmt}'. Expected {size} bytes, got {len(data)}.")
+    return struct.unpack(fmt, data)[0]
+
+def read_vector_raw(f, element_fmt_char):
+    """Reads a vector (size followed by data), returns count and raw bytes."""
+    count = -1 # Initialize count
+    total_bytes = -1 # Initialize total_bytes
+    try:
+        count = read_struct(f, '<Q') # size_t usually 64-bit unsigned
+        element_size = struct.calcsize(element_fmt_char)
+        # --- FIX for MemoryError: Check for unreasonably large count ---
+        max_reasonable_count = 10 * (10**9) # ~10 billion elements limit
+        if count > max_reasonable_count or count < 0:
+            raise MemoryError(f"Vector count {count} seems unreasonably large, possibly due to file corruption or incorrect format read.")
+
+        total_bytes = count * element_size
+        # --- FIX for MemoryError: Check for huge byte size before allocation ---
+        max_reasonable_bytes = 50 * (1024**3) # ~50 GB limit
+        if total_bytes > max_reasonable_bytes or total_bytes < 0: # Check for overflow
+             raise MemoryError(f"Attempting to read {total_bytes} bytes ({count} elements * {element_size} bytes/element), which exceeds the safety limit. File might be corrupted or format mismatch.")
+
+        data_bytes = f.read(total_bytes)
+
+        if len(data_bytes) != total_bytes:
+             raise EOFError(f"File ended unexpectedly reading vector data. Expected {total_bytes} bytes, got {len(data_bytes)}.")
+        return count, data_bytes
+    except (MemoryError, OverflowError) as e:
+         # Add context to the error message
+         print(f"\nError during raw vector read (element_fmt='{element_fmt_char}', count={count}, total_bytes={total_bytes}): {e}", file=sys.stderr)
+         raise e # Re-raise the original error type
+
+def read_numpy_vector(f, np_dtype, struct_fmt_char):
+    """Reads a vector into a NumPy array."""
+    count = -1 # Initialize count for robust error handling
+    print(f"  Reading vector (dtype={np_dtype}, fmt='{struct_fmt_char}')... ", end='', flush=True)
+    try:
+        count, data_bytes = read_vector_raw(f, struct_fmt_char)
+        print(f"Count={count}, Bytes={len(data_bytes)}")
+        if count > 0 and len(data_bytes) > 0:
+            arr = np.frombuffer(data_bytes, dtype=np_dtype)
+            if arr.size != count:
+                raise ValueError(f"Inconsistent array size after reading. Expected {count}, got {arr.size}")
+            return arr
+        elif count == 0:
+             return np.array([], dtype=np_dtype)
+        else:
+             raise ValueError("Read zero bytes but count > 0.")
+    except MemoryError as e:
+        # Now count should be defined (or -1 if error was in read_struct)
+        print(f"\nMemoryError creating NumPy array (dtype={np_dtype}, count={count}). {e}", file=sys.stderr)
+        raise e
+    except Exception as e: # Catch other potential errors like ValueError
+        print(f"\nError reading numpy vector (dtype={np_dtype}, fmt='{struct_fmt_char}', count={count}): {e}", file=sys.stderr)
+        raise e
+
+
+def write_numpy_vector(f, arr, struct_fmt_char):
+    """Writes a NumPy array as a vector (size followed by data)."""
+    count = arr.size
+    f.write(struct.pack('<Q', count))
+    try:
+        expected_dtype = np.dtype(struct_fmt_char)
+        if arr.dtype != expected_dtype:
+            data_to_write = arr.astype(expected_dtype).tobytes()
+        else:
+            data_to_write = arr.tobytes()
+        f.write(data_to_write)
+        del data_to_write # Hint GC
+    except MemoryError as e:
+         print(f"\nMemoryError converting NumPy array to bytes for writing (size={count}, dtype={arr.dtype}). {e}", file=sys.stderr)
+         raise e
+
+def write_list_vector(f, lst, struct_fmt_char):
+    """Writes a Python list as a vector iteratively."""
+    count = len(lst)
+    f.write(struct.pack('<Q', count))
+    fmt = '<' + struct_fmt_char
+    chunk_size = 1024 * 1024
+    element_size = struct.calcsize(fmt)
+    # Allocate buffer outside the loop if possible, or handle MemoryError during allocation
+    try:
+        buffer = bytearray(chunk_size * element_size)
+    except MemoryError:
+        print(f"MemoryError: Cannot allocate buffer for writing list vector chunk (size {chunk_size * element_size} bytes).", file=sys.stderr)
+        raise
+    buffer_count = 0
+
+    for i, item in enumerate(lst):
+        try:
+            offset = buffer_count * element_size
+            struct.pack_into(fmt, buffer, offset, item)
+            buffer_count += 1
+
+            if buffer_count == chunk_size or i == count - 1:
+                f.write(buffer[:buffer_count * element_size])
+                buffer_count = 0
+
+        except struct.error as e:
+            print(f"\nStruct packing error for item {item} at index {i} with format '{fmt}'. {e}", file=sys.stderr)
+            raise e
+
+
+def get_cum_neighbors(cum_nneighbor_per_level_np, level):
+    """Helper to get cumulative neighbors count, matching C++ logic."""
+    if level < 0: return 0
+    if level < len(cum_nneighbor_per_level_np):
+        return cum_nneighbor_per_level_np[level]
+    else:
+        return cum_nneighbor_per_level_np[-1] if len(cum_nneighbor_per_level_np) > 0 else 0
+
+def write_compact_format(f_out, original_hnsw_data, assign_probas_np, cum_nneighbor_per_level_np, 
+                        levels_np, compact_level_ptr, compact_node_offsets_np, 
+                        compact_neighbors_data, storage_fourcc, storage_data):
+    """Write HNSW data in compact format following C++ read order exactly."""
+    # Write IndexHNSW Header
+    f_out.write(struct.pack('<I', original_hnsw_data['index_fourcc']))
+    f_out.write(struct.pack('<i', original_hnsw_data['d']))
+    f_out.write(struct.pack('<q', original_hnsw_data['ntotal']))
+    f_out.write(struct.pack('<q', original_hnsw_data['dummy1']))
+    f_out.write(struct.pack('<q', original_hnsw_data['dummy2']))
+    f_out.write(struct.pack('<?', original_hnsw_data['is_trained']))
+    f_out.write(struct.pack('<i', original_hnsw_data['metric_type']))
+    if original_hnsw_data['metric_type'] > 1:
+         f_out.write(struct.pack('<f', original_hnsw_data['metric_arg']))
+
+    # Write HNSW struct parts (standard order)
+    write_numpy_vector(f_out, assign_probas_np, 'd')
+    write_numpy_vector(f_out, cum_nneighbor_per_level_np, 'i')
+    write_numpy_vector(f_out, levels_np, 'i')
+
+    # Write compact format flag
+    f_out.write(struct.pack('<?', True)) # storage_is_compact = True
+
+    # Write compact data in CORRECT C++ read order: level_ptr, node_offsets FIRST
+    if isinstance(compact_level_ptr, np.ndarray):
+        write_numpy_vector(f_out, compact_level_ptr, 'Q')
+    else:
+        write_list_vector(f_out, compact_level_ptr, 'Q')
+    
+    write_numpy_vector(f_out, compact_node_offsets_np, 'Q')
+
+    # Write HNSW scalar parameters
+    f_out.write(struct.pack('<i', original_hnsw_data['entry_point']))
+    f_out.write(struct.pack('<i', original_hnsw_data['max_level']))
+    f_out.write(struct.pack('<i', original_hnsw_data['efConstruction']))
+    f_out.write(struct.pack('<i', original_hnsw_data['efSearch']))
+    f_out.write(struct.pack('<i', original_hnsw_data['dummy_upper_beam']))
+
+    # Write storage fourcc (this determines how to read what follows)
+    f_out.write(struct.pack('<I', storage_fourcc))
+    
+    # Write compact neighbors data AFTER storage fourcc
+    write_list_vector(f_out, compact_neighbors_data, 'i')
+    
+    # Write storage data if not NULL (only after neighbors)
+    if storage_fourcc != NULL_INDEX_FOURCC and storage_data:
+        f_out.write(storage_data)
+
+
+# --- Main Conversion Logic ---
+
+def convert_hnsw_graph_to_csr(input_filename, output_filename, prune_embeddings=True):
+    """
+    Converts an HNSW graph file to the CSR format.
+    Supports both original and already-compact formats (backward compatibility).
+    
+    Args:
+        input_filename: Input HNSW index file
+        output_filename: Output CSR index file
+        prune_embeddings: Whether to prune embedding storage (write NULL storage marker)
+    """
+    print(f"Starting conversion: {input_filename} -> {output_filename}")
+    start_time = time.time()
+    original_hnsw_data = {}
+    neighbors_np = None # Initialize to allow check in finally block
+    try:
+        with open(input_filename, 'rb') as f_in, open(output_filename, 'wb') as f_out:
+
+            # --- Read IndexHNSW FourCC and Header ---
+            print(f"[{time.time() - start_time:.2f}s] Reading Index HNSW header...")
+            # ... (Keep the header reading logic as before) ...
+            hnsw_index_fourcc = read_struct(f_in, '<I')
+            if hnsw_index_fourcc not in EXPECTED_HNSW_FOURCCS:
+                 print(f"Error: Expected HNSW Index FourCC ({list(EXPECTED_HNSW_FOURCCS)}), got {hnsw_index_fourcc:08x}.", file=sys.stderr)
+                 return False
+            original_hnsw_data['index_fourcc'] = hnsw_index_fourcc
+            original_hnsw_data['d'] = read_struct(f_in, '<i')
+            original_hnsw_data['ntotal'] = read_struct(f_in, '<q')
+            original_hnsw_data['dummy1'] = read_struct(f_in, '<q')
+            original_hnsw_data['dummy2'] = read_struct(f_in, '<q')
+            original_hnsw_data['is_trained'] = read_struct(f_in, '?')
+            original_hnsw_data['metric_type'] = read_struct(f_in, '<i')
+            original_hnsw_data['metric_arg'] = 0.0
+            if original_hnsw_data['metric_type'] > 1:
+                 original_hnsw_data['metric_arg'] = read_struct(f_in, '<f')
+            print(f"[{time.time() - start_time:.2f}s]   Header read: d={original_hnsw_data['d']}, ntotal={original_hnsw_data['ntotal']}")
+
+
+            # --- Read original HNSW struct data ---
+            print(f"[{time.time() - start_time:.2f}s] Reading HNSW struct vectors...")
+            assign_probas_np = read_numpy_vector(f_in, np.float64, 'd')
+            print(f"[{time.time() - start_time:.2f}s]   Read assign_probas ({assign_probas_np.size})")
+            gc.collect()
+
+            cum_nneighbor_per_level_np = read_numpy_vector(f_in, np.int32, 'i')
+            print(f"[{time.time() - start_time:.2f}s]   Read cum_nneighbor_per_level ({cum_nneighbor_per_level_np.size})")
+            gc.collect()
+
+            levels_np = read_numpy_vector(f_in, np.int32, 'i')
+            print(f"[{time.time() - start_time:.2f}s]   Read levels ({levels_np.size})")
+            gc.collect()
+
+            ntotal = len(levels_np)
+            if ntotal != original_hnsw_data['ntotal']:
+                 print(f"Warning: ntotal mismatch! Header says {original_hnsw_data['ntotal']}, levels vector size is {ntotal}. Using levels vector size.", file=sys.stderr)
+                 original_hnsw_data['ntotal'] = ntotal
+
+            # --- Check for compact format flag ---
+            print(f"[{time.time() - start_time:.2f}s]   Probing for compact storage flag...")
+            pos_before_compact = f_in.tell()
+            try:
+                is_compact_flag = read_struct(f_in, '<?')
+                print(f"[{time.time() - start_time:.2f}s]   Found compact flag: {is_compact_flag}")
+                
+                if is_compact_flag:
+                    # Input is already in compact format - read compact data
+                    print(f"[{time.time() - start_time:.2f}s]   Input is already in compact format, reading compact data...")
+                    
+                    compact_level_ptr = read_numpy_vector(f_in, np.uint64, 'Q')
+                    print(f"[{time.time() - start_time:.2f}s]   Read compact_level_ptr ({compact_level_ptr.size})")
+                    
+                    compact_node_offsets_np = read_numpy_vector(f_in, np.uint64, 'Q')
+                    print(f"[{time.time() - start_time:.2f}s]   Read compact_node_offsets ({compact_node_offsets_np.size})")
+                    
+                    # Read scalar parameters
+                    original_hnsw_data['entry_point'] = read_struct(f_in, '<i')
+                    original_hnsw_data['max_level'] = read_struct(f_in, '<i')
+                    original_hnsw_data['efConstruction'] = read_struct(f_in, '<i')
+                    original_hnsw_data['efSearch'] = read_struct(f_in, '<i')
+                    original_hnsw_data['dummy_upper_beam'] = read_struct(f_in, '<i')
+                    print(f"[{time.time() - start_time:.2f}s]   Read scalar params (ep={original_hnsw_data['entry_point']}, max_lvl={original_hnsw_data['max_level']})")
+
+                    # Read storage fourcc
+                    storage_fourcc = read_struct(f_in, '<I')
+                    print(f"[{time.time() - start_time:.2f}s]   Found storage fourcc: {storage_fourcc:08x}")
+                    
+                    if prune_embeddings and storage_fourcc != NULL_INDEX_FOURCC:
+                        # Read compact neighbors data
+                        compact_neighbors_data_np = read_numpy_vector(f_in, np.int32, 'i')
+                        print(f"[{time.time() - start_time:.2f}s]   Read compact neighbors data ({compact_neighbors_data_np.size})")
+                        compact_neighbors_data = compact_neighbors_data_np.tolist()
+                        del compact_neighbors_data_np
+                        
+                        # Skip storage data and write with NULL marker
+                        print(f"[{time.time() - start_time:.2f}s]   Pruning embeddings: Writing NULL storage marker.")
+                        storage_fourcc = NULL_INDEX_FOURCC
+                    elif not prune_embeddings:
+                        # Read and preserve compact neighbors and storage
+                        compact_neighbors_data_np = read_numpy_vector(f_in, np.int32, 'i')
+                        compact_neighbors_data = compact_neighbors_data_np.tolist()
+                        del compact_neighbors_data_np
+                        
+                        # Read remaining storage data
+                        storage_data = f_in.read()
+                    else:
+                        # Already pruned (NULL storage)
+                        compact_neighbors_data_np = read_numpy_vector(f_in, np.int32, 'i')
+                        compact_neighbors_data = compact_neighbors_data_np.tolist()
+                        del compact_neighbors_data_np
+                        storage_data = b''
+                    
+                    # Write the updated compact format
+                    print(f"[{time.time() - start_time:.2f}s] Writing updated compact format...")
+                    write_compact_format(f_out, original_hnsw_data, assign_probas_np, cum_nneighbor_per_level_np, 
+                                       levels_np, compact_level_ptr, compact_node_offsets_np, 
+                                       compact_neighbors_data, storage_fourcc, storage_data if not prune_embeddings else b'')
+                    
+                    print(f"[{time.time() - start_time:.2f}s] Conversion complete.")
+                    return True
+                    
+                else:
+                    # is_compact=False, rewind and read original format
+                    f_in.seek(pos_before_compact)
+                    print(f"[{time.time() - start_time:.2f}s]   Compact flag is False, reading original format...")
+                    
+            except EOFError:
+                # No compact flag found, assume original format
+                f_in.seek(pos_before_compact)
+                print(f"[{time.time() - start_time:.2f}s]   No compact flag found, assuming original format...")
+
+            # --- Handle potential extra byte in original format (like C++ code) ---
+            print(f"[{time.time() - start_time:.2f}s]   Probing for potential extra byte before non-compact offsets...")
+            pos_before_probe = f_in.tell()
+            try:
+                suspected_flag = read_struct(f_in, '<B')  # Read 1 byte
+                if suspected_flag == 0x00:
+                    print(f"[{time.time() - start_time:.2f}s]   Found and consumed an unexpected 0x00 byte.")
+                elif suspected_flag == 0x01:
+                    print(f"[{time.time() - start_time:.2f}s]   ERROR: Found 0x01 but is_compact should be False")
+                    raise ValueError("Inconsistent compact flag state")
+                else:
+                    # Rewind - this byte is part of offsets data
+                    f_in.seek(pos_before_probe)
+                    print(f"[{time.time() - start_time:.2f}s]   Rewound to original position (byte was 0x{suspected_flag:02x})")
+            except EOFError:
+                f_in.seek(pos_before_probe)
+                print(f"[{time.time() - start_time:.2f}s]   No extra byte found (EOF), proceeding with offsets read")
+
+            # --- Read original format data ---
+            offsets_np = read_numpy_vector(f_in, np.uint64, 'Q')
+            print(f"[{time.time() - start_time:.2f}s]   Read offsets ({offsets_np.size})")
+            if len(offsets_np) != ntotal + 1:
+                 raise ValueError(f"Inconsistent offsets size: len(levels)={ntotal} but len(offsets)={len(offsets_np)}")
+            gc.collect()
+
+            print(f"[{time.time() - start_time:.2f}s]   Attempting to read neighbors vector...")
+            neighbors_np = read_numpy_vector(f_in, np.int32, 'i')
+            print(f"[{time.time() - start_time:.2f}s]   Read neighbors ({neighbors_np.size})")
+            expected_neighbors_size = offsets_np[-1] if ntotal > 0 else 0
+            if neighbors_np.size != expected_neighbors_size:
+                 print(f"Warning: neighbors vector size mismatch. Expected {expected_neighbors_size} based on offsets, got {neighbors_np.size}.")
+            gc.collect()
+
+            original_hnsw_data['entry_point'] = read_struct(f_in, '<i')
+            original_hnsw_data['max_level'] = read_struct(f_in, '<i')
+            original_hnsw_data['efConstruction'] = read_struct(f_in, '<i')
+            original_hnsw_data['efSearch'] = read_struct(f_in, '<i')
+            original_hnsw_data['dummy_upper_beam'] = read_struct(f_in, '<i')
+            print(f"[{time.time() - start_time:.2f}s]   Read scalar params (ep={original_hnsw_data['entry_point']}, max_lvl={original_hnsw_data['max_level']})")
+
+            print(f"[{time.time() - start_time:.2f}s] Checking for storage data...")
+            storage_fourcc = None
+            try:
+                storage_fourcc = read_struct(f_in, '<I')
+                print(f"[{time.time() - start_time:.2f}s]   Found storage fourcc: {storage_fourcc:08x}.")
+            except EOFError:
+                 print(f"[{time.time() - start_time:.2f}s]   No storage data found (EOF).")
+            except Exception as e:
+                 print(f"[{time.time() - start_time:.2f}s]   Error reading potential storage data: {e}")
+
+
+            # --- Perform Conversion ---
+            print(f"[{time.time() - start_time:.2f}s] Converting to CSR format...")
+
+            # Use lists for potentially huge data, np for offsets
+            compact_neighbors_data = []
+            compact_level_ptr = []
+            compact_node_offsets_np = np.zeros(ntotal + 1, dtype=np.uint64)
+
+            current_level_ptr_idx = 0
+            current_data_idx = 0
+            total_valid_neighbors_counted = 0 # For validation
+
+            # Optimize calculation by getting slices once per node if possible
+            for i in range(ntotal):
+                if i > 0 and i % (ntotal // 100 or 1) == 0: # Log progress roughly every 1%
+                    progress = (i / ntotal) * 100
+                    elapsed = time.time() - start_time
+                    print(f"\r[{elapsed:.2f}s]   Converting node {i}/{ntotal} ({progress:.1f}%)...", end="")
+
+                node_max_level = levels_np[i] - 1
+                if node_max_level < -1: node_max_level = -1
+
+                node_ptr_start_index = current_level_ptr_idx
+                compact_node_offsets_np[i] = node_ptr_start_index
+
+                original_offset_start = offsets_np[i]
+                num_pointers_expected = (node_max_level + 1) + 1
+
+                for level in range(node_max_level + 1):
+                    compact_level_ptr.append(current_data_idx)
+
+                    begin_orig_np = original_offset_start + get_cum_neighbors(cum_nneighbor_per_level_np, level)
+                    end_orig_np = original_offset_start + get_cum_neighbors(cum_nneighbor_per_level_np, level + 1)
+
+                    begin_orig = int(begin_orig_np)
+                    end_orig = int(end_orig_np)
+
+                    neighbors_len = len(neighbors_np) # Cache length
+                    begin_orig = min(max(0, begin_orig), neighbors_len)
+                    end_orig = min(max(begin_orig, end_orig), neighbors_len)
+
+                    if begin_orig < end_orig:
+                        # Slicing creates a copy, could be memory intensive for large M
+                        # Consider iterating if memory becomes an issue here
+                        level_neighbors_slice = neighbors_np[begin_orig:end_orig]
+                        valid_neighbors_mask = level_neighbors_slice >= 0
+                        num_valid = np.count_nonzero(valid_neighbors_mask)
+
+                        if num_valid > 0:
+                            # Append valid neighbors
+                            compact_neighbors_data.extend(level_neighbors_slice[valid_neighbors_mask])
+                            current_data_idx += num_valid
+                            total_valid_neighbors_counted += num_valid
+
+
+                compact_level_ptr.append(current_data_idx)
+                current_level_ptr_idx += num_pointers_expected
+
+            compact_node_offsets_np[ntotal] = current_level_ptr_idx
+            print(f"\r[{time.time() - start_time:.2f}s]   Conversion loop finished.                        ") # Clear progress line
+
+            # --- Validation Checks ---
+            print(f"[{time.time() - start_time:.2f}s] Running validation checks...")
+            valid_check_passed = True
+            # Check 1: Total valid neighbors count
+            print(f"    Checking total valid neighbor count...")
+            expected_valid_count = np.sum(neighbors_np >= 0)
+            if total_valid_neighbors_counted != len(compact_neighbors_data):
+                 print(f"Error: Mismatch between counted valid neighbors ({total_valid_neighbors_counted}) and final compact_data size ({len(compact_neighbors_data)})!", file=sys.stderr)
+                 valid_check_passed = False
+            if expected_valid_count != len(compact_neighbors_data):
+                 print(f"Error: Mismatch between NumPy count of valid neighbors ({expected_valid_count}) and final compact_data size ({len(compact_neighbors_data)})!", file=sys.stderr)
+                 valid_check_passed = False
+            else:
+                 print(f"    OK: Total valid neighbors = {len(compact_neighbors_data)}")
+
+            # Check 2: Final pointer indices consistency
+            print(f"    Checking final pointer indices...")
+            if compact_node_offsets_np[ntotal] != len(compact_level_ptr):
+                 print(f"Error: Final node offset ({compact_node_offsets_np[ntotal]}) doesn't match level_ptr size ({len(compact_level_ptr)})!", file=sys.stderr)
+                 valid_check_passed = False
+            if (len(compact_level_ptr) > 0 and compact_level_ptr[-1] != len(compact_neighbors_data)) or \
+               (len(compact_level_ptr) == 0 and len(compact_neighbors_data) != 0):
+                 last_ptr = compact_level_ptr[-1] if len(compact_level_ptr) > 0 else -1
+                 print(f"Error: Last level pointer ({last_ptr}) doesn't match compact_data size ({len(compact_neighbors_data)})!", file=sys.stderr)
+                 valid_check_passed = False
+            else:
+                 print(f"    OK: Final pointers match data size.")
+
+            if not valid_check_passed:
+                print("Error: Validation checks failed. Output file might be incorrect.", file=sys.stderr)
+                # Optional: Exit here if validation fails
+                # return False
+
+            # --- Explicitly delete large intermediate arrays ---
+            print(f"[{time.time() - start_time:.2f}s] Deleting original neighbors and offsets arrays...")
+            del neighbors_np
+            del offsets_np
+            gc.collect()
+
+            print(f"    CSR Stats: |data|={len(compact_neighbors_data)}, |level_ptr|={len(compact_level_ptr)}")
+
+            # --- Write CSR HNSW graph data using unified function ---
+            print(f"[{time.time() - start_time:.2f}s] Writing CSR HNSW graph data in FAISS-compatible order...")
+            
+            # Determine storage fourcc based on prune_embeddings
+            output_storage_fourcc = NULL_INDEX_FOURCC if prune_embeddings else (storage_fourcc if 'storage_fourcc' in locals() else NULL_INDEX_FOURCC)
+            if prune_embeddings:
+                print(f"   Pruning embeddings: Writing NULL storage marker.")
+            storage_data = b''
+            
+            # Use the unified write function
+            write_compact_format(f_out, original_hnsw_data, assign_probas_np, cum_nneighbor_per_level_np, 
+                               levels_np, compact_level_ptr, compact_node_offsets_np, 
+                               compact_neighbors_data, output_storage_fourcc, storage_data if not prune_embeddings else b'')
+            
+            # Clean up memory
+            del assign_probas_np, cum_nneighbor_per_level_np, levels_np
+            del compact_neighbors_data, compact_level_ptr, compact_node_offsets_np
+            gc.collect()
+
+            end_time = time.time()
+            print(f"[{end_time - start_time:.2f}s] Conversion complete.")
+            return True
+
+    except FileNotFoundError:
+        print(f"Error: Input file not found: {input_filename}", file=sys.stderr)
+        return False
+    except MemoryError as e:
+         print(f"\nFatal MemoryError during conversion: {e}. Insufficient RAM.", file=sys.stderr)
+         # Clean up potentially partially written output file?
+         try: os.remove(output_filename)
+         except OSError: pass
+         return False
+    except EOFError as e:
+        print(f"Error: Reached end of file unexpectedly reading {input_filename}. {e}", file=sys.stderr)
+        try: os.remove(output_filename)
+        except OSError: pass
+        return False
+    except Exception as e:
+        print(f"An unexpected error occurred during conversion: {e}", file=sys.stderr)
+        import traceback
+        traceback.print_exc()
+        try:
+            os.remove(output_filename)
+        except OSError: pass
+        return False
+    # Ensure neighbors_np is deleted even if an error occurs after its allocation
+    finally:
+        if 'neighbors_np' in locals() and neighbors_np is not None:
+            del neighbors_np
+            gc.collect()
+
+
+# --- Script Execution ---
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert a Faiss IndexHNSWFlat file to a CSR-based HNSW graph file.")
+    parser.add_argument("input_index_file", help="Path to the input IndexHNSWFlat file")
+    parser.add_argument("output_csr_graph_file", help="Path to write the output CSR HNSW graph file")
+    parser.add_argument("--prune-embeddings", action="store_true", default=True, 
+                       help="Prune embedding storage (write NULL storage marker)")
+    parser.add_argument("--keep-embeddings", action="store_true", 
+                       help="Keep embedding storage (overrides --prune-embeddings)")
+
+    args = parser.parse_args()
+
+    if not os.path.exists(args.input_index_file):
+        print(f"Error: Input file not found: {args.input_index_file}", file=sys.stderr)
+        sys.exit(1)
+
+    if os.path.abspath(args.input_index_file) == os.path.abspath(args.output_csr_graph_file):
+         print(f"Error: Input and output filenames cannot be the same.", file=sys.stderr)
+         sys.exit(1)
+
+    prune_embeddings = args.prune_embeddings and not args.keep_embeddings
+    success = convert_hnsw_graph_to_csr(args.input_index_file, args.output_csr_graph_file, prune_embeddings)
+    if not success:
+        sys.exit(1)
--- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py
+++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py
@@ -3,7 +3,7 @@ import os
 import json
 import struct
 from pathlib import Path
-from typing import Dict
+from typing import Dict, Any
 import contextlib
 import threading
 import time
@@ -12,9 +12,7 @@ import socket
 import subprocess
 import sys

-# 文件: packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py
-
-# ... (其他 import 保持不变) ...
+from .convert_to_csr import convert_hnsw_graph_to_csr

 from leann.registry import register_backend
 from leann.interface import (
@@ -28,7 +26,7 @@ def get_metric_map():
    return {
        "mips": faiss.METRIC_INNER_PRODUCT,
        "l2": faiss.METRIC_L2,
-    "cosine": faiss.METRIC_INNER_PRODUCT,  # Will need normalization
+        "cosine": faiss.METRIC_INNER_PRODUCT,
    }

 def _check_port(port: int) -> bool:
@@ -69,12 +67,11 @@ class HNSWEmbeddingServerManager:
        try:
            command = [
                sys.executable,
-                "-m", "packages.leann-backend-hnsw.src.leann_backend_hnsw.hnsw_embedding_server",
+                "-m", "leann_backend_hnsw.hnsw_embedding_server",
                "--zmq-port", str(port), 
                "--model-name", model_name
            ]
            
-            # Add passages file if provided
            if passages_file:
                command.extend(["--passages-file", str(passages_file)])
            
@@ -172,7 +169,29 @@ class HNSWBackend(LeannBackendFactoryInterface):

 class HNSWBuilder(LeannBackendBuilderInterface):
    def __init__(self, **kwargs):
-        self.build_params = kwargs
+        self.build_params = kwargs.copy()
+        
+        # --- Configuration defaults with standardized names ---
+        # Apply defaults and write them back to the build_params dict
+        # so they can be saved in the metadata file by LeannBuilder.
+        self.is_compact = self.build_params.setdefault("is_compact", True)
+        self.is_recompute = self.build_params.setdefault("is_recompute", True)  # Default: prune embeddings
+        
+        # --- Additional Options ---
+        self.is_skip_neighbors = self.build_params.setdefault("is_skip_neighbors", False) 
+        self.disk_cache_ratio = self.build_params.setdefault("disk_cache_ratio", 0.0)
+        self.external_storage_path = self.build_params.get("external_storage_path", None)
+        
+        # --- Standard HNSW parameters ---
+        self.M = self.build_params.setdefault("M", 32)
+        self.efConstruction = self.build_params.setdefault("efConstruction", 200)
+        self.distance_metric = self.build_params.setdefault("distance_metric", "mips")
+
+        if self.is_skip_neighbors and not self.is_compact:
+            raise ValueError("is_skip_neighbors can only be used with is_compact=True")
+        
+        if self.is_recompute and not self.is_compact:
+            raise ValueError("is_recompute requires is_compact=True for efficiency")

    def build(self, data: np.ndarray, index_path: str, **kwargs):
        """Build HNSW index using FAISS"""
@@ -189,97 +208,297 @@ class HNSWBuilder(LeannBackendBuilderInterface):
        if not data.flags['C_CONTIGUOUS']:
            data = np.ascontiguousarray(data)
            
-        build_kwargs = {**self.build_params, **kwargs}
-        metric_str = build_kwargs.get("distance_metric", "mips").lower()
+        metric_str = self.distance_metric.lower()
        metric_enum = get_metric_map().get(metric_str)
        print('metric_enum', metric_enum,' metric_str', metric_str)
        if metric_enum is None:
            raise ValueError(f"Unsupported distance_metric '{metric_str}'.")

-        # HNSW parameters
-        M = build_kwargs.get("M", 32)  # Max connections per layer
-        efConstruction = build_kwargs.get("efConstruction", 200)  # Size of the dynamic candidate list for construction
+        M = self.M
+        efConstruction = self.efConstruction
        dim = data.shape[1]

        print(f"INFO: Building HNSW index for {data.shape[0]} vectors with metric {metric_enum}...")
        
        try:
-            # Create HNSW index
            if metric_enum == faiss.METRIC_INNER_PRODUCT:
                index = faiss.IndexHNSWFlat(dim, M, metric_enum)
            else:  # L2
                index = faiss.IndexHNSWFlat(dim, M, metric_enum)
            
-            # Set construction parameters
            index.hnsw.efConstruction = efConstruction
            
-            # Normalize vectors if using cosine similarity
            if metric_str == "cosine":
                faiss.normalize_L2(data)
            
-            # Add vectors to index
            print('starting to add vectors to index')
            index.add(data.shape[0], faiss.swig_ptr(data))
            print('vectors added to index')
            
-            # Save index
            index_file = index_dir / f"{index_prefix}.index"
            faiss.write_index(index, str(index_file))
            
            print(f"✅ HNSW index built successfully at '{index_file}'")
+
+            if self.is_compact:
+                self._convert_to_csr(index_file)
+            
+            # Generate passages file for recompute mode
+            if self.is_recompute:
+                self._generate_passages_file(index_dir, index_prefix, **kwargs)
            
        except Exception as e:
            print(f"💥 ERROR: HNSW index build failed. Exception: {e}")
            raise

+    def _convert_to_csr(self, index_file: Path):
+        """Convert built index to CSR format"""
+        try:
+            mode_str = "CSR-pruned" if self.is_recompute else "CSR-standard"
+            print(f"INFO: Converting HNSW index to {mode_str} format...")
+            
+            csr_temp_file = index_file.with_suffix(".csr.tmp")
+            
+            success = convert_hnsw_graph_to_csr(
+                str(index_file), 
+                str(csr_temp_file),
+                prune_embeddings=self.is_recompute
+            )
+            
+            if success:
+                print("✅ CSR conversion successful.")
+                import shutil
+                shutil.move(str(csr_temp_file), str(index_file))
+                print(f"INFO: Replaced original index with {mode_str} version at '{index_file}'")
+            else:
+                # Clean up and fail fast
+                if csr_temp_file.exists():
+                    os.remove(csr_temp_file)
+                raise RuntimeError("CSR conversion failed - cannot proceed with compact format")
+                
+        except Exception as e:
+            print(f"💥 ERROR: CSR conversion failed. Exception: {e}")
+            raise
+
+    def _generate_passages_file(self, index_dir: Path, index_prefix: str, **kwargs):
+        """Generate passages file for recompute mode"""
+        try:
+            chunks = kwargs.get('chunks', [])
+            if not chunks:
+                print("INFO: No chunks data provided, skipping passages file generation")
+                return
+            
+            # Generate node_id to text mapping
+            passages_data = {}
+            for node_id, chunk in enumerate(chunks):
+                passages_data[str(node_id)] = chunk["text"]
+            
+            # Save passages file
+            passages_file = index_dir / f"{index_prefix}.passages.json"
+            with open(passages_file, 'w', encoding='utf-8') as f:
+                json.dump(passages_data, f, ensure_ascii=False, indent=2)
+            
+            print(f"✅ Generated passages file for recompute mode at '{passages_file}' ({len(passages_data)} passages)")
+            
+        except Exception as e:
+            print(f"💥 ERROR: Failed to generate passages file. Exception: {e}")
+            # Don't raise - this is not critical for index building
+            pass
+
 class HNSWSearcher(LeannBackendSearcherInterface):
+    def _get_index_storage_status(self, index_file: Path) -> tuple[bool, bool]:
+        """
+        Robustly determines the index's storage status by parsing the file.
+        
+        Returns:
+            A tuple (is_compact, is_pruned).
+        """
+        if not index_file.exists():
+            return False, False
+        
+        with open(index_file, 'rb') as f:
+            try:
+                def read_struct(fmt):
+                    size = struct.calcsize(fmt)
+                    data = f.read(size)
+                    if len(data) != size:
+                        raise EOFError(f"File ended unexpectedly reading struct fmt '{fmt}'.")
+                    return struct.unpack(fmt, data)[0]
+
+                def skip_vector(element_size):
+                    count = read_struct('<Q')
+                    f.seek(count * element_size, 1)
+
+                # 1. Read up to the compact flag
+                read_struct('<I'); read_struct('<i'); read_struct('<q'); 
+                read_struct('<q'); read_struct('<q'); read_struct('<?')
+                metric_type = read_struct('<i')
+                if metric_type > 1: read_struct('<f')
+                skip_vector(8); skip_vector(4); skip_vector(4)
+                
+                # 2. Check if there's a compact flag byte
+                # Try to read the compact flag, but handle both old and new formats
+                pos_before_compact = f.tell()
+                try:
+                    is_compact = read_struct('<?')
+                    print(f"INFO: Detected is_compact flag as: {is_compact}")
+                except (EOFError, struct.error):
+                    # Old format without compact flag - assume non-compact
+                    f.seek(pos_before_compact)
+                    is_compact = False
+                    print(f"INFO: No compact flag found, assuming is_compact=False")
+
+                # 3. Read storage FourCC to determine if pruned
+                is_pruned = False
+                try:
+                    if is_compact:
+                        # For compact, we need to skip pointers and scalars to get to the storage FourCC
+                        skip_vector(8) # level_ptr
+                        skip_vector(8) # node_offsets
+                        read_struct('<i'); read_struct('<i'); read_struct('<i');
+                        read_struct('<i'); read_struct('<i')
+                        storage_fourcc = read_struct('<I')
+                    else:
+                        # For non-compact, we need to read the flag probe, then skip offsets and neighbors
+                        pos_before_probe = f.tell()
+                        flag_byte = f.read(1)
+                        if not (flag_byte and flag_byte == b'\x00'):
+                            f.seek(pos_before_probe)
+                        skip_vector(8); skip_vector(4) # offsets, neighbors
+                        read_struct('<i'); read_struct('<i'); read_struct('<i');
+                        read_struct('<i'); read_struct('<i')
+                        # Now we are at the storage. The entire rest is storage blob.
+                        storage_fourcc = struct.unpack('<I', f.read(4))[0]
+                        
+                    NULL_INDEX_FOURCC = int.from_bytes(b'null', 'little')
+                    if storage_fourcc == NULL_INDEX_FOURCC:
+                        is_pruned = True
+                except (EOFError, struct.error):
+                    # Cannot determine pruning status, assume not pruned
+                    pass
+                
+                print(f"INFO: Detected is_pruned as: {is_pruned}")
+                return is_compact, is_pruned
+
+            except (EOFError, struct.error) as e:
+                print(f"WARNING: Could not parse index file to detect format: {e}. Assuming standard, not pruned.")
+                return False, False
+
    def __init__(self, index_path: str, **kwargs):
        from . import faiss
        path = Path(index_path)
        index_dir = path.parent
        index_prefix = path.stem
        
-        metric_str = kwargs.get("distance_metric", "mips").lower()
+        # Store configuration and paths for later use
+        self.config = kwargs.copy()
+        self.config["index_path"] = index_path
+        self.index_dir = index_dir
+        self.index_prefix = index_prefix
+        
+        metric_str = self.config.get("distance_metric", "mips").lower()
        metric_enum = get_metric_map().get(metric_str)
        if metric_enum is None:
            raise ValueError(f"Unsupported distance_metric '{metric_str}'.")
        
-        dimensions = kwargs.get("dimensions")
+        dimensions = self.config.get("dimensions")
        if not dimensions:
            raise ValueError("Vector dimension not provided to HNSWSearcher.")
        
-        try:
-            # Load FAISS HNSW index
-            index_file = index_dir / f"{index_prefix}.index"
-            if not index_file.exists():
-                raise FileNotFoundError(f"HNSW index file not found at {index_file}")
-            
-            self._index = faiss.read_index(str(index_file))
-            self.metric_str = metric_str
-            self.embedding_server_manager = HNSWEmbeddingServerManager()
-            print("✅ HNSW index loaded successfully.")
-            
-        except Exception as e:
-            print(f"💥 ERROR: Failed to load HNSW index. Exception: {e}")
-            raise
+        index_file = index_dir / f"{index_prefix}.index"
+        if not index_file.exists():
+            raise FileNotFoundError(f"HNSW index file not found at {index_file}")

-    def search(self, query: np.ndarray, top_k: int, **kwargs) -> Dict[str, any]:
+        self.is_compact, self.is_pruned = self._get_index_storage_status(index_file)
+        
+        # Validate configuration constraints
+        if not self.is_compact and self.config.get("is_skip_neighbors", False):
+            raise ValueError("is_skip_neighbors can only be used with is_compact=True")
+        
+        if self.config.get("is_recompute", False) and self.config.get("external_storage_path"):
+            raise ValueError("Cannot use both is_recompute and external_storage_path simultaneously")
+            
+        hnsw_config = faiss.HNSWIndexConfig()
+        hnsw_config.is_compact = self.is_compact
+        
+        # Apply additional configuration options with strict validation
+        hnsw_config.is_skip_neighbors = self.config.get("is_skip_neighbors", False)
+        # If index is pruned, force recompute mode regardless of user setting
+        hnsw_config.is_recompute = self.is_pruned or self.config.get("is_recompute", False)
+        hnsw_config.disk_cache_ratio = self.config.get("disk_cache_ratio", 0.0)
+        hnsw_config.external_storage_path = self.config.get("external_storage_path")
+        hnsw_config.zmq_port = self.config.get("zmq_port", 5557)
+        
+        # CRITICAL ASSERTION: If index is pruned, recompute MUST be enabled
+        if self.is_pruned and not hnsw_config.is_recompute:
+            raise RuntimeError("Index is pruned (embeddings removed) but recompute is disabled. This is impossible - recompute must be enabled for pruned indices.")
+        
+        print(f"INFO: Loading index with is_compact={self.is_compact}, is_pruned={self.is_pruned}")
+        print(f"INFO: Config - skip_neighbors={hnsw_config.is_skip_neighbors}, recompute={hnsw_config.is_recompute}")
+        
+        self._index = faiss.read_index(str(index_file), faiss.IO_FLAG_MMAP, hnsw_config)
+        
+        if self.is_compact:
+            print("✅ Compact CSR format HNSW index loaded successfully.")
+        else:
+            print("✅ Standard HNSW index loaded successfully.")
+
+        self.metric_str = metric_str
+        self.embedding_server_manager = HNSWEmbeddingServerManager()
+
+    def _get_index_file(self, index_dir: Path, index_prefix: str) -> Path:
+        """Get the appropriate index file path based on format"""
+        # We always use the same filename now, format is detected internally
+        return index_dir / f"{index_prefix}.index"
+
+    def search(self, query: np.ndarray, top_k: int, **kwargs) -> Dict[str, Any]:
        """Search using HNSW index with optional recompute functionality"""
        from . import faiss
-        ef = kwargs.get("ef", 200)  # Size of the dynamic candidate list for search
+        # Merge config with search-time kwargs
+        search_config = self.config.copy()
+        search_config.update(kwargs)
+        
+        ef = search_config.get("ef", 200)  # Size of the dynamic candidate list for search
        
        # Recompute parameters
-        recompute_neighbor_embeddings = kwargs.get("recompute_neighbor_embeddings", False)
-        zmq_port = kwargs.get("zmq_port", 5556)
-        embedding_model = kwargs.get("embedding_model", "sentence-transformers/all-mpnet-base-v2")
-        passages_file = kwargs.get("passages_file", None)
+        zmq_port = search_config.get("zmq_port", 5557)
+        embedding_model = search_config.get("embedding_model", "sentence-transformers/all-mpnet-base-v2")
+        passages_file = search_config.get("passages_file", None)
        
-        if recompute_neighbor_embeddings:
-            print(f"INFO: HNSW ZMQ mode enabled - ensuring embedding server is running")
+        # For recompute mode, try to find the passages file automatically
+        if self.is_pruned and not passages_file:
+            potential_passages_file = self.index_dir / f"{self.index_prefix}.passages.json"
+            print(f"DEBUG: Checking for passages file at: {potential_passages_file}")
+            if potential_passages_file.exists():
+                passages_file = str(potential_passages_file)
+                print(f"INFO: Found passages file for recompute mode: {passages_file}")
+            else:
+                print(f"WARNING: No passages file found for recompute mode at {potential_passages_file}")
+        
+        # If index is pruned (embeddings removed), we MUST start embedding server for recompute
+        if self.is_pruned:
+            print(f"INFO: Index is pruned - starting embedding server for recompute")
            
-            if not self.embedding_server_manager.start_server(zmq_port, embedding_model, passages_file):
-                print(f"WARNING: Failed to start HNSW embedding server, falling back to standard search")
-                kwargs['recompute_neighbor_embeddings'] = False
+            # CRITICAL: Check passages file exists - fail fast if not
+            if not passages_file:
+                raise RuntimeError(f"FATAL: Index is pruned but no passages file found. Cannot proceed with recompute mode.")
+            
+            # Check if server is already running first
+            if _check_port(zmq_port):
+                print(f"INFO: Embedding server already running on port {zmq_port}")
+            else:
+                if not self.embedding_server_manager.start_server(zmq_port, embedding_model, passages_file):
+                    raise RuntimeError(f"Failed to start HNSW embedding server on port {zmq_port}")
+                
+                # Give server extra time to fully initialize
+                print(f"INFO: Waiting for embedding server to fully initialize...")
+                time.sleep(3)
+                
+                # Final verification
+                if not _check_port(zmq_port):
+                    raise RuntimeError(f"Embedding server failed to start listening on port {zmq_port}")
+        else:
+            print(f"INFO: Index has embeddings stored - no recompute needed")
        
        if query.dtype != np.float32:
            query = query.astype(np.float32)
@@ -299,23 +518,14 @@ class HNSWSearcher(LeannBackendSearcherInterface):
            distances = np.empty((batch_size, top_k), dtype=np.float32)
            labels = np.empty((batch_size, top_k), dtype=np.int64)
            
-            if recompute_neighbor_embeddings:
-                # Use custom search with recompute
-                # This would require implementing custom HNSW search logic
-                # For now, we'll fall back to standard search
-                print("WARNING: Recompute functionality for HNSW not yet implemented, using standard search")
-                self._index.search(query.shape[0], faiss.swig_ptr(query), top_k, faiss.swig_ptr(distances), faiss.swig_ptr(labels))
-            else:
-                # Standard FAISS search using SWIG API
-                self._index.search(query.shape[0], faiss.swig_ptr(query), top_k, faiss.swig_ptr(distances), faiss.swig_ptr(labels))
+            # Use standard FAISS search - recompute is handled internally by FAISS
+            self._index.search(query.shape[0], faiss.swig_ptr(query), top_k, faiss.swig_ptr(distances), faiss.swig_ptr(labels))
            
            return {"labels": labels, "distances": distances}
            
        except Exception as e:
            print(f"💥 ERROR: HNSW search failed. Exception: {e}")
-            batch_size = query.shape[0]
-            return {"labels": np.full((batch_size, top_k), -1, dtype=np.int64), 
-                   "distances": np.full((batch_size, top_k), float('inf'), dtype=np.float32)}
+            raise
    
    def __del__(self):
        if hasattr(self, 'embedding_server_manager'):
--- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py
+++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py
@@ -101,7 +101,9 @@ def create_hnsw_embedding_server(
        model_name: Transformer model name
        custom_max_length_param: Custom max sequence length
    """
+    print(f"Loading tokenizer for {model_name}...")
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+    print(f"Tokenizer loaded successfully!")
    
    # Device setup
    mps_available = hasattr(torch.backends, 'mps') and torch.backends.mps.is_available()
@@ -122,7 +124,9 @@ def create_hnsw_embedding_server(
    
    # Load model to the appropriate device
    print(f"Starting HNSW server on port {zmq_port} with model {model_name}")
+    print(f"Loading model {model_name}... (this may take a while if downloading)")
    model = AutoModel.from_pretrained(model_name).to(device).eval()
+    print(f"Model {model_name} loaded successfully!")

    # Check port availability
    import socket
@@ -364,13 +368,14 @@ def create_hnsw_embedding_server(
                        missing_ids = []
                        with lookup_timer.timing():
                            for nid in node_ids:
-                                txtinfo = passages[nid]
-                                if txtinfo is None or txtinfo["text"] == "":
-                                    print(f"Warning: Passage with ID {nid} not found")
-                                    missing_ids.append(nid)
-                                    txt = ""
-                                else:
-                                    txt = txtinfo["text"]
+                                try:
+                                    txtinfo = passages[nid]
+                                    if txtinfo is None or txtinfo["text"] == "":
+                                        raise RuntimeError(f"FATAL: Passage with ID {nid} not found - failing fast")
+                                    else:
+                                        txt = txtinfo["text"]
+                                except (KeyError, IndexError):
+                                    raise RuntimeError(f"FATAL: Passage with ID {nid} not found - failing fast")
                                texts.append(txt)
                        lookup_timer.print_elapsed()
                        
@@ -450,13 +455,14 @@ def create_hnsw_embedding_server(
                missing_ids = []
                with lookup_timer.timing():
                    for nid in node_ids:
-                        txtinfo = passages[nid]
-                        if txtinfo is None or txtinfo["text"] == "":
-                            print(f"Warning: Passage with ID {nid} not found")
-                            missing_ids.append(nid)
-                            txt = ""
-                        else:
-                            txt = txtinfo["text"]
+                        try:
+                            txtinfo = passages[nid]
+                            if txtinfo is None or txtinfo["text"] == "":
+                                raise RuntimeError(f"FATAL: Passage with ID {nid} not found - failing fast")
+                            else:
+                                txt = txtinfo["text"]
+                        except (KeyError, IndexError):
+                            raise RuntimeError(f"FATAL: Passage with ID {nid} not found - failing fast")
                        texts.append(txt)
                lookup_timer.print_elapsed()