import struct import sys import numpy as np import os INDEX_FLAT_L2_FOURCC = int.from_bytes(b'IxF2', 'little') INDEX_FLAT_IP_FOURCC = int.from_bytes(b'IxFI', 'little') INDEX_HNSW_FLAT_FOURCC = int.from_bytes(b'IHNf', 'little') INDEX_HNSW_PQ_FOURCC = int.from_bytes(b'IHNp', 'little') INDEX_HNSW_SQ_FOURCC = int.from_bytes(b'IHNs', 'little') INDEX_HNSW_2L_FOURCC = int.from_bytes(b'IHN2', 'little') INDEX_HNSW_CAGRA_FOURCC = int.from_bytes(b'IHNc', 'little') NULL_INDEX_FOURCC = int.from_bytes(b'null', 'little') HNSW_FOURCCS = { INDEX_HNSW_FLAT_FOURCC, INDEX_HNSW_PQ_FOURCC, INDEX_HNSW_SQ_FOURCC, INDEX_HNSW_2L_FOURCC, INDEX_HNSW_CAGRA_FOURCC, } FLAT_FOURCCS = {INDEX_FLAT_L2_FOURCC, INDEX_FLAT_IP_FOURCC} # --- Helper functions for reading binary data --- def read_struct(f, fmt): """Reads data according to the struct format.""" size = struct.calcsize(fmt) data = f.read(size) if len(data) != size: raise EOFError("File ended unexpectedly.") return struct.unpack(fmt, data)[0] def read_vector(f, element_fmt): """Reads a vector (size followed by data).""" count = read_struct(f, ' 1 byte metric_type = read_struct(f_in, ' 1: metric_arg = read_struct(f_in, ' 1: f_graph_out.write(struct.pack(' 1: print(f" Metric arg: {metric_arg}") # 3. Read and write HNSW struct data print(" Reading HNSW graph data...") # assign_probas (vector) count, data = read_vector(f_in, ') count, data = read_vector(f_in, ') - Store node levels count, data = read_vector(f_in, ') - Store offsets for neighbors count, data = read_vector(f_in, ' -> int32_t typically) count, data = read_vector(f_in, '") sys.exit(1) input_file = sys.argv[1] base_name = os.path.splitext(input_file)[0] graph_file = base_name + ".hnsw_graph" storage_file = base_name + ".flat_storage" if not os.path.exists(input_file): print(f"Error: Input file not found: {input_file}", file=sys.stderr) sys.exit(1) success = separate_hnsw_flat(input_file, graph_file, storage_file) if not success: sys.exit(1)