Initial commit

2025-06-30 09:05:05 +00:00
commit 46f6cc100b
1231 changed files with 278432 additions and 0 deletions
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/CMakeLists.txt
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/CMakeLists.txt
@@ -0,0 +1,25 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+add_executable(demo_imi_flat EXCLUDE_FROM_ALL demo_imi_flat.cpp)
+target_link_libraries(demo_imi_flat PRIVATE faiss)
+
+add_executable(demo_imi_pq EXCLUDE_FROM_ALL demo_imi_pq.cpp)
+target_link_libraries(demo_imi_pq PRIVATE faiss)
+
+add_executable(demo_ivfpq_indexing EXCLUDE_FROM_ALL demo_ivfpq_indexing.cpp)
+target_link_libraries(demo_ivfpq_indexing PRIVATE faiss)
+
+add_executable(demo_nndescent EXCLUDE_FROM_ALL demo_nndescent.cpp)
+target_link_libraries(demo_nndescent PRIVATE faiss)
+
+add_executable(demo_sift1M EXCLUDE_FROM_ALL demo_sift1M.cpp)
+target_link_libraries(demo_sift1M PRIVATE faiss)
+
+add_executable(demo_weighted_kmeans EXCLUDE_FROM_ALL demo_weighted_kmeans.cpp)
+target_link_libraries(demo_weighted_kmeans PRIVATE faiss)
+
+add_executable(demo_residual_quantizer EXCLUDE_FROM_ALL demo_residual_quantizer.cpp)
+target_link_libraries(demo_residual_quantizer PRIVATE faiss)
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/README.md
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/README.md
@@ -0,0 +1,28 @@
+
+
+Demos for a few Faiss functionalities
+=====================================
+
+
+demo_auto_tune.py
+-----------------
+
+Demonstrates the auto-tuning functionality of Faiss
+
+
+demo_ondisk_ivf.py
+------------------
+
+Shows how to construct a Faiss index that stores the inverted file
+data on disk, eg. when it does not fit in RAM. The script works on a
+small dataset (sift1M) for demonstration and proceeds in stages:
+
+0: train on the dataset
+
+1-4: build 4 indexes, each containing 1/4 of the dataset. This can be
+done in parallel on several machines
+
+5: merge the 4 indexes into one that is written directly to disk
+(needs not to fit in RAM)
+
+6: load and test the index
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/demo_auto_tune.py
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/demo_auto_tune.py
@@ -0,0 +1,169 @@
+#!/usr/bin/env python2
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import print_function
+import os
+import time
+import numpy as np
+
+try:
+    import matplotlib
+    matplotlib.use('Agg')
+    from matplotlib import pyplot
+    graphical_output = True
+except ImportError:
+    graphical_output = False
+
+import faiss
+
+#################################################################
+# Small I/O functions
+#################################################################
+
+def ivecs_read(fname):
+    a = np.fromfile(fname, dtype="int32")
+    d = a[0]
+    return a.reshape(-1, d + 1)[:, 1:].copy()
+
+def fvecs_read(fname):
+    return ivecs_read(fname).view('float32')
+
+
+def plot_OperatingPoints(ops, nq, **kwargs):
+    ops = ops.optimal_pts
+    n = ops.size() * 2 - 1
+    pyplot.plot([ops.at( i      // 2).perf for i in range(n)],
+                [ops.at((i + 1) // 2).t / nq * 1000 for i in range(n)],
+                **kwargs)
+
+
+#################################################################
+# prepare common data for all indexes
+#################################################################
+
+
+
+t0 = time.time()
+
+print("load data")
+
+xt = fvecs_read("sift1M/sift_learn.fvecs")
+xb = fvecs_read("sift1M/sift_base.fvecs")
+xq = fvecs_read("sift1M/sift_query.fvecs")
+
+d = xt.shape[1]
+
+print("load GT")
+
+gt = ivecs_read("sift1M/sift_groundtruth.ivecs")
+gt = gt.astype('int64')
+k = gt.shape[1]
+
+print("prepare criterion")
+
+# criterion = 1-recall at 1
+crit = faiss.OneRecallAtRCriterion(xq.shape[0], 1)
+crit.set_groundtruth(None, gt)
+crit.nnn = k
+
+# indexes that are useful when there is no limitation on memory usage
+unlimited_mem_keys = [
+    "IMI2x10,Flat", "IMI2x11,Flat",
+    "IVF4096,Flat", "IVF16384,Flat",
+    "PCA64,IMI2x10,Flat"]
+
+# memory limited to 16 bytes / vector
+keys_mem_16 = [
+    'IMI2x10,PQ16', 'IVF4096,PQ16',
+    'IMI2x10,PQ8+8', 'OPQ16_64,IMI2x10,PQ16'
+    ]
+
+# limited to 32 bytes / vector
+keys_mem_32 = [
+    'IMI2x10,PQ32', 'IVF4096,PQ32', 'IVF16384,PQ32',
+    'IMI2x10,PQ16+16',
+    'OPQ32,IVF4096,PQ32', 'IVF4096,PQ16+16', 'OPQ16,IMI2x10,PQ16+16'
+    ]
+
+# indexes that can run on the GPU
+keys_gpu = [
+    "PCA64,IVF4096,Flat",
+    "PCA64,Flat", "Flat", "IVF4096,Flat", "IVF16384,Flat",
+    "IVF4096,PQ32"]
+
+
+keys_to_test = unlimited_mem_keys
+use_gpu = False
+
+
+if use_gpu:
+    # if this fails, it means that the GPU version was not comp
+    assert faiss.StandardGpuResources, \
+        "Faiss was not compiled with GPU support, or loading _swigfaiss_gpu.so failed"
+    res = faiss.StandardGpuResources()
+    dev_no = 0
+
+# remember results from other index types
+op_per_key = []
+
+
+# keep track of optimal operating points seen so far
+op = faiss.OperatingPoints()
+
+
+for index_key in keys_to_test:
+
+    print("============ key", index_key)
+
+    # make the index described by the key
+    index = faiss.index_factory(d, index_key)
+
+
+    if use_gpu:
+        # transfer to GPU (may be partial)
+        index = faiss.index_cpu_to_gpu(res, dev_no, index)
+        params = faiss.GpuParameterSpace()
+    else:
+        params = faiss.ParameterSpace()
+
+    params.initialize(index)
+
+    print("[%.3f s] train & add" % (time.time() - t0))
+
+    index.train(xt)
+    index.add(xb)
+
+    print("[%.3f s] explore op points" % (time.time() - t0))
+
+    # find operating points for this index
+    opi = params.explore(index, xq, crit)
+
+    print("[%.3f s] result operating points:" % (time.time() - t0))
+    opi.display()
+
+    # update best operating points so far
+    op.merge_with(opi, index_key + " ")
+
+    op_per_key.append((index_key, opi))
+
+    if graphical_output:
+        # graphical output (to tmp/ subdirectory)
+
+        fig = pyplot.figure(figsize=(12, 9))
+        pyplot.xlabel("1-recall at 1")
+        pyplot.ylabel("search time (ms/query, %d threads)" % faiss.omp_get_max_threads())
+        pyplot.gca().set_yscale('log')
+        pyplot.grid()
+        for i2, opi2 in op_per_key:
+            plot_OperatingPoints(opi2, crit.nq, label = i2, marker = 'o')
+        # plot_OperatingPoints(op, crit.nq, label = 'best', marker = 'o', color = 'r')
+        pyplot.legend(loc=2)
+        fig.savefig('tmp/demo_auto_tune.png')
+
+
+print("[%.3f s] final result:" % (time.time() - t0))
+
+op.display()
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/demo_client_server_ivf.py
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/demo_client_server_ivf.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import sys
+import numpy as np
+import faiss
+
+from faiss.contrib.client_server import run_index_server, ClientIndex
+
+
+#################################################################
+# Small I/O functions
+#################################################################
+
+
+def ivecs_read(fname):
+    a = np.fromfile(fname, dtype='int32')
+    d = a[0]
+    return a.reshape(-1, d + 1)[:, 1:].copy()
+
+
+def fvecs_read(fname):
+    return ivecs_read(fname).view('float32')
+
+
+#################################################################
+#  Main program
+#################################################################
+
+stage = int(sys.argv[1])
+
+tmpdir = '/tmp/'
+
+if stage == 0:
+    # train the index
+    xt = fvecs_read("sift1M/sift_learn.fvecs")
+    index = faiss.index_factory(xt.shape[1], "IVF4096,Flat")
+    print("training index")
+    index.train(xt)
+    print("write " + tmpdir + "trained.index")
+    faiss.write_index(index, tmpdir + "trained.index")
+
+
+if 1 <= stage <= 4:
+    # add 1/4 of the database to 4 independent indexes
+    bno = stage - 1
+    xb = fvecs_read("sift1M/sift_base.fvecs")
+    i0, i1 = int(bno * xb.shape[0] / 4), int((bno + 1) * xb.shape[0] / 4)
+    index = faiss.read_index(tmpdir + "trained.index")
+    print("adding vectors %d:%d" % (i0, i1))
+    index.add_with_ids(xb[i0:i1], np.arange(i0, i1))
+    print("write " + tmpdir + "block_%d.index" % bno)
+    faiss.write_index(index, tmpdir + "block_%d.index" % bno)
+
+
+machine_ports = [
+    ('localhost', 12010),
+    ('localhost', 12011),
+    ('localhost', 12012),
+    ('localhost', 12013),
+]
+v6 = False
+
+if 5 <= stage <= 8:
+    # load an index slice and launch index
+    bno = stage - 5
+
+    fname = tmpdir + "block_%d.index" % bno
+    print("read " + fname)
+    index = faiss.read_index(fname)
+
+    port = machine_ports[bno][1]
+    run_index_server(index, port, v6=v6)
+
+
+if stage == 9:
+    client_index = ClientIndex(machine_ports)
+    print('index size:', client_index.ntotal)
+    client_index.set_nprobe(16)
+
+    # load query vectors and ground-truth
+    xq = fvecs_read("sift1M/sift_query.fvecs")
+    gt = ivecs_read("sift1M/sift_groundtruth.ivecs")
+
+    D, I = client_index.search(xq, 5)
+
+    recall_at_1 = (I[:, :1] == gt[:, :1]).sum() / float(xq.shape[0])
+    print("recall@1: %.3f" % recall_at_1)
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/demo_distributed_kmeans_torch.py
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/demo_distributed_kmeans_torch.py
@@ -0,0 +1,173 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+
+import torch
+import torch.distributed
+
+import faiss
+
+import faiss.contrib.torch_utils
+from faiss.contrib.torch import clustering
+from faiss.contrib import datasets
+
+
+class DatasetAssignDistributedGPU(clustering.DatasetAssign):
+    """
+    There is one instance per worker, each worker has a dataset shard.
+    The non-master workers do not run through the k-means function, so some
+    code has run it to keep the workers in sync.
+    """
+
+    def __init__(self, res, x, rank, nproc):
+        clustering.DatasetAssign.__init__(self, x)
+        self.res = res
+        self.rank = rank
+        self.nproc = nproc
+        self.device = x.device
+
+        n = len(x)
+        sizes = torch.zeros(nproc, device=self.device, dtype=torch.int64)
+        sizes[rank] = n
+        torch.distributed.all_gather(
+            [sizes[i:i + 1] for i in range(nproc)], sizes[rank:rank + 1])
+        self.sizes = sizes.cpu().numpy()
+
+        # begin & end of each shard
+        self.cs = np.zeros(nproc + 1, dtype='int64')
+        self.cs[1:] = np.cumsum(self.sizes)
+
+    def count(self):
+        return int(self.sizes.sum())
+
+    def int_to_slaves(self, i):
+        " broadcast an int to all workers "
+        rank = self.rank
+        tab = torch.zeros(1, device=self.device, dtype=torch.int64)
+        if rank == 0:
+            tab[0] = i
+        else:
+            assert i is None
+        torch.distributed.broadcast(tab, 0)
+        return tab.item()
+
+    def get_subset(self, indices):
+        rank = self.rank
+        assert rank == 0 or indices is None
+
+        len_indices = self.int_to_slaves(len(indices) if rank == 0 else None)
+
+        if rank == 0:
+            indices = torch.from_numpy(indices).to(self.device)
+        else:
+            indices = torch.zeros(
+                len_indices, dtype=torch.int64, device=self.device)
+        torch.distributed.broadcast(indices, 0)
+
+        # select subset of indices
+
+        i0, i1 = self.cs[rank], self.cs[rank + 1]
+
+        mask = torch.logical_and(indices < i1, indices >= i0)
+        output = torch.zeros(
+            len_indices, self.x.shape[1],
+            dtype=self.x.dtype, device=self.device)
+        output[mask] = self.x[indices[mask] - i0]
+        torch.distributed.reduce(output, 0)  # sum
+        if rank == 0:
+            return output
+        else:
+            return None
+
+    def perform_search(self, centroids):
+        assert False, "shoudl not be called"
+
+    def assign_to(self, centroids, weights=None):
+        assert weights is None
+
+        rank, nproc = self.rank, self.nproc
+        assert rank == 0 or centroids is None
+        nc = self.int_to_slaves(len(centroids) if rank == 0 else None)
+
+        if rank != 0:
+            centroids = torch.zeros(
+                nc, self.x.shape[1], dtype=self.x.dtype, device=self.device)
+        torch.distributed.broadcast(centroids, 0)
+
+        # perform search
+        D, I = faiss.knn_gpu(
+            self.res, self.x, centroids, 1, device=self.device.index)
+
+        I = I.ravel()
+        D = D.ravel()
+
+        sum_per_centroid = torch.zeros_like(centroids)
+        if weights is None:
+            sum_per_centroid.index_add_(0, I, self.x)
+        else:
+            sum_per_centroid.index_add_(0, I, self.x * weights[:, None])
+
+        torch.distributed.reduce(sum_per_centroid, 0)
+
+        if rank == 0:
+            # gather deos not support tensors of different sizes
+            # should be implemented with point-to-point communication
+            assert np.all(self.sizes == self.sizes[0])
+            device = self.device
+            all_I = torch.zeros(self.count(), dtype=I.dtype, device=device)
+            all_D = torch.zeros(self.count(), dtype=D.dtype, device=device)
+            torch.distributed.gather(
+                I, [all_I[self.cs[r]:self.cs[r + 1]] for r in range(nproc)],
+                dst=0,
+            )
+            torch.distributed.gather(
+                D, [all_D[self.cs[r]:self.cs[r + 1]] for r in range(nproc)],
+                dst=0,
+            )
+            return all_I.cpu().numpy(), all_D, sum_per_centroid
+        else:
+            torch.distributed.gather(I, None, dst=0)
+            torch.distributed.gather(D, None, dst=0)
+            return None
+
+
+if __name__ == "__main__":
+
+    torch.distributed.init_process_group(
+        backend="nccl",
+    )
+    rank = torch.distributed.get_rank()
+    nproc = torch.distributed.get_world_size()
+
+    # current version does only support shards of the same size
+    ds = datasets.SyntheticDataset(32, 10000, 0, 0, seed=1234 + rank)
+    x = ds.get_train()
+
+    device = torch.device(f"cuda:{rank}")
+
+    torch.cuda.set_device(device)
+    x = torch.from_numpy(x).to(device)
+    res = faiss.StandardGpuResources()
+
+    da = DatasetAssignDistributedGPU(res, x, rank, nproc)
+
+    k = 1000
+    niter = 25
+
+    if rank == 0:
+        print(f"sizes = {da.sizes}")
+        centroids, iteration_stats = clustering.kmeans(
+            k, da, niter=niter, return_stats=True)
+        print("clusters:", centroids.cpu().numpy())
+    else:
+        # make sure the iterations are aligned with master
+        da.get_subset(None)
+
+        for _ in range(niter):
+            da.assign_to(None)
+
+    torch.distributed.barrier()
+    print("Done")
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/demo_imi_flat.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/demo_imi_flat.cpp
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+
+#include <sys/time.h>
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexIVFFlat.h>
+#include <faiss/IndexPQ.h>
+
+double elapsed() {
+    struct timeval tv;
+    gettimeofday(&tv, nullptr);
+    return tv.tv_sec + tv.tv_usec * 1e-6;
+}
+
+int main() {
+    double t0 = elapsed();
+
+    // dimension of the vectors to index
+    int d = 128;
+
+    // size of the database we plan to index
+    size_t nb = 1000 * 1000;
+
+    // make a set of nt training vectors in the unit cube
+    // (could be the database)
+    size_t nt = 100 * 1000;
+
+    //---------------------------------------------------------------
+    // Define the core quantizer
+    // We choose a multiple inverted index for faster training with less data
+    // and because it usually offers best accuracy/speed trade-offs
+    //
+    // We here assume that its lifespan of this coarse quantizer will cover the
+    // lifespan of the inverted-file quantizer IndexIVFFlat below
+    // With dynamic allocation, one may give the responsibility to free the
+    // quantizer to the inverted-file index (with attribute do_delete_quantizer)
+    //
+    // Note: a regular clustering algorithm would be defined as:
+    //       faiss::IndexFlatL2 coarse_quantizer (d);
+    //
+    // Use nhash=2 subquantizers used to define the product coarse quantizer
+    // Number of bits: we will have 2^nbits_coarse centroids per subquantizer
+    //                 meaning (2^12)^nhash distinct inverted lists
+    size_t nhash = 2;
+    size_t nbits_subq = int(log2(nb + 1) / 2);     // good choice in general
+    size_t ncentroids = 1 << (nhash * nbits_subq); // total # of centroids
+
+    faiss::MultiIndexQuantizer coarse_quantizer(d, nhash, nbits_subq);
+
+    printf("IMI (%ld,%ld): %ld virtual centroids (target: %ld base vectors)",
+           nhash,
+           nbits_subq,
+           ncentroids,
+           nb);
+
+    // the coarse quantizer should not be dealloced before the index
+    // 4 = nb of bytes per code (d must be a multiple of this)
+    // 8 = nb of bits per sub-code (almost always 8)
+    faiss::MetricType metric = faiss::METRIC_L2; // can be METRIC_INNER_PRODUCT
+    faiss::IndexIVFFlat index(&coarse_quantizer, d, ncentroids, metric);
+    index.quantizer_trains_alone = true;
+
+    // define the number of probes. 2048 is for high-dim, overkilled in practice
+    // Use 4-1024 depending on the trade-off speed accuracy that you want
+    index.nprobe = 2048;
+
+    std::mt19937 rng;
+    std::uniform_real_distribution<> distrib;
+
+    { // training
+        printf("[%.3f s] Generating %ld vectors in %dD for training\n",
+               elapsed() - t0,
+               nt,
+               d);
+
+        std::vector<float> trainvecs(nt * d);
+        for (size_t i = 0; i < nt * d; i++) {
+            trainvecs[i] = distrib(rng);
+        }
+
+        printf("[%.3f s] Training the index\n", elapsed() - t0);
+        index.verbose = true;
+        index.train(nt, trainvecs.data());
+    }
+
+    size_t nq;
+    std::vector<float> queries;
+
+    { // populating the database
+        printf("[%.3f s] Building a dataset of %ld vectors to index\n",
+               elapsed() - t0,
+               nb);
+
+        std::vector<float> database(nb * d);
+        for (size_t i = 0; i < nb * d; i++) {
+            database[i] = distrib(rng);
+        }
+
+        printf("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
+
+        index.add(nb, database.data());
+
+        // remember a few elements from the database as queries
+        int i0 = 1234;
+        int i1 = 1244;
+
+        nq = i1 - i0;
+        queries.resize(nq * d);
+        for (int i = i0; i < i1; i++) {
+            for (int j = 0; j < d; j++) {
+                queries[(i - i0) * d + j] = database[i * d + j];
+            }
+        }
+    }
+
+    { // searching the database
+        int k = 5;
+        printf("[%.3f s] Searching the %d nearest neighbors "
+               "of %ld vectors in the index\n",
+               elapsed() - t0,
+               k,
+               nq);
+
+        std::vector<faiss::idx_t> nns(k * nq);
+        std::vector<float> dis(k * nq);
+
+        index.search(nq, queries.data(), k, dis.data(), nns.data());
+
+        printf("[%.3f s] Query results (vector ids, then distances):\n",
+               elapsed() - t0);
+
+        for (int i = 0; i < nq; i++) {
+            printf("query %2d: ", i);
+            for (int j = 0; j < k; j++) {
+                printf("%7ld ", nns[j + i * k]);
+            }
+            printf("\n     dis: ");
+            for (int j = 0; j < k; j++) {
+                printf("%7g ", dis[j + i * k]);
+            }
+            printf("\n");
+        }
+    }
+    return 0;
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/demo_imi_pq.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/demo_imi_pq.cpp
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+
+#include <sys/time.h>
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexIVFPQ.h>
+#include <faiss/IndexPQ.h>
+#include <faiss/index_io.h>
+
+double elapsed() {
+    struct timeval tv;
+    gettimeofday(&tv, nullptr);
+    return tv.tv_sec + tv.tv_usec * 1e-6;
+}
+
+int main() {
+    double t0 = elapsed();
+
+    // dimension of the vectors to index
+    int d = 64;
+
+    // size of the database we plan to index
+    size_t nb = 1000 * 1000;
+    size_t add_bs = 10000; // # size of the blocks to add
+
+    // make a set of nt training vectors in the unit cube
+    // (could be the database)
+    size_t nt = 100 * 1000;
+
+    //---------------------------------------------------------------
+    // Define the core quantizer
+    // We choose a multiple inverted index for faster training with less data
+    // and because it usually offers best accuracy/speed trade-offs
+    //
+    // We here assume that its lifespan of this coarse quantizer will cover the
+    // lifespan of the inverted-file quantizer IndexIVFFlat below
+    // With dynamic allocation, one may give the responsibility to free the
+    // quantizer to the inverted-file index (with attribute do_delete_quantizer)
+    //
+    // Note: a regular clustering algorithm would be defined as:
+    //       faiss::IndexFlatL2 coarse_quantizer (d);
+    //
+    // Use nhash=2 subquantizers used to define the product coarse quantizer
+    // Number of bits: we will have 2^nbits_coarse centroids per subquantizer
+    //                 meaning (2^12)^nhash distinct inverted lists
+    //
+    // The parameter bytes_per_code is determined by the memory
+    // constraint, the dataset will use nb * (bytes_per_code + 8)
+    // bytes.
+    //
+    // The parameter nbits_subq is determined by the size of the dataset to
+    // index.
+    //
+    size_t nhash = 2;
+    size_t nbits_subq = 9;
+    size_t ncentroids = 1 << (nhash * nbits_subq); // total # of centroids
+    int bytes_per_code = 16;
+
+    faiss::MultiIndexQuantizer coarse_quantizer(d, nhash, nbits_subq);
+
+    printf("IMI (%ld,%ld): %ld virtual centroids (target: %ld base vectors)",
+           nhash,
+           nbits_subq,
+           ncentroids,
+           nb);
+
+    // the coarse quantizer should not be dealloced before the index
+    // 4 = nb of bytes per code (d must be a multiple of this)
+    // 8 = nb of bits per sub-code (almost always 8)
+    faiss::IndexIVFPQ index(
+            &coarse_quantizer, d, ncentroids, bytes_per_code, 8);
+    index.quantizer_trains_alone = true;
+
+    // define the number of probes. 2048 is for high-dim, overkill in practice
+    // Use 4-1024 depending on the trade-off speed accuracy that you want
+    index.nprobe = 2048;
+
+    std::mt19937 rng;
+    std::uniform_real_distribution<> distrib;
+
+    { // training.
+
+        // The distribution of the training vectors should be the same
+        // as the database vectors. It could be a sub-sample of the
+        // database vectors, if sampling is not biased. Here we just
+        // randomly generate the vectors.
+
+        printf("[%.3f s] Generating %ld vectors in %dD for training\n",
+               elapsed() - t0,
+               nt,
+               d);
+
+        std::vector<float> trainvecs(nt * d);
+        for (size_t i = 0; i < nt; i++) {
+            for (size_t j = 0; j < d; j++) {
+                trainvecs[i * d + j] = distrib(rng);
+            }
+        }
+
+        printf("[%.3f s] Training the index\n", elapsed() - t0);
+        index.verbose = true;
+        index.train(nt, trainvecs.data());
+    }
+
+    // the index can be re-loaded later with
+    // faiss::Index * idx = faiss::read_index("/tmp/trained_index.faissindex");
+    faiss::write_index(&index, "/tmp/trained_index.faissindex");
+
+    size_t nq;
+    std::vector<float> queries;
+
+    { // populating the database
+        printf("[%.3f s] Building a dataset of %ld vectors to index\n",
+               elapsed() - t0,
+               nb);
+
+        std::vector<float> database(nb * d);
+        std::vector<faiss::idx_t> ids(nb);
+        for (size_t i = 0; i < nb; i++) {
+            for (size_t j = 0; j < d; j++) {
+                database[i * d + j] = distrib(rng);
+            }
+            ids[i] = 8760000000L + i;
+        }
+
+        printf("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
+
+        for (size_t begin = 0; begin < nb; begin += add_bs) {
+            size_t end = std::min(begin + add_bs, nb);
+            index.add_with_ids(
+                    end - begin,
+                    database.data() + d * begin,
+                    ids.data() + begin);
+        }
+
+        // remember a few elements from the database as queries
+        int i0 = 1234;
+        int i1 = 1244;
+
+        nq = i1 - i0;
+        queries.resize(nq * d);
+        for (int i = i0; i < i1; i++) {
+            for (int j = 0; j < d; j++) {
+                queries[(i - i0) * d + j] = database[i * d + j];
+            }
+        }
+    }
+
+    // A few notes on the internal format of the index:
+    //
+    // - the positing lists for PQ codes are index.codes, which is a
+    //    std::vector < std::vector<uint8_t> >
+    //   if n is the length of posting list #i, codes[i] has length
+    //   bytes_per_code * n
+    //
+    // - the corresponding ids are stored in index.ids
+    //
+    // - given a vector float *x, finding which k centroids are
+    //   closest to it (ie to find the nearest neighbors) can be done with
+    //
+    //   faiss::idx_t *centroid_ids = new faiss::idx_t[k];
+    //   float *distances = new float[k];
+    //   index.quantizer->search (1, x, k, dis, centroids_ids);
+    //
+
+    faiss::write_index(&index, "/tmp/populated_index.faissindex");
+
+    { // searching the database
+        int k = 5;
+        printf("[%.3f s] Searching the %d nearest neighbors "
+               "of %ld vectors in the index\n",
+               elapsed() - t0,
+               k,
+               nq);
+
+        std::vector<faiss::idx_t> nns(k * nq);
+        std::vector<float> dis(k * nq);
+
+        index.search(nq, queries.data(), k, dis.data(), nns.data());
+
+        printf("[%.3f s] Query results (vector ids, then distances):\n",
+               elapsed() - t0);
+
+        for (int i = 0; i < nq; i++) {
+            printf("query %2d: ", i);
+            for (int j = 0; j < k; j++) {
+                printf("%7ld ", nns[j + i * k]);
+            }
+            printf("\n     dis: ");
+            for (int j = 0; j < k; j++) {
+                printf("%7g ", dis[j + i * k]);
+            }
+            printf("\n");
+        }
+    }
+    return 0;
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/demo_ivfpq_indexing.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/demo_ivfpq_indexing.cpp
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+
+#include <sys/time.h>
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexIVFPQ.h>
+#include <faiss/index_io.h>
+
+double elapsed() {
+    struct timeval tv;
+    gettimeofday(&tv, nullptr);
+    return tv.tv_sec + tv.tv_usec * 1e-6;
+}
+
+int main() {
+    double t0 = elapsed();
+
+    // dimension of the vectors to index
+    int d = 128;
+
+    // size of the database we plan to index
+    size_t nb = 200 * 1000;
+
+    // make a set of nt training vectors in the unit cube
+    // (could be the database)
+    size_t nt = 100 * 1000;
+
+    // make the index object and train it
+    faiss::IndexFlatL2 coarse_quantizer(d);
+
+    // a reasonable number of centroids to index nb vectors
+    int ncentroids = int(4 * sqrt(nb));
+
+    // the coarse quantizer should not be dealloced before the index
+    // 4 = nb of bytes per code (d must be a multiple of this)
+    // 8 = nb of bits per sub-code (almost always 8)
+    faiss::IndexIVFPQ index(&coarse_quantizer, d, ncentroids, 4, 8);
+
+    std::mt19937 rng;
+
+    { // training
+        printf("[%.3f s] Generating %ld vectors in %dD for training\n",
+               elapsed() - t0,
+               nt,
+               d);
+
+        std::vector<float> trainvecs(nt * d);
+        std::uniform_real_distribution<> distrib;
+        for (size_t i = 0; i < nt * d; i++) {
+            trainvecs[i] = distrib(rng);
+        }
+
+        printf("[%.3f s] Training the index\n", elapsed() - t0);
+        index.verbose = true;
+
+        index.train(nt, trainvecs.data());
+    }
+
+    { // I/O demo
+        const char* outfilename = "/tmp/index_trained.faissindex";
+        printf("[%.3f s] storing the pre-trained index to %s\n",
+               elapsed() - t0,
+               outfilename);
+
+        write_index(&index, outfilename);
+    }
+
+    size_t nq;
+    std::vector<float> queries;
+
+    { // populating the database
+        printf("[%.3f s] Building a dataset of %ld vectors to index\n",
+               elapsed() - t0,
+               nb);
+
+        std::vector<float> database(nb * d);
+        std::uniform_real_distribution<> distrib;
+        for (size_t i = 0; i < nb * d; i++) {
+            database[i] = distrib(rng);
+        }
+
+        printf("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);
+
+        index.add(nb, database.data());
+
+        printf("[%.3f s] imbalance factor: %g\n",
+               elapsed() - t0,
+               index.invlists->imbalance_factor());
+
+        // remember a few elements from the database as queries
+        int i0 = 1234;
+        int i1 = 1243;
+
+        nq = i1 - i0;
+        queries.resize(nq * d);
+        for (int i = i0; i < i1; i++) {
+            for (int j = 0; j < d; j++) {
+                queries[(i - i0) * d + j] = database[i * d + j];
+            }
+        }
+    }
+
+    { // searching the database
+        int k = 5;
+        printf("[%.3f s] Searching the %d nearest neighbors "
+               "of %ld vectors in the index\n",
+               elapsed() - t0,
+               k,
+               nq);
+
+        std::vector<faiss::idx_t> nns(k * nq);
+        std::vector<float> dis(k * nq);
+
+        index.search(nq, queries.data(), k, dis.data(), nns.data());
+
+        printf("[%.3f s] Query results (vector ids, then distances):\n",
+               elapsed() - t0);
+
+        for (int i = 0; i < nq; i++) {
+            printf("query %2d: ", i);
+            for (int j = 0; j < k; j++) {
+                printf("%7ld ", nns[j + i * k]);
+            }
+            printf("\n     dis: ");
+            for (int j = 0; j < k; j++) {
+                printf("%7g ", dis[j + i * k]);
+            }
+            printf("\n");
+        }
+
+        printf("note that the nearest neighbor is not at "
+               "distance 0 due to quantization errors\n");
+    }
+
+    return 0;
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/demo_nndescent.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/demo_nndescent.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <chrono>
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexNNDescent.h>
+
+using namespace std::chrono;
+
+int main(void) {
+    // dimension of the vectors to index
+    int d = 64;
+    int K = 64;
+
+    // size of the database we plan to index
+    size_t nb = 10000;
+
+    std::mt19937 rng(12345);
+
+    // make the index object and train it
+    faiss::IndexNNDescentFlat index(d, K, faiss::METRIC_L2);
+    index.nndescent.S = 10;
+    index.nndescent.R = 32;
+    index.nndescent.L = K;
+    index.nndescent.iter = 10;
+    index.verbose = true;
+
+    // generate labels by IndexFlat
+    faiss::IndexFlat bruteforce(d, faiss::METRIC_L2);
+
+    std::vector<float> database(nb * d);
+    for (size_t i = 0; i < nb * d; i++) {
+        database[i] = rng() % 1024;
+    }
+
+    { // populating the database
+        index.add(nb, database.data());
+        bruteforce.add(nb, database.data());
+    }
+
+    size_t nq = 1000;
+
+    { // searching the database
+        printf("Searching ...\n");
+        index.nndescent.search_L = 50;
+
+        std::vector<float> queries(nq * d);
+        for (size_t i = 0; i < nq * d; i++) {
+            queries[i] = rng() % 1024;
+        }
+
+        int k = 5;
+        std::vector<faiss::idx_t> nns(k * nq);
+        std::vector<faiss::idx_t> gt_nns(k * nq);
+        std::vector<float> dis(k * nq);
+
+        auto start = high_resolution_clock::now();
+        index.search(nq, queries.data(), k, dis.data(), nns.data());
+        auto end = high_resolution_clock::now();
+
+        // find exact kNNs by brute force search
+        bruteforce.search(nq, queries.data(), k, dis.data(), gt_nns.data());
+
+        int recalls = 0;
+        for (size_t i = 0; i < nq; ++i) {
+            for (int n = 0; n < k; n++) {
+                for (int m = 0; m < k; m++) {
+                    if (nns[i * k + n] == gt_nns[i * k + m]) {
+                        recalls += 1;
+                    }
+                }
+            }
+        }
+        float recall = 1.0f * recalls / (k * nq);
+        auto t = duration_cast<microseconds>(end - start).count();
+        int qps = nq * 1.0f * 1000 * 1000 / t;
+
+        printf("Recall@%d: %f, QPS: %d\n", k, recall, qps);
+    }
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/demo_ondisk_ivf.py
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/demo_ondisk_ivf.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import sys
+import numpy as np
+import faiss
+from faiss.contrib.ondisk import merge_ondisk
+
+#################################################################
+# Small I/O functions
+#################################################################
+
+
+def ivecs_read(fname):
+    a = np.fromfile(fname, dtype='int32')
+    d = a[0]
+    return a.reshape(-1, d + 1)[:, 1:].copy()
+
+
+def fvecs_read(fname):
+    return ivecs_read(fname).view('float32')
+
+
+#################################################################
+# Main program
+#################################################################
+
+stage = int(sys.argv[1])
+
+tmpdir = '/tmp/'
+
+if stage == 0:
+    # train the index
+    xt = fvecs_read("sift1M/sift_learn.fvecs")
+    index = faiss.index_factory(xt.shape[1], "IVF4096,Flat")
+    print("training index")
+    index.train(xt)
+    print("write " + tmpdir + "trained.index")
+    faiss.write_index(index, tmpdir + "trained.index")
+
+
+if 1 <= stage <= 4:
+    # add 1/4 of the database to 4 independent indexes
+    bno = stage - 1
+    xb = fvecs_read("sift1M/sift_base.fvecs")
+    i0, i1 = int(bno * xb.shape[0] / 4), int((bno + 1) * xb.shape[0] / 4)
+    index = faiss.read_index(tmpdir + "trained.index")
+    print("adding vectors %d:%d" % (i0, i1))
+    index.add_with_ids(xb[i0:i1], np.arange(i0, i1))
+    print("write " + tmpdir + "block_%d.index" % bno)
+    faiss.write_index(index, tmpdir + "block_%d.index" % bno)
+
+if stage == 5:
+
+    print('loading trained index')
+    # construct the output index
+    index = faiss.read_index(tmpdir + "trained.index")
+
+    block_fnames = [
+        tmpdir + "block_%d.index" % bno
+        for bno in range(4)
+    ]
+
+    merge_ondisk(index, block_fnames, tmpdir + "merged_index.ivfdata")
+
+    print("write " + tmpdir + "populated.index")
+    faiss.write_index(index, tmpdir + "populated.index")
+
+
+if stage == 6:
+    # perform a search from disk
+    print("read " + tmpdir + "populated.index")
+    index = faiss.read_index(tmpdir + "populated.index")
+    index.nprobe = 16
+
+    # load query vectors and ground-truth
+    xq = fvecs_read("sift1M/sift_query.fvecs")
+    gt = ivecs_read("sift1M/sift_groundtruth.ivecs")
+
+    D, I = index.search(xq, 5)
+
+    recall_at_1 = (I[:, :1] == gt[:, :1]).sum() / float(xq.shape[0])
+    print("recall@1: %.3f" % recall_at_1)
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/demo_qinco.py
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/demo_qinco.py
@@ -0,0 +1,77 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+This demonstrates how to reproduce the QINCo paper results using the Faiss
+QINCo implementation. The code loads the reference model because training 
+is not implemented in Faiss.
+
+Prepare the data with
+
+cd /tmp
+
+# get the reference qinco code
+git clone https://github.com/facebookresearch/Qinco.git
+
+# get the data
+wget https://dl.fbaipublicfiles.com/QINCo/datasets/bigann/bigann1M.bvecs
+
+# get the model
+wget https://dl.fbaipublicfiles.com/QINCo/models/bigann_8x8_L2.pt
+
+"""
+
+import numpy as np
+from faiss.contrib.vecs_io import bvecs_mmap
+import sys
+import time
+import torch
+import faiss
+
+# make sure pickle deserialization will work
+sys.path.append("/tmp/Qinco")
+import model_qinco
+
+with torch.no_grad():
+
+    qinco = torch.load("/tmp/bigann_8x8_L2.pt", weights_only=False)
+    qinco.eval()
+    # print(qinco)
+    if True:
+        torch.set_num_threads(1)
+        faiss.omp_set_num_threads(1)
+
+    x_base = bvecs_mmap("/tmp/bigann1M.bvecs")[:1000].astype('float32')
+    x_scaled = torch.from_numpy(x_base) / qinco.db_scale
+
+    t0 = time.time()
+    codes, _ = qinco.encode(x_scaled)
+    x_decoded_scaled = qinco.decode(codes)
+    print(f"Pytorch encode {time.time() - t0:.3f} s")
+    # multi-thread: 1.13s, single-thread: 7.744
+
+    x_decoded = x_decoded_scaled.numpy() * qinco.db_scale
+
+    err = ((x_decoded - x_base) ** 2).sum(1).mean()
+    print("MSE=", err)  # = 14211.956, near the L=2 result in Fig 4 of the paper
+
+    qinco2 = faiss.QINCo(qinco)
+    t0 = time.time()
+    codes2 = qinco2.encode(faiss.Tensor2D(x_scaled))
+    x_decoded2 = qinco2.decode(codes2).numpy() * qinco.db_scale
+    print(f"Faiss encode {time.time() - t0:.3f} s")
+    # multi-thread: 3.2s, single thread: 7.019
+
+    # these tests don't work because there are outlier encodings
+    # np.testing.assert_array_equal(codes.numpy(), codes2.numpy())
+    # np.testing.assert_allclose(x_decoded, x_decoded2)
+
+    ndiff = (codes.numpy() != codes2.numpy()).sum() / codes.numel()
+    assert ndiff < 0.01
+    ndiff = (((x_decoded - x_decoded2) ** 2).sum(1) > 1e-5).sum()
+    assert ndiff / len(x_base) < 0.01
+
+    err = ((x_decoded2 - x_base) ** 2).sum(1).mean()
+    print("MSE=", err)  # = 14213.551
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/demo_residual_quantizer.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/demo_residual_quantizer.cpp
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <climits>
+#include <cstdio>
+#include <memory>
+
+#include <faiss/IVFlib.h>
+#include <faiss/IndexAdditiveQuantizer.h>
+#include <faiss/IndexIVFAdditiveQuantizer.h>
+#include <faiss/MetricType.h>
+#include <faiss/utils/distances.h>
+#include <faiss/utils/hamming.h>
+#include <faiss/utils/random.h>
+#include <faiss/utils/utils.h>
+
+/* This demo file shows how to:
+ * - use a DistanceComputer to compute distances with encoded vectors
+ * - in the context of an IVF, how to split an additive quantizer into an
+ * AdditiveCoarseQuantizer and a ResidualQuantizer, in two different ways, with
+ * and without storing the prefix.
+ */
+
+int main() {
+    /******************************************
+     * Generate a test dataset
+     ******************************************/
+    using idx_t = faiss::idx_t;
+    size_t d = 128;
+    size_t nt = 10000;
+    size_t nb = 10000;
+    size_t nq = 100;
+    double t0 = faiss::getmillisecs();
+
+    auto tic = [t0]() {
+        printf("[%.3f s] ", (faiss::getmillisecs() - t0) / 1000);
+    };
+
+    tic();
+    printf("samping dataset of %zd dim vectors, Q %zd B %zd T %zd\n",
+           d,
+           nq,
+           nb,
+           nt);
+
+    std::vector<float> buf(d * (nq + nt + nb));
+    faiss::rand_smooth_vectors(nq + nt + nb, d, buf.data(), 1234);
+    const float* xt = buf.data();
+    const float* xb = buf.data() + nt * d;
+    const float* xq = buf.data() + (nt + nb) * d;
+
+    idx_t k = 10;
+    std::vector<idx_t> gt(k * nq);
+    std::vector<float> unused(k * nq);
+    tic();
+    printf("compute ground truth, k=%zd\n", k);
+    faiss::knn_L2sqr(xq, xb, d, nq, nb, k, unused.data(), gt.data());
+
+    // a function to compute the accuracy
+    auto accuracy = [&](const idx_t* I) {
+        idx_t accu = 0;
+        for (idx_t q = 0; q < nq; q++) {
+            accu += faiss::ranklist_intersection_size(
+                    k, gt.data() + q * k, k, I + q * k);
+        }
+        return double(accu) / (k * nq);
+    };
+
+    /******************************************
+     * Prepare the residual quantizer
+     ******************************************/
+
+    faiss::ResidualQuantizer rq(
+            d, 7, 6, faiss::AdditiveQuantizer::ST_norm_qint8);
+    // do cheap an inaccurate training
+    rq.cp.niter = 5;
+    rq.max_beam_size = 5;
+    rq.train_type = 0;
+    tic();
+    printf("training the residual quantizer beam_size=%d\n", rq.max_beam_size);
+    rq.train(nt, xt);
+
+    tic();
+    printf("encoding the database, code_size=%zd\n", rq.code_size);
+    size_t code_size = rq.code_size;
+    std::vector<uint8_t> raw_codes(nb * code_size);
+    rq.compute_codes(xb, raw_codes.data(), nb);
+
+    /****************************************************************
+     * Make an index that uses that residual quantizer
+     * Verify that a distance computer gives the same distances
+     ****************************************************************/
+    {
+        faiss::IndexResidualQuantizer index(
+                rq.d, rq.nbits, faiss::METRIC_L2, rq.search_type);
+
+        // override trained index
+        index.rq = rq;
+        index.is_trained = true;
+
+        // override vectors
+        index.codes = faiss::MaybeOwnedVector<uint8_t>(raw_codes);
+        index.ntotal = nb;
+
+        tic();
+        printf("IndexResidualQuantizer ready, searching\n");
+
+        std::vector<float> D(k * nq);
+        std::vector<idx_t> I(k * nq);
+        index.search(nq, xq, k, D.data(), I.data());
+
+        tic();
+        printf("Accuracy (intersection @ %zd): %.3f\n", k, accuracy(I.data()));
+        std::unique_ptr<faiss::FlatCodesDistanceComputer> dc(
+                index.get_FlatCodesDistanceComputer());
+
+        float max_diff12 = 0, max_diff13 = 0;
+
+        for (idx_t q = 0; q < nq; q++) {
+            const float* query = xq + q * d;
+            dc->set_query(query);
+            for (int i = 0; i < k; i++) {
+                // 3 ways of computing the same distance
+
+                // distance returned by the index
+                float dis1 = D[q * k + i];
+
+                // distance returned by the DistanceComputer that accesses the
+                // index
+                idx_t db_index = I[q * k + i];
+                float dis2 = (*dc)(db_index);
+
+                // distance computer from a code that does not belong to the
+                // index
+                const uint8_t* code = raw_codes.data() + code_size * db_index;
+                float dis3 = dc->distance_to_code(code);
+
+                max_diff12 = std::max(std::abs(dis1 - dis2), max_diff12);
+                max_diff13 = std::max(std::abs(dis1 - dis3), max_diff13);
+            }
+        }
+        tic();
+        printf("Max DistanceComputer discrepancy 1-2: %g 1-3: %g\n",
+               max_diff12,
+               max_diff13);
+    }
+
+    /****************************************************************
+     * Make an IVF index that uses the first 2 levels as a coarse quantizer
+     * The IVF codes contain the full code (ie. redundant with the coarse
+     *quantizer code)
+     ****************************************************************/
+    {
+        // build a coarse quantizer from the 2 first levels of the RQ
+        std::vector<size_t> nbits(2);
+        std::copy(rq.nbits.begin(), rq.nbits.begin() + 2, nbits.begin());
+        faiss::ResidualCoarseQuantizer rcq(rq.d, nbits);
+
+        // set the coarse quantizer from the 2 first quantizers
+        rcq.rq.initialize_from(rq);
+        rcq.is_trained = true;
+        rcq.ntotal = (idx_t)1 << rcq.rq.tot_bits;
+
+        // settings for exhaustive search in RCQ
+        rcq.centroid_norms.resize(rcq.ntotal);
+        rcq.aq->compute_centroid_norms(rcq.centroid_norms.data());
+        rcq.beam_factor = -1.0; // use exact search
+        size_t nlist = rcq.ntotal;
+        tic();
+        printf("RCQ nlist = %zd tot_bits=%zd\n", nlist, rcq.rq.tot_bits);
+
+        // build a IVFResidualQuantizer from that
+        faiss::IndexIVFResidualQuantizer index(
+                &rcq, rcq.d, nlist, rq.nbits, faiss::METRIC_L2, rq.search_type);
+        index.by_residual = false;
+        index.rq = rq;
+        index.is_trained = true;
+
+        // there are 3 ways of filling up the index...
+        for (std::string filled_with : {"add", "manual", "derived"}) {
+            tic();
+            printf("filling up the index with %s, code_size=%zd\n",
+                   filled_with.c_str(),
+                   index.code_size);
+
+            index.reset();
+
+            if (filled_with == "add") {
+                // standard add method
+                index.add(nb, xb);
+            } else if (filled_with == "manual") {
+                // compute inverted lists and add elements manually
+                // fill in the inverted index manually
+                faiss::InvertedLists& invlists = *index.invlists;
+
+                // assign vectors to inverted lists
+                std::vector<idx_t> listnos(nb);
+                std::vector<float> unused(nb);
+                rcq.search(nb, xb, 1, unused.data(), listnos.data());
+
+                // populate inverted lists
+                for (idx_t i = 0; i < nb; i++) {
+                    invlists.add_entry(
+                            listnos[i], i, &raw_codes[i * code_size]);
+                }
+
+                index.ntotal = nb;
+            } else if (filled_with == "derived") {
+                // Since we have the raw codes precomputed, their prefix is the
+                // inverted list index, so let's use that.
+                faiss::InvertedLists& invlists = *index.invlists;
+
+                // populate inverted lists
+                for (idx_t i = 0; i < nb; i++) {
+                    const uint8_t* code = &raw_codes[i * code_size];
+                    faiss::BitstringReader rd(code, code_size);
+                    idx_t list_no =
+                            rd.read(rcq.rq.tot_bits); // read the list number
+                    invlists.add_entry(list_no, i, code);
+                }
+
+                index.ntotal = nb;
+            }
+
+            tic();
+            printf("Index filled in\n");
+
+            for (int nprobe : {1, 4, 16, 64, int(nlist)}) {
+                printf("setting nprobe=%-4d", nprobe);
+
+                index.nprobe = nprobe;
+                std::vector<float> D(k * nq);
+                std::vector<idx_t> I(k * nq);
+                index.search(nq, xq, k, D.data(), I.data());
+
+                tic();
+                printf("Accuracy (intersection @ %zd): %.3f\n",
+                       k,
+                       accuracy(I.data()));
+            }
+        }
+    }
+
+    /****************************************************************
+     * Make an IVF index that uses the first 2 levels as a coarse
+     * quantizer, but this time does not store the code prefix from the index
+     ****************************************************************/
+
+    {
+        // build a coarse quantizer from the 2 first levels of the RQ
+        int nlevel = 2;
+
+        std::unique_ptr<faiss::IndexIVFResidualQuantizer> index(
+                faiss::ivflib::ivf_residual_from_quantizer(rq, nlevel));
+
+        // there are 2 ways of filling up the index...
+        for (std::string filled_with : {"add", "derived"}) {
+            tic();
+            printf("filling up the IVF index with %s, code_size=%zd\n",
+                   filled_with.c_str(),
+                   index->code_size);
+
+            index->reset();
+
+            if (filled_with == "add") {
+                // standard add method
+                index->add(nb, xb);
+            } else if (filled_with == "derived") {
+                faiss::ivflib::ivf_residual_add_from_flat_codes(
+                        index.get(), nb, raw_codes.data(), rq.code_size);
+            }
+
+            tic();
+            printf("Index filled in\n");
+
+            for (int nprobe : {1, 4, 16, 64, int(index->nlist)}) {
+                printf("setting nprobe=%-4d", nprobe);
+
+                index->nprobe = nprobe;
+                std::vector<float> D(k * nq);
+                std::vector<idx_t> I(k * nq);
+                index->search(nq, xq, k, D.data(), I.data());
+
+                tic();
+                printf("Accuracy (intersection @ %zd): %.3f\n",
+                       k,
+                       accuracy(I.data()));
+            }
+        }
+    }
+
+    return 0;
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/demo_sift1M.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/demo_sift1M.cpp
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cassert>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+#include <sys/stat.h>
+
+#include <sys/time.h>
+
+#include <faiss/AutoTune.h>
+#include <faiss/index_factory.h>
+
+/**
+ * To run this demo, please download the ANN_SIFT1M dataset from
+ *
+ *   http://corpus-texmex.irisa.fr/
+ *
+ * and unzip it to the sudirectory sift1M.
+ **/
+
+/*****************************************************
+ * I/O functions for fvecs and ivecs
+ *****************************************************/
+
+float* fvecs_read(const char* fname, size_t* d_out, size_t* n_out) {
+    FILE* f = fopen(fname, "r");
+    if (!f) {
+        fprintf(stderr, "could not open %s\n", fname);
+        perror("");
+        abort();
+    }
+    int d;
+    fread(&d, 1, sizeof(int), f);
+    assert((d > 0 && d < 1000000) || !"unreasonable dimension");
+    fseek(f, 0, SEEK_SET);
+    struct stat st;
+    fstat(fileno(f), &st);
+    size_t sz = st.st_size;
+    assert(sz % ((d + 1) * 4) == 0 || !"weird file size");
+    size_t n = sz / ((d + 1) * 4);
+
+    *d_out = d;
+    *n_out = n;
+    float* x = new float[n * (d + 1)];
+    size_t nr __attribute__((unused)) = fread(x, sizeof(float), n * (d + 1), f);
+    assert(nr == n * (d + 1) || !"could not read whole file");
+
+    // shift array to remove row headers
+    for (size_t i = 0; i < n; i++)
+        memmove(x + i * d, x + 1 + i * (d + 1), d * sizeof(*x));
+
+    fclose(f);
+    return x;
+}
+
+// not very clean, but works as long as sizeof(int) == sizeof(float)
+int* ivecs_read(const char* fname, size_t* d_out, size_t* n_out) {
+    return (int*)fvecs_read(fname, d_out, n_out);
+}
+
+double elapsed() {
+    struct timeval tv;
+    gettimeofday(&tv, nullptr);
+    return tv.tv_sec + tv.tv_usec * 1e-6;
+}
+
+int main() {
+    double t0 = elapsed();
+
+    // this is typically the fastest one.
+    const char* index_key = "IVF4096,Flat";
+
+    // these ones have better memory usage
+    // const char *index_key = "Flat";
+    // const char *index_key = "PQ32";
+    // const char *index_key = "PCA80,Flat";
+    // const char *index_key = "IVF4096,PQ8+16";
+    // const char *index_key = "IVF4096,PQ32";
+    // const char *index_key = "IMI2x8,PQ32";
+    // const char *index_key = "IMI2x8,PQ8+16";
+    // const char *index_key = "OPQ16_64,IMI2x8,PQ8+16";
+
+    faiss::Index* index;
+
+    size_t d;
+
+    {
+        printf("[%.3f s] Loading train set\n", elapsed() - t0);
+
+        size_t nt;
+        float* xt = fvecs_read("sift1M/sift_learn.fvecs", &d, &nt);
+
+        printf("[%.3f s] Preparing index \"%s\" d=%ld\n",
+               elapsed() - t0,
+               index_key,
+               d);
+        index = faiss::index_factory(d, index_key);
+
+        printf("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nt);
+
+        index->train(nt, xt);
+        delete[] xt;
+    }
+
+    {
+        printf("[%.3f s] Loading database\n", elapsed() - t0);
+
+        size_t nb, d2;
+        float* xb = fvecs_read("sift1M/sift_base.fvecs", &d2, &nb);
+        assert(d == d2 || !"dataset does not have same dimension as train set");
+
+        printf("[%.3f s] Indexing database, size %ld*%ld\n",
+               elapsed() - t0,
+               nb,
+               d);
+
+        index->add(nb, xb);
+
+        delete[] xb;
+    }
+
+    size_t nq;
+    float* xq;
+
+    {
+        printf("[%.3f s] Loading queries\n", elapsed() - t0);
+
+        size_t d2;
+        xq = fvecs_read("sift1M/sift_query.fvecs", &d2, &nq);
+        assert(d == d2 || !"query does not have same dimension as train set");
+    }
+
+    size_t k;         // nb of results per query in the GT
+    faiss::idx_t* gt; // nq * k matrix of ground-truth nearest-neighbors
+
+    {
+        printf("[%.3f s] Loading ground truth for %ld queries\n",
+               elapsed() - t0,
+               nq);
+
+        // load ground-truth and convert int to long
+        size_t nq2;
+        int* gt_int = ivecs_read("sift1M/sift_groundtruth.ivecs", &k, &nq2);
+        assert(nq2 == nq || !"incorrect nb of ground truth entries");
+
+        gt = new faiss::idx_t[k * nq];
+        for (int i = 0; i < k * nq; i++) {
+            gt[i] = gt_int[i];
+        }
+        delete[] gt_int;
+    }
+
+    // Result of the auto-tuning
+    std::string selected_params;
+
+    { // run auto-tuning
+
+        printf("[%.3f s] Preparing auto-tune criterion 1-recall at 1 "
+               "criterion, with k=%ld nq=%ld\n",
+               elapsed() - t0,
+               k,
+               nq);
+
+        faiss::OneRecallAtRCriterion crit(nq, 1);
+        crit.set_groundtruth(k, nullptr, gt);
+        crit.nnn = k; // by default, the criterion will request only 1 NN
+
+        printf("[%.3f s] Preparing auto-tune parameters\n", elapsed() - t0);
+
+        faiss::ParameterSpace params;
+        params.initialize(index);
+
+        printf("[%.3f s] Auto-tuning over %ld parameters (%ld combinations)\n",
+               elapsed() - t0,
+               params.parameter_ranges.size(),
+               params.n_combinations());
+
+        faiss::OperatingPoints ops;
+        params.explore(index, nq, xq, crit, &ops);
+
+        printf("[%.3f s] Found the following operating points: \n",
+               elapsed() - t0);
+
+        ops.display();
+
+        // keep the first parameter that obtains > 0.5 1-recall@1
+        for (int i = 0; i < ops.optimal_pts.size(); i++) {
+            if (ops.optimal_pts[i].perf > 0.5) {
+                selected_params = ops.optimal_pts[i].key;
+                break;
+            }
+        }
+        assert(selected_params.size() >= 0 ||
+               !"could not find good enough op point");
+    }
+
+    { // Use the found configuration to perform a search
+
+        faiss::ParameterSpace params;
+
+        printf("[%.3f s] Setting parameter configuration \"%s\" on index\n",
+               elapsed() - t0,
+               selected_params.c_str());
+
+        params.set_index_parameters(index, selected_params.c_str());
+
+        printf("[%.3f s] Perform a search on %ld queries\n",
+               elapsed() - t0,
+               nq);
+
+        // output buffers
+        faiss::idx_t* I = new faiss::idx_t[nq * k];
+        float* D = new float[nq * k];
+
+        index->search(nq, xq, k, D, I);
+
+        printf("[%.3f s] Compute recalls\n", elapsed() - t0);
+
+        // evaluate result by hand.
+        int n_1 = 0, n_10 = 0, n_100 = 0;
+        for (int i = 0; i < nq; i++) {
+            int gt_nn = gt[i * k];
+            for (int j = 0; j < k; j++) {
+                if (I[i * k + j] == gt_nn) {
+                    if (j < 1)
+                        n_1++;
+                    if (j < 10)
+                        n_10++;
+                    if (j < 100)
+                        n_100++;
+                }
+            }
+        }
+        printf("R@1 = %.4f\n", n_1 / float(nq));
+        printf("R@10 = %.4f\n", n_10 / float(nq));
+        printf("R@100 = %.4f\n", n_100 / float(nq));
+
+        delete[] I;
+        delete[] D;
+    }
+
+    delete[] xq;
+    delete[] gt;
+    delete index;
+    return 0;
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/demo_weighted_kmeans.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/demo_weighted_kmeans.cpp
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+#include <memory>
+
+#include <faiss/Clustering.h>
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexHNSW.h>
+#include <faiss/utils/distances.h>
+#include <faiss/utils/random.h>
+
+namespace {
+
+enum WeightedKMeansType {
+    WKMT_FlatL2,
+    WKMT_FlatIP,
+    WKMT_FlatIP_spherical,
+    WKMT_HNSW,
+};
+
+float weighted_kmeans_clustering(
+        size_t d,
+        size_t n,
+        size_t k,
+        const float* input,
+        const float* weights,
+        float* centroids,
+        WeightedKMeansType index_num) {
+    using namespace faiss;
+    Clustering clus(d, k);
+    clus.verbose = true;
+
+    std::unique_ptr<Index> index;
+
+    switch (index_num) {
+        case WKMT_FlatL2:
+            index = std::make_unique<IndexFlatL2>(d);
+            break;
+        case WKMT_FlatIP:
+            index = std::make_unique<IndexFlatIP>(d);
+            break;
+        case WKMT_FlatIP_spherical:
+            index = std::make_unique<IndexFlatIP>(d);
+            clus.spherical = true;
+            break;
+        case WKMT_HNSW:
+            IndexHNSWFlat* ihnsw = new IndexHNSWFlat(d, 32);
+            ihnsw->hnsw.efSearch = 128;
+            index.reset(ihnsw);
+            break;
+    }
+
+    clus.train(n, input, *index.get(), weights);
+    // on output the index contains the centroids.
+    memcpy(centroids, clus.centroids.data(), sizeof(*centroids) * d * k);
+    return clus.iteration_stats.back().obj;
+}
+
+int d = 32;
+float sigma = 0.1;
+
+#define BIGTEST
+
+#ifdef BIGTEST
+// the production setup = setting of https://fb.quip.com/CWgnAAYbwtgs
+int nc = 200000;
+int n_big = 4;
+int n_small = 2;
+#else
+int nc = 5;
+int n_big = 100;
+int n_small = 10;
+#endif
+
+int n; // number of training points
+
+void generate_trainset(
+        std::vector<float>& ccent,
+        std::vector<float>& x,
+        std::vector<float>& weights) {
+    // same sampling as test_build_blocks.py test_weighted
+
+    ccent.resize(d * 2 * nc);
+    faiss::float_randn(ccent.data(), d * 2 * nc, 123);
+    faiss::fvec_renorm_L2(d, 2 * nc, ccent.data());
+    n = nc * n_big + nc * n_small;
+    x.resize(d * n);
+    weights.resize(n);
+    faiss::float_randn(x.data(), x.size(), 1234);
+
+    float* xi = x.data();
+    float* w = weights.data();
+    for (int ci = 0; ci < nc * 2; ci++) {   // loop over centroids
+        int np = ci < nc ? n_big : n_small; // nb of points around this centroid
+        for (int i = 0; i < np; i++) {
+            for (int j = 0; j < d; j++) {
+                xi[j] = xi[j] * sigma + ccent[ci * d + j];
+            }
+            *w++ = ci < nc ? 0.1 : 10;
+            xi += d;
+        }
+    }
+}
+
+} // namespace
+
+int main(int argc, char** argv) {
+    std::vector<float> ccent;
+    std::vector<float> x;
+    std::vector<float> weights;
+
+    printf("generate training set\n");
+    generate_trainset(ccent, x, weights);
+
+    std::vector<float> centroids;
+    centroids.resize(nc * d);
+
+    int the_index_num = -1;
+    int the_with_weights = -1;
+
+    if (argc == 3) {
+        the_index_num = atoi(argv[1]);
+        the_with_weights = atoi(argv[2]);
+    }
+
+    for (int index_num = WKMT_FlatL2; index_num <= WKMT_HNSW; index_num++) {
+        if (the_index_num >= 0 && index_num != the_index_num) {
+            continue;
+        }
+
+        for (int with_weights = 0; with_weights <= 1; with_weights++) {
+            if (the_with_weights >= 0 && with_weights != the_with_weights) {
+                continue;
+            }
+
+            printf("=================== index_num=%d Run %s weights\n",
+                   index_num,
+                   with_weights ? "with" : "without");
+
+            weighted_kmeans_clustering(
+                    d,
+                    n,
+                    nc,
+                    x.data(),
+                    with_weights ? weights.data() : nullptr,
+                    centroids.data(),
+                    (WeightedKMeansType)index_num);
+
+            { // compute distance of points to centroids
+                faiss::IndexFlatL2 cent_index(d);
+                cent_index.add(nc, centroids.data());
+                std::vector<float> dis(n);
+                std::vector<faiss::idx_t> idx(n);
+
+                cent_index.search(
+                        nc * 2, ccent.data(), 1, dis.data(), idx.data());
+
+                float dis1 = 0, dis2 = 0;
+                for (int i = 0; i < nc; i++) {
+                    dis1 += dis[i];
+                }
+                printf("average distance of points from big clusters: %g\n",
+                       dis1 / nc);
+
+                for (int i = 0; i < nc; i++) {
+                    dis2 += dis[i + nc];
+                }
+
+                printf("average distance of points from small clusters: %g\n",
+                       dis2 / nc);
+            }
+        }
+    }
+    return 0;
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/index_pq_flat_separate_codes_from_codebook.py
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/index_pq_flat_separate_codes_from_codebook.py
@@ -0,0 +1,303 @@
+#!/usr/bin/env -S grimaldi --kernel bento_kernel_faiss
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+# fmt: off
+# flake8: noqa
+
+
+""":md
+# Serializing codes separately, with IndexLSH and IndexPQ
+
+Let's say, for example, you have a few vector embeddings per user
+and want to shard a flat index by user so you can re-use the same LSH or PQ method
+ for all users but store each user's codes independently.
+
+
+"""
+
+""":py"""
+import faiss
+import numpy as np
+
+""":py"""
+d = 768
+n = 1_000
+ids = np.arange(n).astype('int64')
+training_data = np.random.rand(n, d).astype('float32')
+
+""":py"""
+def read_ids_codes():
+    try:
+        return np.load("/tmp/ids.npy"), np.load("/tmp/codes.npy")
+    except FileNotFoundError:
+        return None, None
+
+
+def write_ids_codes(ids, codes):
+    np.save("/tmp/ids.npy", ids)
+    np.save("/tmp/codes.npy", codes.reshape(len(ids), -1))
+
+
+def write_template_index(template_index):
+    faiss.write_index(template_index, "/tmp/template.index")
+
+
+def read_template_index_instance():
+    return faiss.read_index("/tmp/template.index")
+
+""":md
+## IndexLSH: separate codes
+
+The first half of this notebook demonstrates how to store LSH codes. Unlike PQ, LSH does not require training. In fact, it's compression method, a random projections matrix, is deterministic on construction based on a random seed value that's [hardcoded](https://github.com/facebookresearch/faiss/blob/2c961cc308ade8a85b3aa10a550728ce3387f625/faiss/IndexLSH.cpp#L35).
+"""
+
+""":py"""
+nbits = 1536
+
+""":py"""
+# demonstrating encoding is deterministic
+
+codes = []
+database_vector_float32 = np.random.rand(1, d).astype(np.float32)
+for i in range(10):
+    index = faiss.IndexIDMap2(faiss.IndexLSH(d, nbits))
+    code = index.index.sa_encode(database_vector_float32)
+    codes.append(code)
+
+for i in range(1, 10):
+    assert np.array_equal(codes[0], codes[i])
+
+""":py"""
+# new database vector
+
+ids, codes = read_ids_codes()
+database_vector_id, database_vector_float32 = max(ids) + 1 if ids is not None else 1, np.random.rand(1, d).astype(np.float32)
+index = faiss.IndexIDMap2(faiss.IndexLSH(d, nbits))
+
+code = index.index.sa_encode(database_vector_float32)
+
+if ids is not None and codes is not None:
+    ids = np.concatenate((ids, [database_vector_id]))
+    codes = np.vstack((codes, code))
+else:
+    ids = np.array([database_vector_id])
+    codes = np.array([code])
+
+write_ids_codes(ids, codes)
+
+""":py '2840581589434841'"""
+# then at query time
+
+query_vector_float32 = np.random.rand(1, d).astype(np.float32)
+index = faiss.IndexIDMap2(faiss.IndexLSH(d, nbits))
+ids, codes = read_ids_codes()
+
+index.add_sa_codes(codes, ids)
+
+index.search(query_vector_float32, k=5)
+
+""":py"""
+!rm /tmp/ids.npy /tmp/codes.npy
+
+""":md
+## IndexPQ: separate codes from codebook
+
+The second half of this notebook demonstrates how to separate serializing and deserializing the PQ codebook
+ (via faiss.write_index for IndexPQ) independently of the vector codes. For example, in the case
+ where you have a few vector embeddings per user and want to shard the flat index by user you 
+ can re-use the same PQ method for all users but store each user's codes independently. 
+
+"""
+
+""":py"""
+M = d//8
+nbits = 8
+
+""":py"""
+# at train time
+template_index = faiss.index_factory(d, f"IDMap2,PQ{M}x{nbits}")
+template_index.train(training_data)
+write_template_index(template_index)
+
+""":py"""
+# New database vector
+
+index = read_template_index_instance()
+ids, codes = read_ids_codes()
+database_vector_id, database_vector_float32 = max(ids) + 1 if ids is not None else 1, np.random.rand(1, d).astype(np.float32)
+
+code = index.index.sa_encode(database_vector_float32)
+
+if ids is not None and codes is not None:
+    ids = np.concatenate((ids, [database_vector_id]))
+    codes = np.vstack((codes, code))
+else:
+    ids = np.array([database_vector_id])
+    codes = np.array([code])
+
+write_ids_codes(ids, codes)
+
+""":py '1858280061369209'"""
+# then at query time
+query_vector_float32 = np.random.rand(1, d).astype(np.float32)
+id_wrapper_index = read_template_index_instance()
+ids, codes = read_ids_codes()
+
+id_wrapper_index.add_sa_codes(codes, ids)
+
+id_wrapper_index.search(query_vector_float32, k=5)
+
+""":py"""
+!rm /tmp/ids.npy /tmp/codes.npy /tmp/template.index
+
+""":md
+## Comparing these methods
+
+- methods: Flat, LSH, PQ
+- vary cost: nbits, M for 1x, 2x, 4x, 8x, 16x, 32x compression
+- measure: recall@1
+
+We don't measure latency as the number of vectors per user shard is insignificant.
+
+"""
+
+""":py '2898032417027201'"""
+n, d
+
+""":py"""
+database_vector_ids, database_vector_float32s = np.arange(n), np.random.rand(n, d).astype(np.float32)
+query_vector_float32s = np.random.rand(n, d).astype(np.float32)
+
+""":py"""
+index = faiss.index_factory(d, "IDMap2,Flat")
+index.add_with_ids(database_vector_float32s, database_vector_ids)
+_, ground_truth_result_ids= index.search(query_vector_float32s, k=1)
+
+""":py '857475336204238'"""
+from dataclasses import dataclass
+
+pq_m_nbits = (
+    # 96 bytes
+    (96, 8),
+    (192, 4),
+    # 192 bytes
+    (192, 8),
+    (384, 4),
+    # 384 bytes
+    (384, 8),
+    (768, 4),
+)
+lsh_nbits = (768, 1536, 3072, 6144, 12288, 24576)
+
+
+@dataclass
+class Record:
+    type_: str
+    index: faiss.Index
+    args: tuple
+    recall: float
+
+
+results = []
+
+for m, nbits in pq_m_nbits:
+    print("pq", m, nbits)
+    index = faiss.index_factory(d, f"IDMap2,PQ{m}x{nbits}")
+    index.train(training_data)
+    index.add_with_ids(database_vector_float32s, database_vector_ids)
+    _, result_ids = index.search(query_vector_float32s, k=1)
+    recall = sum(result_ids == ground_truth_result_ids)
+    results.append(Record("pq", index, (m, nbits), recall))
+
+for nbits in lsh_nbits:
+    print("lsh", nbits)
+    index = faiss.IndexIDMap2(faiss.IndexLSH(d, nbits))
+    index.add_with_ids(database_vector_float32s, database_vector_ids)
+    _, result_ids = index.search(query_vector_float32s, k=1)
+    recall = sum(result_ids == ground_truth_result_ids)
+    results.append(Record("lsh", index, (nbits,), recall))
+
+""":py '556918346720794'"""
+import matplotlib.pyplot as plt
+import numpy as np
+
+def create_grouped_bar_chart(x_values, y_values_list, labels_list, xlabel, ylabel, title):
+    num_bars_per_group = len(x_values)
+
+    plt.figure(figsize=(12, 6))
+
+    for x, y_values, labels in zip(x_values, y_values_list, labels_list):
+        num_bars = len(y_values)
+        bar_width = 0.08 * x
+        bar_positions = np.arange(num_bars) * bar_width - (num_bars - 1) * bar_width / 2 + x
+
+        bars = plt.bar(bar_positions, y_values, width=bar_width)
+
+        for bar, label in zip(bars, labels):
+            height = bar.get_height()
+            plt.annotate(
+                label,
+                xy=(bar.get_x() + bar.get_width() / 2, height),
+                xytext=(0, 3),
+                textcoords="offset points",
+                ha='center', va='bottom'
+            )
+
+    plt.xscale('log')
+    plt.xlabel(xlabel)
+    plt.ylabel(ylabel)
+    plt.title(title)
+    plt.xticks(x_values, labels=[str(x) for x in x_values])
+    plt.tight_layout()
+    plt.show()
+
+# # Example usage:
+# x_values = [1, 2, 4, 8, 16, 32]
+# y_values_list = [
+#     [2.5, 3.6, 1.8],
+#     [3.0, 2.8],
+#     [2.5, 3.5, 4.0, 1.0],
+#     [4.2],
+#     [3.0, 5.5, 2.2],
+#     [6.0, 4.5]
+# ]
+# labels_list = [
+#     ['A1', 'B1', 'C1'],
+#     ['A2', 'B2'],
+#     ['A3', 'B3', 'C3', 'D3'],
+#     ['A4'],
+#     ['A5', 'B5', 'C5'],
+#     ['A6', 'B6']
+# ]
+
+# create_grouped_bar_chart(x_values, y_values_list, labels_list, "x axis", "y axis", "title")
+
+""":py '1630106834206134'"""
+# x-axis: compression ratio
+# y-axis: recall@1
+
+from collections import defaultdict
+
+x = defaultdict(list)
+x[1].append(("flat", 1.00))
+for r in results:
+    y_value = r.recall[0] / n
+    x_value = int(d * 4 / r.index.sa_code_size())
+    label = None
+    if r.type_ == "pq":
+        label = f"PQ{r.args[0]}x{r.args[1]}"
+    if r.type_ == "lsh":
+        label = f"LSH{r.args[0]}"
+    x[x_value].append((label, y_value))
+
+x_values = sorted(list(x.keys()))
+create_grouped_bar_chart(
+    x_values,
+    [[e[1] for e in x[x_value]] for x_value in x_values],
+    [[e[0] for e in x[x_value]] for x_value in x_values],
+    "compression ratio",
+    "recall@1  q=1,000 queries",
+    "recall@1 for a database of n=1,000 d=768 vectors",
+)
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/README.md
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/README.md
@@ -0,0 +1,52 @@
+
+# Offline IVF
+
+This folder contains the code for the offline ivf algorithm powered by faiss big batch search.
+
+Create a conda env:
+
+`conda create --name oivf python=3.10`
+
+`conda activate oivf`
+
+`conda install -c pytorch/label/nightly -c nvidia faiss-gpu=1.7.4`
+
+`conda install tqdm`
+
+`conda install pyyaml`
+
+`conda install -c conda-forge submitit`
+
+
+## Run book
+
+1. Optionally shard your dataset (see create_sharded_dataset.py) and create the corresponding yaml file `config_ssnpp.yaml`. You can use `generate_config.py` by specifying the root directory of your dataset and the files with the data shards
+
+`python generate_config`
+
+2. Run the train index command
+
+`python run.py --command train_index --config config_ssnpp.yaml --xb ssnpp_1B`
+
+
+3. Run the index-shard command so it produces sharded indexes, required for the search step
+
+`python run.py --command index_shard --config config_ssnpp.yaml --xb ssnpp_1B`
+
+
+6. Send jobs to the cluster to run search
+
+`python run.py  --command search --config config_ssnpp.yaml --xb ssnpp_1B  --cluster_run --partition <PARTITION-NAME>`
+
+
+Remarks about the `search` command: it is assumed that the database vectors are the query vectors when performing the search step.
+a. If the query vectors are different than the database vectors, it should be passed in the xq argument
+b. A new dataset needs to be prepared (step 1) before passing it to the query vectors argument `–xq`
+
+`python run.py --command search --config config_ssnpp.yaml --xb ssnpp_1B --xq <QUERIES_DATASET_NAME>`
+
+
+6. We can always run the consistency-check for sanity checks!
+
+`python run.py  --command consistency_check--config config_ssnpp.yaml --xb ssnpp_1B`
+
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/init.py
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/init.py
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/config_ssnpp.yaml
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/config_ssnpp.yaml
@@ -0,0 +1,110 @@
+d: 256
+output: /checkpoint/marialomeli/offline_faiss/ssnpp
+index:
+  prod:
+  - 'IVF8192,PQ128'
+  non-prod:
+  - 'IVF16384,PQ128'
+  - 'IVF32768,PQ128'
+  - 'OPQ64_128,IVF4096,PQ64'
+nprobe:
+  prod:
+    - 512
+  non-prod:
+    - 256
+    - 128
+    - 1024
+    - 2048
+    - 4096
+    - 8192
+
+k: 50
+index_shard_size: 50000000
+query_batch_size: 50000000
+evaluation_sample: 10000
+training_sample: 1572864
+datasets:
+  ssnpp_1B:
+    root: /checkpoint/marialomeli/ssnpp_data
+    size: 1000000000
+    files:
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000000.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000001.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000002.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000003.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000004.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000005.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000006.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000007.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000008.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000009.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000010.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000011.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000012.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000013.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000014.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000015.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000016.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000017.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000018.npy
+      size: 50000000
+    - dtype: uint8
+      format: npy
+      name: ssnpp_0000000019.npy
+      size: 50000000
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/create_sharded_ssnpp_files.py
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/create_sharded_ssnpp_files.py
@@ -0,0 +1,64 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import argparse
+import os
+
+
+def xbin_mmap(fname, dtype, maxn=-1):
+    """
+    Code from
+    https://github.com/harsha-simhadri/big-ann-benchmarks/blob/main/benchmark/dataset_io.py#L94
+    mmap the competition file format for a given type of items
+    """
+    n, d = map(int, np.fromfile(fname, dtype="uint32", count=2))
+    assert os.stat(fname).st_size == 8 + n * d * np.dtype(dtype).itemsize
+    if maxn > 0:
+        n = min(n, maxn)
+    return np.memmap(fname, dtype=dtype, mode="r", offset=8, shape=(n, d))
+
+
+def main(args: argparse.Namespace):
+    ssnpp_data = xbin_mmap(fname=args.filepath, dtype="uint8")
+    num_batches = ssnpp_data.shape[0] // args.data_batch
+    assert (
+        ssnpp_data.shape[0] % args.data_batch == 0
+    ), "num of embeddings per file should divide total num of embeddings"
+    for i in range(num_batches):
+        xb_batch = ssnpp_data[
+            i * args.data_batch:(i + 1) * args.data_batch, :
+        ]
+        filename = args.output_dir + f"/ssnpp_{(i):010}.npy"
+        np.save(filename, xb_batch)
+        print(f"File {filename} is saved!")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--data_batch",
+        dest="data_batch",
+        type=int,
+        default=50000000,
+        help="Number of embeddings per file, should be a divisor of 1B",
+    )
+    parser.add_argument(
+        "--filepath",
+        dest="filepath",
+        type=str,
+        default="/datasets01/big-ann-challenge-data/FB_ssnpp/FB_ssnpp_database.u8bin",
+        help="path of 1B ssnpp database vectors' original file",
+    )
+    parser.add_argument(
+        "--filepath",
+        dest="output_dir",
+        type=str,
+        default="/checkpoint/marialomeli/ssnpp_data",
+        help="path to put sharded files",
+    )
+
+    args = parser.parse_args()
+    main(args)
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/dataset.py
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/dataset.py
@@ -0,0 +1,174 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import numpy as np
+import faiss
+from typing import List
+import random
+import logging
+from functools import lru_cache
+
+
+def create_dataset_from_oivf_config(cfg, ds_name):
+    normalise = cfg["normalise"] if "normalise" in cfg else False
+    return MultiFileVectorDataset(
+        cfg["datasets"][ds_name]["root"],
+        [
+            FileDescriptor(
+                f["name"], f["format"], np.dtype(f["dtype"]), f["size"]
+            )
+            for f in cfg["datasets"][ds_name]["files"]
+        ],
+        cfg["d"],
+        normalise,
+        cfg["datasets"][ds_name]["size"],
+    )
+
+
+@lru_cache(maxsize=100)
+def _memmap_vecs(
+    file_name: str, format: str, dtype: np.dtype, size: int, d: int
+) -> np.array:
+    """
+    If the file is in raw format, the file size will
+    be divisible by the dimensionality and by the size
+    of the data type.
+    Otherwise,the file contains a header and we assume
+    it is of .npy type. It the returns the memmapped file.
+    """
+
+    assert os.path.exists(file_name), f"file does not exist {file_name}"
+    if format == "raw":
+        fl = os.path.getsize(file_name)
+        nb = fl // d // dtype.itemsize
+        assert nb == size, f"{nb} is different than config's {size}"
+        assert fl == d * dtype.itemsize * nb  # no header
+        return np.memmap(file_name, shape=(nb, d), dtype=dtype, mode="r")
+    elif format == "npy":
+        vecs = np.load(file_name, mmap_mode="r")
+        assert vecs.shape[0] == size, f"size:{size},shape {vecs.shape[0]}"
+        assert vecs.shape[1] == d
+        assert vecs.dtype == dtype
+        return vecs
+    else:
+        ValueError("The file cannot be loaded in the current format.")
+
+
+class FileDescriptor:
+    def __init__(self, name: str, format: str, dtype: np.dtype, size: int):
+        self.name = name
+        self.format = format
+        self.dtype = dtype
+        self.size = size
+
+
+class MultiFileVectorDataset:
+    def __init__(
+        self,
+        root: str,
+        file_descriptors: List[FileDescriptor],
+        d: int,
+        normalize: bool,
+        size: int,
+    ):
+        assert os.path.exists(root)
+        self.root = root
+        self.file_descriptors = file_descriptors
+        self.d = d
+        self.normalize = normalize
+        self.size = size
+        self.file_offsets = [0]
+        t = 0
+        for f in self.file_descriptors:
+            xb = _memmap_vecs(
+                f"{self.root}/{f.name}", f.format, f.dtype, f.size, self.d
+            )
+            t += xb.shape[0]
+            self.file_offsets.append(t)
+        assert (
+            t == self.size
+        ), "the sum of num of embeddings per file!=total num of embeddings"
+
+    def iterate(self, start: int, batch_size: int, dt: np.dtype):
+        buffer = np.empty(shape=(batch_size, self.d), dtype=dt)
+        rem = 0
+        for f in self.file_descriptors:
+            if start >= f.size:
+                start -= f.size
+                continue
+            logging.info(f"processing: {f.name}...")
+            xb = _memmap_vecs(
+                f"{self.root}/{f.name}",
+                f.format,
+                f.dtype,
+                f.size,
+                self.d,
+            )
+            if start > 0:
+                xb = xb[start:]
+                start = 0
+            req = min(batch_size - rem, xb.shape[0])
+            buffer[rem:rem + req] = xb[:req]
+            rem += req
+            if rem == batch_size:
+                if self.normalize:
+                    faiss.normalize_L2(buffer)
+                yield buffer.copy()
+                rem = 0
+            for i in range(req, xb.shape[0], batch_size):
+                j = i + batch_size
+                if j <= xb.shape[0]:
+                    tmp = xb[i:j].astype(dt)
+                    if self.normalize:
+                        faiss.normalize_L2(tmp)
+                    yield tmp
+                else:
+                    rem = xb.shape[0] - i
+                    buffer[:rem] = xb[i:j]
+        if rem > 0:
+            tmp = buffer[:rem]
+            if self.normalize:
+                faiss.normalize_L2(tmp)
+            yield tmp
+
+    def get(self, idx: List[int]):
+        n = len(idx)
+        fidx = np.searchsorted(self.file_offsets, idx, "right")
+        res = np.empty(shape=(len(idx), self.d), dtype=np.float32)
+        for r, id, fid in zip(range(n), idx, fidx):
+            assert fid > 0 and fid <= len(self.file_descriptors), f"{fid}"
+            f = self.file_descriptors[fid - 1]
+            # deferring normalization until after reading the vec
+            vecs = _memmap_vecs(
+                f"{self.root}/{f.name}", f.format, f.dtype, f.size, self.d
+            )
+            i = id - self.file_offsets[fid - 1]
+            assert i >= 0 and i < vecs.shape[0]
+            res[r, :] = vecs[i]  # TODO: find a faster way
+        if self.normalize:
+            faiss.normalize_L2(res)
+        return res
+
+    def sample(self, n, idx_fn, vecs_fn):
+        if vecs_fn and os.path.exists(vecs_fn):
+            vecs = np.load(vecs_fn)
+            assert vecs.shape == (n, self.d)
+            return vecs
+        if idx_fn and os.path.exists(idx_fn):
+            idx = np.load(idx_fn)
+            assert idx.size == n
+        else:
+            idx = np.array(sorted(random.sample(range(self.size), n)))
+            if idx_fn:
+                np.save(idx_fn, idx)
+        vecs = self.get(idx)
+        if vecs_fn:
+            np.save(vecs_fn, vecs)
+        return vecs
+
+    def get_first_n(self, n, dt):
+        assert n <= self.size
+        return next(self.iterate(0, n, dt))
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/generate_config.py
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/generate_config.py
@@ -0,0 +1,46 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import os
+import yaml
+
+# with ssnpp sharded data
+root = "/checkpoint/marialomeli/ssnpp_data"
+file_names = [f"ssnpp_{i:010}.npy" for i in range(20)]
+d = 256
+dt = np.dtype(np.uint8)
+
+
+def read_embeddings(fp):
+    fl = os.path.getsize(fp)
+    nb = fl // d // dt.itemsize
+    print(nb)
+    if fl == d * dt.itemsize * nb:  # no header
+        return ("raw", np.memmap(fp, shape=(nb, d), dtype=dt, mode="r"))
+    else:  # assume npy
+        vecs = np.load(fp, mmap_mode="r")
+        assert vecs.shape[1] == d
+        assert vecs.dtype == dt
+        return ("npy", vecs)
+
+
+cfg = {}
+files = []
+size = 0
+for fn in file_names:
+    fp = f"{root}/{fn}"
+    assert os.path.exists(fp), f"{fp} is missing"
+    ft, xb = read_embeddings(fp)
+    files.append(
+        {"name": fn, "size": xb.shape[0], "dtype": dt.name, "format": ft}
+    )
+    size += xb.shape[0]
+
+cfg["size"] = size
+cfg["root"] = root
+cfg["d"] = d
+cfg["files"] = files
+print(yaml.dump(cfg))
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/offline_ivf.py
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/offline_ivf.py
@@ -0,0 +1,891 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import faiss
+import numpy as np
+import os
+from tqdm import tqdm, trange
+import sys
+import logging
+from faiss.contrib.ondisk import merge_ondisk
+from faiss.contrib.big_batch_search import big_batch_search
+from faiss.contrib.exhaustive_search import knn_ground_truth
+from faiss.contrib.evaluation import knn_intersection_measure
+from utils import (
+    get_intersection_cardinality_frequencies,
+    margin,
+    is_pretransform_index,
+)
+from dataset import create_dataset_from_oivf_config
+
+logging.basicConfig(
+    format=(
+        "%(asctime)s.%(msecs)03d %(levelname)-8s %(threadName)-12s %(message)s"
+    ),
+    level=logging.INFO,
+    datefmt="%Y-%m-%d %H:%M:%S",
+    force=True,
+)
+
+EMBEDDINGS_BATCH_SIZE: int = 100_000
+NUM_SUBSAMPLES: int = 100
+SMALL_DATA_SAMPLE: int = 10000
+
+
+class OfflineIVF:
+    def __init__(self, cfg, args, nprobe, index_factory_str):
+        self.input_d = cfg["d"]
+        self.dt = cfg["datasets"][args.xb]["files"][0]["dtype"]
+        assert self.input_d > 0
+        output_dir = cfg["output"]
+        assert os.path.exists(output_dir)
+        self.index_factory = index_factory_str
+        assert self.index_factory is not None
+        self.index_factory_fn = self.index_factory.replace(",", "_")
+        self.index_template_file = (
+            f"{output_dir}/{args.xb}/{self.index_factory_fn}.empty.faissindex"
+        )
+        logging.info(f"index template: {self.index_template_file}")
+
+        if not args.xq:
+            args.xq = args.xb
+
+        self.by_residual = True
+        if args.no_residuals:
+            self.by_residual = False
+
+        xb_output_dir = f"{output_dir}/{args.xb}"
+        if not os.path.exists(xb_output_dir):
+            os.makedirs(xb_output_dir)
+        xq_output_dir = f"{output_dir}/{args.xq}"
+        if not os.path.exists(xq_output_dir):
+            os.makedirs(xq_output_dir)
+        search_output_dir = f"{output_dir}/{args.xq}_in_{args.xb}"
+        if not os.path.exists(search_output_dir):
+            os.makedirs(search_output_dir)
+        self.knn_dir = f"{search_output_dir}/knn"
+        if not os.path.exists(self.knn_dir):
+            os.makedirs(self.knn_dir)
+        self.eval_dir = f"{search_output_dir}/eval"
+        if not os.path.exists(self.eval_dir):
+            os.makedirs(self.eval_dir)
+        self.index = {}  # to keep a reference to opened indices,
+        self.ivls = {}  # hstack inverted lists,
+        self.index_shards = {}  # and index shards
+        self.index_shard_prefix = (
+            f"{xb_output_dir}/{self.index_factory_fn}.shard_"
+        )
+        self.xq_index_shard_prefix = (
+            f"{xq_output_dir}/{self.index_factory_fn}.shard_"
+        )
+        self.index_file = (  # TODO: added back temporarily for evaluate, handle name of non-sharded index file and remove.
+            f"{xb_output_dir}/{self.index_factory_fn}.faissindex"
+        )
+        self.xq_index_file = (
+            f"{xq_output_dir}/{self.index_factory_fn}.faissindex"
+        )
+        self.training_sample = cfg["training_sample"]
+        self.evaluation_sample = cfg["evaluation_sample"]
+        self.xq_ds = create_dataset_from_oivf_config(cfg, args.xq)
+        self.xb_ds = create_dataset_from_oivf_config(cfg, args.xb)
+        file_descriptors = self.xq_ds.file_descriptors
+        self.file_sizes = [fd.size for fd in file_descriptors]
+        self.shard_size = cfg["index_shard_size"]  # ~100GB
+        self.nshards = self.xb_ds.size // self.shard_size
+        if self.xb_ds.size % self.shard_size != 0:
+            self.nshards += 1
+        self.xq_nshards = self.xq_ds.size // self.shard_size
+        if self.xq_ds.size % self.shard_size != 0:
+            self.xq_nshards += 1
+        self.nprobe = nprobe
+        assert self.nprobe > 0, "Invalid nprobe parameter."
+        if "deduper" in cfg:
+            self.deduper = cfg["deduper"]
+            self.deduper_codec_fn = [
+                f"{xb_output_dir}/deduper_codec_{codec.replace(',', '_')}"
+                for codec in self.deduper
+            ]
+            self.deduper_idx_fn = [
+                f"{xb_output_dir}/deduper_idx_{codec.replace(',', '_')}"
+                for codec in self.deduper
+            ]
+        else:
+            self.deduper = None
+        self.k = cfg["k"]
+        assert self.k > 0, "Invalid number of neighbours parameter."
+        self.knn_output_file_suffix = (
+            f"{self.index_factory_fn}_np{self.nprobe}.npy"
+        )
+
+        fp = 32
+        if self.dt == "float16":
+            fp = 16
+
+        self.xq_bs = cfg["query_batch_size"]
+        if "metric" in cfg:
+            self.metric = eval(f'faiss.{cfg["metric"]}')
+        else:
+            self.metric = faiss.METRIC_L2
+
+        if "evaluate_by_margin" in cfg:
+            self.evaluate_by_margin = cfg["evaluate_by_margin"]
+        else:
+            self.evaluate_by_margin = False
+
+        os.system("grep -m1 'model name' < /proc/cpuinfo")
+        os.system("grep -E 'MemTotal|MemFree' /proc/meminfo")
+        os.system("nvidia-smi")
+        os.system("nvcc --version")
+
+        self.knn_queries_memory_limit = 4 * 1024 * 1024 * 1024  # 4 GB
+        self.knn_vectors_memory_limit = 8 * 1024 * 1024 * 1024  # 8 GB
+
+    def input_stats(self):
+        """
+        Trains the index using a subsample of the first chunk of data in the database and saves it in the template file (with no vectors added).
+        """
+        xb_sample = self.xb_ds.get_first_n(self.training_sample, np.float32)
+        logging.info(f"input shape: {xb_sample.shape}")
+        logging.info("running MatrixStats on training sample...")
+        logging.info(faiss.MatrixStats(xb_sample).comments)
+        logging.info("done")
+
+    def dedupe(self):
+        logging.info(self.deduper)
+        if self.deduper is None:
+            logging.info("No deduper configured")
+            return
+        codecs = []
+        codesets = []
+        idxs = []
+        for factory, filename in zip(self.deduper, self.deduper_codec_fn):
+            if os.path.exists(filename):
+                logging.info(f"loading trained dedupe codec: {filename}")
+                codec = faiss.read_index(filename)
+            else:
+                logging.info(f"training dedupe codec: {factory}")
+                codec = faiss.index_factory(self.input_d, factory)
+                xb_sample = np.unique(
+                    self.xb_ds.get_first_n(100_000, np.float32), axis=0
+                )
+                faiss.ParameterSpace().set_index_parameter(codec, "verbose", 1)
+                codec.train(xb_sample)
+                logging.info(f"writing trained dedupe codec: {filename}")
+                faiss.write_index(codec, filename)
+            codecs.append(codec)
+            codesets.append(faiss.CodeSet(codec.sa_code_size()))
+            idxs.append(np.empty((0,), dtype=np.uint32))
+        bs = 1_000_000
+        i = 0
+        for buffer in tqdm(self._iterate_transformed(self.xb_ds, 0, bs, np.float32)):
+            for j in range(len(codecs)):
+                codec, codeset, idx = codecs[j], codesets[j], idxs[j]
+                uniq = codeset.insert(codec.sa_encode(buffer))
+                idxs[j] = np.append(
+                    idx,
+                    np.arange(i, i + buffer.shape[0], dtype=np.uint32)[uniq],
+                )
+            i += buffer.shape[0]
+        for idx, filename in zip(idxs, self.deduper_idx_fn):
+            logging.info(f"writing {filename}, shape: {idx.shape}")
+            np.save(filename, idx)
+        logging.info("done")
+
+    def train_index(self):
+        """
+        Trains the index using a subsample of the first chunk of data in the database and saves it in the template file (with no vectors added).
+        """
+        assert not os.path.exists(self.index_template_file), (
+            "The train command has been ran, the index template file already"
+            " exists."
+        )
+        xb_sample = np.unique(
+            self.xb_ds.get_first_n(self.training_sample, np.float32), axis=0
+        )
+        logging.info(f"input shape: {xb_sample.shape}")
+        index = faiss.index_factory(
+            self.input_d, self.index_factory, self.metric
+        )
+        index_ivf = faiss.downcast_index(faiss.extract_index_ivf(index))
+        index_ivf.by_residual = True
+        faiss.ParameterSpace().set_index_parameter(index, "verbose", 1)
+        logging.info("running training...")
+        index.train(xb_sample)
+        logging.info(f"writing trained index {self.index_template_file}...")
+        faiss.write_index(index, self.index_template_file)
+        logging.info("done")
+
+    def _iterate_transformed(self, ds, start, batch_size, dt):
+        assert os.path.exists(self.index_template_file)
+        index = faiss.read_index(self.index_template_file)
+        if is_pretransform_index(index):
+            vt = index.chain.at(0)  # fetch pretransform
+            for buffer in ds.iterate(start, batch_size, dt):
+                yield vt.apply(buffer)
+        else:
+            for buffer in ds.iterate(start, batch_size, dt):
+                yield buffer
+
+    def index_shard(self):
+        assert os.path.exists(self.index_template_file)
+        index = faiss.read_index(self.index_template_file)
+        index_ivf = faiss.downcast_index(faiss.extract_index_ivf(index))
+        assert self.nprobe <= index_ivf.quantizer.ntotal, (
+            f"the number of vectors {index_ivf.quantizer.ntotal} is not enough"
+            f" to retrieve {self.nprobe} neighbours, check."
+        )
+        cpu_quantizer = index_ivf.quantizer
+        gpu_quantizer = faiss.index_cpu_to_all_gpus(cpu_quantizer)
+
+        for i in range(0, self.nshards):
+            sfn = f"{self.index_shard_prefix}{i}"
+            try:
+                index.reset()
+                index_ivf.quantizer = gpu_quantizer
+                with open(sfn, "xb"):
+                    start = i * self.shard_size
+                    jj = 0
+                    embeddings_batch_size = min(
+                        EMBEDDINGS_BATCH_SIZE, self.shard_size
+                    )
+                    assert (
+                        self.shard_size % embeddings_batch_size == 0
+                        or EMBEDDINGS_BATCH_SIZE % embeddings_batch_size == 0
+                    ), (
+                        f"the shard size {self.shard_size} and embeddings"
+                        f" shard size  {EMBEDDINGS_BATCH_SIZE} are not"
+                        " divisible"
+                    )
+
+                    for xb_j in tqdm(
+                        self._iterate_transformed(
+                            self.xb_ds,
+                            start,
+                            embeddings_batch_size,
+                            np.float32,
+                        ),
+                        file=sys.stdout,
+                    ):
+                        if is_pretransform_index(index):
+                            assert xb_j.shape[1] == index.chain.at(0).d_out
+                            index_ivf.add_with_ids(
+                                xb_j,
+                                np.arange(start + jj, start + jj + xb_j.shape[0]),
+                            )
+                        else:
+                            assert xb_j.shape[1] == index.d
+                            index.add_with_ids(
+                                xb_j,
+                                np.arange(start + jj, start + jj + xb_j.shape[0]),
+                            )
+                        jj += xb_j.shape[0]
+                        logging.info(jj)
+                        assert (
+                            jj <= self.shard_size
+                        ), f"jj {jj} and shard_zide {self.shard_size}"
+                        if jj == self.shard_size:
+                            break
+                logging.info(f"writing {sfn}...")
+                index_ivf.quantizer = cpu_quantizer
+                faiss.write_index(index, sfn)
+            except FileExistsError:
+                logging.info(f"skipping shard: {i}")
+                continue
+        logging.info("done")
+
+    def merge_index(self):
+        ivf_file = f"{self.index_file}.ivfdata"
+
+        assert os.path.exists(self.index_template_file)
+        assert not os.path.exists(
+            ivf_file
+        ), f"file with embeddings data {ivf_file} not found, check."
+        assert not os.path.exists(self.index_file)
+        index = faiss.read_index(self.index_template_file)
+        block_fnames = [
+            f"{self.index_shard_prefix}{i}" for i in range(self.nshards)
+        ]
+        for fn in block_fnames:
+            assert os.path.exists(fn)
+        logging.info(block_fnames)
+        logging.info("merging...")
+        merge_ondisk(index, block_fnames, ivf_file)
+        logging.info("writing index...")
+        faiss.write_index(index, self.index_file)
+        logging.info("done")
+
+    def _cached_search(
+        self,
+        sample,
+        xq_ds,
+        xb_ds,
+        idx_file,
+        vecs_file,
+        I_file,
+        D_file,
+        index_file=None,
+        nprobe=None,
+    ):
+        if not os.path.exists(I_file):
+            assert not os.path.exists(I_file), f"file {I_file} does not exist "
+            assert not os.path.exists(D_file), f"file {D_file} does not exist "
+            xq = xq_ds.sample(sample, idx_file, vecs_file)
+
+            if index_file:
+                D, I = self._index_nonsharded_search(index_file, xq, nprobe)
+            else:
+                logging.info("ground truth computations")
+                db_iterator = xb_ds.iterate(0, 100_000, np.float32)
+                D, I = knn_ground_truth(
+                    xq, db_iterator, self.k, metric_type=self.metric
+                )
+                assert np.amin(I) >= 0
+
+            np.save(I_file, I)
+            np.save(D_file, D)
+        else:
+            assert os.path.exists(idx_file), f"file {idx_file} does not exist "
+            assert os.path.exists(
+                vecs_file
+            ), f"file {vecs_file} does not exist "
+            assert os.path.exists(I_file), f"file {I_file} does not exist "
+            assert os.path.exists(D_file), f"file {D_file} does not exist "
+            I = np.load(I_file)
+            D = np.load(D_file)
+        assert I.shape == (sample, self.k), f"{I_file} shape mismatch"
+        assert D.shape == (sample, self.k), f"{D_file} shape mismatch"
+        return (D, I)
+
+    def _index_search(self, index_shard_prefix, xq, nprobe):
+        assert nprobe is not None
+        logging.info(
+            f"open sharded index: {index_shard_prefix}, {self.nshards}"
+        )
+        index = self._open_sharded_index(index_shard_prefix)
+        index_ivf = faiss.downcast_index(faiss.extract_index_ivf(index))
+        logging.info(f"setting nprobe to {nprobe}")
+        index_ivf.nprobe = nprobe
+        return index.search(xq, self.k)
+
+    def _index_nonsharded_search(self, index_file, xq, nprobe):
+        assert nprobe is not None
+        logging.info(f"index {index_file}")
+        assert os.path.exists(index_file), f"file {index_file} does not exist "
+        index = faiss.read_index(index_file, faiss.IO_FLAG_ONDISK_SAME_DIR)
+        logging.info(f"index size {index.ntotal} ")
+        index_ivf = faiss.downcast_index(faiss.extract_index_ivf(index))
+        logging.info(f"setting nprobe to {nprobe}")
+        index_ivf.nprobe = nprobe
+        return index.search(xq, self.k)
+
+    def _refine_distances(self, xq_ds, idx, xb_ds, I):
+        xq = xq_ds.get(idx).repeat(self.k, axis=0)
+        xb = xb_ds.get(I.reshape(-1))
+        if self.metric == faiss.METRIC_INNER_PRODUCT:
+            return (xq * xb).sum(axis=1).reshape(I.shape)
+        elif self.metric == faiss.METRIC_L2:
+            return ((xq - xb) ** 2).sum(axis=1).reshape(I.shape)
+        else:
+            raise ValueError(f"metric not supported {self.metric}")
+
+    def evaluate(self):
+        self._evaluate(
+            self.index_factory_fn,
+            self.index_file,
+            self.xq_index_file,
+            self.nprobe,
+        )
+
+    def _evaluate(self, index_factory_fn, index_file, xq_index_file, nprobe):
+        idx_a_file = f"{self.eval_dir}/idx_a.npy"
+        idx_b_gt_file = f"{self.eval_dir}/idx_b_gt.npy"
+        idx_b_ann_file = (
+            f"{self.eval_dir}/idx_b_ann_{index_factory_fn}_np{nprobe}.npy"
+        )
+        vecs_a_file = f"{self.eval_dir}/vecs_a.npy"
+        vecs_b_gt_file = f"{self.eval_dir}/vecs_b_gt.npy"
+        vecs_b_ann_file = (
+            f"{self.eval_dir}/vecs_b_ann_{index_factory_fn}_np{nprobe}.npy"
+        )
+        D_a_gt_file = f"{self.eval_dir}/D_a_gt.npy"
+        D_a_ann_file = (
+            f"{self.eval_dir}/D_a_ann_{index_factory_fn}_np{nprobe}.npy"
+        )
+        D_a_ann_refined_file = f"{self.eval_dir}/D_a_ann_refined_{index_factory_fn}_np{nprobe}.npy"
+        D_b_gt_file = f"{self.eval_dir}/D_b_gt.npy"
+        D_b_ann_file = (
+            f"{self.eval_dir}/D_b_ann_{index_factory_fn}_np{nprobe}.npy"
+        )
+        D_b_ann_gt_file = (
+            f"{self.eval_dir}/D_b_ann_gt_{index_factory_fn}_np{nprobe}.npy"
+        )
+        I_a_gt_file = f"{self.eval_dir}/I_a_gt.npy"
+        I_a_ann_file = (
+            f"{self.eval_dir}/I_a_ann_{index_factory_fn}_np{nprobe}.npy"
+        )
+        I_b_gt_file = f"{self.eval_dir}/I_b_gt.npy"
+        I_b_ann_file = (
+            f"{self.eval_dir}/I_b_ann_{index_factory_fn}_np{nprobe}.npy"
+        )
+        I_b_ann_gt_file = (
+            f"{self.eval_dir}/I_b_ann_gt_{index_factory_fn}_np{nprobe}.npy"
+        )
+        margin_gt_file = f"{self.eval_dir}/margin_gt.npy"
+        margin_refined_file = (
+            f"{self.eval_dir}/margin_refined_{index_factory_fn}_np{nprobe}.npy"
+        )
+        margin_ann_file = (
+            f"{self.eval_dir}/margin_ann_{index_factory_fn}_np{nprobe}.npy"
+        )
+
+        logging.info("exact search forward")
+        # xq -> xb AKA a -> b
+        D_a_gt, I_a_gt = self._cached_search(
+            self.evaluation_sample,
+            self.xq_ds,
+            self.xb_ds,
+            idx_a_file,
+            vecs_a_file,
+            I_a_gt_file,
+            D_a_gt_file,
+        )
+        idx_a = np.load(idx_a_file)
+
+        logging.info("approximate search forward")
+        D_a_ann, I_a_ann = self._cached_search(
+            self.evaluation_sample,
+            self.xq_ds,
+            self.xb_ds,
+            idx_a_file,
+            vecs_a_file,
+            I_a_ann_file,
+            D_a_ann_file,
+            index_file,
+            nprobe,
+        )
+
+        logging.info(
+            "calculate refined distances on approximate search forward"
+        )
+        if os.path.exists(D_a_ann_refined_file):
+            D_a_ann_refined = np.load(D_a_ann_refined_file)
+            assert D_a_ann.shape == D_a_ann_refined.shape
+        else:
+            D_a_ann_refined = self._refine_distances(
+                self.xq_ds, idx_a, self.xb_ds, I_a_ann
+            )
+            np.save(D_a_ann_refined_file, D_a_ann_refined)
+
+        if self.evaluate_by_margin:
+            k_extract = self.k
+            margin_threshold = 1.05
+            logging.info(
+                "exact search backward from the k_extract NN results of"
+                " forward search"
+            )
+            # xb -> xq AKA b -> a
+            D_a_b_gt = D_a_gt[:, :k_extract].ravel()
+            idx_b_gt = I_a_gt[:, :k_extract].ravel()
+            assert len(idx_b_gt) == self.evaluation_sample * k_extract
+            np.save(idx_b_gt_file, idx_b_gt)
+            # exact search
+            D_b_gt, _ = self._cached_search(
+                len(idx_b_gt),
+                self.xb_ds,
+                self.xq_ds,
+                idx_b_gt_file,
+                vecs_b_gt_file,
+                I_b_gt_file,
+                D_b_gt_file,
+            )  # xb and xq ^^^ are inverted
+
+            logging.info("margin on exact search")
+            margin_gt = margin(
+                self.evaluation_sample,
+                idx_a,
+                idx_b_gt,
+                D_a_b_gt,
+                D_a_gt,
+                D_b_gt,
+                self.k,
+                k_extract,
+                margin_threshold,
+            )
+            np.save(margin_gt_file, margin_gt)
+
+            logging.info(
+                "exact search backward from the k_extract NN results of"
+                " approximate forward search"
+            )
+            D_a_b_refined = D_a_ann_refined[:, :k_extract].ravel()
+            idx_b_ann = I_a_ann[:, :k_extract].ravel()
+            assert len(idx_b_ann) == self.evaluation_sample * k_extract
+            np.save(idx_b_ann_file, idx_b_ann)
+            # exact search
+            D_b_ann_gt, _ = self._cached_search(
+                len(idx_b_ann),
+                self.xb_ds,
+                self.xq_ds,
+                idx_b_ann_file,
+                vecs_b_ann_file,
+                I_b_ann_gt_file,
+                D_b_ann_gt_file,
+            )  # xb and xq ^^^ are inverted
+
+            logging.info("refined margin on approximate search")
+            margin_refined = margin(
+                self.evaluation_sample,
+                idx_a,
+                idx_b_ann,
+                D_a_b_refined,
+                D_a_gt,  # not D_a_ann_refined(!)
+                D_b_ann_gt,
+                self.k,
+                k_extract,
+                margin_threshold,
+            )
+            np.save(margin_refined_file, margin_refined)
+
+            D_b_ann, I_b_ann = self._cached_search(
+                len(idx_b_ann),
+                self.xb_ds,
+                self.xq_ds,
+                idx_b_ann_file,
+                vecs_b_ann_file,
+                I_b_ann_file,
+                D_b_ann_file,
+                xq_index_file,
+                nprobe,
+            )
+
+            D_a_b_ann = D_a_ann[:, :k_extract].ravel()
+
+            logging.info("approximate search margin")
+
+            margin_ann = margin(
+                self.evaluation_sample,
+                idx_a,
+                idx_b_ann,
+                D_a_b_ann,
+                D_a_ann,
+                D_b_ann,
+                self.k,
+                k_extract,
+                margin_threshold,
+            )
+            np.save(margin_ann_file, margin_ann)
+
+        logging.info("intersection")
+        logging.info(I_a_gt)
+        logging.info(I_a_ann)
+
+        for i in range(1, self.k + 1):
+            logging.info(
+                f"{i}: {knn_intersection_measure(I_a_gt[:,:i], I_a_ann[:,:i])}"
+            )
+
+        logging.info(f"mean of gt distances: {D_a_gt.mean()}")
+        logging.info(f"mean of approx distances: {D_a_ann.mean()}")
+        logging.info(f"mean of refined distances: {D_a_ann_refined.mean()}")
+
+        logging.info("intersection cardinality frequencies")
+        logging.info(get_intersection_cardinality_frequencies(I_a_ann, I_a_gt))
+
+        logging.info("done")
+        pass
+
+    def _knn_function(self, xq, xb, k, metric, thread_id=None):
+        try:
+            return faiss.knn_gpu(
+                self.all_gpu_resources[thread_id],
+                xq,
+                xb,
+                k,
+                metric=metric,
+                device=thread_id,
+                vectorsMemoryLimit=self.knn_vectors_memory_limit,
+                queriesMemoryLimit=self.knn_queries_memory_limit,
+            )
+        except Exception:
+            logging.info(f"knn_function failed: {xq.shape}, {xb.shape}")
+            raise
+
+    def _coarse_quantize(self, index_ivf, xq, nprobe):
+        assert nprobe <= index_ivf.quantizer.ntotal
+        quantizer = faiss.index_cpu_to_all_gpus(index_ivf.quantizer)
+        bs = 100_000
+        nq = len(xq)
+        q_assign = np.empty((nq, nprobe), dtype="int32")
+        for i0 in trange(0, nq, bs):
+            i1 = min(nq, i0 + bs)
+            _, q_assign_i = quantizer.search(xq[i0:i1], nprobe)
+            q_assign[i0:i1] = q_assign_i
+        return q_assign
+
+    def search(self):
+        logging.info(f"search: {self.knn_dir}")
+        slurm_job_id = os.environ.get("SLURM_JOB_ID")
+
+        ngpu = faiss.get_num_gpus()
+        logging.info(f"number of gpus: {ngpu}")
+        self.all_gpu_resources = [
+            faiss.StandardGpuResources() for _ in range(ngpu)
+        ]
+        self._knn_function(
+            np.zeros((10, 10), dtype=np.float16),
+            np.zeros((10, 10), dtype=np.float16),
+            self.k,
+            metric=self.metric,
+            thread_id=0,
+        )
+
+        index = self._open_sharded_index()
+        index_ivf = faiss.downcast_index(faiss.extract_index_ivf(index))
+        logging.info(f"setting nprobe to {self.nprobe}")
+        index_ivf.nprobe = self.nprobe
+        # quantizer = faiss.index_cpu_to_all_gpus(index_ivf.quantizer)
+        for i in range(0, self.xq_ds.size, self.xq_bs):
+            Ifn = f"{self.knn_dir}/I{(i):010}_{self.knn_output_file_suffix}"
+            Dfn = f"{self.knn_dir}/D_approx{(i):010}_{self.knn_output_file_suffix}"
+            CPfn = f"{self.knn_dir}/CP{(i):010}_{self.knn_output_file_suffix}"
+
+            if slurm_job_id:
+                worker_record = (
+                    self.knn_dir
+                    + f"/record_{(i):010}_{self.knn_output_file_suffix}.txt"
+                )
+                if not os.path.exists(worker_record):
+                    logging.info(
+                        f"creating record file {worker_record} and saving job"
+                        f" id: {slurm_job_id}"
+                    )
+                    with open(worker_record, "w") as h:
+                        h.write(slurm_job_id)
+                else:
+                    old_slurm_id = open(worker_record, "r").read()
+                    logging.info(
+                        f"old job slurm id {old_slurm_id} and current job id:"
+                        f" {slurm_job_id}"
+                    )
+                    if old_slurm_id == slurm_job_id:
+                        if os.path.getsize(Ifn) == 0:
+                            logging.info(
+                                f"cleaning up zero length files {Ifn} and"
+                                f" {Dfn}"
+                            )
+                            os.remove(Ifn)
+                            os.remove(Dfn)
+
+            try:
+                if is_pretransform_index(index):
+                    d = index.chain.at(0).d_out
+                else:
+                    d = self.input_d
+                with open(Ifn, "xb") as f, open(Dfn, "xb") as g:
+                    xq_i = np.empty(
+                        shape=(self.xq_bs, d), dtype=np.float16
+                    )
+                    q_assign = np.empty(
+                        (self.xq_bs, self.nprobe), dtype=np.int32
+                    )
+                    j = 0
+                    quantizer = faiss.index_cpu_to_all_gpus(
+                        index_ivf.quantizer
+                    )
+                    for xq_i_j in tqdm(
+                        self._iterate_transformed(
+                            self.xq_ds, i, min(100_000, self.xq_bs), np.float16
+                        ),
+                        file=sys.stdout,
+                    ):
+                        xq_i[j:j + xq_i_j.shape[0]] = xq_i_j
+                        (
+                            _,
+                            q_assign[j:j + xq_i_j.shape[0]],
+                        ) = quantizer.search(xq_i_j, self.nprobe)
+                        j += xq_i_j.shape[0]
+                        assert j <= xq_i.shape[0]
+                        if j == xq_i.shape[0]:
+                            break
+                    xq_i = xq_i[:j]
+                    q_assign = q_assign[:j]
+
+                    assert q_assign.shape == (xq_i.shape[0], index_ivf.nprobe)
+                    del quantizer
+                    logging.info(f"computing: {Ifn}")
+                    logging.info(f"computing: {Dfn}")
+                    prefetch_threads = faiss.get_num_gpus()
+                    D_ann, I = big_batch_search(
+                        index_ivf,
+                        xq_i,
+                        self.k,
+                        verbose=10,
+                        method="knn_function",
+                        knn=self._knn_function,
+                        threaded=faiss.get_num_gpus() * 8,
+                        use_float16=True,
+                        prefetch_threads=prefetch_threads,
+                        computation_threads=faiss.get_num_gpus(),
+                        q_assign=q_assign,
+                        checkpoint=CPfn,
+                        checkpoint_freq=7200,  # in seconds
+                    )
+                    assert (
+                        np.amin(I) >= 0
+                    ), f"{I}, there exists negative indices, check"
+                    logging.info(f"saving: {Ifn}")
+                    np.save(f, I)
+                    logging.info(f"saving: {Dfn}")
+                    np.save(g, D_ann)
+
+                    if os.path.exists(CPfn):
+                        logging.info(f"removing: {CPfn}")
+                        os.remove(CPfn)
+
+            except FileExistsError:
+                logging.info(f"skipping {Ifn}, already exists")
+                logging.info(f"skipping {Dfn}, already exists")
+                continue
+
+    def _open_index_shard(self, fn):
+        if fn in self.index_shards:
+            index_shard = self.index_shards[fn]
+        else:
+            logging.info(f"open index shard: {fn}")
+            index_shard = faiss.read_index(
+                fn, faiss.IO_FLAG_MMAP | faiss.IO_FLAG_READ_ONLY
+            )
+            self.index_shards[fn] = index_shard
+        return index_shard
+
+    def _open_sharded_index(self, index_shard_prefix=None):
+        if index_shard_prefix is None:
+            index_shard_prefix = self.index_shard_prefix
+        if index_shard_prefix in self.index:
+            return self.index[index_shard_prefix]
+        assert os.path.exists(
+            self.index_template_file
+        ), f"file {self.index_template_file} does not exist "
+        logging.info(f"open index template: {self.index_template_file}")
+        index = faiss.read_index(self.index_template_file)
+        index_ivf = faiss.downcast_index(faiss.extract_index_ivf(index))
+        ilv = faiss.InvertedListsPtrVector()
+        for i in range(self.nshards):
+            fn = f"{index_shard_prefix}{i}"
+            assert os.path.exists(fn), f"file {fn} does not exist "
+            logging.info(fn)
+            index_shard = self._open_index_shard(fn)
+            il = faiss.downcast_index(
+                faiss.extract_index_ivf(index_shard)
+            ).invlists
+            ilv.push_back(il)
+        hsil = faiss.HStackInvertedLists(ilv.size(), ilv.data())
+        index_ivf.replace_invlists(hsil, False)
+        self.ivls[index_shard_prefix] = hsil
+        self.index[index_shard_prefix] = index
+        return index
+
+    def index_shard_stats(self):
+        for i in range(self.nshards):
+            fn = f"{self.index_shard_prefix}{i}"
+            assert os.path.exists(fn)
+            index = faiss.read_index(
+                fn, faiss.IO_FLAG_MMAP | faiss.IO_FLAG_READ_ONLY
+            )
+            index_ivf = faiss.downcast_index(faiss.extract_index_ivf(index))
+            il = index_ivf.invlists
+            il.print_stats()
+
+    def index_stats(self):
+        index = self._open_sharded_index()
+        index_ivf = faiss.downcast_index(faiss.extract_index_ivf(index))
+        il = index_ivf.invlists
+        list_sizes = [il.list_size(i) for i in range(il.nlist)]
+        logging.info(np.max(list_sizes))
+        logging.info(np.mean(list_sizes))
+        logging.info(np.argmax(list_sizes))
+        logging.info("index_stats:")
+        il.print_stats()
+
+    def consistency_check(self):
+        logging.info("consistency-check")
+
+        logging.info("index template...")
+
+        assert os.path.exists(self.index_template_file)
+        index = faiss.read_index(self.index_template_file)
+
+        offset = 0  # 2**24
+        assert self.shard_size > offset + SMALL_DATA_SAMPLE
+
+        logging.info("index shards...")
+        for i in range(self.nshards):
+            r = i * self.shard_size + offset
+            xb = next(self.xb_ds.iterate(r, SMALL_DATA_SAMPLE, np.float32))
+            fn = f"{self.index_shard_prefix}{i}"
+            assert os.path.exists(fn), f"There is no index shard file {fn}"
+            index = self._open_index_shard(fn)
+            index_ivf = faiss.downcast_index(faiss.extract_index_ivf(index))
+            index_ivf.nprobe = 1
+            _, I = index.search(xb, 100)
+            for j in range(SMALL_DATA_SAMPLE):
+                assert np.where(I[j] == j + r)[0].size > 0, (
+                    f"I[j]: {I[j]}, j: {j}, i: {i}, shard_size:"
+                    f" {self.shard_size}"
+                )
+
+        logging.info("merged index...")
+        index = self._open_sharded_index()
+        index_ivf = faiss.downcast_index(faiss.extract_index_ivf(index))
+        index_ivf.nprobe = 1
+        for i in range(self.nshards):
+            r = i * self.shard_size + offset
+            xb = next(self.xb_ds.iterate(r, SMALL_DATA_SAMPLE, np.float32))
+            _, I = index.search(xb, 100)
+            for j in range(SMALL_DATA_SAMPLE):
+                assert np.where(I[j] == j + r)[0].size > 0, (
+                    f"I[j]: {I[j]}, j: {j}, i: {i}, shard_size:"
+                    f" {self.shard_size}")
+
+        logging.info("search results...")
+        index_ivf.nprobe = self.nprobe
+        for i in range(0, self.xq_ds.size, self.xq_bs):
+            Ifn = f"{self.knn_dir}/I{i:010}_{self.index_factory_fn}_np{self.nprobe}.npy"
+            assert os.path.exists(Ifn)
+            assert os.path.getsize(Ifn) > 0, f"The file {Ifn} is empty."
+            logging.info(Ifn)
+            I = np.load(Ifn, mmap_mode="r")
+
+            assert I.shape[1] == self.k
+            assert I.shape[0] == min(self.xq_bs, self.xq_ds.size - i)
+            assert np.all(I[:, 1] >= 0)
+
+            Dfn = f"{self.knn_dir}/D_approx{i:010}_{self.index_factory_fn}_np{self.nprobe}.npy"
+            assert os.path.exists(Dfn)
+            assert os.path.getsize(Dfn) > 0, f"The file {Dfn} is empty."
+            logging.info(Dfn)
+            D = np.load(Dfn, mmap_mode="r")
+            assert D.shape == I.shape
+
+            xq = next(self.xq_ds.iterate(i, SMALL_DATA_SAMPLE, np.float32))
+            D_online, I_online = index.search(xq, self.k)
+            assert (
+                np.where(I[:SMALL_DATA_SAMPLE] == I_online)[0].size
+                / (self.k * SMALL_DATA_SAMPLE)
+                > 0.95
+            ), (
+                "the ratio is"
+                f" {np.where(I[:SMALL_DATA_SAMPLE] == I_online)[0].size / (self.k * SMALL_DATA_SAMPLE)}"
+            )
+            assert np.allclose(
+                D[:SMALL_DATA_SAMPLE].sum(axis=1),
+                D_online.sum(axis=1),
+                rtol=0.01,
+            ), (
+                "the difference is"
+                f" {D[:SMALL_DATA_SAMPLE].sum(axis=1), D_online.sum(axis=1)}"
+            )
+
+        logging.info("done")
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/run.py
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/run.py
@@ -0,0 +1,219 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+from utils import (
+    load_config,
+    add_group_args,
+)
+from offline_ivf import OfflineIVF
+import faiss
+from typing import List, Callable, Dict
+import submitit
+
+
+def join_lists_in_dict(poss: List[str]) -> List[str]:
+    """
+    Joins two lists of prod and non-prod values, checking if the prod value is already included.
+    If there is no non-prod list, it returns the prod list.
+    """
+    if "non-prod" in poss.keys():
+        all_poss = poss["non-prod"]
+        if poss["prod"][-1] not in poss["non-prod"]:
+            all_poss += poss["prod"]
+        return all_poss
+    else:
+        return poss["prod"]
+
+
+def main(
+    args: argparse.Namespace,
+    cfg: Dict[str, str],
+    nprobe: int,
+    index_factory_str: str,
+) -> None:
+    oivf = OfflineIVF(cfg, args, nprobe, index_factory_str)
+    eval(f"oivf.{args.command}()")
+
+
+def process_options_and_run_jobs(args: argparse.Namespace) -> None:
+    """
+    If "--cluster_run", it launches an array of jobs to the cluster using the submitit library for all the index strings. In
+    the case of evaluate, it launches a job for each index string and nprobe pair. Otherwise, it launches a single job
+    that is ran locally with the prod values for index string and nprobe.
+    """
+
+    cfg = load_config(args.config)
+    index_strings = cfg["index"]
+    nprobes = cfg["nprobe"]
+    if args.command == "evaluate":
+        if args.cluster_run:
+            all_nprobes = join_lists_in_dict(nprobes)
+            all_index_strings = join_lists_in_dict(index_strings)
+            for index_factory_str in all_index_strings:
+                for nprobe in all_nprobes:
+                    launch_job(main, args, cfg, nprobe, index_factory_str)
+        else:
+            launch_job(
+                main, args, cfg, nprobes["prod"][-1], index_strings["prod"][-1]
+            )
+    else:
+        if args.cluster_run:
+            all_index_strings = join_lists_in_dict(index_strings)
+            for index_factory_str in all_index_strings:
+                launch_job(
+                    main, args, cfg, nprobes["prod"][-1], index_factory_str
+                )
+        else:
+            launch_job(
+                main, args, cfg, nprobes["prod"][-1], index_strings["prod"][-1]
+            )
+
+
+def launch_job(
+    func: Callable,
+    args: argparse.Namespace,
+    cfg: Dict[str, str],
+    n_probe: int,
+    index_str: str,
+) -> None:
+    """
+    Launches an array of slurm jobs to the cluster using the submitit library.
+    """
+
+    if args.cluster_run:
+        assert args.num_nodes >= 1
+        executor = submitit.AutoExecutor(folder=args.logs_dir)
+
+        executor.update_parameters(
+            nodes=args.num_nodes,
+            gpus_per_node=args.gpus_per_node,
+            cpus_per_task=args.cpus_per_task,
+            tasks_per_node=args.tasks_per_node,
+            name=args.job_name,
+            slurm_partition=args.partition,
+            slurm_time=70 * 60,
+        )
+        if args.slurm_constraint:
+            executor.update_parameters(slurm_constraint=args.slurm_constrain)
+
+        job = executor.submit(func, args, cfg, n_probe, index_str)
+        print(f"Job id: {job.job_id}")
+    else:
+        func(args, cfg, n_probe, index_str)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    group = parser.add_argument_group("general")
+
+    add_group_args(group, "--command", required=True, help="command to run")
+    add_group_args(
+        group,
+        "--config",
+        required=True,
+        help="config yaml with the dataset specs",
+    )
+    add_group_args(
+        group, "--nt", type=int, default=96, help="nb search threads"
+    )
+    add_group_args(
+        group,
+        "--no_residuals",
+        action="store_false",
+        help="set index.by_residual to False during train index.",
+    )
+
+    group = parser.add_argument_group("slurm_job")
+
+    add_group_args(
+        group,
+        "--cluster_run",
+        action="store_true",
+        help=" if True, runs in cluster",
+    )
+    add_group_args(
+        group,
+        "--job_name",
+        type=str,
+        default="oivf",
+        help="cluster job name",
+    )
+    add_group_args(
+        group,
+        "--num_nodes",
+        type=str,
+        default=1,
+        help="num of nodes per job",
+    )
+    add_group_args(
+        group,
+        "--tasks_per_node",
+        type=int,
+        default=1,
+        help="tasks per job",
+    )
+
+    add_group_args(
+        group,
+        "--gpus_per_node",
+        type=int,
+        default=8,
+        help="cluster job name",
+    )
+    add_group_args(
+        group,
+        "--cpus_per_task",
+        type=int,
+        default=80,
+        help="cluster job name",
+    )
+
+    add_group_args(
+        group,
+        "--logs_dir",
+        type=str,
+        default="/checkpoint/marialomeli/offline_faiss/logs",
+        help="cluster job name",
+    )
+
+    add_group_args(
+        group,
+        "--slurm_constraint",
+        type=str,
+        default=None,
+        help="can be volta32gb for the fair cluster",
+    )
+
+    add_group_args(
+        group,
+        "--partition",
+        type=str,
+        default="learnlab",
+        help="specify which partition to use if ran on cluster with job arrays",
+        choices=[
+            "learnfair",
+            "devlab",
+            "scavenge",
+            "learnlab",
+            "nllb",
+            "seamless",
+            "seamless_medium",
+            "learnaccel",
+            "onellm_low",
+            "learn",
+            "scavenge",
+        ],
+    )
+
+    group = parser.add_argument_group("dataset")
+
+    add_group_args(group, "--xb", required=True, help="database vectors")
+    add_group_args(group, "--xq", help="query vectors")
+
+    args = parser.parse_args()
+    print("args:", args)
+    faiss.omp_set_num_threads(args.nt)
+    process_options_and_run_jobs(args=args)
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/tests/testing_utils.py
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/tests/testing_utils.py
@@ -0,0 +1,181 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import yaml
+import numpy as np
+from typing import Dict, List, Optional
+
+OIVF_TEST_ARGS: List[str] = [
+    "--config",
+    "--xb",
+    "--xq",
+    "--command",
+    "--cluster_run",
+    "--no_residuals",
+]
+
+
+def get_test_parser(args) -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser()
+    for arg in args:
+        parser.add_argument(arg)
+    return parser
+
+
+class TestDataCreator:
+    def __init__(
+        self,
+        tempdir: str,
+        dimension: int,
+        data_type: np.dtype,
+        index_factory: Optional[List] = ["OPQ4,IVF256,PQ4"],
+        training_sample: Optional[int] = 9984,
+        index_shard_size: Optional[int] = 1000,
+        query_batch_size: Optional[int] = 1000,
+        evaluation_sample: Optional[int] = 100,
+        num_files: Optional[int] = None,
+        file_size: Optional[int] = None,
+        file_sizes: Optional[List] = None,
+        nprobe: Optional[int] = 64,
+        k: Optional[int] = 10,
+        metric: Optional[str] = "METRIC_L2",
+        normalise: Optional[bool] = False,
+        with_queries_ds: Optional[bool] = False,
+        evaluate_by_margin: Optional[bool] = False,
+    ) -> None:
+        self.tempdir = tempdir
+        self.dimension = dimension
+        self.data_type = np.dtype(data_type).name
+        self.index_factory = {"prod": index_factory}
+        if file_size and num_files:
+            self.file_sizes = [file_size for _ in range(num_files)]
+        elif file_sizes:
+            self.file_sizes = file_sizes
+        else:
+            raise ValueError("no file sizes provided")
+        self.num_files = len(self.file_sizes)
+        self.training_sample = training_sample
+        self.index_shard_size = index_shard_size
+        self.query_batch_size = query_batch_size
+        self.evaluation_sample = evaluation_sample
+        self.nprobe = {"prod": [nprobe]}
+        self.k = k
+        self.metric = metric
+        self.normalise = normalise
+        self.config_file = self.tempdir + "/config_test.yaml"
+        self.ds_name = "my_test_data"
+        self.qs_name = "my_queries_data"
+        self.evaluate_by_margin = evaluate_by_margin
+        self.with_queries_ds = with_queries_ds
+
+    def create_test_data(self) -> None:
+        datafiles = self._create_data_files()
+        files_info = []
+
+        for i, file in enumerate(datafiles):
+            files_info.append(
+                {
+                    "dtype": self.data_type,
+                    "format": "npy",
+                    "name": file,
+                    "size": self.file_sizes[i],
+                }
+            )
+
+        config_for_yaml = {
+            "d": self.dimension,
+            "output": self.tempdir,
+            "index": self.index_factory,
+            "nprobe": self.nprobe,
+            "k": self.k,
+            "normalise": self.normalise,
+            "metric": self.metric,
+            "training_sample": self.training_sample,
+            "evaluation_sample": self.evaluation_sample,
+            "index_shard_size": self.index_shard_size,
+            "query_batch_size": self.query_batch_size,
+            "datasets": {
+                self.ds_name: {
+                    "root": self.tempdir,
+                    "size": sum(self.file_sizes),
+                    "files": files_info,
+                }
+            },
+        }
+        if self.evaluate_by_margin:
+            config_for_yaml["evaluate_by_margin"] = self.evaluate_by_margin
+        q_datafiles = self._create_data_files("my_q_data")
+        q_files_info = []
+
+        for i, file in enumerate(q_datafiles):
+            q_files_info.append(
+                {
+                    "dtype": self.data_type,
+                    "format": "npy",
+                    "name": file,
+                    "size": self.file_sizes[i],
+                }
+            )
+        if self.with_queries_ds:
+            config_for_yaml["datasets"][self.qs_name] = {
+                "root": self.tempdir,
+                "size": sum(self.file_sizes),
+                "files": q_files_info,
+            }
+
+        self._create_config_yaml(config_for_yaml)
+
+    def setup_cli(self, command="consistency_check") -> argparse.Namespace:
+        parser = get_test_parser(OIVF_TEST_ARGS)
+
+        if self.with_queries_ds:
+            return parser.parse_args(
+                [
+                    "--xb",
+                    self.ds_name,
+                    "--config",
+                    self.config_file,
+                    "--command",
+                    command,
+                    "--xq",
+                    self.qs_name,
+                ]
+            )
+        return parser.parse_args(
+            [
+                "--xb",
+                self.ds_name,
+                "--config",
+                self.config_file,
+                "--command",
+                command,
+            ]
+        )
+
+    def _create_data_files(self, name_of_file="my_data") -> List[str]:
+        """
+        Creates a dataset "my_test_data" with number of files (num_files), using padding in the files
+        name. If self.with_queries is True, it adds an extra dataset "my_queries_data" with the same number of files
+        as the "my_test_data". The default name for embeddings files is "my_data" + <padding>.npy.
+        """
+        filenames = []
+        for i, file_size in enumerate(self.file_sizes):
+            # np.random.seed(i)
+            db_vectors = np.random.random((file_size, self.dimension)).astype(
+                self.data_type
+            )
+            filename = name_of_file + f"{i:02}" + ".npy"
+            filenames.append(filename)
+            np.save(self.tempdir + "/" + filename, db_vectors)
+        return filenames
+
+    def _create_config_yaml(self, dict_file: Dict[str, str]) -> None:
+        """
+        Creates a yaml file in dir (can be a temporary dir for tests).
+        """
+        filename = self.tempdir + "/config_test.yaml"
+        with open(filename, "w") as file:
+            yaml.dump(dict_file, file, default_flow_style=False)
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/utils.py
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/offline_ivf/utils.py
@@ -0,0 +1,95 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import os
+from typing import Dict
+import yaml
+import faiss
+from faiss.contrib.datasets import SyntheticDataset
+
+
+def load_config(config):
+    assert os.path.exists(config)
+    with open(config, "r") as f:
+        return yaml.safe_load(f)
+
+
+def faiss_sanity_check():
+    ds = SyntheticDataset(256, 0, 100, 100)
+    xq = ds.get_queries()
+    xb = ds.get_database()
+    index_cpu = faiss.IndexFlat(ds.d)
+    index_gpu = faiss.index_cpu_to_all_gpus(index_cpu)
+    index_cpu.add(xb)
+    index_gpu.add(xb)
+    D_cpu, I_cpu = index_cpu.search(xq, 10)
+    D_gpu, I_gpu = index_gpu.search(xq, 10)
+    assert np.all(I_cpu == I_gpu), "faiss sanity check failed"
+    assert np.all(np.isclose(D_cpu, D_gpu)), "faiss sanity check failed"
+
+
+def margin(sample, idx_a, idx_b, D_a_b, D_a, D_b, k, k_extract, threshold):
+    """
+    two datasets: xa, xb; n = number of pairs
+    idx_a - (np,) - query vector ids in xa
+    idx_b - (np,) - query vector ids in xb
+    D_a_b - (np,) - pairwise distances between xa[idx_a] and xb[idx_b]
+    D_a - (np, k) - distances between vectors xa[idx_a] and corresponding nearest neighbours in xb
+    D_b - (np, k) - distances between vectors xb[idx_b] and corresponding nearest neighbours in xa
+    k - k nearest neighbours used for margin
+    k_extract - number of nearest neighbours of each query in xb we consider for margin calculation and filtering
+    threshold - margin threshold
+    """
+
+    n = sample
+    nk = n * k_extract
+    assert idx_a.shape == (n,)
+    idx_a_k = idx_a.repeat(k_extract)
+    assert idx_a_k.shape == (nk,)
+    assert idx_b.shape == (nk,)
+    assert D_a_b.shape == (nk,)
+    assert D_a.shape == (n, k)
+    assert D_b.shape == (nk, k)
+    mean_a = np.mean(D_a, axis=1)
+    assert mean_a.shape == (n,)
+    mean_a_k = mean_a.repeat(k_extract)
+    assert mean_a_k.shape == (nk,)
+    mean_b = np.mean(D_b, axis=1)
+    assert mean_b.shape == (nk,)
+    margin = 2 * D_a_b / (mean_a_k + mean_b)
+    above_threshold = margin > threshold
+    print(np.count_nonzero(above_threshold))
+    print(idx_a_k[above_threshold])
+    print(idx_b[above_threshold])
+    print(margin[above_threshold])
+    return margin
+
+
+def add_group_args(group, *args, **kwargs):
+    return group.add_argument(*args, **kwargs)
+
+
+def get_intersection_cardinality_frequencies(
+    I: np.ndarray, I_gt: np.ndarray
+) -> Dict[int, int]:
+    """
+    Computes the frequencies for the cardinalities of the intersection of neighbour indices.
+    """
+    nq = I.shape[0]
+    res = []
+    for ell in range(nq):
+        res.append(len(np.intersect1d(I[ell, :], I_gt[ell, :])))
+    values, counts = np.unique(res, return_counts=True)
+    return dict(zip(values, counts))
+
+
+def is_pretransform_index(index):
+    if index.__class__ == faiss.IndexPreTransform:
+        assert hasattr(index, "chain")
+        return True
+    else:
+        assert not hasattr(index, "chain")
+        return False
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/rocksdb_ivf/CMakeLists.txt
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/rocksdb_ivf/CMakeLists.txt
@@ -0,0 +1,13 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+cmake_minimum_required(VERSION 3.17 FATAL_ERROR)
+project (ROCKSDB_IVF)
+set(CMAKE_BUILD_TYPE Debug)
+find_package(faiss REQUIRED)
+find_package(RocksDB REQUIRED)
+
+add_executable(demo_rocksdb_ivf demo_rocksdb_ivf.cpp RocksDBInvertedLists.cpp)
+target_link_libraries(demo_rocksdb_ivf faiss RocksDB::rocksdb)
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/rocksdb_ivf/README.md
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/rocksdb_ivf/README.md
@@ -0,0 +1,23 @@
+# Storing Faiss inverted lists in RocksDB
+
+Demo of storing the inverted lists of any IVF index in RocksDB or any similar key-value store which supports the prefix scan operation.
+
+# How to build
+
+We use conda to create the build environment for simplicity. Only tested on Linux x86.
+
+```
+conda create -n rocksdb_ivf
+conda activate rocksdb_ivf
+conda install pytorch::faiss-cpu conda-forge::rocksdb cmake make gxx_linux-64 sysroot_linux-64
+cd ~/faiss/demos/rocksdb_ivf
+cmake -B build .
+make -C build -j$(nproc)
+```
+
+# Run the example
+
+```
+cd ~/faiss/demos/rocksdb_ivf/build
+./rocksdb_ivf test_db
+```
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/rocksdb_ivf/RocksDBInvertedLists.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/rocksdb_ivf/RocksDBInvertedLists.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "RocksDBInvertedLists.h"
+
+#include <faiss/impl/FaissAssert.h>
+
+using namespace faiss;
+
+namespace faiss_rocksdb {
+
+RocksDBInvertedListsIterator::RocksDBInvertedListsIterator(
+        rocksdb::DB* db,
+        size_t list_no,
+        size_t code_size)
+        : InvertedListsIterator(),
+          it(db->NewIterator(rocksdb::ReadOptions())),
+          list_no(list_no),
+          code_size(code_size),
+          codes(code_size) {
+    it->Seek(rocksdb::Slice(
+            reinterpret_cast<const char*>(&list_no), sizeof(size_t)));
+}
+
+bool RocksDBInvertedListsIterator::is_available() const {
+    return it->Valid() &&
+            it->key().starts_with(rocksdb::Slice(
+                    reinterpret_cast<const char*>(&list_no), sizeof(size_t)));
+}
+
+void RocksDBInvertedListsIterator::next() {
+    it->Next();
+}
+
+std::pair<idx_t, const uint8_t*> RocksDBInvertedListsIterator::
+        get_id_and_codes() {
+    idx_t id =
+            *reinterpret_cast<const idx_t*>(&it->key().data()[sizeof(size_t)]);
+    assert(code_size == it->value().size());
+    return {id, reinterpret_cast<const uint8_t*>(it->value().data())};
+}
+
+RocksDBInvertedLists::RocksDBInvertedLists(
+        const char* db_directory,
+        size_t nlist,
+        size_t code_size)
+        : InvertedLists(nlist, code_size) {
+    use_iterator = true;
+
+    rocksdb::Options options;
+    options.create_if_missing = true;
+    rocksdb::DB* db;
+    rocksdb::Status status = rocksdb::DB::Open(options, db_directory, &db);
+    db_ = std::unique_ptr<rocksdb::DB>(db);
+    assert(status.ok());
+}
+
+size_t RocksDBInvertedLists::list_size(size_t /*list_no*/) const {
+    FAISS_THROW_MSG("list_size is not supported");
+}
+
+const uint8_t* RocksDBInvertedLists::get_codes(size_t /*list_no*/) const {
+    FAISS_THROW_MSG("get_codes is not supported");
+}
+
+const idx_t* RocksDBInvertedLists::get_ids(size_t /*list_no*/) const {
+    FAISS_THROW_MSG("get_ids is not supported");
+}
+
+size_t RocksDBInvertedLists::add_entries(
+        size_t list_no,
+        size_t n_entry,
+        const idx_t* ids,
+        const uint8_t* code) {
+    rocksdb::WriteOptions wo;
+    std::vector<char> key(sizeof(size_t) + sizeof(idx_t));
+    memcpy(key.data(), &list_no, sizeof(size_t));
+    for (size_t i = 0; i < n_entry; i++) {
+        memcpy(key.data() + sizeof(size_t), ids + i, sizeof(idx_t));
+        rocksdb::Status status = db_->Put(
+                wo,
+                rocksdb::Slice(key.data(), key.size()),
+                rocksdb::Slice(
+                        reinterpret_cast<const char*>(code + i * code_size),
+                        code_size));
+        assert(status.ok());
+    }
+    return 0; // ignored
+}
+
+void RocksDBInvertedLists::update_entries(
+        size_t /*list_no*/,
+        size_t /*offset*/,
+        size_t /*n_entry*/,
+        const idx_t* /*ids*/,
+        const uint8_t* /*code*/) {
+    FAISS_THROW_MSG("update_entries is not supported");
+}
+
+void RocksDBInvertedLists::resize(size_t /*list_no*/, size_t /*new_size*/) {
+    FAISS_THROW_MSG("resize is not supported");
+}
+
+InvertedListsIterator* RocksDBInvertedLists::get_iterator(
+        size_t list_no,
+        void* inverted_list_context) const {
+    return new RocksDBInvertedListsIterator(db_.get(), list_no, code_size);
+}
+
+} // namespace faiss_rocksdb
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/rocksdb_ivf/RocksDBInvertedLists.h
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/rocksdb_ivf/RocksDBInvertedLists.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#pragma once
+
+#include <faiss/invlists/InvertedLists.h>
+
+#include <rocksdb/db.h>
+
+namespace faiss_rocksdb {
+
+struct RocksDBInvertedListsIterator : faiss::InvertedListsIterator {
+    RocksDBInvertedListsIterator(
+            rocksdb::DB* db,
+            size_t list_no,
+            size_t code_size);
+    virtual bool is_available() const override;
+    virtual void next() override;
+    virtual std::pair<faiss::idx_t, const uint8_t*> get_id_and_codes() override;
+
+   private:
+    std::unique_ptr<rocksdb::Iterator> it;
+    size_t list_no;
+    size_t code_size;
+    std::vector<uint8_t> codes; // buffer for returning codes in next()
+};
+
+struct RocksDBInvertedLists : faiss::InvertedLists {
+    RocksDBInvertedLists(
+            const char* db_directory,
+            size_t nlist,
+            size_t code_size);
+
+    size_t list_size(size_t list_no) const override;
+    const uint8_t* get_codes(size_t list_no) const override;
+    const faiss::idx_t* get_ids(size_t list_no) const override;
+
+    size_t add_entries(
+            size_t list_no,
+            size_t n_entry,
+            const faiss::idx_t* ids,
+            const uint8_t* code) override;
+
+    void update_entries(
+            size_t list_no,
+            size_t offset,
+            size_t n_entry,
+            const faiss::idx_t* ids,
+            const uint8_t* code) override;
+
+    void resize(size_t list_no, size_t new_size) override;
+
+    faiss::InvertedListsIterator* get_iterator(
+            size_t list_no,
+            void* inverted_list_context) const override;
+
+   private:
+    std::unique_ptr<rocksdb::DB> db_;
+};
+
+} // namespace faiss_rocksdb
--- a/packages/leann-backend-hnsw/third_party/faiss/demos/rocksdb_ivf/demo_rocksdb_ivf.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/demos/rocksdb_ivf/demo_rocksdb_ivf.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#include <exception>
+#include <iostream>
+#include <memory>
+
+#include "RocksDBInvertedLists.h"
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexIVFFlat.h>
+#include <faiss/impl/AuxIndexStructures.h>
+#include <faiss/impl/FaissException.h>
+#include <faiss/utils/random.h>
+
+using namespace faiss;
+
+int main(int argc, char* argv[]) {
+    try {
+        if (argc != 2) {
+            std::cerr << "missing db directory argument" << std::endl;
+            return -1;
+        }
+        size_t d = 128;
+        size_t nlist = 100;
+        IndexFlatL2 quantizer(d);
+        IndexIVFFlat index(&quantizer, d, nlist);
+        faiss_rocksdb::RocksDBInvertedLists ril(
+                argv[1], nlist, index.code_size);
+        index.replace_invlists(&ril, false);
+
+        idx_t nb = 10000;
+        std::vector<float> xb(d * nb);
+        float_rand(xb.data(), d * nb, 12345);
+        std::vector<idx_t> xids(nb);
+        std::iota(xids.begin(), xids.end(), 0);
+
+        index.train(nb, xb.data());
+        index.add_with_ids(nb, xb.data(), xids.data());
+
+        idx_t nq = 20; // nb;
+        index.nprobe = 2;
+
+        std::cout << "search" << std::endl;
+        idx_t k = 5;
+        std::vector<float> distances(nq * k);
+        std::vector<idx_t> labels(nq * k, -1);
+        index.search(
+                nq, xb.data(), k, distances.data(), labels.data(), nullptr);
+
+        for (idx_t iq = 0; iq < nq; iq++) {
+            std::cout << iq << ": ";
+            for (auto j = 0; j < k; j++) {
+                std::cout << labels[iq * k + j] << " " << distances[iq * k + j]
+                          << " | ";
+            }
+            std::cout << std::endl;
+        }
+
+        std::cout << std::endl << "range search" << std::endl;
+        float range = 15.0f;
+        RangeSearchResult result(nq);
+        index.range_search(nq, xb.data(), range, &result);
+
+        for (idx_t iq = 0; iq < nq; iq++) {
+            std::cout << iq << ": ";
+            for (auto j = result.lims[iq]; j < result.lims[iq + 1]; j++) {
+                std::cout << result.labels[j] << " " << result.distances[j]
+                          << " | ";
+            }
+            std::cout << std::endl;
+        }
+
+    } catch (FaissException& e) {
+        std::cerr << e.what() << '\n';
+    } catch (std::exception& e) {
+        std::cerr << e.what() << '\n';
+    } catch (...) {
+        std::cerr << "Unrecognized exception!\n";
+    }
+    return 0;
+}