Initial commit

2025-06-30 09:05:05 +00:00
commit 46f6cc100b
1231 changed files with 278432 additions and 0 deletions
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/CMakeLists.txt
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/CMakeLists.txt
@@ -0,0 +1,92 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+set(FAISS_TEST_SRC
+  test_binary_flat.cpp
+  test_dealloc_invlists.cpp
+  test_ivfpq_codec.cpp
+  test_ivfpq_indexing.cpp
+  test_lowlevel_ivf.cpp
+  test_ivf_index.cpp
+  test_merge.cpp
+  test_omp_threads.cpp
+  test_ondisk_ivf.cpp
+  test_pairs_decoding.cpp
+  test_params_override.cpp
+  test_pq_encoding.cpp
+  test_sliding_ivf.cpp
+  test_threaded_index.cpp
+  test_transfer_invlists.cpp
+  test_mem_leak.cpp
+  test_cppcontrib_sa_decode.cpp
+  test_cppcontrib_uintreader.cpp
+  test_simdlib.cpp
+  test_approx_topk.cpp
+  test_RCQ_cropping.cpp
+  test_distances_simd.cpp
+  test_heap.cpp
+  test_code_distance.cpp
+  test_hnsw.cpp
+  test_partitioning.cpp
+  test_fastscan_perf.cpp
+  test_disable_pq_sdc_tables.cpp
+  test_common_ivf_empty_index.cpp
+  test_callback.cpp
+  test_utils.cpp
+  test_hamming.cpp
+  test_mmap.cpp
+  test_zerocopy.cpp
+)
+
+add_executable(faiss_test ${FAISS_TEST_SRC})
+
+include(../cmake/link_to_faiss_lib.cmake)
+
+link_to_faiss_lib(faiss_test)
+
+if (FAISS_ENABLE_PYTHON)
+  target_link_libraries(faiss_test PUBLIC faiss_example_external_module)
+endif()
+
+include(FetchContent)
+FetchContent_Declare(
+  googletest
+  GIT_REPOSITORY https://github.com/google/googletest.git
+  GIT_TAG 58d77fa8070e8cec2dc1ed015d66b454c8d78850 # release-1.12.1
+  OVERRIDE_FIND_PACKAGE)
+set(BUILD_GMOCK CACHE BOOL OFF)
+set(INSTALL_GTEST CACHE BOOL OFF)
+FetchContent_MakeAvailable(googletest)
+
+if(NOT EXISTS ${CMAKE_FIND_PACKAGE_REDIRECTS_DIR}/gtest-config.cmake
+   AND NOT EXISTS ${CMAKE_FIND_PACKAGE_REDIRECTS_DIR}/GTestConfig.cmake)
+  file(
+    WRITE ${CMAKE_FIND_PACKAGE_REDIRECTS_DIR}/gtest-config.cmake
+    [=[
+include(CMakeFindDependencyMacro)
+find_dependency(googletest)
+if(NOT TARGET GTest::GTest)
+  add_library(GTest::GTest INTERFACE IMPORTED)
+  target_link_libraries(GTest::GTest INTERFACE GTest::gtest)
+endif()
+if(NOT TARGET GTest::Main)
+  add_library(GTest::Main INTERFACE IMPORTED)
+  target_link_libraries(GTest::Main INTERFACE GTest::gtest_main)
+endif()
+]=])
+endif()
+
+find_package(OpenMP REQUIRED)
+find_package(GTest CONFIG REQUIRED)
+
+target_link_libraries(faiss_test PRIVATE
+  OpenMP::OpenMP_CXX
+  GTest::gtest_main
+  $<$<BOOL:${FAISS_ENABLE_ROCM}>:hip::host>
+)
+
+# Defines `gtest_discover_tests()`.
+include(GoogleTest)
+gtest_discover_tests(faiss_test)
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/common_faiss_tests.py
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/common_faiss_tests.py
@@ -0,0 +1,127 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# a few common functions for the tests
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import numpy as np
+import faiss
+
+# reduce number of threads to avoid excessive nb of threads in opt
+# mode (recuces runtime from 100s to 4s!)
+faiss.omp_set_num_threads(4)
+
+
+def random_unitary(n, d, seed):
+    x = faiss.randn(n * d, seed).reshape(n, d)
+    faiss.normalize_L2(x)
+    return x
+
+
+class Randu10k:
+
+    def __init__(self):
+        self.nb = 10000
+        self.nq = 1000
+        self.nt = 10000
+        self.d = 128
+
+        self.xb = random_unitary(self.nb, self.d, 1)
+        self.xt = random_unitary(self.nt, self.d, 2)
+        self.xq = random_unitary(self.nq, self.d, 3)
+
+        dotprods = np.dot(self.xq, self.xb.T)
+        self.gt = dotprods.argmax(1)
+        self.k = 100
+
+    def launch(self, name, index):
+        if not index.is_trained:
+            index.train(self.xt)
+        index.add(self.xb)
+        return index.search(self.xq, self.k)
+
+    def evalres(self, DI):
+        D, I = DI
+        e = {}
+        for rank in 1, 10, 100:
+            e[rank] = ((I[:, :rank] == self.gt.reshape(-1, 1)).sum() /
+                       float(self.nq))
+        return e
+
+
+class Randu10kUnbalanced(Randu10k):
+
+    def __init__(self):
+        Randu10k.__init__(self)
+
+        weights = 0.95 ** np.arange(self.d)
+        rs = np.random.RandomState(123)
+        weights = weights[rs.permutation(self.d)]
+        self.xb *= weights
+        self.xb /= np.linalg.norm(self.xb, axis=1)[:, np.newaxis]
+        self.xq *= weights
+        self.xq /= np.linalg.norm(self.xq, axis=1)[:, np.newaxis]
+        self.xt *= weights
+        self.xt /= np.linalg.norm(self.xt, axis=1)[:, np.newaxis]
+
+        dotprods = np.dot(self.xq, self.xb.T)
+        self.gt = dotprods.argmax(1)
+        self.k = 100
+
+
+def get_dataset(d, nb, nt, nq):
+    rs = np.random.RandomState(123)
+    xb = rs.rand(nb, d).astype('float32')
+    xt = rs.rand(nt, d).astype('float32')
+    xq = rs.rand(nq, d).astype('float32')
+
+    return (xt, xb, xq)
+
+
+def get_dataset_2(d, nt, nb, nq):
+    """A dataset that is not completely random but still challenging to
+    index
+    """
+    d1 = 10     # intrinsic dimension (more or less)
+    n = nb + nt + nq
+    rs = np.random.RandomState(1338)
+    x = rs.normal(size=(n, d1))
+    x = np.dot(x, rs.rand(d1, d))
+    # now we have a d1-dim ellipsoid in d-dimensional space
+    # higher factor (>4) -> higher frequency -> less linear
+    x = x * (rs.rand(d) * 4 + 0.1)
+    x = np.sin(x)
+    x = x.astype('float32')
+    return x[:nt], x[nt:nt + nb], x[nt + nb:]
+
+
+def make_binary_dataset(d, nt, nb, nq):
+    assert d % 8 == 0
+    rs = np.random.RandomState(123)
+    x = rs.randint(256, size=(nb + nq + nt, int(d / 8))).astype('uint8')
+    return x[:nt], x[nt:-nq], x[-nq:]
+
+
+def compare_binary_result_lists(D1, I1, D2, I2):
+    """comparing result lists is difficult because there are many
+    ties. Here we sort by (distance, index) pairs and ignore the largest
+    distance of each result. Compatible result lists should pass this."""
+    assert D1.shape == I1.shape == D2.shape == I2.shape
+    n, k = D1.shape
+    ndiff = (D1 != D2).sum()
+    assert ndiff == 0, '%d differences in distance matrix %s' % (
+        ndiff, D1.shape)
+
+    def normalize_DI(D, I):
+        norm = I.max() + 1.0
+        Dr = D.astype('float64') + I / norm
+        # ignore -1s and elements on last column
+        Dr[I1 == -1] = 1e20
+        Dr[D == D[:, -1:]] = 1e20
+        Dr.sort(axis=1)
+        return Dr
+    ndiff = (normalize_DI(D1, I1) != normalize_DI(D2, I2)).sum()
+    assert ndiff == 0, '%d differences in normalized D matrix' % ndiff
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/external_module_test.py
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/external_module_test.py
@@ -0,0 +1,66 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import faiss
+
+import faiss.faiss_example_external_module as external_module
+
+import numpy as np
+
+
+class TestCustomIDSelector(unittest.TestCase):
+    """test if we can construct a custom IDSelector"""
+
+    def test_IDSelector(self):
+        ids = external_module.IDSelectorModulo(3)
+        self.assertFalse(ids.is_member(1))
+        self.assertTrue(ids.is_member(3))
+
+
+class TestArrayConversions(unittest.TestCase):
+
+    def test_idx_array(self):
+        tab = np.arange(10).astype("int64")
+        new_sum = external_module.sum_of_idx(len(tab), faiss.swig_ptr(tab))
+        self.assertEqual(new_sum, tab.sum())
+
+    def do_array_test(self, ty):
+        tab = np.arange(10).astype(ty)
+        func = getattr(external_module, "sum_of_" + ty)
+        print("perceived type", faiss.swig_ptr(tab))
+        new_sum = func(len(tab), faiss.swig_ptr(tab))
+        self.assertEqual(new_sum, tab.sum())
+
+    def test_sum_uint8(self):
+        self.do_array_test("uint8")
+
+    def test_sum_uint16(self):
+        self.do_array_test("uint16")
+
+    def test_sum_uint32(self):
+        self.do_array_test("uint32")
+
+    def test_sum_uint64(self):
+        self.do_array_test("uint64")
+
+    def test_sum_int8(self):
+        self.do_array_test("int8")
+
+    def test_sum_int16(self):
+        self.do_array_test("int16")
+
+    def test_sum_int32(self):
+        self.do_array_test("int32")
+
+    def test_sum_int64(self):
+        self.do_array_test("int64")
+
+    def test_sum_float32(self):
+        self.do_array_test("float32")
+
+    def test_sum_float64(self):
+        self.do_array_test("float64")
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_NSG_compressed_graph.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_NSG_compressed_graph.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <faiss/IndexNSG.h>
+#include <faiss/utils/hamming.h>
+#include <faiss/utils/random.h>
+#include <gtest/gtest.h>
+
+using namespace faiss;
+
+using FinalNSGGraph = nsg::Graph<int32_t>;
+
+struct CompressedNSGGraph : FinalNSGGraph {
+    int bits;
+    size_t stride;
+    std::vector<uint8_t> compressed_data;
+
+    CompressedNSGGraph(const FinalNSGGraph& graph, int bits)
+            : FinalNSGGraph(graph.data, graph.N, graph.K), bits(bits) {
+        FAISS_THROW_IF_NOT((1 << bits) >= K + 1);
+        stride = (K * bits + 7) / 8;
+        compressed_data.resize(N * stride);
+        for (size_t i = 0; i < N; i++) {
+            BitstringWriter writer(compressed_data.data() + i * stride, stride);
+            for (size_t j = 0; j < K; j++) {
+                int32_t v = graph.data[i * K + j];
+                if (v == -1) {
+                    writer.write(K + 1, bits);
+                    break;
+                } else {
+                    writer.write(v, bits);
+                }
+            }
+        }
+        data = nullptr;
+    }
+
+    size_t get_neighbors(int i, int32_t* neighbors) const override {
+        BitstringReader reader(compressed_data.data() + i * stride, stride);
+        for (int j = 0; j < K; j++) {
+            int32_t v = reader.read(bits);
+            if (v == K + 1) {
+                return j;
+            }
+            neighbors[j] = v;
+        }
+        return K;
+    }
+};
+
+TEST(NSGCompressed, test_compressed) {
+    size_t nq = 10, nt = 0, nb = 5000, d = 32, k = 10;
+
+    using idx_t = faiss::idx_t;
+
+    std::vector<float> buf((nq + nb + nt) * d);
+    faiss::rand_smooth_vectors(nq + nb + nt, d, buf.data(), 1234);
+    const float* xt = buf.data();
+    const float* xb = xt + nt * d;
+    const float* xq = xb + nb * d;
+
+    faiss::IndexNSGFlat index(d, 32);
+
+    index.add(nb, xb);
+
+    std::vector<faiss::idx_t> Iref(nq * k);
+    std::vector<float> Dref(nq * k);
+    index.search(nq, xq, k, Dref.data(), Iref.data());
+
+    // replace the shared ptr
+    index.nsg.final_graph.reset(
+            new CompressedNSGGraph(*index.nsg.final_graph, 13));
+
+    std::vector<idx_t> I(nq * k);
+    std::vector<float> D(nq * k);
+    index.search(nq, xq, k, D.data(), I.data());
+
+    // make sure we find back the original results
+    EXPECT_EQ(Iref, I);
+    EXPECT_EQ(Dref, D);
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_RCQ_cropping.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_RCQ_cropping.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <faiss/IndexAdditiveQuantizer.h>
+#include <faiss/IndexScalarQuantizer.h>
+#include <faiss/utils/random.h>
+#include <gtest/gtest.h>
+
+/* This test creates a 3-level RCQ and performs a search on it.
+ * Then it crops the RCQ to just the 2 first levels and verifies that
+ * the 3-level vectors are in a subtree that was visited in the 2-level RCQ. */
+TEST(RCQCropping, test_cropping) {
+    size_t nq = 10, nt = 2000, nb = 1000, d = 32;
+
+    using idx_t = faiss::idx_t;
+
+    std::vector<float> buf((nq + nb + nt) * d);
+    faiss::rand_smooth_vectors(nq + nb + nt, d, buf.data(), 1234);
+    const float* xt = buf.data();
+    const float* xb = xt + nt * d;
+    const float* xq = xb + nb * d;
+
+    std::vector<size_t> nbits = {5, 4, 4};
+    faiss::ResidualCoarseQuantizer rcq(d, nbits);
+
+    rcq.train(nt, xt);
+
+    // the test below works only for beam size == nprobe
+    rcq.set_beam_factor(1.0);
+
+    // perform search
+    int nprobe = 15;
+    std::vector<faiss::idx_t> Iref(nq * nprobe);
+    std::vector<float> Dref(nq * nprobe);
+    rcq.search(nq, xq, nprobe, Dref.data(), Iref.data());
+
+    // crop to the first 2 quantization levels
+    int last_nbits = nbits.back();
+    nbits.pop_back();
+    faiss::ResidualCoarseQuantizer rcq_cropped(d, nbits);
+    rcq_cropped.initialize_from(rcq);
+
+    EXPECT_EQ(rcq_cropped.ntotal, rcq.ntotal >> last_nbits);
+
+    // perform search
+    std::vector<faiss::idx_t> Inew(nq * nprobe);
+    std::vector<float> Dnew(nq * nprobe);
+    rcq_cropped.search(nq, xq, nprobe, Dnew.data(), Inew.data());
+
+    // these bits are in common between the two RCQs
+    idx_t mask = ((idx_t)1 << rcq_cropped.rq.tot_bits) - 1;
+    for (int q = 0; q < nq; q++) {
+        for (int i = 0; i < nprobe; i++) {
+            idx_t fine = Iref[q * nprobe + i];
+            EXPECT_GE(fine, 0);
+            bool found = false;
+
+            // fine should be generated from a path that passes through coarse
+            for (int j = 0; j < nprobe; j++) {
+                idx_t coarse = Inew[q * nprobe + j];
+                if ((fine & mask) == coarse) {
+                    found = true;
+                    break;
+                }
+            }
+            EXPECT_TRUE(found);
+        }
+    }
+}
+
+TEST(RCQCropping, search_params) {
+    size_t nq = 10, nt = 2000, nb = 1000, d = 32;
+
+    using idx_t = faiss::idx_t;
+
+    std::vector<float> buf((nq + nb + nt) * d);
+    faiss::rand_smooth_vectors(nq + nb + nt, d, buf.data(), 1234);
+    const float* xt = buf.data();
+    const float* xb = xt + nt * d;
+    const float* xq = xb + nb * d;
+
+    std::vector<size_t> nbits = {3, 6, 3};
+    faiss::ResidualCoarseQuantizer quantizer(d, nbits);
+    size_t ntotal = (size_t)1 << quantizer.rq.tot_bits;
+    faiss::IndexIVFScalarQuantizer index(
+            &quantizer, d, ntotal, faiss::ScalarQuantizer::QT_8bit);
+    index.quantizer_trains_alone = true;
+
+    index.train(nt, xt);
+    index.add(nb, xb);
+
+    index.nprobe = 10;
+
+    int k = 4;
+    float beam_factor_1 = 8.0;
+    quantizer.set_beam_factor(beam_factor_1);
+    std::vector<idx_t> I1(nq * k);
+    std::vector<float> D1(nq * k);
+    index.search(nq, xq, k, D1.data(), I1.data());
+
+    // change from 8 to 1
+    quantizer.set_beam_factor(1.0f);
+    std::vector<idx_t> I2(nq * k);
+    std::vector<float> D2(nq * k);
+    index.search(nq, xq, k, D2.data(), I2.data());
+
+    // make sure it changes the result
+    EXPECT_NE(I1, I2);
+    EXPECT_NE(D1, D2);
+
+    // override the class level beam factor
+    faiss::SearchParametersResidualCoarseQuantizer params1;
+    params1.beam_factor = beam_factor_1;
+    faiss::SearchParametersIVF params;
+    params.nprobe = index.nprobe;
+    params.quantizer_params = &params1;
+
+    std::vector<idx_t> I3(nq * k);
+    std::vector<float> D3(nq * k);
+    index.search(nq, xq, k, D3.data(), I3.data(), &params);
+
+    // make sure we find back the original results
+    EXPECT_EQ(I1, I3);
+    EXPECT_EQ(D1, D3);
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_approx_topk.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_approx_topk.cpp
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <chrono>
+#include <cstdint>
+#include <random>
+#include <sstream>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include <faiss/utils/approx_topk/approx_topk.h>
+
+#include <faiss/impl/FaissException.h>
+#include <faiss/utils/Heap.h>
+
+//
+using namespace faiss;
+
+//
+template <uint32_t NBUCKETS, uint32_t N>
+void test_approx_topk(
+        const uint32_t beamSize,
+        const uint32_t nPerBeam,
+        const uint32_t k,
+        const uint32_t nDatasetsToTest,
+        const bool verbose) {
+    if (verbose) {
+        printf("-----------\n");
+    }
+
+    // generate random data
+    std::default_random_engine rng(123);
+    std::uniform_real_distribution<float> u(0, 1);
+
+    // matches
+    size_t nMatches = 0;
+    // the element was completely missed in approx version.
+    size_t nMissed = 0;
+    // the element is available
+    size_t nAvailable = 0;
+    // the distance is the same, but the index is different.
+    size_t nSoftMismatches = 0;
+    // the distances are different
+    size_t nHardMismatches = 0;
+    // error of distances
+    double sqrError = 0.0;
+
+    //
+    double timeBaseline = 0.0;
+    double timeApprox = 0.0;
+
+    for (size_t iDataset = 0; iDataset < nDatasetsToTest; iDataset++) {
+        const size_t n = (size_t)(nPerBeam)*beamSize;
+        std::vector<float> distances(n, 0);
+        for (size_t i = 0; i < n; i++) {
+            distances[i] = u(rng);
+        }
+
+        //
+        using C = CMax<float, int>;
+
+        // do a regular beam search
+        std::vector<float> baselineDistances(k, C::neutral());
+        std::vector<int> baselineIndices(k, -1);
+
+        auto startBaseline = std::chrono::high_resolution_clock::now();
+        heap_addn<C>(
+                k,
+                baselineDistances.data(),
+                baselineIndices.data(),
+                distances.data(),
+                nullptr,
+                nPerBeam * beamSize);
+        auto endBaseline = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> diffBaseline =
+                endBaseline - startBaseline;
+        timeBaseline += diffBaseline.count();
+
+        heap_reorder<C>(k, baselineDistances.data(), baselineIndices.data());
+
+        // do an approximate beam search
+        std::vector<float> approxDistances(k, C::neutral());
+        std::vector<int> approxIndices(k, -1);
+
+        auto startApprox = std::chrono::high_resolution_clock::now();
+        try {
+            HeapWithBuckets<C, NBUCKETS, N>::bs_addn(
+                    beamSize,
+                    nPerBeam,
+                    distances.data(),
+                    k,
+                    approxDistances.data(),
+                    approxIndices.data());
+        } catch (const faiss::FaissException&) {
+            //
+            if (verbose) {
+                printf("Skipping the case.\n");
+            }
+            return;
+        }
+
+        auto endApprox = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> diffApprox = endApprox - startApprox;
+        timeApprox += diffApprox.count();
+
+        heap_reorder<C>(k, approxDistances.data(), approxIndices.data());
+
+        bool bGotMismatches = false;
+
+        // the error
+        for (uint32_t i = 0; i < k; i++) {
+            if (baselineDistances[i] != approxDistances[i]) {
+                nHardMismatches += 1;
+
+                double diff = baselineDistances[i] - approxDistances[i];
+                sqrError += diff * diff;
+
+                bGotMismatches = true;
+
+                if (verbose) {
+                    printf("i=%d, bs.d=%f, bs.i=%d, app.d=%f, app.i=%d\n",
+                           i,
+                           baselineDistances[i],
+                           baselineIndices[i],
+                           approxDistances[i],
+                           approxIndices[i]);
+                }
+            } else {
+                if (baselineIndices[i] != approxIndices[i]) {
+                    nSoftMismatches += 1;
+                } else {
+                    nMatches += 1;
+                }
+            }
+        }
+
+        if (bGotMismatches) {
+            if (verbose) {
+                printf("\n");
+            }
+        }
+
+        //
+        std::unordered_set<int> bsIndicesHS(
+                baselineIndices.cbegin(), baselineIndices.cend());
+        for (uint32_t i = 0; i < k; i++) {
+            auto itr = bsIndicesHS.find(approxIndices[i]);
+            if (itr != bsIndicesHS.cend()) {
+                nAvailable += 1;
+            } else {
+                nMissed += 1;
+            }
+        }
+    }
+
+    if (verbose) {
+        printf("%d, %d, %d, %d, %d, %d: %ld, %ld, %ld, %f, %ld, %ld, %f, %f\n",
+               NBUCKETS,
+               N,
+               beamSize,
+               nPerBeam,
+               k,
+               nDatasetsToTest,
+               nMatches,
+               nSoftMismatches,
+               nHardMismatches,
+               sqrError,
+               nAvailable,
+               nMissed,
+               timeBaseline,
+               timeApprox);
+    }
+
+    // just confirm that the error is not crazy
+    if (NBUCKETS * N * beamSize >= k) {
+        EXPECT_TRUE(nAvailable > nMissed);
+    } else {
+        // it is possible that the results are crazy here. Skip it.
+    }
+}
+
+//
+TEST(testApproxTopk, COMMON) {
+    constexpr bool verbose = false;
+
+    //
+    const uint32_t nDifferentDatasets = 8;
+
+    uint32_t kValues[] = {1, 2, 3, 5, 8, 13, 21, 34};
+
+    for (size_t codebookBitSize = 8; codebookBitSize <= 10; codebookBitSize++) {
+        const uint32_t codebookSize = 1 << codebookBitSize;
+        for (const auto k : kValues) {
+            test_approx_topk<1 * 8, 3>(
+                    1, codebookSize, k, nDifferentDatasets, verbose);
+            test_approx_topk<1 * 8, 3>(
+                    k, codebookSize, k, nDifferentDatasets, verbose);
+
+            test_approx_topk<1 * 8, 2>(
+                    1, codebookSize, k, nDifferentDatasets, verbose);
+            test_approx_topk<1 * 8, 2>(
+                    k, codebookSize, k, nDifferentDatasets, verbose);
+
+            test_approx_topk<2 * 8, 2>(
+                    1, codebookSize, k, nDifferentDatasets, verbose);
+            test_approx_topk<2 * 8, 2>(
+                    k, codebookSize, k, nDifferentDatasets, verbose);
+
+            test_approx_topk<4 * 8, 2>(
+                    1, codebookSize, k, nDifferentDatasets, verbose);
+            test_approx_topk<4 * 8, 2>(
+                    k, codebookSize, k, nDifferentDatasets, verbose);
+        }
+    }
+}
+
+//
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_binary_flat.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_binary_flat.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+
+#include <gtest/gtest.h>
+
+#include <faiss/IndexBinaryFlat.h>
+#include <faiss/utils/hamming.h>
+
+TEST(BinaryFlat, accuracy) {
+    // dimension of the vectors to index
+    int d = 64;
+
+    // size of the database we plan to index
+    size_t nb = 1000;
+
+    // make the index object and train it
+    faiss::IndexBinaryFlat index(d);
+
+    std::vector<uint8_t> database(nb * (d / 8));
+    for (size_t i = 0; i < nb * (d / 8); i++) {
+        database[i] = rand() % 0x100;
+    }
+
+    { // populating the database
+        index.add(nb, database.data());
+    }
+
+    size_t nq = 200;
+
+    { // searching the database
+
+        std::vector<uint8_t> queries(nq * (d / 8));
+        for (size_t i = 0; i < nq * (d / 8); i++) {
+            queries[i] = rand() % 0x100;
+        }
+
+        int k = 5;
+        std::vector<faiss::idx_t> nns(k * nq);
+        std::vector<int> dis(k * nq);
+
+        index.search(nq, queries.data(), k, dis.data(), nns.data());
+
+        for (size_t i = 0; i < nq; ++i) {
+            faiss::HammingComputer8 hc(queries.data() + i * (d / 8), d / 8);
+            hamdis_t dist_min = hc.hamming(database.data());
+            for (size_t j = 1; j < nb; ++j) {
+                hamdis_t dist = hc.hamming(database.data() + j * (d / 8));
+                if (dist < dist_min) {
+                    dist_min = dist;
+                }
+            }
+            EXPECT_EQ(dist_min, dis[k * i]);
+        }
+    }
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_callback.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_callback.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <faiss/Clustering.h>
+#include <faiss/IndexFlat.h>
+#include <faiss/impl/AuxIndexStructures.h>
+#include <faiss/impl/FaissException.h>
+#include <faiss/utils/random.h>
+
+TEST(TestCallback, timeout) {
+    int n = 1000;
+    int k = 100;
+    int d = 128;
+    int niter = 1000000000;
+    int seed = 42;
+
+    std::vector<float> vecs(n * d);
+    faiss::float_rand(vecs.data(), vecs.size(), seed);
+
+    auto index(new faiss::IndexFlat(d));
+
+    faiss::ClusteringParameters cp;
+    cp.niter = niter;
+    cp.verbose = false;
+
+    faiss::Clustering kmeans(d, k, cp);
+
+    faiss::TimeoutCallback::reset(0.010);
+    EXPECT_THROW(kmeans.train(n, vecs.data(), *index), faiss::FaissException);
+    delete index;
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_code_distance.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_code_distance.cpp
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <omp.h>
+
+#include <algorithm>
+#include <chrono>
+#include <cmath>
+#include <iostream>
+#include <memory>
+#include <random>
+#include <thread>
+#include <tuple>
+#include <vector>
+
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/impl/ProductQuantizer.h>
+#include <faiss/impl/code_distance/code_distance.h>
+
+size_t nMismatches(
+        const std::vector<float>& ref,
+        const std::vector<float>& candidate) {
+    size_t count = 0;
+    for (size_t i = 0; i < count; i++) {
+        double abs = std::abs(ref[i] - candidate[i]);
+        if (abs >= 1e-5) {
+            count += 1;
+        }
+    }
+
+    return count;
+}
+
+void test(
+        // dimensionality of the data
+        const size_t dim,
+        // number of subquantizers
+        const size_t subq,
+        // bits per subquantizer
+        const size_t nbits,
+        // number of codes to process
+        const size_t n) {
+    FAISS_THROW_IF_NOT(nbits == 8);
+
+    // remove if benchmarking is needed
+    omp_set_num_threads(1);
+
+    // rng
+    std::minstd_rand rng(123);
+    std::uniform_int_distribution<uint8_t> u(0, 255);
+    std::uniform_real_distribution<float> uf(0, 1);
+
+    // initialize lookup
+    std::vector<float> lookup(256 * subq, 0);
+    for (size_t i = 0; i < lookup.size(); i++) {
+        lookup[i] = uf(rng);
+    }
+
+    // initialize codes
+    std::vector<uint8_t> codes(n * subq);
+#pragma omp parallel
+    {
+        std::minstd_rand rng0(123);
+        std::uniform_int_distribution<uint8_t> u1(0, 255);
+
+#pragma omp for schedule(guided)
+        for (size_t i = 0; i < codes.size(); i++) {
+            codes[i] = u1(rng0);
+        }
+    }
+
+    // warmup. compute reference results
+    std::vector<float> resultsRef(n, 0);
+    for (size_t k = 0; k < 10; k++) {
+#pragma omp parallel for schedule(guided)
+        for (size_t i = 0; i < n; i++) {
+            resultsRef[i] =
+                    faiss::distance_single_code_generic<faiss::PQDecoder8>(
+                            subq, 8, lookup.data(), codes.data() + subq * i);
+        }
+    }
+
+    // generic, 1 code per step
+    std::vector<float> resultsNewGeneric1x(n, 0);
+    double generic1xMsec = 0;
+    {
+        const auto startingTimepoint = std::chrono::steady_clock::now();
+        for (size_t k = 0; k < 1000; k++) {
+#pragma omp parallel for schedule(guided)
+            for (size_t i = 0; i < n; i++) {
+                resultsNewGeneric1x[i] =
+                        faiss::distance_single_code_generic<faiss::PQDecoder8>(
+                                subq,
+                                8,
+                                lookup.data(),
+                                codes.data() + subq * i);
+            }
+        }
+        const auto endingTimepoint = std::chrono::steady_clock::now();
+
+        std::chrono::duration<double> duration =
+                endingTimepoint - startingTimepoint;
+        generic1xMsec = (duration.count() * 1000.0);
+    }
+
+    // generic, 4 codes per step
+    std::vector<float> resultsNewGeneric4x(n, 0);
+    double generic4xMsec = 0;
+    {
+        const auto startingTimepoint = std::chrono::steady_clock::now();
+        for (size_t k = 0; k < 1000; k++) {
+#pragma omp parallel for schedule(guided)
+            for (size_t i = 0; i < n; i += 4) {
+                faiss::distance_four_codes_generic<faiss::PQDecoder8>(
+                        subq,
+                        8,
+                        lookup.data(),
+                        codes.data() + subq * (i + 0),
+                        codes.data() + subq * (i + 1),
+                        codes.data() + subq * (i + 2),
+                        codes.data() + subq * (i + 3),
+                        resultsNewGeneric4x[i + 0],
+                        resultsNewGeneric4x[i + 1],
+                        resultsNewGeneric4x[i + 2],
+                        resultsNewGeneric4x[i + 3]);
+            }
+        }
+
+        const auto endingTimepoint = std::chrono::steady_clock::now();
+
+        std::chrono::duration<double> duration =
+                endingTimepoint - startingTimepoint;
+        generic4xMsec = (duration.count() * 1000.0);
+    }
+
+    // generic, 1 code per step
+    std::vector<float> resultsNewCustom1x(n, 0);
+    double custom1xMsec = 0;
+    {
+        const auto startingTimepoint = std::chrono::steady_clock::now();
+        for (size_t k = 0; k < 1000; k++) {
+#pragma omp parallel for schedule(guided)
+            for (size_t i = 0; i < n; i++) {
+                resultsNewCustom1x[i] =
+                        faiss::distance_single_code<faiss::PQDecoder8>(
+                                subq,
+                                8,
+                                lookup.data(),
+                                codes.data() + subq * i);
+            }
+        }
+        const auto endingTimepoint = std::chrono::steady_clock::now();
+
+        std::chrono::duration<double> duration =
+                endingTimepoint - startingTimepoint;
+        custom1xMsec = (duration.count() * 1000.0);
+    }
+
+    // generic, 4 codes per step
+    std::vector<float> resultsNewCustom4x(n, 0);
+    double custom4xMsec = 0;
+    {
+        const auto startingTimepoint = std::chrono::steady_clock::now();
+        for (size_t k = 0; k < 1000; k++) {
+#pragma omp parallel for schedule(guided)
+            for (size_t i = 0; i < n; i += 4) {
+                faiss::distance_four_codes<faiss::PQDecoder8>(
+                        subq,
+                        8,
+                        lookup.data(),
+                        codes.data() + subq * (i + 0),
+                        codes.data() + subq * (i + 1),
+                        codes.data() + subq * (i + 2),
+                        codes.data() + subq * (i + 3),
+                        resultsNewCustom4x[i + 0],
+                        resultsNewCustom4x[i + 1],
+                        resultsNewCustom4x[i + 2],
+                        resultsNewCustom4x[i + 3]);
+            }
+        }
+
+        const auto endingTimepoint = std::chrono::steady_clock::now();
+
+        std::chrono::duration<double> duration =
+                endingTimepoint - startingTimepoint;
+        custom4xMsec = (duration.count() * 1000.0);
+    }
+
+    const size_t nMismatchesG1 = nMismatches(resultsRef, resultsNewGeneric1x);
+    const size_t nMismatchesG4 = nMismatches(resultsRef, resultsNewGeneric4x);
+    const size_t nMismatchesCustom1 =
+            nMismatches(resultsRef, resultsNewCustom1x);
+    const size_t nMismatchesCustom4 =
+            nMismatches(resultsRef, resultsNewCustom4x);
+
+    std::cout << "Dim = " << dim << ", subq = " << subq << ", nbits = " << nbits
+              << ", n = " << n << std::endl;
+    std::cout << "Generic 1x code: " << generic1xMsec << " msec, "
+              << nMismatchesG1 << " mismatches" << std::endl;
+    std::cout << "Generic 4x code: " << generic4xMsec << " msec, "
+              << nMismatchesG4 << " mismatches" << std::endl;
+    std::cout << "custom 1x code: " << custom1xMsec << " msec, "
+              << nMismatchesCustom1 << " mismatches" << std::endl;
+    std::cout << "custom 4x code: " << custom4xMsec << " msec, "
+              << nMismatchesCustom4 << " mismatches" << std::endl;
+    std::cout << std::endl;
+
+    ASSERT_EQ(nMismatchesG1, 0);
+    ASSERT_EQ(nMismatchesG4, 0);
+    ASSERT_EQ(nMismatchesCustom1, 0);
+    ASSERT_EQ(nMismatchesCustom4, 0);
+}
+
+// this test can be used as a benchmark.
+// 1. Increase the value of NELEMENTS
+// 2. Remove omp_set_num_threads()
+
+constexpr size_t NELEMENTS = 10000;
+
+TEST(TestCodeDistance, SUBQ4_NBITS8) {
+    test(256, 4, 8, NELEMENTS);
+}
+
+TEST(TestCodeDistance, SUBQ8_NBITS8) {
+    test(256, 8, 8, NELEMENTS);
+}
+
+TEST(TestCodeDistance, SUBQ16_NBITS8) {
+    test(256, 16, 8, NELEMENTS);
+}
+
+TEST(TestCodeDistance, SUBQ32_NBITS8) {
+    test(256, 32, 8, NELEMENTS);
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_common_ivf_empty_index.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_common_ivf_empty_index.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <cstddef>
+#include <memory>
+#include <vector>
+
+#include <faiss/IndexIVF.h>
+#include <faiss/clone_index.h>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/index_factory.h>
+#include <faiss/invlists/InvertedLists.h>
+#include <faiss/utils/random.h>
+
+/* This demonstrates how to query several independent IVF indexes with a trained
+ *index in common. This avoids to duplicate the coarse quantizer and metadata
+ *in memory.
+ **/
+
+namespace {
+
+int d = 64;
+
+} // namespace
+
+std::vector<float> get_random_vectors(size_t n, int seed) {
+    std::vector<float> x(n * d);
+    faiss::rand_smooth_vectors(n, d, x.data(), seed);
+    seed++;
+    return x;
+}
+
+/** InvetedLists implementation that dispatches the search to an InvertedList
+ * object that is passed in at query time */
+
+struct DispatchingInvertedLists : faiss::ReadOnlyInvertedLists {
+    DispatchingInvertedLists(size_t nlist, size_t code_size)
+            : faiss::ReadOnlyInvertedLists(nlist, code_size) {
+        use_iterator = true;
+    }
+
+    faiss::InvertedListsIterator* get_iterator(
+            size_t list_no,
+            void* inverted_list_context = nullptr) const override {
+        assert(inverted_list_context);
+        auto il =
+                static_cast<const faiss::InvertedLists*>(inverted_list_context);
+        return il->get_iterator(list_no);
+    }
+
+    using idx_t = faiss::idx_t;
+
+    size_t list_size(size_t list_no) const override {
+        FAISS_THROW_MSG("use iterator interface");
+    }
+    const uint8_t* get_codes(size_t list_no) const override {
+        FAISS_THROW_MSG("use iterator interface");
+    }
+    const idx_t* get_ids(size_t list_no) const override {
+        FAISS_THROW_MSG("use iterator interface");
+    }
+};
+
+TEST(COMMON, test_common_trained_index) {
+    int N = 3;    // number of independent indexes
+    int nt = 500; // training vectors
+    int nb = 200; // nb database vectors per index
+    int nq = 10;  // nb queries performed on each index
+    int k = 4;    // restults requested per query
+
+    // construct and build an "empty index": a trained index that does not
+    // itself hold any data
+    std::unique_ptr<faiss::IndexIVF> empty_index(dynamic_cast<faiss::IndexIVF*>(
+            faiss::index_factory(d, "IVF32,PQ8np")));
+    auto xt = get_random_vectors(nt, 123);
+    empty_index->train(nt, xt.data());
+    empty_index->nprobe = 4;
+
+    // reference run: build one index for each set of db / queries and record
+    // results
+    std::vector<std::vector<faiss::idx_t>> ref_I(N);
+
+    for (int i = 0; i < N; i++) {
+        // clone the empty index
+        std::unique_ptr<faiss::Index> index(
+                faiss::clone_index(empty_index.get()));
+        auto xb = get_random_vectors(nb, 1234 + i);
+        auto xq = get_random_vectors(nq, 12345 + i);
+        // add vectors and perform a search
+        index->add(nb, xb.data());
+        std::vector<float> D(k * nq);
+        std::vector<faiss::idx_t> I(k * nq);
+        index->search(nq, xq.data(), k, D.data(), I.data());
+        // record result as reference
+        ref_I[i] = I;
+    }
+
+    // build a set of inverted lists for each independent index
+    std::vector<faiss::ArrayInvertedLists> sub_invlists;
+
+    for (int i = 0; i < N; i++) {
+        // swap in other inverted lists
+        sub_invlists.emplace_back(empty_index->nlist, empty_index->code_size);
+        faiss::InvertedLists* invlists = &sub_invlists.back();
+
+        // replace_invlists swaps in a new InvertedLists for an existing index
+        empty_index->replace_invlists(invlists, false);
+        empty_index->reset(); // reset id counter to 0
+        // populate inverted lists
+        auto xb = get_random_vectors(nb, 1234 + i);
+        empty_index->add(nb, xb.data());
+    }
+
+    // perform search dispatching to the sub-invlists. At search time, we don't
+    // use replace_invlists because that would wreak havoc in a multithreaded
+    // context
+    DispatchingInvertedLists di(empty_index->nlist, empty_index->code_size);
+    empty_index->replace_invlists(&di, false);
+
+    std::vector<std::vector<faiss::idx_t>> new_I(N);
+
+    // run searches in the independent indexes but with a common empty_index
+#pragma omp parallel for
+    for (int i = 0; i < N; i++) {
+        auto xq = get_random_vectors(nq, 12345 + i);
+        std::vector<float> D(k * nq);
+        std::vector<faiss::idx_t> I(k * nq);
+
+        // here we set to what sub-index the queries should be directed
+        faiss::SearchParametersIVF params;
+        params.nprobe = empty_index->nprobe;
+        params.inverted_list_context = &sub_invlists[i];
+
+        empty_index->search(nq, xq.data(), k, D.data(), I.data(), &params);
+        new_I[i] = I;
+    }
+
+    // compare with reference reslt
+    for (int i = 0; i < N; i++) {
+        ASSERT_EQ(ref_I[i], new_I[i]);
+    }
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_cppcontrib_sa_decode.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_cppcontrib_sa_decode.cpp
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_cppcontrib_uintreader.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_cppcontrib_uintreader.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// This test was designed to be run using valgrind or ASAN to test the
+// correctness of memory accesses.
+
+#include <gtest/gtest.h>
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <random>
+
+#include <faiss/utils/hamming.h>
+
+#include <faiss/cppcontrib/detail/UintReader.h>
+
+template <intptr_t N_ELEMENTS, intptr_t CODE_BITS, intptr_t CPOS>
+struct TestLoop {
+    static void test(
+            const uint8_t* const container,
+            faiss::BitstringReader& br) {
+        // validate
+        const intptr_t uintreader_data = faiss::cppcontrib::detail::
+                UintReaderRaw<N_ELEMENTS, CODE_BITS, CPOS>::get(container);
+        const intptr_t bitstringreader_data = br.read(CODE_BITS);
+
+        ASSERT_EQ(uintreader_data, bitstringreader_data)
+                << "Mismatch between BitstringReader (" << bitstringreader_data
+                << ") and UintReader (" << uintreader_data
+                << ") for N_ELEMENTS=" << N_ELEMENTS
+                << ", CODE_BITS=" << CODE_BITS << ", CPOS=" << CPOS;
+
+        //
+        TestLoop<N_ELEMENTS, CODE_BITS, CPOS + 1>::test(container, br);
+    }
+};
+
+template <intptr_t N_ELEMENTS, intptr_t CODE_BITS>
+struct TestLoop<N_ELEMENTS, CODE_BITS, N_ELEMENTS> {
+    static void test(
+            const uint8_t* const container,
+            faiss::BitstringReader& br) {}
+};
+
+template <intptr_t N_ELEMENTS, intptr_t CODE_BITS>
+void TestUintReader() {
+    constexpr intptr_t CODE_BYTES = (CODE_BITS * N_ELEMENTS + 7) / 8;
+
+    std::default_random_engine rng;
+    std::uniform_int_distribution<uint64_t> u(0, 1 << CODE_BITS);
+
+    // do several attempts
+    for (size_t attempt = 0; attempt < 10; attempt++) {
+        // allocate a buffer. This way, not std::vector
+        std::unique_ptr<uint8_t[]> container(new uint8_t[CODE_BYTES]);
+        // make it empty
+        for (size_t i = 0; i < CODE_BYTES; i++) {
+            container.get()[i] = 0;
+        }
+
+        // populate it
+        faiss::BitstringWriter bw(container.get(), CODE_BYTES);
+        for (size_t i = 0; i < N_ELEMENTS; i++) {
+            bw.write(u(rng), CODE_BITS);
+        }
+
+        // read it back and verify against bitreader
+        faiss::BitstringReader br(container.get(), CODE_BYTES);
+
+        TestLoop<N_ELEMENTS, CODE_BITS, 0>::test(container.get(), br);
+    }
+}
+
+template <intptr_t CODE_BITS>
+void TestUintReaderBits() {
+    TestUintReader<1, CODE_BITS>();
+    TestUintReader<2, CODE_BITS>();
+    TestUintReader<3, CODE_BITS>();
+    TestUintReader<4, CODE_BITS>();
+    TestUintReader<5, CODE_BITS>();
+    TestUintReader<6, CODE_BITS>();
+    TestUintReader<7, CODE_BITS>();
+    TestUintReader<8, CODE_BITS>();
+    TestUintReader<9, CODE_BITS>();
+    TestUintReader<10, CODE_BITS>();
+    TestUintReader<11, CODE_BITS>();
+    TestUintReader<12, CODE_BITS>();
+    TestUintReader<13, CODE_BITS>();
+    TestUintReader<14, CODE_BITS>();
+    TestUintReader<15, CODE_BITS>();
+    TestUintReader<16, CODE_BITS>();
+    TestUintReader<17, CODE_BITS>();
+}
+
+TEST(testCppcontribUintreader, Test8bit) {
+    TestUintReaderBits<8>();
+}
+
+TEST(testCppcontribUintreader, Test10bit) {
+    TestUintReaderBits<10>();
+}
+
+TEST(testCppcontribUintreader, Test12bit) {
+    TestUintReaderBits<12>();
+}
+
+TEST(testCppcontribUintreader, Test16bit) {
+    TestUintReaderBits<16>();
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_dealloc_invlists.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_dealloc_invlists.cpp
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+
+#include <memory>
+#include <random>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include <faiss/AutoTune.h>
+#include <faiss/IVFlib.h>
+#include <faiss/IndexIVF.h>
+#include <faiss/index_factory.h>
+
+using namespace faiss;
+
+namespace {
+
+// dimension of the vectors to index
+int d = 32;
+
+// nb of training vectors
+size_t nt = 5000;
+
+// size of the database points per window step
+size_t nb = 1000;
+
+// nb of queries
+size_t nq = 200;
+
+std::mt19937 rng;
+
+std::vector<float> make_data(size_t n) {
+    std::vector<float> database(n * d);
+    std::uniform_real_distribution<> distrib;
+
+    for (size_t i = 0; i < n * d; i++) {
+        database[i] = distrib(rng);
+    }
+    return database;
+}
+
+std::unique_ptr<Index> make_trained_index(const char* index_type) {
+    auto index = std::unique_ptr<Index>(index_factory(d, index_type));
+    auto xt = make_data(nt * d);
+    index->train(nt, xt.data());
+    ParameterSpace().set_index_parameter(index.get(), "nprobe", 4);
+    return index;
+}
+
+std::vector<idx_t> search_index(Index* index, const float* xq) {
+    int k = 10;
+    std::vector<idx_t> I(k * nq);
+    std::vector<float> D(k * nq);
+    index->search(nq, xq, k, D.data(), I.data());
+    return I;
+}
+
+/*************************************************************
+ * Test functions for a given index type
+ *************************************************************/
+
+struct EncapsulateInvertedLists : InvertedLists {
+    const InvertedLists* il;
+
+    EncapsulateInvertedLists(const InvertedLists* il)
+            : InvertedLists(il->nlist, il->code_size), il(il) {}
+
+    static void* memdup(const void* m, size_t size) {
+        if (size == 0)
+            return nullptr;
+        return memcpy(malloc(size), m, size);
+    }
+
+    size_t list_size(size_t list_no) const override {
+        return il->list_size(list_no);
+    }
+
+    const uint8_t* get_codes(size_t list_no) const override {
+        return (uint8_t*)memdup(
+                il->get_codes(list_no), list_size(list_no) * code_size);
+    }
+
+    const idx_t* get_ids(size_t list_no) const override {
+        return (idx_t*)memdup(
+                il->get_ids(list_no), list_size(list_no) * sizeof(idx_t));
+    }
+
+    void release_codes(size_t, const uint8_t* codes) const override {
+        free((void*)codes);
+    }
+
+    void release_ids(size_t, const idx_t* ids) const override {
+        free((void*)ids);
+    }
+
+    const uint8_t* get_single_code(size_t list_no, size_t offset)
+            const override {
+        return (uint8_t*)memdup(
+                il->get_single_code(list_no, offset), code_size);
+    }
+
+    size_t add_entries(size_t, size_t, const idx_t*, const uint8_t*) override {
+        assert(!"not implemented");
+        return 0;
+    }
+
+    void update_entries(size_t, size_t, size_t, const idx_t*, const uint8_t*)
+            override {
+        assert(!"not implemented");
+    }
+
+    void resize(size_t, size_t) override {
+        assert(!"not implemented");
+    }
+
+    ~EncapsulateInvertedLists() override {}
+};
+
+int test_dealloc_invlists(const char* index_key) {
+    std::unique_ptr<Index> index = make_trained_index(index_key);
+    IndexIVF* index_ivf = ivflib::extract_index_ivf(index.get());
+
+    auto xb = make_data(nb * d);
+    index->add(nb, xb.data());
+
+    auto xq = make_data(nq * d);
+
+    auto ref_res = search_index(index.get(), xq.data());
+
+    EncapsulateInvertedLists eil(index_ivf->invlists);
+
+    index_ivf->own_invlists = false;
+    index_ivf->replace_invlists(&eil, false);
+
+    // TEST: this could crash or leak mem
+    auto new_res = search_index(index.get(), xq.data());
+
+    // delete explicitly
+    delete eil.il;
+
+    // just to make sure
+    EXPECT_EQ(ref_res, new_res);
+    return 0;
+}
+
+} // anonymous namespace
+
+/*************************************************************
+ * Test entry points
+ *************************************************************/
+
+TEST(TestIvlistDealloc, IVFFlat) {
+    test_dealloc_invlists("IVF32,Flat");
+}
+
+TEST(TestIvlistDealloc, IVFSQ) {
+    test_dealloc_invlists("IVF32,SQ8");
+}
+
+TEST(TestIvlistDealloc, IVFPQ) {
+    test_dealloc_invlists("IVF32,PQ4np");
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_disable_pq_sdc_tables.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_disable_pq_sdc_tables.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <random>
+
+#include "faiss/Index.h"
+#include "faiss/IndexHNSW.h"
+#include "faiss/index_factory.h"
+#include "faiss/index_io.h"
+#include "test_util.h"
+
+pthread_mutex_t temp_file_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+TEST(IO, TestReadHNSWPQ_whenSDCDisabledFlagPassed_thenDisableSDCTable) {
+    // Create a temp file name with a randomized component for stress runs
+    std::random_device rd;
+    std::mt19937 mt(rd());
+    std::uniform_real_distribution<float> dist(0, 9999999);
+    std::string temp_file_name =
+            "/tmp/faiss_TestReadHNSWPQ" + std::to_string(int(dist(mt)));
+    Tempfilename index_filename(&temp_file_mutex, temp_file_name);
+
+    // Create a HNSW index with PQ encoding
+    int d = 32, n = 256;
+    std::default_random_engine rng(123);
+    std::uniform_real_distribution<float> u(0, 100);
+    std::vector<float> vectors(n * d);
+    for (size_t i = 0; i < n * d; i++) {
+        vectors[i] = u(rng);
+    }
+
+    // Build the index and write it to the temp file
+    {
+        std::unique_ptr<faiss::Index> index_writer(
+                faiss::index_factory(d, "HNSW8,PQ4np", faiss::METRIC_L2));
+        index_writer->train(n, vectors.data());
+        index_writer->add(n, vectors.data());
+
+        faiss::write_index(index_writer.get(), index_filename.c_str());
+    }
+
+    // Load index from disk. Confirm that the sdc table is equal to 0 when
+    // disable sdc is set
+    {
+        std::unique_ptr<faiss::IndexHNSWPQ> index_reader_read_write(
+                dynamic_cast<faiss::IndexHNSWPQ*>(
+                        faiss::read_index(index_filename.c_str())));
+        std::unique_ptr<faiss::IndexHNSWPQ> index_reader_sdc_disabled(
+                dynamic_cast<faiss::IndexHNSWPQ*>(faiss::read_index(
+                        index_filename.c_str(),
+                        faiss::IO_FLAG_PQ_SKIP_SDC_TABLE)));
+
+        ASSERT_NE(
+                dynamic_cast<faiss::IndexPQ*>(index_reader_read_write->storage)
+                        ->pq.sdc_table.size(),
+                0);
+        ASSERT_EQ(
+                dynamic_cast<faiss::IndexPQ*>(
+                        index_reader_sdc_disabled->storage)
+                        ->pq.sdc_table.size(),
+                0);
+    }
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_distances_simd.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_distances_simd.cpp
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <cstddef>
+#include <cstdint>
+#include <random>
+#include <vector>
+
+#include <faiss/utils/distances.h>
+
+// reference implementations
+void fvec_inner_products_ny_ref(
+        float* ip,
+        const float* x,
+        const float* y,
+        size_t d,
+        size_t ny) {
+    for (size_t i = 0; i < ny; i++) {
+        ip[i] = faiss::fvec_inner_product(x, y, d);
+        y += d;
+    }
+}
+
+void fvec_L2sqr_ny_ref(
+        float* dis,
+        const float* x,
+        const float* y,
+        size_t d,
+        size_t ny) {
+    for (size_t i = 0; i < ny; i++) {
+        dis[i] = faiss::fvec_L2sqr(x, y, d);
+        y += d;
+    }
+}
+
+// test templated versions of fvec_L2sqr_ny
+TEST(TestFvecL2sqrNy, D2) {
+    // we're using int values in order to get 100% accurate
+    // results with floats.
+    std::default_random_engine rng(123);
+    std::uniform_int_distribution<int32_t> u(0, 32);
+
+    for (const auto dim : {2, 4, 8, 12}) {
+        std::vector<float> x(dim, 0);
+        for (size_t i = 0; i < x.size(); i++) {
+            x[i] = u(rng);
+        }
+
+        for (const auto nrows : {1, 2, 5, 10, 15, 20, 25}) {
+            std::vector<float> y(nrows * dim);
+            for (size_t i = 0; i < y.size(); i++) {
+                y[i] = u(rng);
+            }
+
+            std::vector<float> distances(nrows, 0);
+            faiss::fvec_L2sqr_ny(
+                    distances.data(), x.data(), y.data(), dim, nrows);
+
+            std::vector<float> distances_ref(nrows, 0);
+            fvec_L2sqr_ny_ref(
+                    distances_ref.data(), x.data(), y.data(), dim, nrows);
+
+            ASSERT_EQ(distances, distances_ref)
+                    << "Mismatching results for dim = " << dim
+                    << ", nrows = " << nrows;
+        }
+    }
+}
+
+// fvec_inner_products_ny
+TEST(TestFvecInnerProductsNy, D2) {
+    // we're using int values in order to get 100% accurate
+    // results with floats.
+    std::default_random_engine rng(123);
+    std::uniform_int_distribution<int32_t> u(0, 32);
+
+    for (const auto dim : {2, 4, 8, 12}) {
+        std::vector<float> x(dim, 0);
+        for (size_t i = 0; i < x.size(); i++) {
+            x[i] = u(rng);
+        }
+
+        for (const auto nrows : {1, 2, 5, 10, 15, 20, 25}) {
+            std::vector<float> y(nrows * dim);
+            for (size_t i = 0; i < y.size(); i++) {
+                y[i] = u(rng);
+            }
+
+            std::vector<float> distances(nrows, 0);
+            faiss::fvec_inner_products_ny(
+                    distances.data(), x.data(), y.data(), dim, nrows);
+
+            std::vector<float> distances_ref(nrows, 0);
+            fvec_inner_products_ny_ref(
+                    distances_ref.data(), x.data(), y.data(), dim, nrows);
+
+            ASSERT_EQ(distances, distances_ref)
+                    << "Mismatching results for dim = " << dim
+                    << ", nrows = " << nrows;
+        }
+    }
+}
+
+TEST(TestFvecL2sqr, distances_L2_squared_y_transposed) {
+    // ints instead of floats for 100% accuracy
+    std::default_random_engine rng(123);
+    std::uniform_int_distribution<int32_t> uniform(0, 32);
+
+    // modulo 8 results - 16 is to repeat the loop in the function
+    int ny = 11; // this value will hit all the codepaths
+    for (const auto d : {1, 2, 3, 4, 5, 6, 7, 8, 16}) {
+        // initialize inputs
+        std::vector<float> x(d);
+        float x_sqlen = 0;
+        for (size_t i = 0; i < x.size(); i++) {
+            x[i] = uniform(rng);
+            x_sqlen += x[i] * x[i];
+        }
+        std::vector<float> y(d * ny);
+        std::vector<float> y_sqlens(ny, 0);
+        for (size_t i = 0; i < ny; i++) {
+            for (size_t j = 0; j < y.size(); j++) {
+                y[j] = uniform(rng);
+                y_sqlens[i] += y[j] * y[j];
+            }
+        }
+
+        // perform function
+        std::vector<float> true_distances(ny, 0);
+        for (size_t i = 0; i < ny; i++) {
+            float dp = 0;
+            for (size_t j = 0; j < d; j++) {
+                dp += x[j] * y[i + j * ny];
+            }
+            true_distances[i] = x_sqlen + y_sqlens[i] - 2 * dp;
+        }
+
+        std::vector<float> distances(ny);
+        faiss::fvec_L2sqr_ny_transposed(
+                distances.data(),
+                x.data(),
+                y.data(),
+                y_sqlens.data(),
+                d,
+                ny, // no need for special offset to test all lines of code
+                ny);
+
+        ASSERT_EQ(distances, true_distances)
+                << "Mismatching fvec_L2sqr_ny_transposed results for d = " << d;
+    }
+}
+
+TEST(TestFvecL2sqr, nearest_L2_squared_y_transposed) {
+    // ints instead of floats for 100% accuracy
+    std::default_random_engine rng(123);
+    std::uniform_int_distribution<int32_t> uniform(0, 32);
+
+    // modulo 8 results - 16 is to repeat the loop in the function
+    int ny = 11; // this value will hit all the codepaths
+    for (const auto d : {1, 2, 3, 4, 5, 6, 7, 8, 16}) {
+        // initialize inputs
+        std::vector<float> x(d);
+        float x_sqlen = 0;
+        for (size_t i = 0; i < x.size(); i++) {
+            x[i] = uniform(rng);
+            x_sqlen += x[i] * x[i];
+        }
+        std::vector<float> y(d * ny);
+        std::vector<float> y_sqlens(ny, 0);
+        for (size_t i = 0; i < ny; i++) {
+            for (size_t j = 0; j < y.size(); j++) {
+                y[j] = uniform(rng);
+                y_sqlens[i] += y[j] * y[j];
+            }
+        }
+
+        // get distances
+        std::vector<float> distances(ny, 0);
+        for (size_t i = 0; i < ny; i++) {
+            float dp = 0;
+            for (size_t j = 0; j < d; j++) {
+                dp += x[j] * y[i + j * ny];
+            }
+            distances[i] = x_sqlen + y_sqlens[i] - 2 * dp;
+        }
+        // find nearest
+        size_t true_nearest_idx = 0;
+        float min_dis = HUGE_VALF;
+        for (size_t i = 0; i < ny; i++) {
+            if (distances[i] < min_dis) {
+                min_dis = distances[i];
+                true_nearest_idx = i;
+            }
+        }
+
+        std::vector<float> buffer(ny);
+        size_t nearest_idx = faiss::fvec_L2sqr_ny_nearest_y_transposed(
+                buffer.data(),
+                x.data(),
+                y.data(),
+                y_sqlens.data(),
+                d,
+                ny, // no need for special offset to test all lines of code
+                ny);
+
+        ASSERT_EQ(nearest_idx, true_nearest_idx)
+                << "Mismatching fvec_L2sqr_ny_nearest_y_transposed results for d = "
+                << d;
+    }
+}
+
+TEST(TestFvecL1, manhattan_distance) {
+    // ints instead of floats for 100% accuracy
+    std::default_random_engine rng(123);
+    std::uniform_int_distribution<int32_t> uniform(0, 32);
+
+    // modulo 8 results - 16 is to repeat the while loop in the function
+    for (const auto nrows : {8, 9, 10, 11, 12, 13, 14, 15, 16}) {
+        std::vector<float> x(nrows);
+        std::vector<float> y(nrows);
+        float true_distance = 0;
+        for (size_t i = 0; i < x.size(); i++) {
+            x[i] = uniform(rng);
+            y[i] = uniform(rng);
+            true_distance += std::abs(x[i] - y[i]);
+        }
+
+        auto distance = faiss::fvec_L1(x.data(), y.data(), x.size());
+
+        ASSERT_EQ(distance, true_distance)
+                << "Mismatching fvec_Linf results for nrows = " << nrows;
+    }
+}
+
+TEST(TestFvecLinf, chebyshev_distance) {
+    // ints instead of floats for 100% accuracy
+    std::default_random_engine rng(123);
+    std::uniform_int_distribution<int32_t> uniform(0, 32);
+
+    // modulo 8 results - 16 is to repeat the while loop in the function
+    for (const auto nrows : {8, 9, 10, 11, 12, 13, 14, 15, 16}) {
+        std::vector<float> x(nrows);
+        std::vector<float> y(nrows);
+        float true_distance = 0;
+        for (size_t i = 0; i < x.size(); i++) {
+            x[i] = uniform(rng);
+            y[i] = uniform(rng);
+            true_distance = std::max(true_distance, std::abs(x[i] - y[i]));
+        }
+
+        auto distance = faiss::fvec_Linf(x.data(), y.data(), x.size());
+
+        ASSERT_EQ(distance, true_distance)
+                << "Mismatching fvec_Linf results for nrows = " << nrows;
+    }
+}
+
+TEST(TestFvecMadd, multiple_add) {
+    // ints instead of floats for 100% accuracy
+    std::default_random_engine rng(123);
+    std::uniform_int_distribution<int32_t> uniform(0, 32);
+
+    // modulo 8 results - 16 is to repeat the while loop in the function
+    for (const auto nrows : {8, 9, 10, 11, 12, 13, 14, 15, 16}) {
+        std::vector<float> a(nrows);
+        std::vector<float> b(nrows);
+        const float bf = uniform(rng);
+        std::vector<float> true_distances(nrows);
+        for (size_t i = 0; i < a.size(); i++) {
+            a[i] = uniform(rng);
+            b[i] = uniform(rng);
+            true_distances[i] = a[i] + bf * b[i];
+        }
+
+        std::vector<float> distances(nrows);
+        faiss::fvec_madd(a.size(), a.data(), bf, b.data(), distances.data());
+
+        ASSERT_EQ(distances, true_distances)
+                << "Mismatching fvec_madd results for nrows = " << nrows;
+    }
+}
+
+TEST(TestFvecAdd, add_array) {
+    // ints instead of floats for 100% accuracy
+    std::default_random_engine rng(123);
+    std::uniform_int_distribution<int32_t> uniform(0, 32);
+
+    for (const auto nrows : {1, 2, 5, 10, 15, 20, 25}) {
+        std::vector<float> a(nrows);
+        std::vector<float> b(nrows);
+        std::vector<float> true_distances(nrows);
+        for (size_t i = 0; i < a.size(); i++) {
+            a[i] = uniform(rng);
+            b[i] = uniform(rng);
+            true_distances[i] = a[i] + b[i];
+        }
+
+        std::vector<float> distances(nrows);
+        faiss::fvec_add(a.size(), a.data(), b.data(), distances.data());
+
+        ASSERT_EQ(distances, true_distances)
+                << "Mismatching array-array fvec_add results for nrows = "
+                << nrows;
+    }
+}
+
+TEST(TestFvecAdd, add_value) {
+    // ints instead of floats for 100% accuracy
+    std::default_random_engine rng(123);
+    std::uniform_int_distribution<int32_t> uniform(0, 32);
+
+    for (const auto nrows : {1, 2, 5, 10, 15, 20, 25}) {
+        std::vector<float> a(nrows);
+        const float b = uniform(rng); // value to add
+        std::vector<float> true_distances(nrows);
+        for (size_t i = 0; i < a.size(); i++) {
+            a[i] = uniform(rng);
+            true_distances[i] = a[i] + b;
+        }
+
+        std::vector<float> distances(nrows);
+        faiss::fvec_add(a.size(), a.data(), b, distances.data());
+
+        ASSERT_EQ(distances, true_distances)
+                << "Mismatching array-value fvec_add results for nrows = "
+                << nrows;
+    }
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_factory_tools.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_factory_tools.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <faiss/cppcontrib/factory_tools.h>
+#include <faiss/index_factory.h>
+#include <gtest/gtest.h>
+
+namespace faiss {
+
+TEST(TestFactoryTools, TestReverseIndexFactory) {
+    for (const char* factory : {
+                 "Flat",
+                 "IMI2x5,PQ8x8",
+                 "IVF32_HNSW32,SQ8",
+                 "IVF8,Flat",
+                 "IVF8,SQ4",
+                 "IVF8,PQ4x8",
+                 "LSHrt",
+                 "PQ4x8",
+                 "HNSW32",
+                 "SQ8",
+                 "SQfp16",
+                 "NSG24,Flat",
+                 "NSG16,SQ8",
+         }) {
+        std::unique_ptr<Index> index{index_factory(64, factory)};
+        ASSERT_TRUE(index);
+        EXPECT_EQ(factory, reverse_index_factory(index.get()));
+    }
+    using Case = std::pair<const char*, const char*>;
+    for (auto [src, dst] : {
+                 Case{"SQ8,RFlat", "SQ8,Refine(Flat)"},
+                 Case{"NSG", "NSG32,Flat"},
+                 Case{"NSG,PQ8", "NSG32,PQ8x8"},
+         }) {
+        std::unique_ptr<Index> index{index_factory(64, src)};
+        ASSERT_TRUE(index);
+        EXPECT_EQ(dst, reverse_index_factory(index.get()));
+    }
+}
+
+} // namespace faiss
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_fastscan_perf.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_fastscan_perf.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <random>
+#include <vector>
+
+#include <omp.h>
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexIVFPQFastScan.h>
+#include <faiss/impl/AuxIndexStructures.h>
+
+TEST(TestFastScan, knnVSrange) {
+    // small vectors and database
+    int d = 64;
+    size_t nb = 4000;
+
+    // ivf centroids
+    size_t nlist = 4;
+
+    // more than 2 threads to surface
+    // problems related to multi-threading
+    omp_set_num_threads(8);
+
+    // random database, also used as queries
+    std::vector<float> database(nb * d);
+    std::mt19937 rng;
+    std::uniform_real_distribution<> distrib;
+    for (size_t i = 0; i < nb * d; i++) {
+        database[i] = distrib(rng);
+    }
+
+    // build index
+    faiss::IndexFlatL2 coarse_quantizer(d);
+    faiss::IndexIVFPQFastScan index(
+            &coarse_quantizer, d, nlist, d / 2, 4, faiss::METRIC_L2, 32);
+    index.pq.cp.niter = 10; // speed up train
+    index.nprobe = nlist;
+    index.train(nb, database.data());
+    index.add(nb, database.data());
+
+    std::vector<float> distances(nb);
+    std::vector<faiss::idx_t> labels(nb);
+    auto t = std::chrono::high_resolution_clock::now();
+    index.search(nb, database.data(), 1, distances.data(), labels.data());
+    auto knn_time = std::chrono::high_resolution_clock::now() - t;
+
+    faiss::RangeSearchResult rsr(nb);
+    t = std::chrono::high_resolution_clock::now();
+    index.range_search(nb, database.data(), 1.0, &rsr);
+    auto range_time = std::chrono::high_resolution_clock::now() - t;
+
+    // we expect the perf of knn and range search
+    // to be similar, at least within a factor of 4
+    ASSERT_LE(range_time, knn_time * 4);
+    ASSERT_LE(knn_time, range_time * 4);
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_hamming.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_hamming.cpp
@@ -0,0 +1,335 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/utils/hamming.h>
+#include <random>
+
+using namespace ::testing;
+
+template <typename T>
+std::string print_data(
+        std::shared_ptr<std::vector<T>> data,
+        const size_t divider) {
+    std::string ret = "";
+    for (int i = 0; i < data->size(); ++i) {
+        if (i % divider) {
+            ret += " ";
+        } else {
+            ret += "|";
+        }
+        ret += std::to_string((*data)[i]);
+    }
+    ret += "|";
+    return ret;
+}
+
+std::stringstream get_correct_hamming_example(
+        const size_t na, // number of queries
+        const size_t nb, // number of candidates
+        const size_t k,
+        const size_t code_size,
+        std::shared_ptr<std::vector<uint8_t>> a,
+        std::shared_ptr<std::vector<uint8_t>> b,
+        std::shared_ptr<std::vector<long>> true_ids,
+        // regular Hamming (bit-level distances)
+        std::shared_ptr<std::vector<int>> true_bit_distances,
+        // generalized Hamming (byte-level distances)
+        std::shared_ptr<std::vector<int>> true_byte_distances) {
+    assert(nb >= k);
+
+    // Initialization
+    std::default_random_engine rng(123);
+    std::uniform_int_distribution<int32_t> uniform(0, nb - 1);
+
+    const size_t nresults = na * k;
+
+    a->clear();
+    a->resize(na * code_size, 1); // query vectors are all 1
+    b->clear();
+    b->resize(nb * code_size, 2); // database vectors are all 2
+    true_ids->clear();
+    true_ids->reserve(nresults);
+    true_bit_distances->clear();
+    true_bit_distances->reserve(nresults);
+    true_byte_distances->clear();
+    true_byte_distances->reserve(nresults);
+
+    // define correct ids (must be unique)
+    std::set<long> correct_ids;
+    do {
+        correct_ids.insert(uniform(rng));
+    } while (correct_ids.size() < k);
+
+    // replace database vector at id with vector more similar to query
+    // ordered, so earlier ids must be more similar
+    for (size_t nmatches = k; nmatches > 0; --nmatches) {
+        // get id and erase it
+        const size_t id = *correct_ids.begin();
+        *correct_ids.erase(correct_ids.begin());
+
+        // assemble true id and distance at locations
+        true_ids->push_back(id);
+        true_bit_distances->push_back(
+                (code_size > nmatches ? code_size - nmatches : 0) *
+                /* per-code distance between 1 and 2 (0b01 and 0b10) */
+                2);
+        true_byte_distances->push_back(
+                (code_size > nmatches ? code_size - nmatches : 0));
+        for (size_t i = 0; i < nmatches; ++i) {
+            b->begin()[id * code_size + i] = 1; // query byte value
+        }
+    }
+
+    // true_ids, true_bit_distances, true_byte_distances only contain results
+    // for the first query.
+    // Query vectors are identical (all 1s), so copy the first sets of k
+    // distances na-1 times.
+    for (size_t i = 1; i < na; ++i) {
+        true_ids->insert(
+                true_ids->end(), true_ids->begin(), true_ids->begin() + k);
+        true_bit_distances->insert(
+                true_bit_distances->end(),
+                true_bit_distances->begin(),
+                true_bit_distances->begin() + k);
+        true_byte_distances->insert(
+                true_byte_distances->end(),
+                true_byte_distances->begin(),
+                true_byte_distances->begin() + k);
+    }
+
+    // assemble string for debugging
+    std::stringstream ret;
+    ret << "na: " << na << std::endl
+        << "nb: " << nb << std::endl
+        << "k: " << k << std::endl
+        << "code_size: " << code_size << std::endl
+        << "a: " << print_data(a, code_size) << std::endl
+        << "b: " << print_data(b, code_size) << std::endl
+        << "true_ids: " << print_data(true_ids, k) << std::endl
+        << "true_bit_distances: " << print_data(true_bit_distances, k)
+        << std::endl
+        << "true_byte_distances: " << print_data(true_byte_distances, k)
+        << std::endl;
+    return ret;
+}
+
+TEST(TestHamming, test_crosshamming_count_thres) {
+    // Initialize randomizer
+    std::default_random_engine rng(123);
+    std::uniform_int_distribution<int32_t> uniform(0, 255);
+
+    // Initialize inputs
+    const size_t n = 10; // number of codes
+    const hamdis_t hamming_threshold = 20;
+
+    // one for each case - 65 is default
+    for (auto ncodes : {8, 16, 32, 64, 65}) {
+        // initialize inputs
+        const int nbits = ncodes * 8;
+        const size_t nwords = nbits / 64;
+        // 8 to for later conversion to uint64_t, and 2 for buffer
+        std::vector<uint8_t> dbs(nwords * n * 8 * 2);
+        for (int i = 0; i < dbs.size(); ++i) {
+            dbs[i] = uniform(rng);
+        }
+
+        // get true distance
+        size_t true_count = 0;
+        uint64_t* bs1 = (uint64_t*)dbs.data();
+        for (int i = 0; i < n; ++i) {
+            uint64_t* bs2 = bs1 + 2;
+            for (int j = i + 1; j < n; ++j) {
+                if (faiss::hamming(bs1 + i * nwords, bs2 + j * nwords, nwords) <
+                    hamming_threshold) {
+                    ++true_count;
+                }
+            }
+        }
+
+        // run test and check correctness
+        size_t count;
+        if (ncodes == 65) {
+            ASSERT_THROW(
+                    faiss::crosshamming_count_thres(
+                            dbs.data(), n, hamming_threshold, ncodes, &count),
+                    faiss::FaissException);
+            continue;
+        }
+        faiss::crosshamming_count_thres(
+                dbs.data(), n, hamming_threshold, ncodes, &count);
+
+        ASSERT_EQ(count, true_count) << "ncodes = " << ncodes;
+    }
+}
+TEST(TestHamming, test_hamming_thres) {
+    // Initialize randomizer
+    std::default_random_engine rng(123);
+    std::uniform_int_distribution<int32_t> uniform(0, 255);
+
+    // Initialize inputs
+    const size_t n1 = 10;
+    const size_t n2 = 15;
+    const hamdis_t hamming_threshold = 100;
+
+    // one for each case - 65 is default
+    for (auto ncodes : {8, 16, 32, 64, 65}) {
+        // initialize inputs
+        const int nbits = ncodes * 8;
+        const size_t nwords = nbits / 64;
+        std::vector<uint8_t> bs1(nwords * n1 * 8);
+        std::vector<uint8_t> bs2(nwords * n2 * 8);
+        for (int i = 0; i < bs1.size(); ++i) {
+            bs1[i] = uniform(rng);
+        }
+        for (int i = 0; i < bs2.size(); ++i) {
+            bs2[i] = uniform(rng);
+        }
+
+        // get true distance
+        size_t true_count = 0;
+        std::vector<int64_t> true_idx;
+        std::vector<hamdis_t> true_dis;
+
+        uint64_t* bs1_64 = (uint64_t*)bs1.data();
+        uint64_t* bs2_64 = (uint64_t*)bs2.data();
+        for (int i = 0; i < n1; ++i) {
+            for (int j = 0; j < n2; ++j) {
+                hamdis_t ham_dist = faiss::hamming(
+                        bs1_64 + i * nwords, bs2_64 + j * nwords, nwords);
+                if (ham_dist < hamming_threshold) {
+                    ++true_count;
+                    true_idx.push_back(i);
+                    true_idx.push_back(j);
+                    true_dis.push_back(ham_dist);
+                }
+            }
+        }
+
+        // run test and check correctness for both
+        // match_hamming_thres and hamming_count_thres
+        std::vector<int64_t> idx(true_idx.size());
+        std::vector<hamdis_t> dis(true_dis.size());
+        if (ncodes == 65) {
+            ASSERT_THROW(
+                    faiss::match_hamming_thres(
+                            bs1.data(),
+                            bs2.data(),
+                            n1,
+                            n2,
+                            hamming_threshold,
+                            ncodes,
+                            idx.data(),
+                            dis.data()),
+                    faiss::FaissException);
+            ASSERT_THROW(
+                    faiss::hamming_count_thres(
+                            bs1.data(),
+                            bs2.data(),
+                            n1,
+                            n2,
+                            hamming_threshold,
+                            ncodes,
+                            nullptr),
+                    faiss::FaissException);
+            continue;
+        }
+        size_t match_count = faiss::match_hamming_thres(
+                bs1.data(),
+                bs2.data(),
+                n1,
+                n2,
+                hamming_threshold,
+                ncodes,
+                idx.data(),
+                dis.data());
+        size_t count_count;
+        faiss::hamming_count_thres(
+                bs1.data(),
+                bs2.data(),
+                n1,
+                n2,
+                hamming_threshold,
+                ncodes,
+                &count_count);
+
+        ASSERT_EQ(match_count, true_count) << "ncodes = " << ncodes;
+        ASSERT_EQ(count_count, true_count) << "ncodes = " << ncodes;
+        ASSERT_EQ(idx, true_idx) << "ncodes = " << ncodes;
+        ASSERT_EQ(dis, true_dis) << "ncodes = " << ncodes;
+    }
+}
+
+TEST(TestHamming, test_hamming_knn) {
+    // Initialize randomizer
+    std::default_random_engine rng(123);
+    std::uniform_int_distribution<int32_t> uniform(0, 32);
+
+    // Initialize inputs
+    const size_t na = 4;
+    const size_t nb = 12; // number of candidates
+    const size_t k = 6;
+
+    auto a = std::make_shared<std::vector<uint8_t>>();
+    auto b = std::make_shared<std::vector<uint8_t>>();
+    auto true_ids = std::make_shared<std::vector<long>>();
+    auto true_bit_distances = std::make_shared<std::vector<int>>();
+    auto true_byte_distances = std::make_shared<std::vector<int>>();
+
+    // 8, 16, 32 are cases - 24 will hit default case
+    // all should be multiples of 8
+    for (auto code_size : {8, 16, 24, 32}) {
+        // get example
+        std::stringstream assert_str = get_correct_hamming_example(
+                na,
+                nb,
+                k,
+                code_size,
+                a,
+                b,
+                true_ids,
+                true_bit_distances,
+                true_byte_distances);
+
+        // run test on generalized_hammings_knn_hc
+        std::vector<long> ids_gen(na * k);
+        std::vector<int> dist_gen(na * k);
+        faiss::int_maxheap_array_t res = {
+                na, k, ids_gen.data(), dist_gen.data()};
+        faiss::generalized_hammings_knn_hc(
+                &res, a->data(), b->data(), nb, code_size, true);
+        ASSERT_EQ(ids_gen, *true_ids) << assert_str.str();
+        ASSERT_EQ(dist_gen, *true_byte_distances) << assert_str.str();
+
+        // run test on hammings_knn
+        std::vector<long> ids_ham_knn(na * k, 0);
+        std::vector<int> dist_ham_knn(na * k, 0);
+        res = {na, k, ids_ham_knn.data(), dist_ham_knn.data()};
+        faiss::hammings_knn(&res, a->data(), b->data(), nb, code_size, true);
+        ASSERT_EQ(ids_ham_knn, *true_ids) << assert_str.str();
+        ASSERT_EQ(dist_ham_knn, *true_bit_distances) << assert_str.str();
+    }
+
+    for (auto code_size : {8, 16, 24, 32}) {
+        std::stringstream assert_str = get_correct_hamming_example(
+                na,
+                nb,
+                /* k */ nb, // faiss::hammings computes all distances
+                code_size,
+                a,
+                b,
+                true_ids,
+                true_bit_distances,
+                true_byte_distances);
+        std::vector<hamdis_t> dist_gen(na * nb);
+        faiss::hammings(
+                a->data(), b->data(), na, nb, code_size, dist_gen.data());
+        EXPECT_EQ(dist_gen, *true_bit_distances) << assert_str.str();
+    }
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_heap.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_heap.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <faiss/utils/Heap.h>
+#include <gtest/gtest.h>
+#include <algorithm>
+#include <numeric>
+
+using namespace faiss;
+
+TEST(Heap, addn_with_ids) {
+    size_t n = 1000;
+    size_t k = 1;
+    std::vector<int64_t> heap_labels(n, -1);
+    std::vector<float> heap_distances(n, 0);
+    float_minheap_array_t heaps = {
+            n, k, heap_labels.data(), heap_distances.data()};
+    heaps.heapify();
+    std::vector<int64_t> labels(n, 1);
+    std::vector<float> distances(n, 0.0f);
+    std::vector<int64_t> subset(n);
+    std::iota(subset.begin(), subset.end(), 0);
+    heaps.addn_with_ids(1, distances.data(), labels.data(), 1);
+    heaps.reorder();
+    EXPECT_TRUE(
+            std::all_of(heap_labels.begin(), heap_labels.end(), [](int64_t i) {
+                return i == 1;
+            }));
+}
+
+TEST(Heap, addn_query_subset_with_ids) {
+    size_t n = 20000000; // more than 2^24
+    size_t k = 1;
+    std::vector<int64_t> heap_labels(n, -1);
+    std::vector<float> heap_distances(n, 0);
+    float_minheap_array_t heaps = {
+            n, k, heap_labels.data(), heap_distances.data()};
+    heaps.heapify();
+    std::vector<int64_t> labels(n, 1);
+    std::vector<float> distances(n, 0.0f);
+    std::vector<int64_t> subset(n);
+    std::iota(subset.begin(), subset.end(), 0);
+    heaps.addn_query_subset_with_ids(
+            n, subset.data(), 1, distances.data(), labels.data(), 1);
+    heaps.reorder();
+    EXPECT_TRUE(
+            std::all_of(heap_labels.begin(), heap_labels.end(), [](int64_t i) {
+                return i == 1;
+            }));
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_hnsw.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_hnsw.cpp
@@ -0,0 +1,657 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <cstddef>
+#include <limits>
+#include <random>
+#include <unordered_set>
+#include <vector>
+
+#include <faiss/IndexHNSW.h>
+#include <faiss/impl/HNSW.h>
+#include <faiss/impl/ResultHandler.h>
+#include <faiss/utils/random.h>
+
+int reference_pop_min(faiss::HNSW::MinimaxHeap& heap, float* vmin_out) {
+    assert(heap.k > 0);
+    // returns min. This is an O(n) operation
+    int i = heap.k - 1;
+    while (i >= 0) {
+        if (heap.ids[i] != -1)
+            break;
+        i--;
+    }
+    if (i == -1)
+        return -1;
+    int imin = i;
+    float vmin = heap.dis[i];
+    i--;
+    while (i >= 0) {
+        if (heap.ids[i] != -1 && heap.dis[i] < vmin) {
+            vmin = heap.dis[i];
+            imin = i;
+        }
+        i--;
+    }
+    if (vmin_out)
+        *vmin_out = vmin;
+    int ret = heap.ids[imin];
+    heap.ids[imin] = -1;
+    --heap.nvalid;
+
+    return ret;
+}
+
+void test_popmin(int heap_size, int amount_to_put) {
+    // create a heap
+    faiss::HNSW::MinimaxHeap mm_heap(heap_size);
+
+    using storage_idx_t = faiss::HNSW::storage_idx_t;
+
+    std::default_random_engine rng(123 + heap_size * amount_to_put);
+    std::uniform_int_distribution<storage_idx_t> u(0, 65536);
+    std::uniform_real_distribution<float> uf(0, 1);
+
+    // generate random unique indices
+    std::unordered_set<storage_idx_t> indices;
+    while (indices.size() < amount_to_put) {
+        const storage_idx_t index = u(rng);
+        indices.insert(index);
+    }
+
+    // put ones into the heap
+    for (const auto index : indices) {
+        float distance = uf(rng);
+        if (distance >= 0.7f) {
+            // add infinity values from time to time
+            distance = std::numeric_limits<float>::infinity();
+        }
+        mm_heap.push(index, distance);
+    }
+
+    // clone the heap
+    faiss::HNSW::MinimaxHeap cloned_mm_heap = mm_heap;
+
+    // takes ones out one by one
+    while (mm_heap.size() > 0) {
+        // compare heaps
+        ASSERT_EQ(mm_heap.n, cloned_mm_heap.n);
+        ASSERT_EQ(mm_heap.k, cloned_mm_heap.k);
+        ASSERT_EQ(mm_heap.nvalid, cloned_mm_heap.nvalid);
+        ASSERT_EQ(mm_heap.ids, cloned_mm_heap.ids);
+        ASSERT_EQ(mm_heap.dis, cloned_mm_heap.dis);
+
+        // use the reference pop_min for the cloned heap
+        float cloned_vmin_dis = std::numeric_limits<float>::quiet_NaN();
+        storage_idx_t cloned_vmin_idx =
+                reference_pop_min(cloned_mm_heap, &cloned_vmin_dis);
+
+        float vmin_dis = std::numeric_limits<float>::quiet_NaN();
+        storage_idx_t vmin_idx = mm_heap.pop_min(&vmin_dis);
+
+        // compare returns
+        ASSERT_EQ(vmin_dis, cloned_vmin_dis);
+        ASSERT_EQ(vmin_idx, cloned_vmin_idx);
+    }
+
+    // compare heaps again
+    ASSERT_EQ(mm_heap.n, cloned_mm_heap.n);
+    ASSERT_EQ(mm_heap.k, cloned_mm_heap.k);
+    ASSERT_EQ(mm_heap.nvalid, cloned_mm_heap.nvalid);
+    ASSERT_EQ(mm_heap.ids, cloned_mm_heap.ids);
+    ASSERT_EQ(mm_heap.dis, cloned_mm_heap.dis);
+}
+
+void test_popmin_identical_distances(
+        int heap_size,
+        int amount_to_put,
+        const float distance) {
+    // create a heap
+    faiss::HNSW::MinimaxHeap mm_heap(heap_size);
+
+    using storage_idx_t = faiss::HNSW::storage_idx_t;
+
+    std::default_random_engine rng(123 + heap_size * amount_to_put);
+    std::uniform_int_distribution<storage_idx_t> u(0, 65536);
+
+    // generate random unique indices
+    std::unordered_set<storage_idx_t> indices;
+    while (indices.size() < amount_to_put) {
+        const storage_idx_t index = u(rng);
+        indices.insert(index);
+    }
+
+    // put ones into the heap
+    for (const auto index : indices) {
+        mm_heap.push(index, distance);
+    }
+
+    // clone the heap
+    faiss::HNSW::MinimaxHeap cloned_mm_heap = mm_heap;
+
+    // takes ones out one by one
+    while (mm_heap.size() > 0) {
+        // compare heaps
+        ASSERT_EQ(mm_heap.n, cloned_mm_heap.n);
+        ASSERT_EQ(mm_heap.k, cloned_mm_heap.k);
+        ASSERT_EQ(mm_heap.nvalid, cloned_mm_heap.nvalid);
+        ASSERT_EQ(mm_heap.ids, cloned_mm_heap.ids);
+        ASSERT_EQ(mm_heap.dis, cloned_mm_heap.dis);
+
+        // use the reference pop_min for the cloned heap
+        float cloned_vmin_dis = std::numeric_limits<float>::quiet_NaN();
+        storage_idx_t cloned_vmin_idx =
+                reference_pop_min(cloned_mm_heap, &cloned_vmin_dis);
+
+        float vmin_dis = std::numeric_limits<float>::quiet_NaN();
+        storage_idx_t vmin_idx = mm_heap.pop_min(&vmin_dis);
+
+        // compare returns
+        ASSERT_EQ(vmin_dis, cloned_vmin_dis);
+        ASSERT_EQ(vmin_idx, cloned_vmin_idx);
+    }
+
+    // compare heaps again
+    ASSERT_EQ(mm_heap.n, cloned_mm_heap.n);
+    ASSERT_EQ(mm_heap.k, cloned_mm_heap.k);
+    ASSERT_EQ(mm_heap.nvalid, cloned_mm_heap.nvalid);
+    ASSERT_EQ(mm_heap.ids, cloned_mm_heap.ids);
+    ASSERT_EQ(mm_heap.dis, cloned_mm_heap.dis);
+}
+
+TEST(HNSW, Test_popmin) {
+    std::vector<size_t> sizes = {1, 2, 3, 4, 5, 7, 9, 11, 16, 27, 32, 64, 128};
+    for (const size_t size : sizes) {
+        for (size_t amount = size; amount > 0; amount /= 2) {
+            test_popmin(size, amount);
+        }
+    }
+}
+
+TEST(HNSW, Test_popmin_identical_distances) {
+    std::vector<size_t> sizes = {1, 2, 3, 4, 5, 7, 9, 11, 16, 27, 32};
+    for (const size_t size : sizes) {
+        for (size_t amount = size; amount > 0; amount /= 2) {
+            test_popmin_identical_distances(size, amount, 1.0f);
+        }
+    }
+}
+
+TEST(HNSW, Test_popmin_infinite_distances) {
+    std::vector<size_t> sizes = {1, 2, 3, 4, 5, 7, 9, 11, 16, 27, 32};
+    for (const size_t size : sizes) {
+        for (size_t amount = size; amount > 0; amount /= 2) {
+            test_popmin_identical_distances(
+                    size, amount, std::numeric_limits<float>::infinity());
+        }
+    }
+}
+
+TEST(HNSW, Test_IndexHNSW_METRIC_Lp) {
+    // Create an HNSW index with METRIC_Lp and metric_arg = 3
+    faiss::IndexFlat storage_index(1, faiss::METRIC_Lp);
+    storage_index.metric_arg = 3;
+    faiss::IndexHNSW index(&storage_index, 32);
+
+    // Add a single data point
+    float data[1] = {0.0};
+    index.add(1, data);
+
+    // Prepare a query
+    float query[1] = {2.0};
+    float distance;
+    faiss::idx_t label;
+
+    index.search(1, query, 1, &distance, &label);
+
+    EXPECT_NEAR(distance, 8.0, 1e-5); // Distance should be 8.0 (2^3)
+    EXPECT_EQ(label, 0);              // Label should be 0
+}
+
+class HNSWTest : public testing::Test {
+   protected:
+    HNSWTest() {
+        xb = std::make_unique<std::vector<float>>(d * nb);
+        xb->reserve(d * nb);
+        faiss::float_rand(xb->data(), d * nb, 12345);
+        index = std::make_unique<faiss::IndexHNSWFlat>(d, M);
+        index->add(nb, xb->data());
+        xq = std::unique_ptr<std::vector<float>>(
+                new std::vector<float>(d * nq));
+        xq->reserve(d * nq);
+        faiss::float_rand(xq->data(), d * nq, 12345);
+        dis = std::unique_ptr<faiss::DistanceComputer>(
+                index->storage->get_distance_computer());
+        dis->set_query(xq->data() + 0 * index->d);
+    }
+
+    const int d = 64;
+    const int nb = 2000;
+    const int M = 4;
+    const int nq = 10;
+    const int k = 10;
+    std::unique_ptr<std::vector<float>> xb;
+    std::unique_ptr<std::vector<float>> xq;
+    std::unique_ptr<faiss::DistanceComputer> dis;
+    std::unique_ptr<faiss::IndexHNSWFlat> index;
+};
+
+/** Do a BFS on the candidates list */
+int reference_search_from_candidates(
+        const faiss::HNSW& hnsw,
+        faiss::DistanceComputer& qdis,
+        faiss::ResultHandler<faiss::HNSW::C>& res,
+        faiss::HNSW::MinimaxHeap& candidates,
+        faiss::VisitedTable& vt,
+        faiss::HNSWStats& stats,
+        int level,
+        int nres_in,
+        const faiss::SearchParametersHNSW* params) {
+    int nres = nres_in;
+    int ndis = 0;
+
+    // can be overridden by search params
+    bool do_dis_check = params ? params->check_relative_distance
+                               : hnsw.check_relative_distance;
+    int efSearch = params ? params->efSearch : hnsw.efSearch;
+    const faiss::IDSelector* sel = params ? params->sel : nullptr;
+
+    faiss::HNSW::C::T threshold = res.threshold;
+    for (int i = 0; i < candidates.size(); i++) {
+        faiss::idx_t v1 = candidates.ids[i];
+        float d = candidates.dis[i];
+        FAISS_ASSERT(v1 >= 0);
+        if (!sel || sel->is_member(v1)) {
+            if (d < threshold) {
+                if (res.add_result(d, v1)) {
+                    threshold = res.threshold;
+                }
+            }
+        }
+        vt.set(v1);
+    }
+
+    int nstep = 0;
+
+    while (candidates.size() > 0) {
+        float d0 = 0;
+        int v0 = candidates.pop_min(&d0);
+
+        if (do_dis_check) {
+            // tricky stopping condition: there are more that ef
+            // distances that are processed already that are smaller
+            // than d0
+
+            int n_dis_below = candidates.count_below(d0);
+            if (n_dis_below >= efSearch) {
+                break;
+            }
+        }
+
+        size_t begin, end;
+        hnsw.neighbor_range(v0, level, &begin, &end);
+
+        // a reference version
+        for (size_t j = begin; j < end; j++) {
+            int v1 = hnsw.neighbors[j];
+            if (v1 < 0)
+                break;
+            if (vt.get(v1)) {
+                continue;
+            }
+            vt.set(v1);
+            ndis++;
+            float d = qdis(v1);
+            if (!sel || sel->is_member(v1)) {
+                if (d < threshold) {
+                    if (res.add_result(d, v1)) {
+                        threshold = res.threshold;
+                        nres += 1;
+                    }
+                }
+            }
+
+            candidates.push(v1, d);
+        }
+
+        nstep++;
+        if (!do_dis_check && nstep > efSearch) {
+            break;
+        }
+    }
+
+    if (level == 0) {
+        stats.n1++;
+        if (candidates.size() == 0) {
+            stats.n2++;
+        }
+        stats.ndis += ndis;
+        stats.nhops += nstep;
+    }
+
+    return nres;
+}
+
+faiss::HNSWStats reference_greedy_update_nearest(
+        const faiss::HNSW& hnsw,
+        faiss::DistanceComputer& qdis,
+        int level,
+        faiss::HNSW::storage_idx_t& nearest,
+        float& d_nearest) {
+    faiss::HNSWStats stats;
+
+    for (;;) {
+        faiss::HNSW::storage_idx_t prev_nearest = nearest;
+
+        size_t begin, end;
+        hnsw.neighbor_range(nearest, level, &begin, &end);
+
+        size_t ndis = 0;
+
+        for (size_t i = begin; i < end; i++) {
+            faiss::HNSW::storage_idx_t v = hnsw.neighbors[i];
+            if (v < 0)
+                break;
+            ndis += 1;
+            float dis = qdis(v);
+            if (dis < d_nearest) {
+                nearest = v;
+                d_nearest = dis;
+            }
+        }
+        // update stats
+        stats.ndis += ndis;
+        stats.nhops += 1;
+
+        if (nearest == prev_nearest) {
+            return stats;
+        }
+    }
+}
+
+std::priority_queue<faiss::HNSW::Node> reference_search_from_candidate_unbounded(
+        const faiss::HNSW& hnsw,
+        const faiss::HNSW::Node& node,
+        faiss::DistanceComputer& qdis,
+        int ef,
+        faiss::VisitedTable* vt,
+        faiss::HNSWStats& stats) {
+    int ndis = 0;
+    std::priority_queue<faiss::HNSW::Node> top_candidates;
+    std::priority_queue<
+            faiss::HNSW::Node,
+            std::vector<faiss::HNSW::Node>,
+            std::greater<faiss::HNSW::Node>>
+            candidates;
+
+    top_candidates.push(node);
+    candidates.push(node);
+
+    vt->set(node.second);
+
+    while (!candidates.empty()) {
+        float d0;
+        faiss::HNSW::storage_idx_t v0;
+        std::tie(d0, v0) = candidates.top();
+
+        if (d0 > top_candidates.top().first) {
+            break;
+        }
+
+        candidates.pop();
+
+        size_t begin, end;
+        hnsw.neighbor_range(v0, 0, &begin, &end);
+
+        for (size_t j = begin; j < end; ++j) {
+            int v1 = hnsw.neighbors[j];
+
+            if (v1 < 0) {
+                break;
+            }
+            if (vt->get(v1)) {
+                continue;
+            }
+
+            vt->set(v1);
+
+            float d1 = qdis(v1);
+            ++ndis;
+
+            if (top_candidates.top().first > d1 || top_candidates.size() < ef) {
+                candidates.emplace(d1, v1);
+                top_candidates.emplace(d1, v1);
+
+                if (top_candidates.size() > ef) {
+                    top_candidates.pop();
+                }
+            }
+        }
+
+        stats.nhops += 1;
+    }
+
+    ++stats.n1;
+    if (candidates.size() == 0) {
+        ++stats.n2;
+    }
+    stats.ndis += ndis;
+
+    return top_candidates;
+}
+
+TEST_F(HNSWTest, TEST_search_from_candidate_unbounded) {
+    omp_set_num_threads(1);
+    auto nearest = index->hnsw.entry_point;
+    float d_nearest = (*dis)(nearest);
+    auto node = faiss::HNSW::Node(d_nearest, nearest);
+    faiss::VisitedTable vt(index->ntotal);
+    faiss::HNSWStats stats;
+
+    // actual version
+    auto top_candidates = faiss::search_from_candidate_unbounded(
+            index->hnsw, node, *dis, k, &vt, stats);
+
+    auto reference_nearest = index->hnsw.entry_point;
+    float reference_d_nearest = (*dis)(nearest);
+    auto reference_node =
+            faiss::HNSW::Node(reference_d_nearest, reference_nearest);
+    faiss::VisitedTable reference_vt(index->ntotal);
+    faiss::HNSWStats reference_stats;
+
+    // reference version
+    auto reference_top_candidates = reference_search_from_candidate_unbounded(
+            index->hnsw,
+            reference_node,
+            *dis,
+            k,
+            &reference_vt,
+            reference_stats);
+    EXPECT_EQ(stats.ndis, reference_stats.ndis);
+    EXPECT_EQ(stats.nhops, reference_stats.nhops);
+    EXPECT_EQ(stats.n1, reference_stats.n1);
+    EXPECT_EQ(stats.n2, reference_stats.n2);
+    EXPECT_EQ(top_candidates.size(), reference_top_candidates.size());
+}
+
+TEST_F(HNSWTest, TEST_greedy_update_nearest) {
+    omp_set_num_threads(1);
+
+    auto nearest = index->hnsw.entry_point;
+    float d_nearest = (*dis)(nearest);
+    auto reference_nearest = index->hnsw.entry_point;
+    float reference_d_nearest = (*dis)(reference_nearest);
+
+    // actual version
+    auto stats = faiss::greedy_update_nearest(
+            index->hnsw, *dis, 0, nearest, d_nearest);
+
+    // reference version
+    auto reference_stats = reference_greedy_update_nearest(
+            index->hnsw, *dis, 0, reference_nearest, reference_d_nearest);
+    EXPECT_EQ(stats.ndis, reference_stats.ndis);
+    EXPECT_EQ(stats.nhops, reference_stats.nhops);
+    EXPECT_EQ(stats.n1, reference_stats.n1);
+    EXPECT_EQ(stats.n2, reference_stats.n2);
+    EXPECT_NEAR(d_nearest, reference_d_nearest, 0.01);
+    EXPECT_EQ(nearest, reference_nearest);
+}
+
+TEST_F(HNSWTest, TEST_search_from_candidates) {
+    omp_set_num_threads(1);
+
+    std::vector<faiss::idx_t> I(k * nq);
+    std::vector<float> D(k * nq);
+    std::vector<faiss::idx_t> reference_I(k * nq);
+    std::vector<float> reference_D(k * nq);
+    using RH = faiss::HeapBlockResultHandler<faiss::HNSW::C>;
+
+    faiss::VisitedTable vt(index->ntotal);
+    faiss::VisitedTable reference_vt(index->ntotal);
+    int num_candidates = 10;
+    faiss::HNSW::MinimaxHeap candidates(num_candidates);
+    faiss::HNSW::MinimaxHeap reference_candidates(num_candidates);
+
+    for (int i = 0; i < num_candidates; i++) {
+        vt.set(i);
+        reference_vt.set(i);
+        candidates.push(i, (*dis)(i));
+        reference_candidates.push(i, (*dis)(i));
+    }
+
+    faiss::HNSWStats stats;
+    RH bres(nq, D.data(), I.data(), k);
+    faiss::HeapBlockResultHandler<faiss::HNSW::C>::SingleResultHandler res(
+            bres);
+
+    res.begin(0);
+    faiss::search_from_candidates(
+            index->hnsw, *dis, res, candidates, vt, stats, 0, 0, nullptr);
+    res.end();
+
+    faiss::HNSWStats reference_stats;
+    RH reference_bres(nq, reference_D.data(), reference_I.data(), k);
+    faiss::HeapBlockResultHandler<faiss::HNSW::C>::SingleResultHandler
+            reference_res(reference_bres);
+    reference_res.begin(0);
+    reference_search_from_candidates(
+            index->hnsw,
+            *dis,
+            reference_res,
+            reference_candidates,
+            reference_vt,
+            reference_stats,
+            0,
+            0,
+            nullptr);
+    reference_res.end();
+    for (int i = 0; i < nq; i++) {
+        for (int j = 0; j < k; j++) {
+            EXPECT_NEAR(I[i * k + j], reference_I[i * k + j], 0.1);
+            EXPECT_NEAR(D[i * k + j], reference_D[i * k + j], 0.1);
+        }
+    }
+    EXPECT_EQ(reference_stats.ndis, stats.ndis);
+    EXPECT_EQ(reference_stats.nhops, stats.nhops);
+    EXPECT_EQ(reference_stats.n1, stats.n1);
+    EXPECT_EQ(reference_stats.n2, stats.n2);
+}
+
+TEST_F(HNSWTest, TEST_search_neighbors_to_add) {
+    omp_set_num_threads(1);
+
+    faiss::VisitedTable vt(index->ntotal);
+    faiss::VisitedTable reference_vt(index->ntotal);
+
+    std::priority_queue<faiss::HNSW::NodeDistCloser> link_targets;
+    std::priority_queue<faiss::HNSW::NodeDistCloser> reference_link_targets;
+
+    faiss::search_neighbors_to_add(
+            index->hnsw,
+            *dis,
+            link_targets,
+            index->hnsw.entry_point,
+            (*dis)(index->hnsw.entry_point),
+            index->hnsw.max_level,
+            vt,
+            false);
+
+    faiss::search_neighbors_to_add(
+            index->hnsw,
+            *dis,
+            reference_link_targets,
+            index->hnsw.entry_point,
+            (*dis)(index->hnsw.entry_point),
+            index->hnsw.max_level,
+            reference_vt,
+            true);
+
+    EXPECT_EQ(link_targets.size(), reference_link_targets.size());
+    while (!link_targets.empty()) {
+        auto val = link_targets.top();
+        auto reference_val = reference_link_targets.top();
+        EXPECT_EQ(val.d, reference_val.d);
+        EXPECT_EQ(val.id, reference_val.id);
+        link_targets.pop();
+        reference_link_targets.pop();
+    }
+}
+
+TEST_F(HNSWTest, TEST_nb_neighbors_bound) {
+    omp_set_num_threads(1);
+    EXPECT_EQ(index->hnsw.nb_neighbors(0), 8);
+    EXPECT_EQ(index->hnsw.nb_neighbors(1), 4);
+    EXPECT_EQ(index->hnsw.nb_neighbors(2), 4);
+    EXPECT_EQ(index->hnsw.nb_neighbors(3), 4);
+    // picking a large number to trigger an exception based on checking bounds
+    EXPECT_THROW(index->hnsw.nb_neighbors(100), faiss::FaissException);
+}
+
+TEST_F(HNSWTest, TEST_search_level_0) {
+    omp_set_num_threads(1);
+    std::vector<faiss::idx_t> I(k * nq);
+    std::vector<float> D(k * nq);
+
+    using RH = faiss::HeapBlockResultHandler<faiss::HNSW::C>;
+    RH bres1(nq, D.data(), I.data(), k);
+    faiss::HeapBlockResultHandler<faiss::HNSW::C>::SingleResultHandler res1(
+            bres1);
+    RH bres2(nq, D.data(), I.data(), k);
+    faiss::HeapBlockResultHandler<faiss::HNSW::C>::SingleResultHandler res2(
+            bres2);
+
+    faiss::HNSWStats stats1, stats2;
+    faiss::VisitedTable vt1(index->ntotal);
+    faiss::VisitedTable vt2(index->ntotal);
+    auto nprobe = 5;
+    const faiss::HNSW::storage_idx_t values[] = {1, 2, 3, 4, 5};
+    const faiss::HNSW::storage_idx_t* nearest_i = values;
+    const float distances[] = {0.1, 0.2, 0.3, 0.4, 0.5};
+    const float* nearest_d = distances;
+
+    // search_type == 1
+    res1.begin(0);
+    index->hnsw.search_level_0(
+            *dis, res1, nprobe, nearest_i, nearest_d, 1, stats1, vt1, nullptr);
+    res1.end();
+
+    // search_type == 2
+    res2.begin(0);
+    index->hnsw.search_level_0(
+            *dis, res2, nprobe, nearest_i, nearest_d, 2, stats2, vt2, nullptr);
+    res2.end();
+
+    // search_type 1 calls search_from_candidates in a loop nprobe times.
+    // search_type 2 pushes the candidates and just calls search_from_candidates
+    // once, so those stats will be much less.
+    EXPECT_GT(stats1.ndis, stats2.ndis);
+    EXPECT_GT(stats1.nhops, stats2.nhops);
+    EXPECT_GT(stats1.n1, stats2.n1);
+    EXPECT_GT(stats1.n2, stats2.n2);
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_ivf_index.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_ivf_index.cpp
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <omp.h>
+#include <algorithm>
+#include <cstddef>
+#include <map>
+#include <random>
+#include <set>
+
+#include <gtest/gtest.h>
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexIVFFlat.h>
+#include <faiss/impl/FaissAssert.h>
+
+namespace {
+
+// stores all ivf lists, used to verify the context
+// object is passed to the iterator
+class TestContext {
+   public:
+    TestContext() {}
+
+    void save_code(size_t list_no, const uint8_t* code, size_t code_size) {
+        list_nos.emplace(id, list_no);
+        codes.emplace(id, std::vector<uint8_t>(code_size));
+        for (size_t i = 0; i < code_size; i++) {
+            codes[id][i] = code[i];
+        }
+        id++;
+    }
+
+    // id to codes map
+    std::unordered_map<faiss::idx_t, std::vector<uint8_t>> codes;
+    // id to list_no map
+    std::unordered_map<faiss::idx_t, size_t> list_nos;
+    faiss::idx_t id = 0;
+    std::set<size_t> lists_probed;
+};
+
+// the iterator that iterates over the codes stored in context object
+class TestInvertedListIterator : public faiss::InvertedListsIterator {
+   public:
+    TestInvertedListIterator(size_t list_no, TestContext* context)
+            : list_no{list_no}, context{context} {
+        it = context->codes.cbegin();
+        seek_next();
+    }
+    ~TestInvertedListIterator() override {}
+
+    // move the cursor to the first valid entry
+    void seek_next() {
+        while (it != context->codes.cend() &&
+               context->list_nos[it->first] != list_no) {
+            it++;
+        }
+    }
+
+    virtual bool is_available() const override {
+        return it != context->codes.cend();
+    }
+
+    virtual void next() override {
+        it++;
+        seek_next();
+    }
+
+    virtual std::pair<faiss::idx_t, const uint8_t*> get_id_and_codes()
+            override {
+        if (it == context->codes.cend()) {
+            FAISS_THROW_MSG("invalid state");
+        }
+        return std::make_pair(it->first, it->second.data());
+    }
+
+   private:
+    size_t list_no;
+    TestContext* context;
+    decltype(context->codes.cbegin()) it;
+};
+
+class TestInvertedLists : public faiss::InvertedLists {
+   public:
+    TestInvertedLists(size_t nlist, size_t code_size)
+            : faiss::InvertedLists(nlist, code_size) {
+        use_iterator = true;
+    }
+
+    ~TestInvertedLists() override {}
+    size_t list_size(size_t /*list_no*/) const override {
+        FAISS_THROW_MSG("unexpected call");
+    }
+
+    faiss::InvertedListsIterator* get_iterator(size_t list_no, void* context)
+            const override {
+        auto testContext = (TestContext*)context;
+        testContext->lists_probed.insert(list_no);
+        return new TestInvertedListIterator(list_no, testContext);
+    }
+
+    const uint8_t* get_codes(size_t /* list_no */) const override {
+        FAISS_THROW_MSG("unexpected call");
+    }
+
+    const faiss::idx_t* get_ids(size_t /* list_no */) const override {
+        FAISS_THROW_MSG("unexpected call");
+    }
+
+    // store the codes in context object
+    size_t add_entry(
+            size_t list_no,
+            faiss::idx_t /*theid*/,
+            const uint8_t* code,
+            void* context) override {
+        auto testContext = (TestContext*)context;
+        testContext->save_code(list_no, code, code_size);
+        return 0;
+    }
+
+    size_t add_entries(
+            size_t /*list_no*/,
+            size_t /*n_entry*/,
+            const faiss::idx_t* /*ids*/,
+            const uint8_t* /*code*/) override {
+        FAISS_THROW_MSG("unexpected call");
+    }
+
+    void update_entries(
+            size_t /*list_no*/,
+            size_t /*offset*/,
+            size_t /*n_entry*/,
+            const faiss::idx_t* /*ids*/,
+            const uint8_t* /*code*/) override {
+        FAISS_THROW_MSG("unexpected call");
+    }
+
+    void resize(size_t /*list_no*/, size_t /*new_size*/) override {
+        FAISS_THROW_MSG("unexpected call");
+    }
+};
+} // namespace
+
+TEST(IVF, list_context) {
+    // this test verifies that the context object is passed
+    // to the InvertedListsIterator and InvertedLists::add_entry.
+    // the test InvertedLists and InvertedListsIterator reads/writes
+    // to the test context object.
+    // the test verifies the context object is modified as expected.
+
+    constexpr int d = 32;      // dimension
+    constexpr int nb = 100000; // database size
+    constexpr int nlist = 100;
+
+    std::mt19937 rng;
+    std::uniform_real_distribution<> distrib;
+
+    // disable parallism, or we need to make Context object
+    // thread-safe
+    omp_set_num_threads(1);
+
+    faiss::IndexFlatL2 quantizer(d); // the other index
+    faiss::IndexIVFFlat index(&quantizer, d, nlist);
+    TestInvertedLists inverted_lists(nlist, index.code_size);
+    index.replace_invlists(&inverted_lists);
+    {
+        // training
+        constexpr size_t nt = 1500; // nb of training vectors
+        std::vector<float> trainvecs(nt * d);
+        for (size_t i = 0; i < nt * d; i++) {
+            trainvecs[i] = distrib(rng);
+        }
+        index.verbose = true;
+        index.train(nt, trainvecs.data());
+    }
+    TestContext context;
+    std::vector<float> query_vector;
+    constexpr faiss::idx_t query_vector_id = 100;
+    {
+        // populating the database
+        std::vector<float> database(nb * d);
+        for (size_t i = 0; i < nb * d; i++) {
+            database[i] = distrib(rng);
+            // populate the query vector
+            if (i >= query_vector_id * d && i < query_vector_id * d + d) {
+                query_vector.push_back(database[i]);
+            }
+        }
+        std::vector<faiss::idx_t> coarse_idx(nb);
+        index.quantizer->assign(nb, database.data(), coarse_idx.data());
+        // pass dummy ids, the acutal ids are assigned in TextContext object
+        std::vector<faiss::idx_t> xids(nb, 42);
+        index.add_core(
+                nb, database.data(), xids.data(), coarse_idx.data(), &context);
+
+        // check the context object get updated
+        EXPECT_EQ(nb, context.id) << "should have added all ids";
+        EXPECT_EQ(nb, context.codes.size())
+                << "should have correct number of codes";
+        EXPECT_EQ(nb, context.list_nos.size())
+                << "should have correct number of list numbers";
+    }
+    {
+        constexpr size_t num_vecs = 5; // number of vectors
+        std::vector<float> vecs(num_vecs * d);
+        for (size_t i = 0; i < num_vecs * d; i++) {
+            vecs[i] = distrib(rng);
+        }
+        const size_t codeSize = index.sa_code_size();
+        std::vector<uint8_t> encodedData(num_vecs * codeSize);
+        index.sa_encode(num_vecs, vecs.data(), encodedData.data());
+        std::vector<float> decodedVecs(num_vecs * d);
+        index.sa_decode(num_vecs, encodedData.data(), decodedVecs.data());
+        EXPECT_EQ(vecs, decodedVecs)
+                << "decoded vectors should be the same as the original vectors that were encoded";
+    }
+    {
+        constexpr faiss::idx_t k = 100;
+        constexpr size_t nprobe = 10;
+        std::vector<float> distances(k);
+        std::vector<faiss::idx_t> labels(k);
+        faiss::SearchParametersIVF params;
+        params.inverted_list_context = &context;
+        params.nprobe = nprobe;
+        index.search(
+                1,
+                query_vector.data(),
+                k,
+                distances.data(),
+                labels.data(),
+                &params);
+        EXPECT_EQ(nprobe, context.lists_probed.size())
+                << "should probe nprobe lists";
+
+        // check the result contains the query vector, the probablity of
+        // this fail should be low
+        auto query_vector_listno = context.list_nos[query_vector_id];
+        auto& lists_probed = context.lists_probed;
+        EXPECT_TRUE(
+                std::find(
+                        lists_probed.cbegin(),
+                        lists_probed.cend(),
+                        query_vector_listno) != lists_probed.cend())
+                << "should probe the list of the query vector";
+        EXPECT_TRUE(
+                std::find(labels.cbegin(), labels.cend(), query_vector_id) !=
+                labels.cend())
+                << "should return the query vector";
+    }
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_ivfpq_codec.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_ivfpq_codec.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+
+#include <omp.h>
+
+#include <gtest/gtest.h>
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexIVFPQ.h>
+#include <faiss/utils/distances.h>
+
+namespace {
+
+// dimension of the vectors to index
+int d = 64;
+
+// size of the database we plan to index
+size_t nb = 8000;
+
+double eval_codec_error(long ncentroids, long m, const std::vector<float>& v) {
+    faiss::IndexFlatL2 coarse_quantizer(d);
+    faiss::IndexIVFPQ index(&coarse_quantizer, d, ncentroids, m, 8);
+    index.pq.cp.niter = 10; // speed up train
+    index.train(nb, v.data());
+
+    // encode and decode to compute reconstruction error
+
+    std::vector<faiss::idx_t> keys(nb);
+    std::vector<uint8_t> codes(nb * m);
+    index.encode_multiple(nb, keys.data(), v.data(), codes.data(), true);
+
+    std::vector<float> v2(nb * d);
+    index.decode_multiple(nb, keys.data(), codes.data(), v2.data());
+
+    return faiss::fvec_L2sqr(v.data(), v2.data(), nb * d);
+}
+
+} // namespace
+
+bool runs_on_sandcastle() {
+    // see discussion here https://fburl.com/qc5kpdo2
+    const char* sandcastle = getenv("SANDCASTLE");
+    if (sandcastle && !strcmp(sandcastle, "1")) {
+        return true;
+    }
+    const char* tw_job_user = getenv("TW_JOB_USER");
+    if (tw_job_user && !strcmp(tw_job_user, "sandcastle")) {
+        return true;
+    }
+
+    return false;
+}
+
+TEST(IVFPQ, codec) {
+    std::vector<float> database(nb * d);
+    std::mt19937 rng;
+    std::uniform_real_distribution<> distrib;
+    for (size_t i = 0; i < nb * d; i++) {
+        database[i] = distrib(rng);
+    }
+
+    // limit number of threads when running on heavily parallelized test
+    // environment
+    if (runs_on_sandcastle()) {
+        omp_set_num_threads(2);
+    }
+
+    double err0 = eval_codec_error(16, 8, database);
+
+    // should be more accurate as there are more coarse centroids
+    double err1 = eval_codec_error(128, 8, database);
+    EXPECT_GT(err0, err1);
+
+    // should be more accurate as there are more PQ codes
+    double err2 = eval_codec_error(16, 16, database);
+    EXPECT_GT(err0, err2);
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_ivfpq_indexing.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_ivfpq_indexing.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+
+#include <gtest/gtest.h>
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexIVFPQ.h>
+
+TEST(IVFPQ, accuracy) {
+    // dimension of the vectors to index
+    int d = 64;
+
+    // size of the database we plan to index
+    size_t nb = 1000;
+
+    // make a set of nt training vectors in the unit cube
+    // (could be the database)
+    size_t nt = 1500;
+
+    // make the index object and train it
+    faiss::IndexFlatL2 coarse_quantizer(d);
+
+    // a reasonable number of cetroids to index nb vectors
+    int ncentroids = 25;
+
+    faiss::IndexIVFPQ index(&coarse_quantizer, d, ncentroids, 16, 8);
+
+    // index that gives the ground-truth
+    faiss::IndexFlatL2 index_gt(d);
+
+    std::mt19937 rng;
+    std::uniform_real_distribution<> distrib;
+
+    { // training
+
+        std::vector<float> trainvecs(nt * d);
+        for (size_t i = 0; i < nt * d; i++) {
+            trainvecs[i] = distrib(rng);
+        }
+        index.verbose = true;
+        index.train(nt, trainvecs.data());
+    }
+
+    { // populating the database
+
+        std::vector<float> database(nb * d);
+        for (size_t i = 0; i < nb * d; i++) {
+            database[i] = distrib(rng);
+        }
+
+        index.add(nb, database.data());
+        index_gt.add(nb, database.data());
+    }
+
+    int nq = 200;
+    int n_ok;
+
+    { // searching the database
+
+        std::vector<float> queries(nq * d);
+        for (size_t i = 0; i < nq * d; i++) {
+            queries[i] = distrib(rng);
+        }
+
+        std::vector<faiss::idx_t> gt_nns(nq);
+        std::vector<float> gt_dis(nq);
+
+        index_gt.search(nq, queries.data(), 1, gt_dis.data(), gt_nns.data());
+
+        index.nprobe = 5;
+        int k = 5;
+        std::vector<faiss::idx_t> nns(k * nq);
+        std::vector<float> dis(k * nq);
+
+        index.search(nq, queries.data(), k, dis.data(), nns.data());
+
+        n_ok = 0;
+        for (int q = 0; q < nq; q++) {
+            for (int i = 0; i < k; i++)
+                if (nns[q * k + i] == gt_nns[q])
+                    n_ok++;
+        }
+        EXPECT_GT(n_ok, nq * 0.4);
+    }
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_lowlevel_ivf.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_lowlevel_ivf.cpp
@@ -0,0 +1,549 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cinttypes>
+#include <cstdio>
+#include <cstdlib>
+
+#include <memory>
+#include <random>
+#include <thread>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include <faiss/AutoTune.h>
+#include <faiss/IVFlib.h>
+#include <faiss/IndexBinaryIVF.h>
+#include <faiss/IndexIVF.h>
+#include <faiss/IndexPreTransform.h>
+#include <faiss/index_factory.h>
+
+using namespace faiss;
+
+namespace {
+
+// dimension of the vectors to index
+int d = 32;
+
+// nb of training vectors
+size_t nt = 5000;
+
+// size of the database points per window step
+size_t nb = 1000;
+
+// nb of queries
+size_t nq = 200;
+
+int k = 10;
+
+std::mt19937 rng;
+
+std::vector<float> make_data(size_t n) {
+    std::vector<float> database(n * d);
+    std::uniform_real_distribution<> distrib;
+    for (size_t i = 0; i < n * d; i++) {
+        database[i] = distrib(rng);
+    }
+    return database;
+}
+
+std::unique_ptr<Index> make_trained_index(
+        const char* index_type,
+        MetricType metric_type) {
+    auto index =
+            std::unique_ptr<Index>(index_factory(d, index_type, metric_type));
+    auto xt = make_data(nt);
+    index->train(nt, xt.data());
+    ParameterSpace().set_index_parameter(index.get(), "nprobe", 4);
+    return index;
+}
+
+std::vector<idx_t> search_index(Index* index, const float* xq) {
+    std::vector<idx_t> I(k * nq);
+    std::vector<float> D(k * nq);
+    index->search(nq, xq, k, D.data(), I.data());
+    return I;
+}
+
+/*************************************************************
+ * Test functions for a given index type
+ *************************************************************/
+
+void test_lowlevel_access(const char* index_key, MetricType metric) {
+    std::unique_ptr<Index> index = make_trained_index(index_key, metric);
+
+    auto xb = make_data(nb);
+    index->add(nb, xb.data());
+
+    /** handle the case if we have a preprocessor */
+
+    const IndexPreTransform* index_pt =
+            dynamic_cast<const IndexPreTransform*>(index.get());
+
+    int dt = index->d;
+    const float* xbt = xb.data();
+    std::unique_ptr<float[]> del_xbt;
+
+    if (index_pt) {
+        dt = index_pt->index->d;
+        xbt = index_pt->apply_chain(nb, xb.data());
+        if (xbt != xb.data()) {
+            del_xbt.reset((float*)xbt);
+        }
+    }
+
+    IndexIVF* index_ivf = ivflib::extract_index_ivf(index.get());
+
+    /** Test independent encoding
+     *
+     * Makes it possible to do additions on a custom inverted list
+     * implementation. From a set of vectors, computes the inverted
+     * list ids + the codes corresponding to each vector.
+     */
+
+    std::vector<idx_t> list_nos(nb);
+    std::vector<uint8_t> codes(index_ivf->code_size * nb);
+    index_ivf->quantizer->assign(nb, xbt, list_nos.data());
+    index_ivf->encode_vectors(nb, xbt, list_nos.data(), codes.data());
+
+    // compare with normal IVF addition
+
+    const InvertedLists* il = index_ivf->invlists;
+
+    for (int list_no = 0; list_no < index_ivf->nlist; list_no++) {
+        InvertedLists::ScopedCodes ivf_codes(il, list_no);
+        InvertedLists::ScopedIds ivf_ids(il, list_no);
+        size_t list_size = il->list_size(list_no);
+        for (int i = 0; i < list_size; i++) {
+            const uint8_t* ref_code = ivf_codes.get() + i * il->code_size;
+            const uint8_t* new_code = codes.data() + ivf_ids[i] * il->code_size;
+            EXPECT_EQ(memcmp(ref_code, new_code, il->code_size), 0);
+        }
+    }
+
+    /** Test independent search
+     *
+     * Manually scans through inverted lists, computing distances and
+     * ordering results organized in a heap.
+     */
+
+    // sample some example queries and get reference search results.
+    auto xq = make_data(nq);
+    auto ref_I = search_index(index.get(), xq.data());
+
+    // handle preprocessing
+    const float* xqt = xq.data();
+    std::unique_ptr<float[]> del_xqt;
+
+    if (index_pt) {
+        xqt = index_pt->apply_chain(nq, xq.data());
+        if (xqt != xq.data()) {
+            del_xqt.reset((float*)xqt);
+        }
+    }
+
+    // quantize the queries to get the inverted list ids to visit.
+    int nprobe = index_ivf->nprobe;
+
+    std::vector<idx_t> q_lists(nq * nprobe);
+    std::vector<float> q_dis(nq * nprobe);
+
+    index_ivf->quantizer->search(nq, xqt, nprobe, q_dis.data(), q_lists.data());
+
+    // object that does the scanning and distance computations.
+    std::unique_ptr<InvertedListScanner> scanner(
+            index_ivf->get_InvertedListScanner());
+
+    for (int i = 0; i < nq; i++) {
+        std::vector<idx_t> I(k, -1);
+        float default_dis = metric == METRIC_L2 ? HUGE_VAL : -HUGE_VAL;
+        std::vector<float> D(k, default_dis);
+
+        scanner->set_query(xqt + i * dt);
+
+        for (int j = 0; j < nprobe; j++) {
+            int list_no = q_lists[i * nprobe + j];
+            if (list_no < 0)
+                continue;
+            scanner->set_list(list_no, q_dis[i * nprobe + j]);
+
+            // here we get the inverted lists from the InvertedLists
+            // object but they could come from anywhere
+
+            scanner->scan_codes(
+                    il->list_size(list_no),
+                    InvertedLists::ScopedCodes(il, list_no).get(),
+                    InvertedLists::ScopedIds(il, list_no).get(),
+                    D.data(),
+                    I.data(),
+                    k);
+
+            if (j == 0) {
+                // all results so far come from list_no, so let's check if
+                // the distance function works
+                for (int jj = 0; jj < k; jj++) {
+                    int vno = I[jj];
+                    if (vno < 0)
+                        break; // heap is not full yet
+
+                    // we have the codes from the addition test
+                    float computed_D = scanner->distance_to_code(
+                            codes.data() + vno * il->code_size);
+
+                    EXPECT_FLOAT_EQ(computed_D, D[jj]);
+                }
+            }
+        }
+
+        // re-order heap
+        if (metric == METRIC_L2) {
+            maxheap_reorder(k, D.data(), I.data());
+        } else {
+            minheap_reorder(k, D.data(), I.data());
+        }
+
+        // check that we have the same results as the reference search
+        for (int j = 0; j < k; j++) {
+            EXPECT_EQ(I[j], ref_I[i * k + j]);
+        }
+    }
+}
+
+} // anonymous namespace
+
+/*************************************************************
+ * Test entry points
+ *************************************************************/
+
+TEST(TestLowLevelIVF, IVFFlatL2) {
+    test_lowlevel_access("IVF32,Flat", METRIC_L2);
+}
+
+TEST(TestLowLevelIVF, PCAIVFFlatL2) {
+    test_lowlevel_access("PCAR16,IVF32,Flat", METRIC_L2);
+}
+
+TEST(TestLowLevelIVF, IVFFlatIP) {
+    test_lowlevel_access("IVF32,Flat", METRIC_INNER_PRODUCT);
+}
+
+TEST(TestLowLevelIVF, IVFSQL2) {
+    test_lowlevel_access("IVF32,SQ8", METRIC_L2);
+}
+
+TEST(TestLowLevelIVF, IVFSQIP) {
+    test_lowlevel_access("IVF32,SQ8", METRIC_INNER_PRODUCT);
+}
+
+TEST(TestLowLevelIVF, IVFPQL2) {
+    test_lowlevel_access("IVF32,PQ4np", METRIC_L2);
+}
+
+TEST(TestLowLevelIVF, IVFPQIP) {
+    test_lowlevel_access("IVF32,PQ4np", METRIC_INNER_PRODUCT);
+}
+
+/*************************************************************
+ * Same for binary (a bit simpler)
+ *************************************************************/
+
+namespace {
+
+int nbit = 256;
+
+// here d is used the number of ints -> d=32 means 128 bits
+
+std::vector<uint8_t> make_data_binary(size_t n) {
+    std::vector<uint8_t> database(n * nbit / 8);
+    std::uniform_int_distribution<> distrib;
+    for (size_t i = 0; i < n * d; i++) {
+        database[i] = distrib(rng);
+    }
+    return database;
+}
+
+std::unique_ptr<IndexBinary> make_trained_index_binary(const char* index_type) {
+    auto index = std::unique_ptr<IndexBinary>(
+            index_binary_factory(nbit, index_type));
+    auto xt = make_data_binary(nt);
+    index->train(nt, xt.data());
+    return index;
+}
+
+void test_lowlevel_access_binary(const char* index_key) {
+    std::unique_ptr<IndexBinary> index = make_trained_index_binary(index_key);
+
+    IndexBinaryIVF* index_ivf = dynamic_cast<IndexBinaryIVF*>(index.get());
+    assert(index_ivf);
+
+    index_ivf->nprobe = 4;
+
+    auto xb = make_data_binary(nb);
+    index->add(nb, xb.data());
+
+    std::vector<idx_t> list_nos(nb);
+    index_ivf->quantizer->assign(nb, xb.data(), list_nos.data());
+
+    /* For binary there is no test for encoding because binary vectors
+     * are copied verbatim to the inverted lists */
+
+    const InvertedLists* il = index_ivf->invlists;
+
+    /** Test independent search
+     *
+     * Manually scans through inverted lists, computing distances and
+     * ordering results organized in a heap.
+     */
+
+    // sample some example queries and get reference search results.
+    auto xq = make_data_binary(nq);
+
+    std::vector<idx_t> I_ref(k * nq);
+    std::vector<int32_t> D_ref(k * nq);
+    index->search(nq, xq.data(), k, D_ref.data(), I_ref.data());
+
+    // quantize the queries to get the inverted list ids to visit.
+    int nprobe = index_ivf->nprobe;
+
+    std::vector<idx_t> q_lists(nq * nprobe);
+    std::vector<int32_t> q_dis(nq * nprobe);
+
+    // quantize queries
+    index_ivf->quantizer->search(
+            nq, xq.data(), nprobe, q_dis.data(), q_lists.data());
+
+    // object that does the scanning and distance computations.
+    std::unique_ptr<BinaryInvertedListScanner> scanner(
+            index_ivf->get_InvertedListScanner());
+
+    for (int i = 0; i < nq; i++) {
+        std::vector<idx_t> I(k, -1);
+        uint32_t default_dis = 1 << 30;
+        std::vector<int32_t> D(k, default_dis);
+
+        scanner->set_query(xq.data() + i * index_ivf->code_size);
+
+        for (int j = 0; j < nprobe; j++) {
+            int list_no = q_lists[i * nprobe + j];
+            if (list_no < 0)
+                continue;
+            scanner->set_list(list_no, q_dis[i * nprobe + j]);
+
+            // here we get the inverted lists from the InvertedLists
+            // object but they could come from anywhere
+
+            scanner->scan_codes(
+                    il->list_size(list_no),
+                    InvertedLists::ScopedCodes(il, list_no).get(),
+                    InvertedLists::ScopedIds(il, list_no).get(),
+                    D.data(),
+                    I.data(),
+                    k);
+
+            if (j == 0) {
+                // all results so far come from list_no, so let's check if
+                // the distance function works
+                for (int jj = 0; jj < k; jj++) {
+                    int vno = I[jj];
+                    if (vno < 0)
+                        break; // heap is not full yet
+
+                    // we have the codes from the addition test
+                    float computed_D = scanner->distance_to_code(
+                            xb.data() + vno * il->code_size);
+
+                    EXPECT_EQ(computed_D, D[jj]);
+                }
+            }
+        }
+
+        // re-order heap
+        heap_reorder<CMax<int32_t, idx_t>>(k, D.data(), I.data());
+
+        // check that we have the same results as the reference search
+        for (int j = 0; j < k; j++) {
+            // here the order is not guaranteed to be the same
+            // so we scan through ref results
+            // EXPECT_EQ (I[j], I_ref[i * k + j]);
+            EXPECT_LE(D[j], D_ref[i * k + k - 1]);
+            if (D[j] < D_ref[i * k + k - 1]) {
+                int j2 = 0;
+                while (j2 < k) {
+                    if (I[j] == I_ref[i * k + j2])
+                        break;
+                    j2++;
+                }
+                EXPECT_LT(j2, k); // it was found
+                if (j2 < k) {
+                    EXPECT_EQ(D[j], D_ref[i * k + j2]);
+                }
+            }
+        }
+    }
+}
+
+} // anonymous namespace
+
+TEST(TestLowLevelIVF, IVFBinary) {
+    test_lowlevel_access_binary("BIVF32");
+}
+
+namespace {
+
+void test_threaded_search(const char* index_key, MetricType metric) {
+    std::unique_ptr<Index> index = make_trained_index(index_key, metric);
+
+    auto xb = make_data(nb);
+    index->add(nb, xb.data());
+
+    /** handle the case if we have a preprocessor */
+
+    const IndexPreTransform* index_pt =
+            dynamic_cast<const IndexPreTransform*>(index.get());
+
+    int dt = index->d;
+    const float* xbt = xb.data();
+    std::unique_ptr<float[]> del_xbt;
+
+    if (index_pt) {
+        dt = index_pt->index->d;
+        xbt = index_pt->apply_chain(nb, xb.data());
+        if (xbt != xb.data()) {
+            del_xbt.reset((float*)xbt);
+        }
+    }
+
+    IndexIVF* index_ivf = ivflib::extract_index_ivf(index.get());
+
+    /** Test independent search
+     *
+     * Manually scans through inverted lists, computing distances and
+     * ordering results organized in a heap.
+     */
+
+    // sample some example queries and get reference search results.
+    auto xq = make_data(nq);
+    auto ref_I = search_index(index.get(), xq.data());
+
+    // handle preprocessing
+    const float* xqt = xq.data();
+    std::unique_ptr<float[]> del_xqt;
+
+    if (index_pt) {
+        xqt = index_pt->apply_chain(nq, xq.data());
+        if (xqt != xq.data()) {
+            del_xqt.reset((float*)xqt);
+        }
+    }
+
+    // quantize the queries to get the inverted list ids to visit.
+    int nprobe = index_ivf->nprobe;
+
+    std::vector<idx_t> q_lists(nq * nprobe);
+    std::vector<float> q_dis(nq * nprobe);
+
+    index_ivf->quantizer->search(nq, xqt, nprobe, q_dis.data(), q_lists.data());
+
+    // now run search in this many threads
+    int nproc = 3;
+
+    for (int i = 0; i < nq; i++) {
+        // one result table per thread
+        std::vector<idx_t> I(k * nproc, -1);
+        float default_dis = metric == METRIC_L2 ? HUGE_VAL : -HUGE_VAL;
+        std::vector<float> D(k * nproc, default_dis);
+
+        auto search_function = [index_ivf,
+                                &I,
+                                &D,
+                                dt,
+                                i,
+                                nproc,
+                                xqt,
+                                nprobe,
+                                &q_dis,
+                                &q_lists](int rank) {
+            const InvertedLists* il = index_ivf->invlists;
+
+            // object that does the scanning and distance computations.
+            std::unique_ptr<InvertedListScanner> scanner(
+                    index_ivf->get_InvertedListScanner());
+
+            idx_t* local_I = I.data() + rank * k;
+            float* local_D = D.data() + rank * k;
+
+            scanner->set_query(xqt + i * dt);
+
+            for (int j = rank; j < nprobe; j += nproc) {
+                int list_no = q_lists[i * nprobe + j];
+                if (list_no < 0)
+                    continue;
+                scanner->set_list(list_no, q_dis[i * nprobe + j]);
+
+                scanner->scan_codes(
+                        il->list_size(list_no),
+                        InvertedLists::ScopedCodes(il, list_no).get(),
+                        InvertedLists::ScopedIds(il, list_no).get(),
+                        local_D,
+                        local_I,
+                        k);
+            }
+        };
+
+        // start the threads. Threads are numbered rank=0..nproc-1 (a la MPI)
+        // thread rank takes care of inverted lists
+        // rank, rank+nproc, rank+2*nproc,...
+        std::vector<std::thread> threads;
+        for (int rank = 0; rank < nproc; rank++) {
+            threads.emplace_back(search_function, rank);
+        }
+
+        // join threads, merge heaps
+        for (int rank = 0; rank < nproc; rank++) {
+            threads[rank].join();
+            if (rank == 0)
+                continue; // nothing to merge
+            // merge into first result
+            if (metric == METRIC_L2) {
+                maxheap_addn(
+                        k,
+                        D.data(),
+                        I.data(),
+                        D.data() + rank * k,
+                        I.data() + rank * k,
+                        k);
+            } else {
+                minheap_addn(
+                        k,
+                        D.data(),
+                        I.data(),
+                        D.data() + rank * k,
+                        I.data() + rank * k,
+                        k);
+            }
+        }
+
+        // re-order heap
+        if (metric == METRIC_L2) {
+            maxheap_reorder(k, D.data(), I.data());
+        } else {
+            minheap_reorder(k, D.data(), I.data());
+        }
+
+        // check that we have the same results as the reference search
+        for (int j = 0; j < k; j++) {
+            EXPECT_EQ(I[j], ref_I[i * k + j]);
+        }
+    }
+}
+
+} // namespace
+
+TEST(TestLowLevelIVF, ThreadedSearch) {
+    test_threaded_search("IVF32,Flat", METRIC_L2);
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_mem_leak.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_mem_leak.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexIVFFlat.h>
+#include <faiss/utils/random.h>
+#include <faiss/utils/utils.h>
+
+#include <gtest/gtest.h>
+
+using namespace faiss;
+
+TEST(TestMemoryLeak, ivfflat) {
+    size_t num_tfidf_faiss_cells = 20;
+    size_t max_tfidf_features = 500;
+
+    IndexFlatIP quantizer(max_tfidf_features);
+    IndexIVFFlat tfidf_faiss_index(
+            &quantizer, max_tfidf_features, num_tfidf_faiss_cells);
+
+    std::vector<float> dense_matrix(5000 * max_tfidf_features);
+    float_rand(dense_matrix.data(), dense_matrix.size(), 123);
+
+    tfidf_faiss_index.train(5000, dense_matrix.data());
+    tfidf_faiss_index.add(5000, dense_matrix.data());
+
+    int N1 = 1000;
+    int N2 = 10000;
+
+    std::vector<float> ent_substr_tfidfs_list(N1 * max_tfidf_features);
+    float_rand(
+            ent_substr_tfidfs_list.data(), ent_substr_tfidfs_list.size(), 1234);
+
+    for (int bs : {1, 4, 16}) {
+        size_t m0 = get_mem_usage_kb();
+        double t0 = getmillisecs();
+
+        for (int i = 0; i < N2; i++) {
+            std::vector<idx_t> I(10 * bs);
+            std::vector<float> D(10 * bs);
+
+            tfidf_faiss_index.search(
+                    bs,
+                    ent_substr_tfidfs_list.data() +
+                            (i % (N1 - bs + 1)) * max_tfidf_features,
+                    10,
+                    D.data(),
+                    I.data());
+            if (i % 100 == 0) {
+                printf("[%.2f s] BS %d %d: %ld kB %.2f bytes/it\r",
+                       (getmillisecs() - t0) / 1000,
+                       bs,
+                       i,
+                       get_mem_usage_kb(),
+                       (get_mem_usage_kb() - m0) * 1024.0 / (i + 1));
+                fflush(stdout);
+            }
+        }
+        printf("\n");
+        EXPECT_GE(50 * bs, (get_mem_usage_kb() - m0) * 1024.0 / N2);
+    }
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_merge.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_merge.cpp
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cstdio>
+#include <random>
+
+#include <gtest/gtest.h>
+
+#include <faiss/IVFlib.h>
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexIVFFlat.h>
+#include <faiss/IndexPreTransform.h>
+#include <faiss/MetaIndexes.h>
+#include <faiss/invlists/OnDiskInvertedLists.h>
+
+#include "test_util.h"
+
+namespace {
+
+pthread_mutex_t temp_file_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+typedef faiss::idx_t idx_t;
+
+// parameters to use for the test
+int d = 64;
+size_t nb = 1000;
+size_t nq = 100;
+int nindex = 4;
+int k = 10;
+int nlist = 40;
+int shard_size = nb / nindex;
+
+struct CommonData {
+    std::vector<float> database;
+    std::vector<float> queries;
+    std::vector<idx_t> ids;
+    faiss::IndexFlatL2 quantizer;
+
+    CommonData() : database(nb * d), queries(nq * d), ids(nb), quantizer(d) {
+        std::mt19937 rng;
+        std::uniform_real_distribution<> distrib;
+        for (size_t i = 0; i < nb * d; i++) {
+            database[i] = distrib(rng);
+        }
+        for (size_t i = 0; i < nq * d; i++) {
+            queries[i] = distrib(rng);
+        }
+        for (int i = 0; i < nb; i++) {
+            ids[i] = 123 + 456 * i;
+        }
+        { // just to train the quantizer
+            faiss::IndexIVFFlat iflat(&quantizer, d, nlist);
+            iflat.train(nb, database.data());
+        }
+    }
+};
+
+CommonData cd;
+std::string temp_filename_template = "/tmp/faiss_tmp_XXXXXX";
+/// perform a search on shards, then merge and search again and
+/// compare results.
+int compare_merged(
+        faiss::IndexShards* index_shards,
+        bool shift_ids,
+        bool standard_merge = true) {
+    std::vector<idx_t> refI(k * nq);
+    std::vector<float> refD(k * nq);
+
+    index_shards->search(nq, cd.queries.data(), k, refD.data(), refI.data());
+    Tempfilename filename(&temp_file_mutex, temp_filename_template);
+
+    std::vector<idx_t> newI(k * nq);
+    std::vector<float> newD(k * nq);
+
+    if (standard_merge) {
+        for (int i = 1; i < nindex; i++) {
+            faiss::ivflib::merge_into(
+                    index_shards->at(0), index_shards->at(i), shift_ids);
+        }
+
+        index_shards->syncWithSubIndexes();
+    } else {
+        std::vector<const faiss::InvertedLists*> lists;
+        faiss::IndexIVF* index0 = nullptr;
+        size_t ntotal = 0;
+        for (int i = 0; i < nindex; i++) {
+            auto index_ivf =
+                    dynamic_cast<faiss::IndexIVF*>(index_shards->at(i));
+            assert(index_ivf);
+            if (i == 0) {
+                index0 = index_ivf;
+            }
+            lists.push_back(index_ivf->invlists);
+            ntotal += index_ivf->ntotal;
+        }
+
+        auto il = new faiss::OnDiskInvertedLists(
+                index0->nlist, index0->code_size, filename.c_str());
+
+        il->merge_from_multiple(lists.data(), lists.size(), shift_ids);
+
+        index0->replace_invlists(il, true);
+        index0->ntotal = ntotal;
+    }
+    // search only on first index
+    index_shards->at(0)->search(
+            nq, cd.queries.data(), k, newD.data(), newI.data());
+
+    size_t ndiff = 0;
+    bool adjust_ids = shift_ids && !standard_merge;
+    for (size_t i = 0; i < k * nq; i++) {
+        idx_t new_id = adjust_ids ? refI[i] % shard_size : refI[i];
+        if (refI[i] != new_id) {
+            ndiff++;
+        }
+    }
+
+    return ndiff;
+}
+
+} // namespace
+
+// test on IVFFlat with implicit numbering
+TEST(MERGE, merge_flat_no_ids) {
+    faiss::IndexShards index_shards(d);
+    index_shards.own_indices = true;
+    for (int i = 0; i < nindex; i++) {
+        index_shards.add_shard(
+                new faiss::IndexIVFFlat(&cd.quantizer, d, nlist));
+    }
+    EXPECT_TRUE(index_shards.is_trained);
+    index_shards.add(nb, cd.database.data());
+    size_t prev_ntotal = index_shards.ntotal;
+    int ndiff = compare_merged(&index_shards, true);
+    EXPECT_EQ(prev_ntotal, index_shards.ntotal);
+    EXPECT_EQ(0, ndiff);
+}
+
+// test on IVFFlat, explicit ids
+TEST(MERGE, merge_flat) {
+    faiss::IndexShards index_shards(d, false, false);
+    index_shards.own_indices = true;
+
+    for (int i = 0; i < nindex; i++) {
+        index_shards.add_shard(
+                new faiss::IndexIVFFlat(&cd.quantizer, d, nlist));
+    }
+
+    EXPECT_TRUE(index_shards.is_trained);
+    index_shards.add_with_ids(nb, cd.database.data(), cd.ids.data());
+    int ndiff = compare_merged(&index_shards, false);
+    EXPECT_GE(0, ndiff);
+}
+
+// test on IVFFlat and a VectorTransform
+TEST(MERGE, merge_flat_vt) {
+    faiss::IndexShards index_shards(d, false, false);
+    index_shards.own_indices = true;
+
+    // here we have to retrain because of the vectorTransform
+    faiss::RandomRotationMatrix rot(d, d);
+    rot.init(1234);
+    faiss::IndexFlatL2 quantizer(d);
+
+    { // just to train the quantizer
+        faiss::IndexIVFFlat iflat(&quantizer, d, nlist);
+        faiss::IndexPreTransform ipt(&rot, &iflat);
+        ipt.train(nb, cd.database.data());
+    }
+
+    for (int i = 0; i < nindex; i++) {
+        faiss::IndexPreTransform* ipt = new faiss::IndexPreTransform(
+                new faiss::RandomRotationMatrix(rot),
+                new faiss::IndexIVFFlat(&quantizer, d, nlist));
+        ipt->own_fields = true;
+        index_shards.add_shard(ipt);
+    }
+    EXPECT_TRUE(index_shards.is_trained);
+    index_shards.add_with_ids(nb, cd.database.data(), cd.ids.data());
+    size_t prev_ntotal = index_shards.ntotal;
+    int ndiff = compare_merged(&index_shards, false);
+    EXPECT_EQ(prev_ntotal, index_shards.ntotal);
+    EXPECT_GE(0, ndiff);
+}
+
+// put the merged invfile on disk
+TEST(MERGE, merge_flat_ondisk) {
+    faiss::IndexShards index_shards(d, false, false);
+    index_shards.own_indices = true;
+    Tempfilename filename(&temp_file_mutex, temp_filename_template);
+
+    for (int i = 0; i < nindex; i++) {
+        auto ivf = new faiss::IndexIVFFlat(&cd.quantizer, d, nlist);
+        if (i == 0) {
+            auto il = new faiss::OnDiskInvertedLists(
+                    ivf->nlist, ivf->code_size, filename.c_str());
+            ivf->replace_invlists(il, true);
+        }
+        index_shards.add_shard(ivf);
+    }
+
+    EXPECT_TRUE(index_shards.is_trained);
+    index_shards.add_with_ids(nb, cd.database.data(), cd.ids.data());
+    int ndiff = compare_merged(&index_shards, false);
+
+    EXPECT_EQ(ndiff, 0);
+}
+
+// now use ondisk specific merge
+TEST(MERGE, merge_flat_ondisk_2) {
+    faiss::IndexShards index_shards(d, false, false);
+    index_shards.own_indices = true;
+
+    for (int i = 0; i < nindex; i++) {
+        index_shards.add_shard(
+                new faiss::IndexIVFFlat(&cd.quantizer, d, nlist));
+    }
+    EXPECT_TRUE(index_shards.is_trained);
+    index_shards.add_with_ids(nb, cd.database.data(), cd.ids.data());
+    int ndiff = compare_merged(&index_shards, false, false);
+    EXPECT_GE(0, ndiff);
+}
+
+// now use ondisk specific merge and use shift ids
+TEST(MERGE, merge_flat_ondisk_3) {
+    faiss::IndexShards index_shards(d, false, false);
+    index_shards.own_indices = true;
+
+    std::vector<idx_t> ids;
+    for (int i = 0; i < nb; ++i) {
+        int id = i % shard_size;
+        ids.push_back(id);
+    }
+    for (int i = 0; i < nindex; i++) {
+        index_shards.add_shard(
+                new faiss::IndexIVFFlat(&cd.quantizer, d, nlist));
+    }
+    EXPECT_TRUE(index_shards.is_trained);
+    index_shards.add_with_ids(nb, cd.database.data(), ids.data());
+    int ndiff = compare_merged(&index_shards, true, false);
+    EXPECT_GE(0, ndiff);
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_mmap.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_mmap.cpp
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <fstream>
+#include <iostream>
+#include <random>
+#include <vector>
+
+#include <faiss/IndexBinaryFlat.h>
+#include <faiss/IndexFlat.h>
+#include <faiss/impl/io.h>
+#include <faiss/index_io.h>
+
+namespace {
+
+std::vector<float> make_data(const size_t n, const size_t d, size_t seed) {
+    std::vector<float> database(n * d);
+    std::mt19937 rng(seed);
+    std::uniform_real_distribution<float> distrib;
+
+    for (size_t i = 0; i < n * d; i++) {
+        database[i] = distrib(rng);
+    }
+    return database;
+}
+
+std::vector<uint8_t> make_binary_data(
+        const size_t n,
+        const size_t d,
+        size_t seed) {
+    std::vector<uint8_t> database(n * d);
+    std::mt19937 rng(seed);
+    std::uniform_int_distribution<uint8_t> distrib(0, 255);
+
+    for (size_t i = 0; i < n * d; i++) {
+        database[i] = distrib(rng);
+    }
+    return database;
+}
+
+} // namespace
+
+// the logic is the following:
+//   1. generate two flatcodes-based indices, Index1 and Index2
+//   2. serialize both indices into std::vector<> buffers, Buf1 and Buf2
+//   3. save Buf1 into a temporary file, File1
+//   4. deserialize Index1 using mmap feature on File1 into Index1MM
+//   5. ensure that Index1MM acts as Index2 if we write the data from Buf2
+//      on top of the existing File1
+//   6. ensure that Index1MM acts as Index1 if we write the data from Buf1
+//      on top of the existing File1 again
+
+TEST(TestMmap, mmap_flatcodes) {
+    // generate data
+    const size_t nt = 1000;
+    const size_t nq = 10;
+    const size_t d = 32;
+    const size_t k = 25;
+
+    std::vector<float> xt1 = make_data(nt, d, 123);
+    std::vector<float> xt2 = make_data(nt, d, 456);
+    std::vector<float> xq = make_data(nq, d, 789);
+
+    // ensure that the data is different
+    ASSERT_NE(xt1, xt2);
+
+    // make index1 and create reference results
+    faiss::IndexFlatL2 index1(d);
+    index1.train(nt, xt1.data());
+    index1.add(nt, xt1.data());
+
+    std::vector<float> ref_dis_1(k * nq);
+    std::vector<faiss::idx_t> ref_ids_1(k * nq);
+    index1.search(nq, xq.data(), k, ref_dis_1.data(), ref_ids_1.data());
+
+    // make index2 and create reference results
+    faiss::IndexFlatL2 index2(d);
+    index2.train(nt, xt2.data());
+    index2.add(nt, xt2.data());
+
+    std::vector<float> ref_dis_2(k * nq);
+    std::vector<faiss::idx_t> ref_ids_2(k * nq);
+    index2.search(nq, xq.data(), k, ref_dis_2.data(), ref_ids_2.data());
+
+    // ensure that the results are different
+    ASSERT_NE(ref_dis_1, ref_dis_2);
+    ASSERT_NE(ref_ids_1, ref_ids_2);
+
+    // serialize both in a form of vectors
+    faiss::VectorIOWriter wr1;
+    faiss::write_index(&index1, &wr1);
+
+    faiss::VectorIOWriter wr2;
+    faiss::write_index(&index2, &wr2);
+
+    // generate a temporary file and write index1 into it
+    std::string tmpname = std::tmpnam(nullptr);
+
+    {
+        std::ofstream ofs(tmpname);
+        ofs.write((const char*)wr1.data.data(), wr1.data.size());
+    }
+
+    // create a mmap index
+    std::unique_ptr<faiss::Index> index1mm(
+            faiss::read_index(tmpname.c_str(), faiss::IO_FLAG_MMAP_IFC));
+
+    ASSERT_NE(index1mm, nullptr);
+
+    // perform a search
+    std::vector<float> cand_dis_1(k * nq);
+    std::vector<faiss::idx_t> cand_ids_1(k * nq);
+    index1mm->search(nq, xq.data(), k, cand_dis_1.data(), cand_ids_1.data());
+
+    // match vs ref1
+    ASSERT_EQ(ref_ids_1, cand_ids_1);
+    ASSERT_EQ(ref_dis_1, cand_dis_1);
+
+    // ok now, overwrite the internals of the file without recreating it
+    {
+        std::ofstream ofs(tmpname);
+        ofs.seekp(0, std::ios::beg);
+
+        ofs.write((const char*)wr2.data.data(), wr2.data.size());
+    }
+
+    // perform a search
+    std::vector<float> cand_dis_2(k * nq);
+    std::vector<faiss::idx_t> cand_ids_2(k * nq);
+    index1mm->search(nq, xq.data(), k, cand_dis_2.data(), cand_ids_2.data());
+
+    // match vs ref1
+    ASSERT_EQ(ref_ids_2, cand_ids_2);
+    ASSERT_EQ(ref_dis_2, cand_dis_2);
+
+    // write back data1
+    {
+        std::ofstream ofs(tmpname);
+        ofs.seekp(0, std::ios::beg);
+
+        ofs.write((const char*)wr1.data.data(), wr1.data.size());
+    }
+
+    // perform a search
+    std::vector<float> cand_dis_3(k * nq);
+    std::vector<faiss::idx_t> cand_ids_3(k * nq);
+    index1mm->search(nq, xq.data(), k, cand_dis_3.data(), cand_ids_3.data());
+
+    // match vs ref1
+    ASSERT_EQ(ref_ids_1, cand_ids_3);
+    ASSERT_EQ(ref_dis_1, cand_dis_3);
+}
+
+TEST(TestMmap, mmap_binary_flatcodes) {
+    // generate data
+    const size_t nt = 1000;
+    const size_t nq = 10;
+    // in bits
+    const size_t d = 64;
+    // in bytes
+    const size_t d8 = (d + 7) / 8;
+    const size_t k = 25;
+
+    std::vector<uint8_t> xt1 = make_binary_data(nt, d8, 123);
+    std::vector<uint8_t> xt2 = make_binary_data(nt, d8, 456);
+    std::vector<uint8_t> xq = make_binary_data(nq, d8, 789);
+
+    // ensure that the data is different
+    ASSERT_NE(xt1, xt2);
+
+    // make index1 and create reference results
+    faiss::IndexBinaryFlat index1(d);
+    index1.train(nt, xt1.data());
+    index1.add(nt, xt1.data());
+
+    std::vector<int32_t> ref_dis_1(k * nq);
+    std::vector<faiss::idx_t> ref_ids_1(k * nq);
+    index1.search(nq, xq.data(), k, ref_dis_1.data(), ref_ids_1.data());
+
+    // make index2 and create reference results
+    faiss::IndexBinaryFlat index2(d);
+    index2.train(nt, xt2.data());
+    index2.add(nt, xt2.data());
+
+    std::vector<int32_t> ref_dis_2(k * nq);
+    std::vector<faiss::idx_t> ref_ids_2(k * nq);
+    index2.search(nq, xq.data(), k, ref_dis_2.data(), ref_ids_2.data());
+
+    // ensure that the results are different
+    ASSERT_NE(ref_dis_1, ref_dis_2);
+    ASSERT_NE(ref_ids_1, ref_ids_2);
+
+    // serialize both in a form of vectors
+    faiss::VectorIOWriter wr1;
+    faiss::write_index_binary(&index1, &wr1);
+
+    faiss::VectorIOWriter wr2;
+    faiss::write_index_binary(&index2, &wr2);
+
+    // generate a temporary file and write index1 into it
+    std::string tmpname = std::tmpnam(nullptr);
+
+    {
+        std::ofstream ofs(tmpname);
+        ofs.write((const char*)wr1.data.data(), wr1.data.size());
+    }
+
+    // create a mmap index
+    std::unique_ptr<faiss::IndexBinary> index1mm(
+            faiss::read_index_binary(tmpname.c_str(), faiss::IO_FLAG_MMAP_IFC));
+
+    ASSERT_NE(index1mm, nullptr);
+
+    // perform a search
+    std::vector<int32_t> cand_dis_1(k * nq);
+    std::vector<faiss::idx_t> cand_ids_1(k * nq);
+    index1mm->search(nq, xq.data(), k, cand_dis_1.data(), cand_ids_1.data());
+
+    // match vs ref1
+    ASSERT_EQ(ref_ids_1, cand_ids_1);
+    ASSERT_EQ(ref_dis_1, cand_dis_1);
+
+    // ok now, overwrite the internals of the file without recreating it
+    {
+        std::ofstream ofs(tmpname);
+        ofs.seekp(0, std::ios::beg);
+
+        ofs.write((const char*)wr2.data.data(), wr2.data.size());
+    }
+
+    // perform a search
+    std::vector<int32_t> cand_dis_2(k * nq);
+    std::vector<faiss::idx_t> cand_ids_2(k * nq);
+    index1mm->search(nq, xq.data(), k, cand_dis_2.data(), cand_ids_2.data());
+
+    // match vs ref1
+    ASSERT_EQ(ref_ids_2, cand_ids_2);
+    ASSERT_EQ(ref_dis_2, cand_dis_2);
+
+    // write back data1
+    {
+        std::ofstream ofs(tmpname);
+        ofs.seekp(0, std::ios::beg);
+
+        ofs.write((const char*)wr1.data.data(), wr1.data.size());
+    }
+
+    // perform a search
+    std::vector<int32_t> cand_dis_3(k * nq);
+    std::vector<faiss::idx_t> cand_ids_3(k * nq);
+    index1mm->search(nq, xq.data(), k, cand_dis_3.data(), cand_ids_3.data());
+
+    // match vs ref1
+    ASSERT_EQ(ref_ids_1, cand_ids_3);
+    ASSERT_EQ(ref_dis_1, cand_dis_3);
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_omp_threads.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_omp_threads.cpp
@@ -0,0 +1,14 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <faiss/utils/utils.h>
+
+TEST(Threading, openmp) {
+    EXPECT_TRUE(faiss::check_openmp());
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_ondisk_ivf.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_ondisk_ivf.cpp
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+
+#include <unistd.h>
+
+#include <pthread.h>
+#include <unordered_map>
+
+#include <gtest/gtest.h>
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexIVFFlat.h>
+#include <faiss/index_io.h>
+#include <faiss/invlists/OnDiskInvertedLists.h>
+#include <faiss/utils/random.h>
+
+namespace {
+
+struct Tempfilename {
+    static pthread_mutex_t mutex;
+
+    std::string filename = "/tmp/faiss_tmp_XXXXXX";
+
+    Tempfilename() {
+        pthread_mutex_lock(&mutex);
+        int fd = mkstemp(&filename[0]);
+        close(fd);
+        pthread_mutex_unlock(&mutex);
+    }
+
+    ~Tempfilename() {
+        if (access(filename.c_str(), F_OK)) {
+            unlink(filename.c_str());
+        }
+    }
+
+    const char* c_str() {
+        return filename.c_str();
+    }
+};
+
+pthread_mutex_t Tempfilename::mutex = PTHREAD_MUTEX_INITIALIZER;
+
+} // namespace
+
+TEST(ONDISK, make_invlists) {
+    int nlist = 100;
+    int code_size = 32;
+    int nadd = 1000000;
+    std::unordered_map<int, int> listnos;
+
+    Tempfilename filename;
+
+    faiss::OnDiskInvertedLists ivf(nlist, code_size, filename.c_str());
+
+    {
+        std::vector<uint8_t> code(32);
+        std::mt19937 rng;
+        std::uniform_real_distribution<> distrib;
+        for (int i = 0; i < nadd; i++) {
+            double d = distrib(rng);
+            int list_no = int(nlist * d * d); // skewed distribution
+            int* ar = (int*)code.data();
+            ar[0] = i;
+            ar[1] = list_no;
+            ivf.add_entry(list_no, i, code.data());
+            listnos[i] = list_no;
+        }
+    }
+
+    int ntot = 0;
+    for (int i = 0; i < nlist; i++) {
+        int size = ivf.list_size(i);
+        const faiss::idx_t* ids = ivf.get_ids(i);
+        const uint8_t* codes = ivf.get_codes(i);
+        for (int j = 0; j < size; j++) {
+            faiss::idx_t id = ids[j];
+            const int* ar = (const int*)&codes[code_size * j];
+            EXPECT_EQ(ar[0], id);
+            EXPECT_EQ(ar[1], i);
+            EXPECT_EQ(listnos[id], i);
+            ntot++;
+        }
+    }
+    EXPECT_EQ(ntot, nadd);
+}
+
+TEST(ONDISK, test_add) {
+    int d = 8;
+    int nlist = 30, nq = 200, nb = 1500, k = 10;
+    faiss::IndexFlatL2 quantizer(d);
+    {
+        std::vector<float> x(d * nlist);
+        faiss::float_rand(x.data(), d * nlist, 12345);
+        quantizer.add(nlist, x.data());
+    }
+    std::vector<float> xb(d * nb);
+    faiss::float_rand(xb.data(), d * nb, 23456);
+
+    faiss::IndexIVFFlat index(&quantizer, d, nlist);
+    index.add(nb, xb.data());
+
+    std::vector<float> xq(d * nb);
+    faiss::float_rand(xq.data(), d * nq, 34567);
+
+    std::vector<float> ref_D(nq * k);
+    std::vector<faiss::idx_t> ref_I(nq * k);
+
+    index.search(nq, xq.data(), k, ref_D.data(), ref_I.data());
+
+    Tempfilename filename, filename2;
+
+    // test add + search
+    {
+        faiss::IndexIVFFlat index2(&quantizer, d, nlist);
+
+        faiss::OnDiskInvertedLists ivf(
+                index.nlist, index.code_size, filename.c_str());
+
+        index2.replace_invlists(&ivf);
+
+        index2.add(nb, xb.data());
+
+        std::vector<float> new_D(nq * k);
+        std::vector<faiss::idx_t> new_I(nq * k);
+
+        index2.search(nq, xq.data(), k, new_D.data(), new_I.data());
+
+        EXPECT_EQ(ref_D, new_D);
+        EXPECT_EQ(ref_I, new_I);
+
+        write_index(&index2, filename2.c_str());
+    }
+
+    // test io
+    {
+        faiss::Index* index3 = faiss::read_index(filename2.c_str());
+
+        std::vector<float> new_D(nq * k);
+        std::vector<faiss::idx_t> new_I(nq * k);
+
+        index3->search(nq, xq.data(), k, new_D.data(), new_I.data());
+
+        EXPECT_EQ(ref_D, new_D);
+        EXPECT_EQ(ref_I, new_I);
+
+        delete index3;
+    }
+}
+
+// WARN this thest will run multithreaded only in opt mode
+TEST(ONDISK, make_invlists_threaded) {
+    int nlist = 100;
+    int code_size = 32;
+    int nadd = 1000000;
+
+    Tempfilename filename;
+
+    faiss::OnDiskInvertedLists ivf(nlist, code_size, filename.c_str());
+
+    std::vector<int> list_nos(nadd);
+
+    std::mt19937 rng;
+    std::uniform_real_distribution<> distrib;
+    for (int i = 0; i < nadd; i++) {
+        double d = distrib(rng);
+        list_nos[i] = int(nlist * d * d); // skewed distribution
+    }
+
+#pragma omp parallel
+    {
+        std::vector<uint8_t> code(32);
+#pragma omp for
+        for (int i = 0; i < nadd; i++) {
+            int list_no = list_nos[i];
+            int* ar = (int*)code.data();
+            ar[0] = i;
+            ar[1] = list_no;
+            ivf.add_entry(list_no, i, code.data());
+        }
+    }
+
+    int ntot = 0;
+    for (int i = 0; i < nlist; i++) {
+        int size = ivf.list_size(i);
+        const faiss::idx_t* ids = ivf.get_ids(i);
+        const uint8_t* codes = ivf.get_codes(i);
+        for (int j = 0; j < size; j++) {
+            faiss::idx_t id = ids[j];
+            const int* ar = (const int*)&codes[code_size * j];
+            EXPECT_EQ(ar[0], id);
+            EXPECT_EQ(ar[1], i);
+            EXPECT_EQ(list_nos[id], i);
+            ntot++;
+        }
+    }
+    EXPECT_EQ(ntot, nadd);
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_pairs_decoding.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_pairs_decoding.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+
+#include <memory>
+#include <random>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include <faiss/IVFlib.h>
+#include <faiss/IndexIVF.h>
+#include <faiss/VectorTransform.h>
+#include <faiss/index_factory.h>
+
+namespace {
+
+typedef faiss::idx_t idx_t;
+
+/*************************************************************
+ * Test utils
+ *************************************************************/
+
+// dimension of the vectors to index
+int d = 64;
+
+// size of the database we plan to index
+size_t nb = 8000;
+
+// nb of queries
+size_t nq = 200;
+
+std::mt19937 rng;
+
+std::vector<float> make_data(size_t n) {
+    std::vector<float> database(n * d);
+    std::uniform_real_distribution<> distrib;
+    for (size_t i = 0; i < n * d; i++) {
+        database[i] = distrib(rng);
+    }
+    return database;
+}
+
+std::unique_ptr<faiss::Index> make_index(
+        const char* index_type,
+        const std::vector<float>& x) {
+    auto index =
+            std::unique_ptr<faiss::Index>(faiss::index_factory(d, index_type));
+    index->train(nb, x.data());
+    index->add(nb, x.data());
+    return index;
+}
+
+/*************************************************************
+ * Test functions for a given index type
+ *************************************************************/
+
+bool test_search_centroid(const char* index_key) {
+    std::vector<float> xb = make_data(nb); // database vectors
+    auto index = make_index(index_key, xb);
+
+    /* First test: find the centroids associated to the database
+       vectors and make sure that each vector does indeed appear in
+       the inverted list corresponding to its centroid */
+
+    std::vector<idx_t> centroid_ids(nb);
+    faiss::ivflib::search_centroid(
+            index.get(), xb.data(), nb, centroid_ids.data());
+
+    const faiss::IndexIVF* ivf = faiss::ivflib::extract_index_ivf(index.get());
+
+    for (int i = 0; i < nb; i++) {
+        bool found = false;
+        int list_no = centroid_ids[i];
+        int list_size = ivf->invlists->list_size(list_no);
+        auto* list = ivf->invlists->get_ids(list_no);
+
+        for (int j = 0; j < list_size; j++) {
+            if (list[j] == i) {
+                found = true;
+                break;
+            }
+        }
+        if (!found)
+            return false;
+    }
+    return true;
+}
+
+int test_search_and_return_centroids(const char* index_key) {
+    std::vector<float> xb = make_data(nb); // database vectors
+    auto index = make_index(index_key, xb);
+
+    std::vector<idx_t> centroid_ids(nb);
+    faiss::ivflib::search_centroid(
+            index.get(), xb.data(), nb, centroid_ids.data());
+
+    faiss::IndexIVF* ivf = faiss::ivflib::extract_index_ivf(index.get());
+    ivf->nprobe = 4;
+
+    std::vector<float> xq = make_data(nq); // database vectors
+
+    int k = 5;
+
+    // compute a reference search result
+
+    std::vector<idx_t> refI(nq * k);
+    std::vector<float> refD(nq * k);
+    index->search(nq, xq.data(), k, refD.data(), refI.data());
+
+    // compute search result
+
+    std::vector<idx_t> newI(nq * k);
+    std::vector<float> newD(nq * k);
+
+    std::vector<idx_t> query_centroid_ids(nq);
+    std::vector<idx_t> result_centroid_ids(nq * k);
+
+    faiss::ivflib::search_and_return_centroids(
+            index.get(),
+            nq,
+            xq.data(),
+            k,
+            newD.data(),
+            newI.data(),
+            query_centroid_ids.data(),
+            result_centroid_ids.data());
+
+    // first verify that we have the same result as the standard search
+
+    if (newI != refI) {
+        return 1;
+    }
+
+    // then check if the result ids are indeed in the inverted list
+    // they are supposed to be in
+
+    for (int i = 0; i < nq * k; i++) {
+        int list_no = result_centroid_ids[i];
+        int result_no = newI[i];
+
+        if (result_no < 0)
+            continue;
+
+        bool found = false;
+
+        int list_size = ivf->invlists->list_size(list_no);
+        auto* list = ivf->invlists->get_ids(list_no);
+
+        for (int j = 0; j < list_size; j++) {
+            if (list[j] == result_no) {
+                found = true;
+                break;
+            }
+        }
+        if (!found)
+            return 2;
+    }
+    return 0;
+}
+
+} // namespace
+
+/*************************************************************
+ * Test entry points
+ *************************************************************/
+
+TEST(testSearchCentroid, IVFFlat) {
+    bool ok = test_search_centroid("IVF32,Flat");
+    EXPECT_TRUE(ok);
+}
+
+TEST(testSearchCentroid, PCAIVFFlat) {
+    bool ok = test_search_centroid("PCA16,IVF32,Flat");
+    EXPECT_TRUE(ok);
+}
+
+TEST(testSearchAndReturnCentroids, IVFFlat) {
+    int err = test_search_and_return_centroids("IVF32,Flat");
+    EXPECT_NE(err, 1);
+    EXPECT_NE(err, 2);
+}
+
+TEST(testSearchAndReturnCentroids, PCAIVFFlat) {
+    int err = test_search_and_return_centroids("PCA16,IVF32,Flat");
+    EXPECT_NE(err, 1);
+    EXPECT_NE(err, 2);
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_params_override.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_params_override.cpp
@@ -0,0 +1,287 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+
+#include <memory>
+#include <random>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include <faiss/AutoTune.h>
+#include <faiss/IVFlib.h>
+#include <faiss/IndexBinaryIVF.h>
+#include <faiss/IndexIVF.h>
+#include <faiss/clone_index.h>
+#include <faiss/impl/AuxIndexStructures.h>
+#include <faiss/impl/IDSelector.h>
+#include <faiss/index_factory.h>
+
+using namespace faiss;
+
+namespace {
+
+// dimension of the vectors to index
+int d = 32;
+
+// size of the database we plan to index
+size_t nb = 1000;
+
+// nb of queries
+size_t nq = 200;
+
+std::mt19937 rng;
+
+std::vector<float> make_data(size_t n) {
+    std::vector<float> database(n * d);
+    std::uniform_real_distribution<> distrib;
+    for (size_t i = 0; i < n * d; i++) {
+        database[i] = distrib(rng);
+    }
+    return database;
+}
+
+std::unique_ptr<Index> make_index(
+        const char* index_type,
+        MetricType metric,
+        const std::vector<float>& x) {
+    assert(x.size() % d == 0);
+    idx_t nb = x.size() / d;
+    std::unique_ptr<Index> index(index_factory(d, index_type, metric));
+    index->train(nb, x.data());
+    index->add(nb, x.data());
+    return index;
+}
+
+std::vector<idx_t> search_index(Index* index, const float* xq) {
+    int k = 10;
+    std::vector<idx_t> I(k * nq);
+    std::vector<float> D(k * nq);
+    index->search(nq, xq, k, D.data(), I.data());
+    return I;
+}
+
+std::vector<idx_t> search_index_with_params(
+        Index* index,
+        const float* xq,
+        IVFSearchParameters* params) {
+    int k = 10;
+    std::vector<idx_t> I(k * nq);
+    std::vector<float> D(k * nq);
+    ivflib::search_with_parameters(
+            index, nq, xq, k, D.data(), I.data(), params);
+    return I;
+}
+
+/*************************************************************
+ * Test functions for a given index type
+ *************************************************************/
+
+int test_params_override(const char* index_key, MetricType metric) {
+    std::vector<float> xb = make_data(nb); // database vectors
+    auto index = make_index(index_key, metric, xb);
+    // index->train(nb, xb.data());
+    // index->add(nb, xb.data());
+    std::vector<float> xq = make_data(nq);
+    ParameterSpace ps;
+    ps.set_index_parameter(index.get(), "nprobe", 2);
+    auto res2ref = search_index(index.get(), xq.data());
+    ps.set_index_parameter(index.get(), "nprobe", 9);
+    auto res9ref = search_index(index.get(), xq.data());
+    ps.set_index_parameter(index.get(), "nprobe", 1);
+
+    IVFSearchParameters params;
+    params.max_codes = 0;
+    params.nprobe = 2;
+    auto res2new = search_index_with_params(index.get(), xq.data(), &params);
+    params.nprobe = 9;
+    auto res9new = search_index_with_params(index.get(), xq.data(), &params);
+
+    if (res2ref != res2new)
+        return 2;
+
+    if (res9ref != res9new)
+        return 9;
+
+    return 0;
+}
+
+/*************************************************************
+ * Test subsets
+ *************************************************************/
+
+int test_selector(const char* index_key) {
+    std::vector<float> xb = make_data(nb); // database vectors
+    std::vector<float> xq = make_data(nq);
+    ParameterSpace ps;
+
+    std::vector<float> sub_xb;
+    std::vector<idx_t> kept;
+    for (idx_t i = 0; i < nb; i++) {
+        if (i % 10 == 2) {
+            kept.push_back(i);
+            sub_xb.insert(
+                    sub_xb.end(), xb.begin() + i * d, xb.begin() + (i + 1) * d);
+        }
+    }
+
+    // full index
+    auto index = make_index(index_key, METRIC_L2, xb);
+    ps.set_index_parameter(index.get(), "nprobe", 3);
+
+    // restricted index
+    std::unique_ptr<Index> sub_index(clone_index(index.get()));
+    sub_index->reset();
+    sub_index->add_with_ids(kept.size(), sub_xb.data(), kept.data());
+
+    auto ref_result = search_index(sub_index.get(), xq.data());
+
+    IVFSearchParameters params;
+    params.max_codes = 0;
+    params.nprobe = 3;
+    IDSelectorBatch sel(kept.size(), kept.data());
+    params.sel = &sel;
+    auto new_result = search_index_with_params(index.get(), xq.data(), &params);
+
+    if (ref_result != new_result) {
+        return 1;
+    }
+
+    return 0;
+}
+
+} // namespace
+
+/*************************************************************
+ * Test entry points
+ *************************************************************/
+
+TEST(TPO, IVFFlat) {
+    int err1 = test_params_override("IVF32,Flat", METRIC_L2);
+    EXPECT_EQ(err1, 0);
+    int err2 = test_params_override("IVF32,Flat", METRIC_INNER_PRODUCT);
+    EXPECT_EQ(err2, 0);
+}
+
+TEST(TPO, IVFPQ) {
+    int err1 = test_params_override("IVF32,PQ8np", METRIC_L2);
+    EXPECT_EQ(err1, 0);
+    int err2 = test_params_override("IVF32,PQ8np", METRIC_INNER_PRODUCT);
+    EXPECT_EQ(err2, 0);
+}
+
+TEST(TPO, IVFSQ) {
+    int err1 = test_params_override("IVF32,SQ8", METRIC_L2);
+    EXPECT_EQ(err1, 0);
+    int err2 = test_params_override("IVF32,SQ8", METRIC_INNER_PRODUCT);
+    EXPECT_EQ(err2, 0);
+}
+
+TEST(TPO, IVFFlatPP) {
+    int err1 = test_params_override("PCA16,IVF32,SQ8", METRIC_L2);
+    EXPECT_EQ(err1, 0);
+    int err2 = test_params_override("PCA16,IVF32,SQ8", METRIC_INNER_PRODUCT);
+    EXPECT_EQ(err2, 0);
+}
+
+TEST(TSEL, IVFFlat) {
+    int err = test_selector("PCA16,IVF32,Flat");
+    EXPECT_EQ(err, 0);
+}
+
+TEST(TSEL, IVFFPQ) {
+    int err = test_selector("PCA16,IVF32,PQ4x8np");
+    EXPECT_EQ(err, 0);
+}
+
+TEST(TSEL, IVFFSQ) {
+    int err = test_selector("PCA16,IVF32,SQ8");
+    EXPECT_EQ(err, 0);
+}
+
+/*************************************************************
+ * Same for binary indexes
+ *************************************************************/
+
+std::vector<uint8_t> make_data_binary(size_t n) {
+    std::vector<uint8_t> database(n * d / 8);
+    std::uniform_int_distribution<> distrib;
+    for (size_t i = 0; i < n * d / 8; i++) {
+        database[i] = distrib(rng);
+    }
+    return database;
+}
+
+std::unique_ptr<IndexBinaryIVF> make_index(
+        const char* index_type,
+        const std::vector<uint8_t>& x) {
+    auto index = std::unique_ptr<IndexBinaryIVF>(
+            dynamic_cast<IndexBinaryIVF*>(index_binary_factory(d, index_type)));
+    index->train(nb, x.data());
+    index->add(nb, x.data());
+    return index;
+}
+
+std::vector<idx_t> search_index(IndexBinaryIVF* index, const uint8_t* xq) {
+    int k = 10;
+    std::vector<idx_t> I(k * nq);
+    std::vector<int32_t> D(k * nq);
+    index->search(nq, xq, k, D.data(), I.data());
+    return I;
+}
+
+std::vector<idx_t> search_index_with_params(
+        IndexBinaryIVF* index,
+        const uint8_t* xq,
+        IVFSearchParameters* params) {
+    int k = 10;
+    std::vector<idx_t> I(k * nq);
+    std::vector<int32_t> D(k * nq);
+
+    std::vector<idx_t> Iq(params->nprobe * nq);
+    std::vector<int32_t> Dq(params->nprobe * nq);
+
+    index->quantizer->search(nq, xq, params->nprobe, Dq.data(), Iq.data());
+    index->search_preassigned(
+            nq, xq, k, Iq.data(), Dq.data(), D.data(), I.data(), false, params);
+    return I;
+}
+
+int test_params_override_binary(const char* index_key) {
+    std::vector<uint8_t> xb = make_data_binary(nb); // database vectors
+    auto index = make_index(index_key, xb);
+    index->train(nb, xb.data());
+    index->add(nb, xb.data());
+    std::vector<uint8_t> xq = make_data_binary(nq);
+    index->nprobe = 2;
+    auto res2ref = search_index(index.get(), xq.data());
+    index->nprobe = 9;
+    auto res9ref = search_index(index.get(), xq.data());
+    index->nprobe = 1;
+
+    IVFSearchParameters params;
+    params.max_codes = 0;
+    params.nprobe = 2;
+    auto res2new = search_index_with_params(index.get(), xq.data(), &params);
+    params.nprobe = 9;
+    auto res9new = search_index_with_params(index.get(), xq.data(), &params);
+
+    if (res2ref != res2new)
+        return 2;
+
+    if (res9ref != res9new)
+        return 9;
+
+    return 0;
+}
+
+TEST(TPOB, IVF) {
+    int err1 = test_params_override_binary("BIVF32");
+    EXPECT_EQ(err1, 0);
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_partitioning.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_partitioning.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <faiss/utils/AlignedTable.h>
+#include <faiss/utils/partitioning.h>
+
+using namespace faiss;
+
+typedef AlignedTable<uint16_t> AlignedTableUint16;
+
+// TODO: This test fails when Faiss is compiled with
+// GCC 13.2 from conda-forge with AVX2 enabled. This may be
+// a GCC bug that needs to be investigated further.
+// As of 16-AUG-2023 the Faiss conda packages are built
+// with GCC 11.2, so the published binaries are not affected.
+TEST(TestPartitioning, TestPartitioningBigRange) {
+    auto n = 1024;
+    AlignedTableUint16 tab(n);
+    for (auto i = 0; i < n; i++) {
+        tab[i] = i * 64;
+    }
+    int32_t hist[16]{};
+    simd_histogram_16(tab.get(), n, 0, 12, hist);
+    for (auto i = 0; i < 16; i++) {
+        ASSERT_EQ(hist[i], 64);
+    }
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_pq_encoding.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_pq_encoding.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <iostream>
+#include <memory>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include <faiss/IndexPQFastScan.h>
+#include <faiss/impl/ProductQuantizer.h>
+#include <faiss/impl/pq4_fast_scan.h>
+
+namespace {
+
+const std::vector<uint64_t> random_vector(size_t s) {
+    std::vector<uint64_t> v(s, 0);
+    for (size_t i = 0; i < s; ++i) {
+        v[i] = rand();
+    }
+
+    return v;
+}
+
+const std::vector<float> random_vector_float(size_t s) {
+    std::vector<float> v(s, 0);
+    for (size_t i = 0; i < s; ++i) {
+        v[i] = rand();
+    }
+
+    return v;
+}
+
+} // namespace
+
+TEST(PQEncoderGeneric, encode) {
+    const int nsubcodes = 97;
+    const int minbits = 1;
+    const int maxbits = 24;
+    const std::vector<uint64_t> values = random_vector(nsubcodes);
+
+    for (int nbits = minbits; nbits <= maxbits; ++nbits) {
+        std::cerr << "nbits = " << nbits << std::endl;
+
+        const uint64_t mask = (1ull << nbits) - 1;
+        std::unique_ptr<uint8_t[]> codes(
+                new uint8_t[(nsubcodes * maxbits + 7) / 8]);
+
+        // NOTE(hoss): Necessary scope to ensure trailing bits are flushed to
+        // mem.
+        {
+            faiss::PQEncoderGeneric encoder(codes.get(), nbits);
+            for (const auto& v : values) {
+                encoder.encode(v & mask);
+            }
+        }
+
+        faiss::PQDecoderGeneric decoder(codes.get(), nbits);
+        for (int i = 0; i < nsubcodes; ++i) {
+            uint64_t v = decoder.decode();
+            EXPECT_EQ(values[i] & mask, v);
+        }
+    }
+}
+
+TEST(PQEncoder8, encode) {
+    const int nsubcodes = 100;
+    const std::vector<uint64_t> values = random_vector(nsubcodes);
+    const uint64_t mask = 0xFF;
+    std::unique_ptr<uint8_t[]> codes(new uint8_t[nsubcodes]);
+
+    faiss::PQEncoder8 encoder(codes.get(), 8);
+    for (const auto& v : values) {
+        encoder.encode(v & mask);
+    }
+
+    faiss::PQDecoder8 decoder(codes.get(), 8);
+    for (int i = 0; i < nsubcodes; ++i) {
+        uint64_t v = decoder.decode();
+        EXPECT_EQ(values[i] & mask, v);
+    }
+}
+
+TEST(PQEncoder16, encode) {
+    const int nsubcodes = 100;
+    const std::vector<uint64_t> values = random_vector(nsubcodes);
+    const uint64_t mask = 0xFFFF;
+    std::unique_ptr<uint8_t[]> codes(new uint8_t[2 * nsubcodes]);
+
+    faiss::PQEncoder16 encoder(codes.get(), 16);
+    for (const auto& v : values) {
+        encoder.encode(v & mask);
+    }
+
+    faiss::PQDecoder16 decoder(codes.get(), 16);
+    for (int i = 0; i < nsubcodes; ++i) {
+        uint64_t v = decoder.decode();
+        EXPECT_EQ(values[i] & mask, v);
+    }
+}
+
+TEST(PQFastScan, set_packed_element) {
+    int d = 20, ntotal = 1000, M = 5, nbits = 4;
+    const std::vector<float> ds = random_vector_float(ntotal * d);
+    faiss::IndexPQFastScan index(d, M, nbits);
+    index.train(ntotal, ds.data());
+    index.add(ntotal, ds.data());
+
+    for (int j = 0; j < 10; j++) {
+        int vector_id = rand() % ntotal;
+        std::vector<uint8_t> old(ntotal * M);
+        std::vector<uint8_t> code(M);
+        for (int i = 0; i < ntotal; i++) {
+            for (int sq = 0; sq < M; sq++) {
+                old[i * M + sq] = faiss::pq4_get_packed_element(
+                        index.codes.data(), index.bbs, M, i, sq);
+            }
+        }
+        for (int sq = 0; sq < M; sq++) {
+            faiss::pq4_set_packed_element(
+                    index.codes.data(),
+                    ((old[vector_id * M + sq] + 3) % 16),
+                    index.bbs,
+                    M,
+                    vector_id,
+                    sq);
+        }
+        for (int i = 0; i < ntotal; i++) {
+            for (int sq = 0; sq < M; sq++) {
+                uint8_t newcode = faiss::pq4_get_packed_element(
+                        index.codes.data(), index.bbs, M, i, sq);
+                uint8_t oldcode = old[i * M + sq];
+                if (i == vector_id) {
+                    EXPECT_EQ(newcode, (oldcode + 3) % 16);
+                } else {
+                    EXPECT_EQ(newcode, oldcode);
+                }
+            }
+        }
+    }
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_simdlib.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_simdlib.cpp
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <faiss/utils/simdlib.h>
+
+using namespace faiss;
+
+TEST(TestSIMDLib, TestCmpltAndBlendInplace) {
+    simd8float32 lowestValues(0, 1, 2, 3, 4, 5, 6, 7);
+    simd8uint32 lowestIndices(0, 1, 2, 3, 4, 5, 6, 7);
+
+    simd8float32 candidateValues0(5, 5, 5, 5, 5, 5, 5, 5);
+    simd8uint32 candidateIndices0(10, 11, 12, 13, 14, 15, 16, 17);
+    cmplt_and_blend_inplace(
+            candidateValues0, candidateIndices0, lowestValues, lowestIndices);
+
+    simd8float32 candidateValues1(6, 6, 6, 6, 6, 6, 6, 6);
+    simd8uint32 candidateIndices1(20, 21, 22, 23, 24, 25, 26, 27);
+    cmplt_and_blend_inplace(
+            candidateValues1, candidateIndices1, lowestValues, lowestIndices);
+
+    simd8float32 candidateValues2(0, 1, 2, 3, 4, 5, 5, 5);
+    simd8uint32 candidateIndices2(30, 31, 32, 33, 34, 35, 36, 37);
+    cmplt_and_blend_inplace(
+            candidateValues2, candidateIndices2, lowestValues, lowestIndices);
+
+    simd8float32 expectedValues(0, 1, 2, 3, 4, 5, 5, 5);
+    simd8uint32 expectedIndices(0, 1, 2, 3, 4, 5, 16, 17);
+    ASSERT_TRUE(lowestValues.is_same_as(expectedValues));
+    ASSERT_TRUE(lowestIndices.is_same_as(expectedIndices));
+}
+
+TEST(TestSIMDLib, TestCmpltMinMaxFloat) {
+    simd8float32 minValues(0, 0, 0, 0, 0, 0, 0, 0);
+    simd8uint32 minIndices(0, 0, 0, 0, 0, 0, 0, 0);
+    simd8float32 maxValues(0, 0, 0, 0, 0, 0, 0, 0);
+    simd8uint32 maxIndices(0, 0, 0, 0, 0, 0, 0, 0);
+
+    simd8float32 candidateValues0(5, 5, 5, 5, 5, 5, 5, 5);
+    simd8uint32 candidateIndices0(10, 11, 12, 13, 14, 15, 16, 17);
+    simd8float32 currentValues0(0, 1, 2, 3, 4, 5, 6, 7);
+    simd8uint32 currentIndices0(0, 1, 2, 3, 4, 5, 6, 7);
+
+    cmplt_min_max_fast(
+            candidateValues0,
+            candidateIndices0,
+            currentValues0,
+            currentIndices0,
+            minValues,
+            minIndices,
+            maxValues,
+            maxIndices);
+
+    simd8float32 expectedMinValues(0, 1, 2, 3, 4, 5, 5, 5);
+    simd8uint32 expectedMinIndices(0, 1, 2, 3, 4, 5, 16, 17);
+    ASSERT_TRUE(minValues.is_same_as(expectedMinValues));
+    ASSERT_TRUE(minIndices.is_same_as(expectedMinIndices));
+
+    simd8float32 expectedMaxValues(5, 5, 5, 5, 5, 5, 6, 7);
+    // the result is not 10,11,12,13,14,5,6,7 because it is _fast version
+    simd8uint32 expectedMaxIndices(10, 11, 12, 13, 14, 15, 6, 7);
+    ASSERT_TRUE(maxValues.is_same_as(expectedMaxValues));
+    ASSERT_TRUE(maxIndices.is_same_as(expectedMaxIndices));
+}
+
+TEST(TestSIMDLib, TestCmpltMinMaxInt) {
+    simd8uint32 minValues(0, 0, 0, 0, 0, 0, 0, 0);
+    simd8uint32 minIndices(0, 0, 0, 0, 0, 0, 0, 0);
+    simd8uint32 maxValues(0, 0, 0, 0, 0, 0, 0, 0);
+    simd8uint32 maxIndices(0, 0, 0, 0, 0, 0, 0, 0);
+
+    simd8uint32 candidateValues0(5, 5, 5, 5, 5, 5, 5, 5);
+    simd8uint32 candidateIndices0(10, 11, 12, 13, 14, 15, 16, 17);
+    simd8uint32 currentValues0(0, 1, 2, 3, 4, 5, 6, 7);
+    simd8uint32 currentIndices0(0, 1, 2, 3, 4, 5, 6, 7);
+
+    cmplt_min_max_fast(
+            candidateValues0,
+            candidateIndices0,
+            currentValues0,
+            currentIndices0,
+            minValues,
+            minIndices,
+            maxValues,
+            maxIndices);
+
+    simd8uint32 expectedMinValues(0, 1, 2, 3, 4, 5, 5, 5);
+    simd8uint32 expectedMinIndices(0, 1, 2, 3, 4, 5, 16, 17);
+    ASSERT_TRUE(minValues.is_same_as(expectedMinValues));
+    ASSERT_TRUE(minIndices.is_same_as(expectedMinIndices));
+
+    simd8uint32 expectedMaxValues(5, 5, 5, 5, 5, 5, 6, 7);
+    // the result is not 10,11,12,13,14,5,6,7 because it is _fast version
+    simd8uint32 expectedMaxIndices(10, 11, 12, 13, 14, 15, 6, 7);
+    ASSERT_TRUE(maxValues.is_same_as(expectedMaxValues));
+    ASSERT_TRUE(maxIndices.is_same_as(expectedMaxIndices));
+}
+
+TEST(TestSIMDLib, TestCmpltMinMaxInt16) {
+    simd16uint16 minValues(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd16uint16 minIndices(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd16uint16 maxValues(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd16uint16 maxIndices(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+
+    simd16uint16 candidateValues0(
+            5,
+            5,
+            5,
+            5,
+            5,
+            5,
+            5,
+            5,
+            1005,
+            1005,
+            1005,
+            1005,
+            1005,
+            1005,
+            1005,
+            1005);
+    simd16uint16 candidateIndices0(
+            10,
+            11,
+            12,
+            13,
+            14,
+            15,
+            16,
+            17,
+            1010,
+            1011,
+            1012,
+            1013,
+            1014,
+            1015,
+            1016,
+            1017);
+    simd16uint16 currentValues0(
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            1000,
+            1001,
+            1002,
+            1003,
+            1004,
+            1005,
+            1006,
+            1007);
+    simd16uint16 currentIndices0(
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            1000,
+            1001,
+            1002,
+            1003,
+            1004,
+            1005,
+            1006,
+            1007);
+
+    cmplt_min_max_fast(
+            candidateValues0,
+            candidateIndices0,
+            currentValues0,
+            currentIndices0,
+            minValues,
+            minIndices,
+            maxValues,
+            maxIndices);
+
+    simd16uint16 expectedMinValues(
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            5,
+            5,
+            1000,
+            1001,
+            1002,
+            1003,
+            1004,
+            1005,
+            1005,
+            1005);
+    simd16uint16 expectedMinIndices(
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            16,
+            17,
+            1000,
+            1001,
+            1002,
+            1003,
+            1004,
+            1005,
+            1016,
+            1017);
+    ASSERT_TRUE(minValues.is_same_as(expectedMinValues));
+    ASSERT_TRUE(minIndices.is_same_as(expectedMinIndices));
+
+    simd16uint16 expectedMaxValues(
+            5,
+            5,
+            5,
+            5,
+            5,
+            5,
+            6,
+            7,
+            1005,
+            1005,
+            1005,
+            1005,
+            1005,
+            1005,
+            1006,
+            1007);
+    // the result is not 10,11,12,13,14,5,6,7 because it is _fast version
+    simd16uint16 expectedMaxIndices(
+            10,
+            11,
+            12,
+            13,
+            14,
+            15,
+            6,
+            7,
+            1010,
+            1011,
+            1012,
+            1013,
+            1014,
+            1015,
+            1006,
+            1007);
+    ASSERT_TRUE(maxValues.is_same_as(expectedMaxValues));
+    ASSERT_TRUE(maxIndices.is_same_as(expectedMaxIndices));
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_sliding_ivf.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_sliding_ivf.cpp
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+
+#include <memory>
+#include <random>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include <faiss/AutoTune.h>
+#include <faiss/IVFlib.h>
+#include <faiss/IndexIVF.h>
+#include <faiss/clone_index.h>
+#include <faiss/index_factory.h>
+
+using namespace faiss;
+
+// dimension of the vectors to index
+int d = 32;
+
+// nb of training vectors
+size_t nt = 5000;
+
+// size of the database points per window step
+size_t nb = 1000;
+
+// nb of queries
+size_t nq = 200;
+
+int total_size = 40;
+int window_size = 10;
+
+std::vector<float> make_data(size_t n) {
+    std::vector<float> database(n * d);
+    std::mt19937 rng;
+    std::uniform_real_distribution<> distrib;
+    for (size_t i = 0; i < n * d; i++) {
+        database[i] = distrib(rng);
+    }
+    return database;
+}
+
+std::unique_ptr<Index> make_trained_index(const char* index_type) {
+    auto index = std::unique_ptr<Index>(index_factory(d, index_type));
+    auto xt = make_data(nt * d);
+    index->train(nt, xt.data());
+    ParameterSpace().set_index_parameter(index.get(), "nprobe", 4);
+    return index;
+}
+
+std::vector<idx_t> search_index(Index* index, const float* xq) {
+    int k = 10;
+    std::vector<idx_t> I(k * nq);
+    std::vector<float> D(k * nq);
+    index->search(nq, xq, k, D.data(), I.data());
+    return I;
+}
+
+/*************************************************************
+ * Test functions for a given index type
+ *************************************************************/
+
+// make a few slices of indexes that can be merged
+void make_index_slices(
+        const Index* trained_index,
+        std::vector<std::unique_ptr<Index>>& sub_indexes) {
+    for (int i = 0; i < total_size; i++) {
+        sub_indexes.emplace_back(clone_index(trained_index));
+
+        Index* index = sub_indexes.back().get();
+
+        auto xb = make_data(nb * d);
+        std::vector<faiss::idx_t> ids(nb);
+        std::mt19937 rng;
+        std::uniform_int_distribution<> distrib;
+        for (int j = 0; j < nb; j++) {
+            ids[j] = distrib(rng);
+        }
+        index->add_with_ids(nb, xb.data(), ids.data());
+    }
+}
+
+// build merged index explicitly at sliding window position i
+Index* make_merged_index(
+        const Index* trained_index,
+        const std::vector<std::unique_ptr<Index>>& sub_indexes,
+        int i) {
+    Index* merged_index = clone_index(trained_index);
+    for (int j = i - window_size + 1; j <= i; j++) {
+        if (j < 0 || j >= total_size)
+            continue;
+        std::unique_ptr<Index> sub_index(clone_index(sub_indexes[j].get()));
+        IndexIVF* ivf0 = ivflib::extract_index_ivf(merged_index);
+        IndexIVF* ivf1 = ivflib::extract_index_ivf(sub_index.get());
+        ivf0->merge_from(*ivf1, 0);
+        merged_index->ntotal = ivf0->ntotal;
+    }
+    return merged_index;
+}
+
+int test_sliding_window(const char* index_key) {
+    std::unique_ptr<Index> trained_index = make_trained_index(index_key);
+
+    // make the index slices
+    std::vector<std::unique_ptr<Index>> sub_indexes;
+
+    make_index_slices(trained_index.get(), sub_indexes);
+
+    // now slide over the windows
+    std::unique_ptr<Index> index(clone_index(trained_index.get()));
+    ivflib::SlidingIndexWindow window(index.get());
+
+    auto xq = make_data(nq * d);
+
+    for (int i = 0; i < total_size + window_size; i++) {
+        // update the index
+        window.step(
+                i < total_size ? sub_indexes[i].get() : nullptr,
+                i >= window_size);
+
+        auto new_res = search_index(index.get(), xq.data());
+
+        std::unique_ptr<Index> merged_index(
+                make_merged_index(trained_index.get(), sub_indexes, i));
+
+        auto ref_res = search_index(merged_index.get(), xq.data());
+
+        EXPECT_EQ(ref_res.size(), new_res.size());
+
+        EXPECT_EQ(ref_res, new_res);
+    }
+    return 0;
+}
+
+int test_sliding_invlists(const char* index_key) {
+    std::unique_ptr<Index> trained_index = make_trained_index(index_key);
+
+    // make the index slices
+    std::vector<std::unique_ptr<Index>> sub_indexes;
+
+    make_index_slices(trained_index.get(), sub_indexes);
+
+    // now slide over the windows
+    std::unique_ptr<Index> index(clone_index(trained_index.get()));
+    IndexIVF* index_ivf = ivflib::extract_index_ivf(index.get());
+
+    auto xq = make_data(nq * d);
+
+    for (int i = 0; i < total_size + window_size; i++) {
+        // update the index
+        std::vector<const InvertedLists*> ils;
+        for (int j = i - window_size + 1; j <= i; j++) {
+            if (j < 0 || j >= total_size)
+                continue;
+            ils.push_back(
+                    ivflib::extract_index_ivf(sub_indexes[j].get())->invlists);
+        }
+        if (ils.size() == 0)
+            continue;
+
+        ConcatenatedInvertedLists* ci =
+                new ConcatenatedInvertedLists(ils.size(), ils.data());
+
+        // will be deleted by the index
+        index_ivf->replace_invlists(ci, true);
+
+        auto new_res = search_index(index.get(), xq.data());
+
+        std::unique_ptr<Index> merged_index(
+                make_merged_index(trained_index.get(), sub_indexes, i));
+
+        auto ref_res = search_index(merged_index.get(), xq.data());
+
+        EXPECT_EQ(ref_res.size(), new_res.size());
+        EXPECT_EQ(ref_res, new_res);
+    }
+    return 0;
+}
+
+/*************************************************************
+ * Test entry points
+ *************************************************************/
+
+TEST(SlidingWindow, IVFFlat) {
+    test_sliding_window("IVF32,Flat");
+}
+
+TEST(SlidingWindow, PCAIVFFlat) {
+    test_sliding_window("PCA24,IVF32,Flat");
+}
+
+TEST(SlidingInvlists, IVFFlat) {
+    test_sliding_invlists("IVF32,Flat");
+}
+
+TEST(SlidingInvlists, PCAIVFFlat) {
+    test_sliding_invlists("PCA24,IVF32,Flat");
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_threaded_index.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_threaded_index.cpp
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <faiss/IndexReplicas.h>
+#include <faiss/IndexShards.h>
+#include <faiss/impl/ThreadedIndex.h>
+
+#include <gtest/gtest.h>
+#include <chrono>
+#include <memory>
+#include <thread>
+#include <vector>
+
+namespace {
+
+struct TestException : public std::exception {};
+
+using idx_t = faiss::idx_t;
+
+struct MockIndex : public faiss::Index {
+    explicit MockIndex(idx_t d) : faiss::Index(d) {
+        resetMock();
+    }
+
+    void resetMock() {
+        flag = false;
+        nCalled = 0;
+        xCalled = nullptr;
+        kCalled = 0;
+        distancesCalled = nullptr;
+        labelsCalled = nullptr;
+    }
+
+    void add(idx_t n, const float* x) override {
+        nCalled = n;
+        xCalled = x;
+    }
+
+    void search(
+            idx_t n,
+            const float* x,
+            idx_t k,
+            float* distances,
+            idx_t* labels,
+            const faiss::SearchParameters* params) const override {
+        FAISS_THROW_IF_NOT(!params);
+        nCalled = n;
+        xCalled = x;
+        kCalled = k;
+        distancesCalled = distances;
+        labelsCalled = labels;
+    }
+
+    void reset() override {}
+
+    bool flag;
+
+    mutable idx_t nCalled;
+    mutable const float* xCalled;
+    mutable idx_t kCalled;
+    mutable float* distancesCalled;
+    mutable idx_t* labelsCalled;
+};
+
+template <typename IndexT>
+struct MockThreadedIndex : public faiss::ThreadedIndex<IndexT> {
+    using idx_t = faiss::idx_t;
+
+    explicit MockThreadedIndex(bool threaded)
+            : faiss::ThreadedIndex<IndexT>(threaded) {}
+
+    void add(idx_t, const float*) override {}
+    void search(
+            idx_t,
+            const float*,
+            idx_t,
+            float*,
+            idx_t*,
+            const faiss::SearchParameters*) const override {}
+    void reset() override {}
+};
+
+} // namespace
+
+TEST(ThreadedIndex, SingleException) {
+    std::vector<std::unique_ptr<MockIndex>> idxs;
+
+    for (int i = 0; i < 3; ++i) {
+        idxs.emplace_back(new MockIndex(1));
+    }
+
+    auto fn = [](int i, MockIndex* index) {
+        if (i == 1) {
+            throw TestException();
+        } else {
+            std::this_thread::sleep_for(std::chrono::milliseconds(i * 250));
+
+            index->flag = true;
+        }
+    };
+
+    // Try with threading and without
+    for (bool threaded : {true, false}) {
+        // clear flags
+        for (auto& idx : idxs) {
+            idx->resetMock();
+        }
+
+        MockThreadedIndex<MockIndex> ti(threaded);
+        for (auto& idx : idxs) {
+            ti.addIndex(idx.get());
+        }
+
+        // The second index should throw
+        EXPECT_THROW(ti.runOnIndex(fn), TestException);
+
+        // Index 0 and 2 should have processed
+        EXPECT_TRUE(idxs[0]->flag);
+        EXPECT_TRUE(idxs[2]->flag);
+    }
+}
+
+TEST(ThreadedIndex, MultipleException) {
+    std::vector<std::unique_ptr<MockIndex>> idxs;
+
+    for (int i = 0; i < 3; ++i) {
+        idxs.emplace_back(new MockIndex(1));
+    }
+
+    auto fn = [](int i, MockIndex* index) {
+        if (i < 2) {
+            throw TestException();
+        } else {
+            std::this_thread::sleep_for(std::chrono::milliseconds(i * 250));
+
+            index->flag = true;
+        }
+    };
+
+    // Try with threading and without
+    for (bool threaded : {true, false}) {
+        // clear flags
+        for (auto& idx : idxs) {
+            idx->resetMock();
+        }
+
+        MockThreadedIndex<MockIndex> ti(threaded);
+        for (auto& idx : idxs) {
+            ti.addIndex(idx.get());
+        }
+
+        // Multiple indices threw an exception that was aggregated into a
+        // FaissException
+        EXPECT_THROW(ti.runOnIndex(fn), faiss::FaissException);
+
+        // Index 2 should have processed
+        EXPECT_TRUE(idxs[2]->flag);
+    }
+}
+
+TEST(ThreadedIndex, TestReplica) {
+    int numReplicas = 5;
+    int n = 10 * numReplicas;
+    int d = 3;
+    int k = 6;
+
+    // Try with threading and without
+    for ([[maybe_unused]] const bool threaded : {true, false}) {
+        std::vector<std::unique_ptr<MockIndex>> idxs;
+        faiss::IndexReplicas replica(d);
+
+        for (int i = 0; i < numReplicas; ++i) {
+            idxs.emplace_back(new MockIndex(d));
+            replica.addIndex(idxs.back().get());
+        }
+
+        std::vector<float> x(n * d);
+        std::vector<float> distances(n * k);
+        std::vector<faiss::idx_t> labels(n * k);
+
+        replica.add(n, x.data());
+
+        for (int i = 0; i < idxs.size(); ++i) {
+            EXPECT_EQ(idxs[i]->nCalled, n);
+            EXPECT_EQ(idxs[i]->xCalled, x.data());
+        }
+
+        for (auto& idx : idxs) {
+            idx->resetMock();
+        }
+
+        replica.search(n, x.data(), k, distances.data(), labels.data());
+
+        for (int i = 0; i < idxs.size(); ++i) {
+            auto perReplica = n / idxs.size();
+
+            EXPECT_EQ(idxs[i]->nCalled, perReplica);
+            EXPECT_EQ(idxs[i]->xCalled, x.data() + i * perReplica * d);
+            EXPECT_EQ(idxs[i]->kCalled, k);
+            EXPECT_EQ(
+                    idxs[i]->distancesCalled,
+                    distances.data() + (i * perReplica) * k);
+            EXPECT_EQ(
+                    idxs[i]->labelsCalled,
+                    labels.data() + (i * perReplica) * k);
+        }
+    }
+}
+
+TEST(ThreadedIndex, TestShards) {
+    int numShards = 7;
+    int d = 3;
+    int n = 10 * numShards;
+    int k = 6;
+
+    // Try with threading and without
+    for (bool threaded : {true, false}) {
+        std::vector<std::unique_ptr<MockIndex>> idxs;
+        faiss::IndexShards shards(d, threaded);
+
+        for (int i = 0; i < numShards; ++i) {
+            idxs.emplace_back(new MockIndex(d));
+            shards.addIndex(idxs.back().get());
+        }
+
+        std::vector<float> x(n * d);
+        std::vector<float> distances(n * k);
+        std::vector<faiss::idx_t> labels(n * k);
+
+        shards.add(n, x.data());
+
+        for (int i = 0; i < idxs.size(); ++i) {
+            auto perShard = n / idxs.size();
+
+            EXPECT_EQ(idxs[i]->nCalled, perShard);
+            EXPECT_EQ(idxs[i]->xCalled, x.data() + i * perShard * d);
+        }
+
+        for (auto& idx : idxs) {
+            idx->resetMock();
+        }
+
+        shards.search(n, x.data(), k, distances.data(), labels.data());
+
+        for (int i = 0; i < idxs.size(); ++i) {
+            EXPECT_EQ(idxs[i]->nCalled, n);
+            EXPECT_EQ(idxs[i]->xCalled, x.data());
+            EXPECT_EQ(idxs[i]->kCalled, k);
+            // There is a temporary buffer used for shards
+            EXPECT_EQ(
+                    idxs[i]->distancesCalled,
+                    idxs[0]->distancesCalled + i * k * n);
+            EXPECT_EQ(idxs[i]->labelsCalled, idxs[0]->labelsCalled + i * k * n);
+        }
+    }
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_transfer_invlists.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_transfer_invlists.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+#include <memory>
+
+#include <gtest/gtest.h>
+
+#include <faiss/AutoTune.h>
+#include <faiss/IVFlib.h>
+#include <faiss/IndexIVFFlat.h>
+#include <faiss/clone_index.h>
+#include <faiss/impl/io.h>
+#include <faiss/index_factory.h>
+#include <faiss/index_io.h>
+#include <faiss/utils/random.h>
+
+namespace {
+
+// parameters to use for the test
+int d = 64;
+size_t nb = 1000;
+size_t nq = 100;
+size_t nt = 500;
+int k = 10;
+int nlist = 40;
+
+using namespace faiss;
+
+typedef faiss::idx_t idx_t;
+
+std::vector<float> get_data(size_t nb, int seed) {
+    std::vector<float> x(nb * d);
+    float_randn(x.data(), nb * d, seed);
+    return x;
+}
+
+void test_index_type(const char* factory_string) {
+    // transfer inverted lists in nslice slices
+    int nslice = 3;
+
+    /****************************************************************
+     * trained reference index
+     ****************************************************************/
+
+    std::unique_ptr<Index> trained(index_factory(d, factory_string));
+
+    {
+        auto xt = get_data(nt, 123);
+        trained->train(nt, xt.data());
+    }
+
+    // sample nq query vectors to check if results are the same
+    auto xq = get_data(nq, 818);
+
+    /****************************************************************
+     * source index
+     ***************************************************************/
+    std::unique_ptr<Index> src_index(clone_index(trained.get()));
+
+    { // add some data to source index
+        auto xb = get_data(nb, 245);
+        src_index->add(nb, xb.data());
+    }
+
+    ParameterSpace().set_index_parameter(src_index.get(), "nprobe", 4);
+
+    // remember reference search result on source index
+    std::vector<idx_t> Iref(nq * k);
+    std::vector<float> Dref(nq * k);
+    src_index->search(nq, xq.data(), k, Dref.data(), Iref.data());
+
+    /****************************************************************
+     * destination index -- should be replaced by source index
+     ***************************************************************/
+
+    std::unique_ptr<Index> dst_index(clone_index(trained.get()));
+
+    { // initial state: filled in with some garbage
+        int nb2 = nb + 10;
+        auto xb = get_data(nb2, 366);
+        dst_index->add(nb2, xb.data());
+    }
+
+    std::vector<idx_t> Inew(nq * k);
+    std::vector<float> Dnew(nq * k);
+
+    ParameterSpace().set_index_parameter(dst_index.get(), "nprobe", 4);
+
+    // transfer from source to destination in nslice slices
+    for (int sl = 0; sl < nslice; sl++) {
+        // so far, the indexes are different
+        dst_index->search(nq, xq.data(), k, Dnew.data(), Inew.data());
+        EXPECT_TRUE(Iref != Inew);
+        EXPECT_TRUE(Dref != Dnew);
+
+        // range of inverted list indices to transfer
+        long i0 = sl * nlist / nslice;
+        long i1 = (sl + 1) * nlist / nslice;
+
+        std::vector<uint8_t> data_to_transfer;
+        {
+            std::unique_ptr<ArrayInvertedLists> il(
+                    ivflib::get_invlist_range(src_index.get(), i0, i1));
+            // serialize inverted lists
+            VectorIOWriter wr;
+            write_InvertedLists(il.get(), &wr);
+            data_to_transfer.swap(wr.data);
+        }
+
+        // transfer data here from source machine to dest machine
+
+        {
+            VectorIOReader reader;
+            reader.data.swap(data_to_transfer);
+
+            // deserialize inverted lists
+            std::unique_ptr<ArrayInvertedLists> il(
+                    dynamic_cast<ArrayInvertedLists*>(
+                            read_InvertedLists(&reader)));
+
+            // swap inverted lists. Block searches here!
+            { ivflib::set_invlist_range(dst_index.get(), i0, i1, il.get()); }
+        }
+    }
+    EXPECT_EQ(dst_index->ntotal, src_index->ntotal);
+
+    // now, the indexes are the same
+    dst_index->search(nq, xq.data(), k, Dnew.data(), Inew.data());
+    EXPECT_TRUE(Iref == Inew);
+    EXPECT_TRUE(Dref == Dnew);
+}
+
+} // namespace
+
+TEST(TRANS, IVFFlat) {
+    test_index_type("IVF40,Flat");
+}
+
+TEST(TRANS, IVFFlatPreproc) {
+    test_index_type("PCAR32,IVF40,Flat");
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_util.h
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_util.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef FAISS_TEST_UTIL_H
+#define FAISS_TEST_UTIL_H
+
+#include <faiss/IndexIVFPQ.h>
+#include <unistd.h>
+
+struct Tempfilename {
+    pthread_mutex_t* mutex;
+    std::string filename;
+
+    Tempfilename(pthread_mutex_t* mutex, std::string filename_template) {
+        this->mutex = mutex;
+        this->filename = filename_template;
+        pthread_mutex_lock(mutex);
+        int fd = mkstemp(&this->filename[0]);
+        close(fd);
+        pthread_mutex_unlock(mutex);
+    }
+
+    ~Tempfilename() {
+        if (access(filename.c_str(), F_OK)) {
+            unlink(filename.c_str());
+        }
+    }
+
+    const char* c_str() {
+        return filename.c_str();
+    }
+};
+
+#endif // FAISS_TEST_UTIL_H
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_utils.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_utils.cpp
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <faiss/Index.h>
+#include <faiss/utils/utils.h>
+
+TEST(TestUtils, get_version) {
+    std::string version = std::to_string(FAISS_VERSION_MAJOR) + "." +
+            std::to_string(FAISS_VERSION_MINOR) + "." +
+            std::to_string(FAISS_VERSION_PATCH);
+
+    EXPECT_EQ(version, faiss::get_version());
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/test_zerocopy.cpp
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/test_zerocopy.cpp
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <cstddef>
+#include <cstdint>
+#include <random>
+#include <vector>
+
+#include <faiss/IndexBinaryFlat.h>
+#include <faiss/IndexFlat.h>
+#include <faiss/impl/io.h>
+#include <faiss/impl/zerocopy_io.h>
+#include <faiss/index_io.h>
+
+namespace {
+
+std::vector<float> make_data(const size_t n, const size_t d, size_t seed) {
+    std::vector<float> database(n * d);
+    std::mt19937 rng(seed);
+    std::uniform_real_distribution<float> distrib;
+
+    for (size_t i = 0; i < n * d; i++) {
+        database[i] = distrib(rng);
+    }
+    return database;
+}
+
+std::vector<uint8_t> make_binary_data(
+        const size_t n,
+        const size_t d,
+        size_t seed) {
+    std::vector<uint8_t> database(n * d);
+    std::mt19937 rng(seed);
+    std::uniform_int_distribution<uint8_t> distrib(0, 255);
+
+    for (size_t i = 0; i < n * d; i++) {
+        database[i] = distrib(rng);
+    }
+    return database;
+}
+
+} // namespace
+
+// the logic is the following:
+//   1. generate two flatcodes-based indices, Index1 and Index2
+//   2. serialize both indices into std::vector<> buffers, Buf1 and Buf2
+//   3. deserialize Index1 using zero-copy feature on Buf1 into Index1ZC
+//   4. ensure that Index1ZC acts as Index2 if we write the data from Buf2
+//      on top of the existing Buf1
+
+TEST(TestZeroCopy, zerocopy_flatcodes) {
+    // generate data
+    const size_t nt = 1000;
+    const size_t nq = 10;
+    const size_t d = 32;
+    const size_t k = 25;
+
+    std::vector<float> xt1 = make_data(nt, d, 123);
+    std::vector<float> xt2 = make_data(nt, d, 456);
+    std::vector<float> xq = make_data(nq, d, 789);
+
+    // ensure that the data is different
+    ASSERT_NE(xt1, xt2);
+
+    // make index1 and create reference results
+    faiss::IndexFlatL2 index1(d);
+    index1.train(nt, xt1.data());
+    index1.add(nt, xt1.data());
+
+    std::vector<float> ref_dis_1(k * nq);
+    std::vector<faiss::idx_t> ref_ids_1(k * nq);
+    index1.search(nq, xq.data(), k, ref_dis_1.data(), ref_ids_1.data());
+
+    // make index2 and create reference results
+    faiss::IndexFlatL2 index2(d);
+    index2.train(nt, xt2.data());
+    index2.add(nt, xt2.data());
+
+    std::vector<float> ref_dis_2(k * nq);
+    std::vector<faiss::idx_t> ref_ids_2(k * nq);
+    index2.search(nq, xq.data(), k, ref_dis_2.data(), ref_ids_2.data());
+
+    // ensure that the results are different
+    ASSERT_NE(ref_dis_1, ref_dis_2);
+    ASSERT_NE(ref_ids_1, ref_ids_2);
+
+    // serialize both in a form of vectors
+    faiss::VectorIOWriter wr1;
+    faiss::write_index(&index1, &wr1);
+
+    faiss::VectorIOWriter wr2;
+    faiss::write_index(&index2, &wr2);
+
+    ASSERT_EQ(wr1.data.size(), wr2.data.size());
+
+    // clone a buffer
+    std::vector<uint8_t> buffer = wr1.data;
+
+    // create a zero-copy index
+    faiss::ZeroCopyIOReader reader(buffer.data(), buffer.size());
+    std::unique_ptr<faiss::Index> index1zc(faiss::read_index(&reader));
+
+    ASSERT_NE(index1zc, nullptr);
+
+    // perform a search
+    std::vector<float> cand_dis_1(k * nq);
+    std::vector<faiss::idx_t> cand_ids_1(k * nq);
+    index1zc->search(nq, xq.data(), k, cand_dis_1.data(), cand_ids_1.data());
+
+    // match vs ref1
+    ASSERT_EQ(ref_ids_1, cand_ids_1);
+    ASSERT_EQ(ref_dis_1, cand_dis_1);
+
+    // overwrite buffer without moving it
+    for (size_t i = 0; i < buffer.size(); i++) {
+        buffer[i] = wr2.data[i];
+    }
+
+    // perform a search
+    std::vector<float> cand_dis_2(k * nq);
+    std::vector<faiss::idx_t> cand_ids_2(k * nq);
+    index1zc->search(nq, xq.data(), k, cand_dis_2.data(), cand_ids_2.data());
+
+    // match vs ref2
+    ASSERT_EQ(ref_ids_2, cand_ids_2);
+    ASSERT_EQ(ref_dis_2, cand_dis_2);
+
+    // overwrite again
+    for (size_t i = 0; i < buffer.size(); i++) {
+        buffer[i] = wr1.data[i];
+    }
+
+    // perform a search
+    std::vector<float> cand_dis_3(k * nq);
+    std::vector<faiss::idx_t> cand_ids_3(k * nq);
+    index1zc->search(nq, xq.data(), k, cand_dis_3.data(), cand_ids_3.data());
+
+    // match vs ref1
+    ASSERT_EQ(ref_ids_1, cand_ids_3);
+    ASSERT_EQ(ref_dis_1, cand_dis_3);
+}
+
+TEST(TestZeroCopy, zerocopy_binary_flatcodes) {
+    // generate data
+    const size_t nt = 1000;
+    const size_t nq = 10;
+    // in bits
+    const size_t d = 64;
+    // in bytes
+    const size_t d8 = (d + 7) / 8;
+    const size_t k = 25;
+
+    std::vector<uint8_t> xt1 = make_binary_data(nt, d8, 123);
+    std::vector<uint8_t> xt2 = make_binary_data(nt, d8, 456);
+    std::vector<uint8_t> xq = make_binary_data(nq, d8, 789);
+
+    // ensure that the data is different
+    ASSERT_NE(xt1, xt2);
+
+    // make index1 and create reference results
+    faiss::IndexBinaryFlat index1(d);
+    index1.train(nt, xt1.data());
+    index1.add(nt, xt1.data());
+
+    std::vector<int32_t> ref_dis_1(k * nq);
+    std::vector<faiss::idx_t> ref_ids_1(k * nq);
+    index1.search(nq, xq.data(), k, ref_dis_1.data(), ref_ids_1.data());
+
+    // make index2 and create reference results
+    faiss::IndexBinaryFlat index2(d);
+    index2.train(nt, xt2.data());
+    index2.add(nt, xt2.data());
+
+    std::vector<int32_t> ref_dis_2(k * nq);
+    std::vector<faiss::idx_t> ref_ids_2(k * nq);
+    index2.search(nq, xq.data(), k, ref_dis_2.data(), ref_ids_2.data());
+
+    // ensure that the results are different
+    ASSERT_NE(ref_dis_1, ref_dis_2);
+    ASSERT_NE(ref_ids_1, ref_ids_2);
+
+    // serialize both in a form of vectors
+    faiss::VectorIOWriter wr1;
+    faiss::write_index_binary(&index1, &wr1);
+
+    faiss::VectorIOWriter wr2;
+    faiss::write_index_binary(&index2, &wr2);
+
+    ASSERT_EQ(wr1.data.size(), wr2.data.size());
+
+    // clone a buffer
+    std::vector<uint8_t> buffer = wr1.data;
+
+    // create a zero-copy index
+    faiss::ZeroCopyIOReader reader(buffer.data(), buffer.size());
+    std::unique_ptr<faiss::IndexBinary> index1zc(
+            faiss::read_index_binary(&reader));
+
+    ASSERT_NE(index1zc, nullptr);
+
+    // perform a search
+    std::vector<int32_t> cand_dis_1(k * nq);
+    std::vector<faiss::idx_t> cand_ids_1(k * nq);
+    index1zc->search(nq, xq.data(), k, cand_dis_1.data(), cand_ids_1.data());
+
+    // match vs ref1
+    ASSERT_EQ(ref_ids_1, cand_ids_1);
+    ASSERT_EQ(ref_dis_1, cand_dis_1);
+
+    // overwrite buffer without moving it
+    for (size_t i = 0; i < buffer.size(); i++) {
+        buffer[i] = wr2.data[i];
+    }
+
+    // perform a search
+    std::vector<int32_t> cand_dis_2(k * nq);
+    std::vector<faiss::idx_t> cand_ids_2(k * nq);
+    index1zc->search(nq, xq.data(), k, cand_dis_2.data(), cand_ids_2.data());
+
+    // match vs ref2
+    ASSERT_EQ(ref_ids_2, cand_ids_2);
+    ASSERT_EQ(ref_dis_2, cand_dis_2);
+
+    // overwrite again
+    for (size_t i = 0; i < buffer.size(); i++) {
+        buffer[i] = wr1.data[i];
+    }
+
+    // perform a search
+    std::vector<int32_t> cand_dis_3(k * nq);
+    std::vector<faiss::idx_t> cand_ids_3(k * nq);
+    index1zc->search(nq, xq.data(), k, cand_dis_3.data(), cand_ids_3.data());
+
+    // match vs ref1
+    ASSERT_EQ(ref_ids_1, cand_ids_3);
+    ASSERT_EQ(ref_dis_1, cand_dis_3);
+}
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/torch_test_contrib.py
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/torch_test_contrib.py
@@ -0,0 +1,427 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch  # usort: skip
+import unittest  # usort: skip
+import numpy as np  # usort: skip
+
+import faiss  # usort: skip
+import faiss.contrib.torch_utils  # usort: skip
+from faiss.contrib import datasets
+from faiss.contrib.torch import clustering, quantization
+
+
+
+
+class TestTorchUtilsCPU(unittest.TestCase):
+    # tests add, search
+    def test_lookup(self):
+        d = 128
+        index = faiss.IndexFlatL2(d)
+
+        # Add to CPU index with torch CPU
+        xb_torch = torch.rand(10000, d)
+        index.add(xb_torch)
+
+        # Test reconstruct
+        y_torch = index.reconstruct(10)
+        self.assertTrue(torch.equal(y_torch, xb_torch[10]))
+
+        # Add to CPU index with numpy CPU
+        xb_np = torch.rand(500, d).numpy()
+        index.add(xb_np)
+        self.assertEqual(index.ntotal, 10500)
+
+        y_np = np.zeros(d, dtype=np.float32)
+        index.reconstruct(10100, y_np)
+        self.assertTrue(np.array_equal(y_np, xb_np[100]))
+
+        # Search with np cpu
+        xq_torch = torch.rand(10, d, dtype=torch.float32)
+        d_np, I_np = index.search(xq_torch.numpy(), 5)
+
+        # Search with torch cpu
+        d_torch, I_torch = index.search(xq_torch, 5)
+
+        # The two should be equivalent
+        self.assertTrue(np.array_equal(d_np, d_torch.numpy()))
+        self.assertTrue(np.array_equal(I_np, I_torch.numpy()))
+
+        # Search with np cpu using pre-allocated arrays
+        d_np_input = np.zeros((10, 5), dtype=np.float32)
+        I_np_input = np.zeros((10, 5), dtype=np.int64)
+        index.search(xq_torch.numpy(), 5, d_np_input, I_np_input)
+
+        self.assertTrue(np.array_equal(d_np, d_np_input))
+        self.assertTrue(np.array_equal(I_np, I_np_input))
+
+        # Search with torch cpu using pre-allocated arrays
+        d_torch_input = torch.zeros(10, 5, dtype=torch.float32)
+        I_torch_input = torch.zeros(10, 5, dtype=torch.int64)
+        index.search(xq_torch, 5, d_torch_input, I_torch_input)
+
+        self.assertTrue(np.array_equal(d_torch_input.numpy(), d_np))
+        self.assertTrue(np.array_equal(I_torch_input.numpy(), I_np))
+
+    # tests train, add_with_ids
+    def test_train_add_with_ids(self):
+        d = 32
+        nlist = 5
+
+        quantizer = faiss.IndexFlatL2(d)
+        index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
+        xb = torch.rand(1000, d, dtype=torch.float32)
+        index.train(xb)
+
+        # Test add_with_ids with torch cpu
+        ids = torch.arange(1000, 1000 + xb.shape[0], dtype=torch.int64)
+        index.add_with_ids(xb, ids)
+        _, I = index.search(xb[10:20], 1)
+        self.assertTrue(torch.equal(I.view(10), ids[10:20]))
+
+        # Test add_with_ids with numpy
+        index.reset()
+        index.train(xb.numpy())
+        index.add_with_ids(xb.numpy(), ids.numpy())
+        _, I = index.search(xb.numpy()[10:20], 1)
+        self.assertTrue(np.array_equal(I.reshape(10), ids.numpy()[10:20]))
+
+    # tests reconstruct, reconstruct_n
+    def test_reconstruct(self):
+        d = 32
+        index = faiss.IndexFlatL2(d)
+
+        xb = torch.rand(100, d, dtype=torch.float32)
+        index.add(xb)
+
+        # Test reconstruct with torch cpu (native return)
+        y = index.reconstruct(7)
+        self.assertTrue(torch.equal(xb[7], y))
+
+        # Test reconstruct with numpy output provided
+        y = np.empty(d, dtype=np.float32)
+        index.reconstruct(11, y)
+        self.assertTrue(np.array_equal(xb.numpy()[11], y))
+
+        # Test reconstruct with torch cpu output providesd
+        y = torch.empty(d, dtype=torch.float32)
+        index.reconstruct(12, y)
+        self.assertTrue(torch.equal(xb[12], y))
+
+        # Test reconstruct_n with torch cpu (native return)
+        y = index.reconstruct_n(10, 10)
+        self.assertTrue(torch.equal(xb[10:20], y))
+
+        # Test reconstruct with numpy output provided
+        y = np.empty((10, d), dtype=np.float32)
+        index.reconstruct_n(20, 10, y)
+        self.assertTrue(np.array_equal(xb.cpu().numpy()[20:30], y))
+
+        # Test reconstruct_n with torch cpu output provided
+        y = torch.empty(10, d, dtype=torch.float32)
+        index.reconstruct_n(40, 10, y)
+        self.assertTrue(torch.equal(xb[40:50].cpu(), y))
+
+    # tests assign
+    def test_assign(self):
+        d = 32
+        index = faiss.IndexFlatL2(d)
+        xb = torch.rand(1000, d, dtype=torch.float32)
+        index.add(xb)
+
+        index_ref = faiss.IndexFlatL2(d)
+        index_ref.add(xb.numpy())
+
+        # Test assign with native cpu output
+        xq = torch.rand(10, d, dtype=torch.float32)
+        labels = index.assign(xq, 5)
+        labels_ref = index_ref.assign(xq.cpu(), 5)
+
+        self.assertTrue(torch.equal(labels, labels_ref))
+
+        # Test assign with np input
+        labels = index.assign(xq.numpy(), 5)
+        labels_ref = index_ref.assign(xq.numpy(), 5)
+        self.assertTrue(np.array_equal(labels, labels_ref))
+
+        # Test assign with numpy output provided
+        labels = np.empty((xq.shape[0], 5), dtype='int64')
+        index.assign(xq.numpy(), 5, labels)
+        self.assertTrue(np.array_equal(labels, labels_ref))
+
+        # Test assign with torch cpu output provided
+        labels = torch.empty(xq.shape[0], 5, dtype=torch.int64)
+        index.assign(xq, 5, labels)
+        labels_ref = index_ref.assign(xq, 5)
+        self.assertTrue(torch.equal(labels, labels_ref))
+
+    # tests remove_ids
+    def test_remove_ids(self):
+        # only implemented for cpu index + numpy at the moment
+        d = 32
+        quantizer = faiss.IndexFlatL2(d)
+        index = faiss.IndexIVFFlat(quantizer, d, 5)
+        index.make_direct_map()
+        index.set_direct_map_type(faiss.DirectMap.Hashtable)
+
+        xb = torch.rand(1000, d, dtype=torch.float32)
+        ids = torch.arange(1000, 1000 + xb.shape[0], dtype=torch.int64)
+        index.train(xb)
+        index.add_with_ids(xb, ids)
+
+        ids_remove = np.array([1010], dtype=np.int64)
+        index.remove_ids(ids_remove)
+
+        # We should find this
+        y = index.reconstruct(1011)
+        self.assertTrue(np.array_equal(xb[11].numpy(), y))
+
+        # We should not find this
+        with self.assertRaises(RuntimeError):
+            y = index.reconstruct(1010)
+
+        # Torch not yet supported
+        ids_remove = torch.tensor([1012], dtype=torch.int64)
+        with self.assertRaises(AssertionError):
+            index.remove_ids(ids_remove)
+
+    # tests update_vectors
+    def test_update_vectors(self):
+        d = 32
+        quantizer_np = faiss.IndexFlatL2(d)
+        index_np = faiss.IndexIVFFlat(quantizer_np, d, 5)
+        index_np.make_direct_map()
+        index_np.set_direct_map_type(faiss.DirectMap.Hashtable)
+
+        quantizer_torch = faiss.IndexFlatL2(d)
+        index_torch = faiss.IndexIVFFlat(quantizer_torch, d, 5)
+        index_torch.make_direct_map()
+        index_torch.set_direct_map_type(faiss.DirectMap.Hashtable)
+
+        xb = torch.rand(1000, d, dtype=torch.float32)
+        ids = torch.arange(1000, 1000 + xb.shape[0], dtype=torch.int64)
+
+        index_np.train(xb.numpy())
+        index_np.add_with_ids(xb.numpy(), ids.numpy())
+
+        index_torch.train(xb)
+        index_torch.add_with_ids(xb, ids)
+
+        xb_up = torch.rand(10, d, dtype=torch.float32)
+        ids_up = ids[0:10]
+
+        index_np.update_vectors(ids_up.numpy(), xb_up.numpy())
+        index_torch.update_vectors(ids_up, xb_up)
+
+        xq = torch.rand(10, d, dtype=torch.float32)
+
+        D_np, I_np = index_np.search(xq.numpy(), 5)
+        D_torch, I_torch = index_torch.search(xq, 5)
+
+        self.assertTrue(np.array_equal(D_np, D_torch.numpy()))
+        self.assertTrue(np.array_equal(I_np, I_torch.numpy()))
+
+    # tests range_search
+    def test_range_search(self):
+        torch.manual_seed(10)
+        d = 32
+        index = faiss.IndexFlatL2(d)
+        xb = torch.rand(100, d, dtype=torch.float32)
+        index.add(xb)
+
+        # torch cpu as ground truth
+        thresh = 2.9
+        xq = torch.rand(10, d, dtype=torch.float32)
+        lims, D, I = index.range_search(xq, thresh)
+
+        # compare against np
+        lims_np, D_np, I_np = index.range_search(xq.numpy(), thresh)
+
+        self.assertTrue(np.array_equal(lims.numpy(), lims_np))
+        self.assertTrue(np.array_equal(D.numpy(), D_np))
+        self.assertTrue(np.array_equal(I.numpy(), I_np))
+
+    # tests search_and_reconstruct
+    def test_search_and_reconstruct(self):
+        d = 32
+        nlist = 10
+        M = 4
+        k = 5
+        quantizer = faiss.IndexFlatL2(d)
+        index = faiss.IndexIVFPQ(quantizer, d, nlist, M, 4)
+
+        xb = torch.rand(1000, d, dtype=torch.float32)
+        index.train(xb)
+
+        # different set
+        xb = torch.rand(500, d, dtype=torch.float32)
+        index.add(xb)
+
+        # torch cpu as ground truth
+        xq = torch.rand(10, d, dtype=torch.float32)
+        D, I, R = index.search_and_reconstruct(xq, k)
+
+        # compare against numpy
+        D_np, I_np, R_np = index.search_and_reconstruct(xq.numpy(), k)
+
+        self.assertTrue(np.array_equal(D.numpy(), D_np))
+        self.assertTrue(np.array_equal(I.numpy(), I_np))
+        self.assertTrue(np.array_equal(R.numpy(), R_np))
+
+        # numpy input values
+        D_input = np.zeros((xq.shape[0], k), dtype=np.float32)
+        I_input = np.zeros((xq.shape[0], k), dtype=np.int64)
+        R_input = np.zeros((xq.shape[0], k, d), dtype=np.float32)
+
+        index.search_and_reconstruct(xq.numpy(), k, D_input, I_input, R_input)
+
+        self.assertTrue(np.array_equal(D.numpy(), D_input))
+        self.assertTrue(np.array_equal(I.numpy(), I_input))
+        self.assertTrue(np.array_equal(R.numpy(), R_input))
+
+        # torch input values
+        D_input = torch.zeros(xq.shape[0], k, dtype=torch.float32)
+        I_input = torch.zeros(xq.shape[0], k, dtype=torch.int64)
+        R_input = torch.zeros(xq.shape[0], k, d, dtype=torch.float32)
+
+        index.search_and_reconstruct(xq, k, D_input, I_input, R_input)
+
+        self.assertTrue(torch.equal(D, D_input))
+        self.assertTrue(torch.equal(I, I_input))
+        self.assertTrue(torch.equal(R, R_input))
+
+    def test_search_preassigned(self):
+        ds = datasets.SyntheticDataset(32, 1000, 100, 10)
+        index = faiss.index_factory(32, "IVF20,PQ4np")
+        index.train(ds.get_train())
+        index.add(ds.get_database())
+        index.nprobe = 4
+        Dref, Iref = index.search(ds.get_queries(), 10)
+        quantizer = faiss.clone_index(index.quantizer)
+
+        # mutilate the index' quantizer
+        index.quantizer.reset()
+        index.quantizer.add(np.zeros((20, 32), dtype='float32'))
+
+        # test numpy codepath
+        Dq, Iq = quantizer.search(ds.get_queries(), 4)
+        Dref2, Iref2 = index.search_preassigned(ds.get_queries(), 10, Iq, Dq)
+        np.testing.assert_array_equal(Iref, Iref2)
+        np.testing.assert_array_equal(Dref, Dref2)
+
+        # test torch codepath
+        xq = torch.from_numpy(ds.get_queries())
+        Dq, Iq = quantizer.search(xq, 4)
+        Dref2, Iref2 = index.search_preassigned(xq, 10, Iq, Dq)
+        np.testing.assert_array_equal(Iref, Iref2.numpy())
+        np.testing.assert_array_equal(Dref, Dref2.numpy())
+
+    # tests sa_encode, sa_decode
+    def test_sa_encode_decode(self):
+        d = 16
+        index = faiss.IndexScalarQuantizer(d, faiss.ScalarQuantizer.QT_8bit)
+
+        xb = torch.rand(1000, d, dtype=torch.float32)
+        index.train(xb)
+
+        # torch cpu as ground truth
+        nq = 10
+        xq = torch.rand(nq, d, dtype=torch.float32)
+        encoded_torch = index.sa_encode(xq)
+
+        # numpy cpu
+        encoded_np = index.sa_encode(xq.numpy())
+
+        self.assertTrue(np.array_equal(encoded_torch.numpy(), encoded_np))
+
+        decoded_torch = index.sa_decode(encoded_torch)
+        decoded_np = index.sa_decode(encoded_np)
+
+        self.assertTrue(torch.equal(decoded_torch, torch.from_numpy(decoded_np)))
+
+        # torch cpu as output parameter
+        encoded_torch_param = torch.zeros(nq, d, dtype=torch.uint8)
+        index.sa_encode(xq, encoded_torch_param)
+
+        self.assertTrue(torch.equal(encoded_torch, encoded_torch))
+
+        decoded_torch_param = torch.zeros(nq, d, dtype=torch.float32)
+        index.sa_decode(encoded_torch, decoded_torch_param)
+
+        self.assertTrue(torch.equal(decoded_torch, decoded_torch_param))
+
+        # np as output parameter
+        encoded_np_param = np.zeros((nq, d), dtype=np.uint8)
+        index.sa_encode(xq.numpy(), encoded_np_param)
+
+        self.assertTrue(np.array_equal(encoded_torch.numpy(), encoded_np_param))
+
+        decoded_np_param = np.zeros((nq, d), dtype=np.float32)
+        index.sa_decode(encoded_np_param, decoded_np_param)
+
+        self.assertTrue(np.array_equal(decoded_np, decoded_np_param))
+
+    def test_non_contiguous(self):
+        d = 128
+        index = faiss.IndexFlatL2(d)
+
+        xb = torch.rand(d, 100).transpose(0, 1)
+
+        with self.assertRaises(AssertionError):
+            index.add(xb)
+
+        # disabled since we now accept non-contiguous arrays
+        # with self.assertRaises(ValueError):
+        #    index.add(xb.numpy())
+
+
+class TestClustering(unittest.TestCase):
+
+    def test_python_kmeans(self):
+        """ Test the python implementation of kmeans """
+        ds = datasets.SyntheticDataset(32, 10000, 0, 0)
+        x = ds.get_train()
+
+        # bad distribution to stress-test split code
+        xt = x[:10000].copy()
+        xt[:5000] = x[0]
+
+        km_ref = faiss.Kmeans(ds.d, 100, niter=10)
+        km_ref.train(xt)
+        err = faiss.knn(xt, km_ref.centroids, 1)[0].sum()
+
+        xt_torch = torch.from_numpy(xt)
+        data = clustering.DatasetAssign(xt_torch)
+        centroids = clustering.kmeans(100, data, 10)
+        centroids = centroids.numpy()
+        err2 = faiss.knn(xt, centroids, 1)[0].sum()
+
+        # 33498.332 33380.477
+        # print(err, err2)        1/0
+        self.assertLess(err2, err * 1.1)
+
+
+class TestQuantization(unittest.TestCase):
+    def test_python_product_quantization(self):
+        """ Test the python implementation of product quantization """
+        d = 64
+        n = 10000
+        cs = 4
+        nbits = 8
+        M = 4
+        x = np.random.random(size=(n, d)).astype('float32')
+        pq = faiss.ProductQuantizer(d, cs, nbits)
+        pq.train(x)
+        codes = pq.compute_codes(x)
+        x2 = pq.decode(codes)
+        diff = ((x - x2)**2).sum()
+        # vs pure pytorch impl
+        xt = torch.from_numpy(x)
+        my_pq = quantization.ProductQuantizer(d, M, nbits)
+        my_pq.train(xt)
+        my_codes = my_pq.encode(xt)
+        xt2 = my_pq.decode(my_codes)
+        my_diff = ((xt - xt2)**2).sum()
+        self.assertLess(abs(diff - my_diff), 100)
--- a/packages/leann-backend-hnsw/third_party/faiss/tests/torch_test_neural_net.py
+++ b/packages/leann-backend-hnsw/third_party/faiss/tests/torch_test_neural_net.py
@@ -0,0 +1,374 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch  # usort: skip
+from torch import nn  # usort: skip
+import unittest  # usort: skip
+import numpy as np  # usort: skip
+
+import faiss  # usort: skip
+
+from faiss.contrib import datasets  # usort: skip
+from faiss.contrib.inspect_tools import get_additive_quantizer_codebooks  # usort: skip
+
+
+class TestLayer(unittest.TestCase):
+
+    @torch.no_grad()
+    def test_Embedding(self):
+        """ verify that the Faiss Embedding works the same as in Pytorch """
+        torch.manual_seed(123)
+
+        emb = nn.Embedding(40, 50)
+        idx = torch.randint(40, (25, ))
+        ref_batch = emb(idx)
+
+        emb2 = faiss.Embedding(emb)
+        idx2 = faiss.Int32Tensor2D(idx[:, None].to(dtype=torch.int32))
+        new_batch = emb2(idx2)
+
+        new_batch = new_batch.numpy()
+        np.testing.assert_allclose(ref_batch.numpy(), new_batch, atol=2e-6)
+
+    @torch.no_grad()
+    def do_test_Linear(self, bias):
+        """ verify that the Faiss Linear works the same as in Pytorch """
+        torch.manual_seed(123)
+        linear = nn.Linear(50, 40, bias=bias)
+        x = torch.randn(25, 50)
+        ref_y = linear(x)
+
+        linear2 = faiss.Linear(linear)
+        x2 = faiss.Tensor2D(x)
+        y = linear2(x2)
+        np.testing.assert_allclose(ref_y.numpy(), y.numpy(), atol=2e-6)
+
+    def test_Linear(self):
+        self.do_test_Linear(True)
+
+    def test_Linear_nobias(self):
+        self.do_test_Linear(False)
+
+######################################################
+# QINCo Pytorch implementation copied from
+# https://github.com/facebookresearch/Qinco/blob/main/model_qinco.py
+#
+# The implementation is copied here to avoid introducting an additional
+# dependency.
+######################################################
+
+
+def pairwise_distances(a, b):
+    anorms = (a**2).sum(-1)
+    bnorms = (b**2).sum(-1)
+    return anorms[:, None] + bnorms - 2 * a @ b.T
+
+
+def compute_batch_distances(a, b):
+    anorms = (a**2).sum(-1)
+    bnorms = (b**2).sum(-1)
+    return (
+        anorms.unsqueeze(-1) + bnorms.unsqueeze(1) - 2 * torch.bmm(a, b.transpose(2, 1))
+    )
+
+
+def assign_batch_multiple(x, zqs):
+    bs, d = x.shape
+    bs, K, d = zqs.shape
+
+    L2distances = compute_batch_distances(x.unsqueeze(1), zqs).squeeze(1)  # [bs x ksq]
+    idx = torch.argmin(L2distances, dim=1).unsqueeze(1)  # [bsx1]
+    quantized = torch.gather(zqs, dim=1, index=idx.unsqueeze(-1).repeat(1, 1, d))
+    return idx.squeeze(1), quantized.squeeze(1)
+
+
+def assign_to_codebook(x, c, bs=16384):
+    nq, d = x.shape
+    nb, d2 = c.shape
+    assert d == d2
+    if nq * nb < bs * bs:
+        # small enough to represent the whole distance table
+        dis = pairwise_distances(x, c)
+        return dis.argmin(1)
+
+    # otherwise tile computation to avoid OOM
+    res = torch.empty((nq,), dtype=torch.int64, device=x.device)
+    cnorms = (c**2).sum(1)
+    for i in range(0, nq, bs):
+        xnorms = (x[i : i + bs] ** 2).sum(1, keepdim=True)
+        for j in range(0, nb, bs):
+            dis = xnorms + cnorms[j : j + bs] - 2 * x[i : i + bs] @ c[j : j + bs].T
+            dmini, imini = dis.min(1)
+            if j == 0:
+                dmin = dmini
+                imin = imini
+            else:
+                (mask,) = torch.where(dmini < dmin)
+                dmin[mask] = dmini[mask]
+                imin[mask] = imini[mask] + j
+        res[i : i + bs] = imin
+    return res
+
+
+class QINCoStep(nn.Module):
+    """
+    One quantization step for QINCo.
+    Contains the codebook, concatenation block, and residual blocks
+    """
+
+    def __init__(self, d, K, L, h):
+        nn.Module.__init__(self)
+
+        self.d, self.K, self.L, self.h = d, K, L, h
+
+        self.codebook = nn.Embedding(K, d)
+        self.MLPconcat = nn.Linear(2 * d, d)
+
+        self.residual_blocks = []
+        for l in range(L):
+            residual_block = nn.Sequential(
+                nn.Linear(d, h, bias=False), nn.ReLU(), nn.Linear(h, d, bias=False)
+            )
+            self.add_module(f"residual_block{l}", residual_block)
+            self.residual_blocks.append(residual_block)
+
+    def decode(self, xhat, codes):
+        zqs = self.codebook(codes)
+        cc = torch.concatenate((zqs, xhat), 1)
+        zqs = zqs + self.MLPconcat(cc)
+
+        for residual_block in self.residual_blocks:
+            zqs = zqs + residual_block(zqs)
+
+        return zqs
+
+    def encode(self, xhat, x):
+        # we are trying out the whole codebook
+        zqs = self.codebook.weight
+        K, d = zqs.shape
+        bs, d = xhat.shape
+
+        # repeat so that they are of size bs * K
+        zqs_r = zqs.repeat(bs, 1, 1).reshape(bs * K, d)
+        xhat_r = xhat.reshape(bs, 1, d).repeat(1, K, 1).reshape(bs * K, d)
+
+        # pass on batch of size bs * K
+        cc = torch.concatenate((zqs_r, xhat_r), 1)
+        zqs_r = zqs_r + self.MLPconcat(cc)
+
+        for residual_block in self.residual_blocks:
+            zqs_r = zqs_r + residual_block(zqs_r)
+
+        # possible next steps
+        zqs_r = zqs_r.reshape(bs, K, d) + xhat.reshape(bs, 1, d)
+        codes, xhat_next = assign_batch_multiple(x, zqs_r)
+
+        return codes, xhat_next - xhat
+
+
+class QINCo(nn.Module):
+    """
+    QINCo quantizer, built from a chain of residual quantization steps
+    """
+
+    def __init__(self, d, K, L, M, h):
+        nn.Module.__init__(self)
+
+        self.d, self.K, self.L, self.M, self.h = d, K, L, M, h
+
+        self.codebook0 = nn.Embedding(K, d)
+
+        self.steps = []
+        for m in range(1, M):
+            step = QINCoStep(d, K, L, h)
+            self.add_module(f"step{m}", step)
+            self.steps.append(step)
+
+    def decode(self, codes):
+        xhat = self.codebook0(codes[:, 0])
+        for i, step in enumerate(self.steps):
+            xhat = xhat + step.decode(xhat, codes[:, i + 1])
+        return xhat
+
+    def encode(self, x, code0=None):
+        """
+        Encode a batch of vectors x to codes of length M.
+        If this function is called from IVF-QINCo, codes are 1 index longer,
+        due to the first index being the IVF index, and codebook0 is the IVF codebook.
+        """
+        M = len(self.steps) + 1
+        bs, d = x.shape
+        codes = torch.zeros(bs, M, dtype=int, device=x.device)
+
+        if code0 is None:
+            # at IVF training time, the code0 is fixed (and precomputed)
+            code0 = assign_to_codebook(x, self.codebook0.weight)
+
+        codes[:, 0] = code0
+        xhat = self.codebook0.weight[code0]
+
+        for i, step in enumerate(self.steps):
+            codes[:, i + 1], toadd = step.encode(xhat, x)
+            xhat = xhat + toadd
+
+        return codes, xhat
+
+
+######################################################
+# QINCo tests
+######################################################
+
+def copy_QINCoStep(step):
+    step2 = faiss.QINCoStep(step.d, step.K, step.L, step.h)
+    step2.codebook.from_torch(step.codebook)
+    step2.MLPconcat.from_torch(step.MLPconcat)
+
+    for l in range(step.L):
+        src = step.residual_blocks[l]
+        dest = step2.get_residual_block(l)
+        dest.linear1.from_torch(src[0])
+        dest.linear2.from_torch(src[2])
+    return step2
+
+
+class TestQINCoStep(unittest.TestCase):
+    @torch.no_grad()
+    def test_decode(self):
+        torch.manual_seed(123)
+        step = QINCoStep(d=16, K=20, L=2, h=8)
+
+        codes = torch.randint(0, 20, (10, ))
+        xhat = torch.randn(10, 16)
+        ref_decode = step.decode(xhat, codes)
+
+        # step2 = copy_QINCoStep(step)
+        step2 = faiss.QINCoStep(step)
+        codes2 = faiss.Int32Tensor2D(codes[:, None].to(dtype=torch.int32))
+
+        np.testing.assert_array_equal(
+            step.codebook(codes).numpy(),
+            step2.codebook(codes2).numpy()
+        )
+
+        xhat2 = faiss.Tensor2D(xhat)
+        # xhat2 = faiss.Tensor2D(len(codes), step2.d)
+
+        new_decode = step2.decode(xhat2, codes2)
+
+        np.testing.assert_allclose(
+            ref_decode.numpy(),
+            new_decode.numpy(),
+            atol=2e-6
+        )
+
+    @torch.no_grad()
+    def test_encode(self):
+        torch.manual_seed(123)
+        step = QINCoStep(d=16, K=20, L=2, h=8)
+
+        # create plausible x for testing starting from actual codes
+        codes = torch.randint(0, 20, (10, ))
+        xhat = torch.zeros(10, 16)
+        x = step.decode(xhat, codes)
+        del codes
+        ref_codes, toadd = step.encode(xhat, x)
+
+        step2 = copy_QINCoStep(step)
+        xhat2 = faiss.Tensor2D(xhat)
+        x2 = faiss.Tensor2D(x)
+        toadd2 = faiss.Tensor2D(10, 16)
+
+        new_codes = step2.encode(xhat2, x2, toadd2)
+
+        np.testing.assert_allclose(
+            ref_codes.numpy(),
+            new_codes.numpy().ravel(),
+            atol=2e-6
+        )
+        np.testing.assert_allclose(toadd.numpy(), toadd2.numpy(), atol=2e-6)
+
+
+
+class TestQINCo(unittest.TestCase):
+
+    @torch.no_grad()
+    def test_decode(self):
+        torch.manual_seed(123)
+        qinco = QINCo(d=16, K=20, L=2, M=3, h=8)
+        codes = torch.randint(0, 20, (10, 3))
+        x_ref = qinco.decode(codes)
+
+        qinco2 = faiss.QINCo(qinco)
+        codes2 = faiss.Int32Tensor2D(codes.to(dtype=torch.int32))
+        x_new = qinco2.decode(codes2)
+
+        np.testing.assert_allclose(x_ref.numpy(), x_new.numpy(), atol=2e-6)
+
+    @torch.no_grad()
+    def test_encode(self):
+        torch.manual_seed(123)
+        qinco = QINCo(d=16, K=20, L=2, M=3, h=8)
+        codes = torch.randint(0, 20, (10, 3))
+        x = qinco.decode(codes)
+        del codes
+
+        ref_codes, _ = qinco.encode(x)
+
+        qinco2 = faiss.QINCo(qinco)
+        x2 = faiss.Tensor2D(x)
+
+        new_codes = qinco2.encode(x2)
+
+        np.testing.assert_allclose(ref_codes.numpy(), new_codes.numpy(), atol=2e-6)
+
+
+######################################################
+# Test index
+######################################################
+
+class TestIndexQINCo(unittest.TestCase):
+
+    def test_search(self):
+        """
+        We can't train qinco with just Faiss so we just train a RQ and use the 
+        codebooks in QINCo with L = 0 residual blocks
+        """
+        ds = datasets.SyntheticDataset(32, 1000, 100, 0)
+
+        # prepare reference quantizer
+        M = 5
+        index_ref = faiss.index_factory(ds.d, "RQ5x4")
+        rq = index_ref.rq
+        # rq = faiss.ResidualQuantizer(ds.d, M, 4)
+        rq.train_type = faiss.ResidualQuantizer.Train_default
+        rq.max_beam_size = 1    # beam search not implemented for QINCo (yet)
+        index_ref.train(ds.get_train())
+        codebooks = get_additive_quantizer_codebooks(rq)
+
+        # convert to QINCo index
+        qinco_index = faiss.IndexQINCo(ds.d, M, 4, 0, ds.d)
+        qinco = qinco_index.qinco
+        qinco.codebook0.from_array(codebooks[0])
+        for i in range(1, qinco.M):
+            step = qinco.get_step(i - 1)
+            step.codebook.from_array(codebooks[i])
+            # MLPConcat left at zero -- it's added to the backbone
+        qinco_index.is_trained = True
+
+        # verify that the encoding gives the same results
+        ref_codes = rq.compute_codes(ds.get_database())
+        ref_decoded = rq.decode(ref_codes)
+        new_decoded = qinco_index.sa_decode(ref_codes)
+        np.testing.assert_allclose(ref_decoded, new_decoded, atol=2e-6)
+
+        new_codes = qinco_index.sa_encode(ds.get_database())
+        np.testing.assert_array_equal(ref_codes, new_codes)
+
+        # verify that search gives the same results
+        Dref, Iref = index_ref.search(ds.get_queries(), 5)
+        Dnew, Inew = qinco_index.search(ds.get_queries(), 5)
+
+        np.testing.assert_array_equal(Iref, Inew)
+        np.testing.assert_allclose(Dref, Dnew, atol=2e-6)