Initial commit

2025-06-30 09:05:05 +00:00
commit 46f6cc100b
1231 changed files with 278432 additions and 0 deletions
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/CMakeLists.txt
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/CMakeLists.txt
@@ -0,0 +1,82 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+cmake_minimum_required(VERSION 3.18...3.22)
+
+set(CMAKE_CXX_STANDARD 17)
+
+if (PYTHON_EXECUTABLE)
+    set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE})
+endif()
+
+find_package(Python3 COMPONENTS Interpreter Development.Module NumPy REQUIRED)
+
+execute_process(COMMAND ${Python3_EXECUTABLE} -c "import pybind11; print(pybind11.get_cmake_dir())"
+        OUTPUT_VARIABLE _tmp_dir
+        OUTPUT_STRIP_TRAILING_WHITESPACE COMMAND_ECHO STDOUT)
+list(APPEND CMAKE_PREFIX_PATH "${_tmp_dir}")
+
+# Now we can find pybind11
+find_package(pybind11 CONFIG REQUIRED)
+
+execute_process(COMMAND ${Python3_EXECUTABLE} -c "import numpy; print(numpy.get_include())"
+        OUTPUT_VARIABLE _numpy_include
+        OUTPUT_STRIP_TRAILING_WHITESPACE COMMAND_ECHO STDOUT)
+
+# pybind11_add_module(diskannpy MODULE src/diskann_bindings.cpp)
+# the following is fairly synonymous with pybind11_add_module, but we need more target_link_libraries
+# see https://pybind11.readthedocs.io/en/latest/compiling.html#advanced-interface-library-targets for more details
+add_library(_diskannpy MODULE
+        src/module.cpp
+        src/builder.cpp
+        src/dynamic_memory_index.cpp
+        src/static_memory_index.cpp
+        src/static_disk_index.cpp
+)
+
+target_include_directories(_diskannpy AFTER PRIVATE include)
+
+if (MSVC)
+    target_compile_options(_diskannpy PRIVATE /U_WINDLL)
+endif()
+
+target_link_libraries(
+        _diskannpy
+        PRIVATE
+        pybind11::module
+        pybind11::lto
+        pybind11::windows_extras
+        ${PROJECT_NAME}
+        ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS}
+        ${DISKANN_ASYNC_LIB}
+)
+
+pybind11_extension(_diskannpy)
+if(NOT MSVC AND NOT ${CMAKE_BUILD_TYPE} MATCHES Debug|RelWithDebInfo)
+    # Strip unnecessary sections of the binary on Linux/macOS
+    pybind11_strip(_diskannpy)
+endif()
+
+set_target_properties(_diskannpy PROPERTIES CXX_VISIBILITY_PRESET "hidden"
+        CUDA_VISIBILITY_PRESET "hidden")
+
+# generally, the VERSION_INFO flag is set by pyproject.toml, by way of setup.py.
+# attempts to locate the version within CMake fail because the version has to be available
+# to pyproject.toml for the sdist to work after we build it.
+
+if(NOT VERSION_INFO)
+    set(VERSION_INFO "0.0.0dev")
+endif()
+target_compile_definitions(_diskannpy PRIVATE VERSION_INFO="${VERSION_INFO}")
+
+# Add a post-build command to automatically copy the compiled Python module
+if(UPDATE_EDITABLE_INSTALL)
+add_custom_command(
+TARGET _diskannpy
+POST_BUILD
+COMMAND ${CMAKE_COMMAND} -E copy 
+        ${CMAKE_CURRENT_BINARY_DIR}/_diskannpy.cpython-*.so
+        ${CMAKE_SOURCE_DIR}/python/src/
+COMMENT "Copying Python module to python/src directory"
+)
+endif()
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/README.md
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/README.md
@@ -0,0 +1,55 @@
+# diskannpy
+
+[![DiskANN Paper](https://img.shields.io/badge/Paper-NeurIPS%3A_DiskANN-blue)](https://papers.nips.cc/paper/9527-rand-nsg-fast-accurate-billion-point-nearest-neighbor-search-on-a-single-node.pdf)
+[![DiskANN Paper](https://img.shields.io/badge/Paper-Arxiv%3A_Fresh--DiskANN-blue)](https://arxiv.org/abs/2105.09613)
+[![DiskANN Paper](https://img.shields.io/badge/Paper-Filtered--DiskANN-blue)](https://harsha-simhadri.org/pubs/Filtered-DiskANN23.pdf)
+[![DiskANN Main](https://github.com/microsoft/DiskANN/actions/workflows/push-test.yml/badge.svg?branch=main)](https://github.com/microsoft/DiskANN/actions/workflows/push-test.yml)
+[![PyPI version](https://img.shields.io/pypi/v/diskannpy.svg)](https://pypi.org/project/diskannpy/)
+[![Downloads shield](https://pepy.tech/badge/diskannpy)](https://pepy.tech/project/diskannpy)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+
+## Installation
+Packages published to PyPI will always be built using the latest numpy major.minor release (at this time, 1.25).
+
+Conda distributions for versions 1.19-1.25 will be completed as a future effort.  In the meantime, feel free to
+clone this repository and build it yourself.
+
+## Local Build Instructions
+Please see the [Project README](https://github.com/microsoft/DiskANN/blob/main/README.md) for system dependencies and requirements.
+
+After ensuring you've followed the directions to build the project library and executables, you will be ready to also
+build `diskannpy` with these additional instructions.
+
+### Changing Numpy Version
+In the root folder of DiskANN, there is a file `pyproject.toml`. You will need to edit the version of numpy in both the
+`[build-system.requires]` section, as well as the `[project.dependencies]` section.  The version numbers must match.
+
+#### Linux
+```bash
+python3.11 -m venv venv # versions from python3.9 and up should work
+source venv/bin/activate
+pip install build
+python -m build
+```
+
+#### Windows
+```powershell
+py -3.11 -m venv venv # versions from python3.9 and up should work
+venv\Scripts\Activate.ps1
+pip install build
+python -m build
+```
+
+The built wheel will be placed in the `dist` directory in your DiskANN root. Install it using `pip install dist/<wheel name>.whl`
+
+## Citations
+Please cite this software in your work as:
+```
+@misc{diskann-github,
+   author = {Simhadri, Harsha Vardhan and Krishnaswamy, Ravishankar and Srinivasa, Gopal and Subramanya, Suhas Jayaram and Antonijevic, Andrija and Pryce, Dax and Kaczynski, David and Williams, Shane and Gollapudi, Siddarth and Sivashankar, Varun and Karia, Neel and Singh, Aditi and Jaiswal, Shikhar and Mahapatro, Neelam and Adams, Philip and Tower, Bryan and Patel, Yash}},
+   title = {{DiskANN: Graph-structured Indices for Scalable, Fast, Fresh and Filtered Approximate Nearest Neighbor Search}},
+   url = {https://github.com/Microsoft/DiskANN},
+   version = {0.6.1},
+   year = {2023}
+}
+```
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/apps/cli/main.py
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/apps/cli/main.py
@@ -0,0 +1,152 @@
+import diskannpy as dap
+import numpy as np
+import numpy.typing as npt
+
+import fire
+
+from contextlib import contextmanager
+from time import perf_counter
+
+from typing import Tuple
+
+
+def _basic_setup(
+    dtype: str,
+    query_vectors_file: str
+) -> Tuple[dap.VectorDType, npt.NDArray[dap.VectorDType]]:
+    _dtype = dap.valid_dtype(dtype)
+    vectors_to_query = dap.vectors_from_binary(query_vectors_file, dtype=_dtype)
+    return _dtype, vectors_to_query
+
+
+def dynamic(
+    dtype: str,
+    index_vectors_file: str,
+    query_vectors_file: str,
+    build_complexity: int,
+    graph_degree: int,
+    K: int,
+    search_complexity: int,
+    num_insert_threads: int,
+    num_search_threads: int,
+    gt_file: str = "",
+):
+    _dtype, vectors_to_query = _basic_setup(dtype, query_vectors_file)
+    vectors_to_index = dap.vectors_from_binary(index_vectors_file, dtype=_dtype)
+
+    npts, ndims = vectors_to_index.shape
+    index = dap.DynamicMemoryIndex(
+        "l2", _dtype, ndims, npts, build_complexity, graph_degree
+    )
+
+    tags = np.arange(1, npts+1, dtype=np.uintc)
+    timer = Timer()
+
+    with timer.time("batch insert"):
+        index.batch_insert(vectors_to_index, tags, num_insert_threads)
+
+    delete_tags = np.random.choice(
+        np.array(range(1, npts + 1, 1), dtype=np.uintc),
+        size=int(0.5 * npts),
+        replace=False
+    )
+    with timer.time("mark deletion"):
+        for tag in delete_tags:
+            index.mark_deleted(tag)
+
+    with timer.time("consolidation"):
+        index.consolidate_delete()
+
+    deleted_data = vectors_to_index[delete_tags - 1, :]
+
+    with timer.time("re-insertion"):
+        index.batch_insert(deleted_data, delete_tags, num_insert_threads)
+
+    with timer.time("batch searched"):
+        tags, dists = index.batch_search(vectors_to_query, K, search_complexity, num_search_threads)
+
+    # res_ids = tags - 1
+    # if gt_file != "":
+    #     recall = utils.calculate_recall_from_gt_file(K, res_ids, gt_file)
+    #     print(f"recall@{K} is {recall}")
+
+def static(
+    dtype: str,
+    index_directory: str,
+    index_vectors_file: str,
+    query_vectors_file: str,
+    build_complexity: int,
+    graph_degree: int,
+    K: int,
+    search_complexity: int,
+    num_threads: int,
+    gt_file: str = "",
+    index_prefix: str = "ann"
+):
+    _dtype, vectors_to_query = _basic_setup(dtype, query_vectors_file)
+    timer = Timer()
+    with timer.time("build static index"):
+        # build index
+        dap.build_memory_index(
+            data=index_vectors_file,
+            metric="l2",
+            vector_dtype=_dtype,
+            index_directory=index_directory,
+            complexity=build_complexity,
+            graph_degree=graph_degree,
+            num_threads=num_threads,
+            index_prefix=index_prefix,
+            alpha=1.2,
+            use_pq_build=False,
+            num_pq_bytes=8,
+            use_opq=False,
+        )
+
+    with timer.time("load static index"):
+        # ready search object
+        index = dap.StaticMemoryIndex(
+            metric="l2",
+            vector_dtype=_dtype,
+            data_path=index_vectors_file,
+            index_directory=index_directory,
+            num_threads=num_threads,  # this can be different at search time if you would like
+            initial_search_complexity=search_complexity,
+            index_prefix=index_prefix
+    )
+
+    ids, dists = index.batch_search(vectors_to_query, K, search_complexity, num_threads)
+
+    # if gt_file != "":
+    #     recall = utils.calculate_recall_from_gt_file(K, ids, gt_file)
+    #     print(f"recall@{K} is {recall}")
+
+def dynamic_clustered():
+    pass
+
+def generate_clusters():
+    pass
+
+
+class Timer:
+    def __init__(self):
+        self._start = -1
+
+    @contextmanager
+    def time(self, message: str):
+        start = perf_counter()
+        if self._start == -1:
+            self._start = start
+        yield
+        now = perf_counter()
+        print(f"Operation {message} completed in {(now - start):.3f}s, total: {(now - self._start):.3f}s")
+
+
+
+
+if __name__ == "__main__":
+    fire.Fire({
+        "in-mem-dynamic": dynamic,
+        "in-mem-static": static,
+        "in-mem-dynamic-clustered": dynamic_clustered,
+        "generate-clusters": generate_clusters
+    }, name="cli")
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/apps/cluster.py
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/apps/cluster.py
@@ -0,0 +1,28 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import argparse
+import utils
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        prog="cluster", description="kmeans cluster points in a file"
+    )
+
+    parser.add_argument("-d", "--data_type", required=True)
+    parser.add_argument("-i", "--indexdata_file", required=True)
+    parser.add_argument("-k", "--num_clusters", type=int, required=True)
+    args = parser.parse_args()
+
+    npts, ndims = get_bin_metadata(indexdata_file)
+
+    data = utils.bin_to_numpy(args.data_type, args.indexdata_file)
+
+    offsets, permutation = utils.cluster_and_permute(
+        args.data_type, npts, ndims, data, args.num_clusters
+    )
+
+    permuted_data = data[permutation]
+
+    utils.numpy_to_bin(permuted_data, args.indexdata_file + ".cluster")
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/apps/in-mem-dynamic.py
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/apps/in-mem-dynamic.py
@@ -0,0 +1,161 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import argparse
+
+import diskannpy
+import numpy as np
+import utils
+
+def insert_and_search(
+    dtype_str,
+    indexdata_file,
+    querydata_file,
+    Lb,
+    graph_degree,
+    K,
+    Ls,
+    num_insert_threads,
+    num_search_threads,
+    gt_file,
+) -> dict[str, float]:
+    """
+
+    :param dtype_str:
+    :param indexdata_file:
+    :param querydata_file:
+    :param Lb:
+    :param graph_degree:
+    :param K:
+    :param Ls:
+    :param num_insert_threads:
+    :param num_search_threads:
+    :param gt_file:
+    :return: Dictionary of timings.  Key is the event and value is the number of seconds the event took
+    """
+    timer_results: dict[str, float] = {}
+
+    method_timer: utils.Timer = utils.Timer()
+
+    npts, ndims = utils.get_bin_metadata(indexdata_file)
+
+    if dtype_str == "float":
+        dtype = np.float32
+    elif dtype_str == "int8":
+        dtype = np.int8
+    elif dtype_str == "uint8":
+        dtype = np.uint8
+    else:
+        raise ValueError("data_type must be float, int8 or uint8")
+
+    index = diskannpy.DynamicMemoryIndex(
+        distance_metric="l2",
+        vector_dtype=dtype,
+        dimensions=ndims,
+        max_vectors=npts,
+        complexity=Lb,
+        graph_degree=graph_degree
+    )
+    queries = diskannpy.vectors_from_file(querydata_file, dtype)
+    data = diskannpy.vectors_from_file(indexdata_file, dtype)
+
+    tags = np.zeros(npts, dtype=np.uintc)
+    timer = utils.Timer()
+    for i in range(npts):
+        tags[i] = i + 1
+    index.batch_insert(data, tags, num_insert_threads)
+    compute_seconds = timer.elapsed()
+    print('batch_insert complete in', compute_seconds, 's')
+    timer_results["batch_insert_seconds"] = compute_seconds
+
+    delete_tags = np.random.choice(
+        np.array(range(1, npts + 1, 1), dtype=np.uintc),
+        size=int(0.5 * npts),
+        replace=False
+    )
+
+    timer.reset()
+    for tag in delete_tags:
+        index.mark_deleted(tag)
+    compute_seconds = timer.elapsed()
+    timer_results['mark_deletion_seconds'] = compute_seconds
+    print('mark deletion completed in', compute_seconds, 's')
+
+    timer.reset()
+    index.consolidate_delete()
+    compute_seconds = timer.elapsed()
+    print('consolidation completed in', compute_seconds, 's')
+    timer_results['consolidation_completed_seconds'] = compute_seconds
+
+    deleted_data = data[delete_tags - 1, :]
+
+    timer.reset()
+    index.batch_insert(deleted_data, delete_tags, num_insert_threads)
+    compute_seconds = timer.elapsed()
+    print('re-insertion completed in', compute_seconds, 's')
+    timer_results['re-insertion_seconds'] = compute_seconds
+
+    timer.reset()
+    tags, dists = index.batch_search(queries, K, Ls, num_search_threads)
+    compute_seconds = timer.elapsed()
+    print('Batch searched', queries.shape[0], ' queries in ', compute_seconds, 's')
+    timer_results['batch_searched_seconds'] = compute_seconds
+
+    res_ids = tags - 1
+    if gt_file != "":
+        timer.reset()
+        recall = utils.calculate_recall_from_gt_file(K, res_ids, gt_file)
+        print(f"recall@{K} is {recall}")
+        timer_results['recall_computed_seconds'] = timer.elapsed()
+
+    timer_results['total_time_seconds'] = method_timer.elapsed()
+
+    return timer_results
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        prog="in-mem-dynamic",
+        description="Inserts points dynamically in a clustered order and search from vectors in a file.",
+    )
+
+    parser.add_argument("-d", "--data_type", required=True)
+    parser.add_argument("-i", "--indexdata_file", required=True)
+    parser.add_argument("-q", "--querydata_file", required=True)
+    parser.add_argument("-Lb", "--Lbuild", default=50, type=int)
+    parser.add_argument("-Ls", "--Lsearch", default=50, type=int)
+    parser.add_argument("-R", "--graph_degree", default=32, type=int)
+    parser.add_argument("-TI", "--num_insert_threads", default=8, type=int)
+    parser.add_argument("-TS", "--num_search_threads", default=8, type=int)
+    parser.add_argument("-K", default=10, type=int)
+    parser.add_argument("--gt_file", default="")
+    parser.add_argument("--json_timings_output", required=False, default=None, help="File to write out timings to as JSON.  If not specified, timings will not be written out.")
+    args = parser.parse_args()
+
+    timings = insert_and_search(
+        args.data_type,
+        args.indexdata_file,
+        args.querydata_file,
+        args.Lbuild,
+        args.graph_degree,  # Build args
+        args.K,
+        args.Lsearch,
+        args.num_insert_threads,
+        args.num_search_threads,  # search args
+        args.gt_file,
+    )
+
+    if args.json_timings_output is not None:
+        import json
+        timings['log_file'] = args.json_timings_output
+        with open(args.json_timings_output, "w") as f:
+            json.dump(timings, f)
+
+"""
+An ingest optimized example with SIFT1M
+source venv/bin/activate
+python python/apps/in-mem-dynamic.py -d float \
+-i "$HOME/data/sift/sift_base.fbin" -q "$HOME/data/sift/sift_query.fbin" --gt_file "$HOME/data/sift/gt100_base" \
+-Lb 10 -R 30 -Ls 200
+"""
+
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/apps/in-mem-static.py
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/apps/in-mem-static.py
@@ -0,0 +1,149 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import argparse
+from xml.dom.pulldom import default_bufsize
+
+import diskannpy
+import numpy as np
+import utils
+
+def build_and_search(
+    metric,
+    dtype_str,
+    index_directory,
+    indexdata_file,
+    querydata_file,
+    Lb,
+    graph_degree,
+    K,
+    Ls,
+    num_threads,
+    gt_file,
+    index_prefix,
+    search_only
+) -> dict[str, float]:
+    """
+
+    :param metric:
+    :param dtype_str:
+    :param index_directory:
+    :param indexdata_file:
+    :param querydata_file:
+    :param Lb:
+    :param graph_degree:
+    :param K:
+    :param Ls:
+    :param num_threads:
+    :param gt_file:
+    :param index_prefix:
+    :param search_only:
+    :return: Dictionary of timings.  Key is the event and value is the number of seconds the event took
+    in wall-clock-time.
+    """
+    timer_results: dict[str, float] = {}
+
+    method_timer: utils.Timer = utils.Timer()
+
+    if dtype_str == "float":
+        dtype = np.single
+    elif dtype_str == "int8":
+        dtype = np.byte
+    elif dtype_str == "uint8":
+        dtype = np.ubyte
+    else:
+        raise ValueError("data_type must be float, int8 or uint8")
+
+    # build index
+    if not search_only:
+        build_index_timer = utils.Timer()
+        diskannpy.build_memory_index(
+            data=indexdata_file,
+            distance_metric=metric,
+            vector_dtype=dtype,
+            index_directory=index_directory,
+            complexity=Lb,
+            graph_degree=graph_degree,
+            num_threads=num_threads,
+            index_prefix=index_prefix,
+            alpha=1.2,
+            use_pq_build=False,
+            num_pq_bytes=8,
+            use_opq=False,
+        )
+        timer_results["build_index_seconds"] = build_index_timer.elapsed()
+
+    # ready search object
+    load_index_timer = utils.Timer()
+    index = diskannpy.StaticMemoryIndex(
+        distance_metric=metric,
+        vector_dtype=dtype,
+        index_directory=index_directory,
+        num_threads=num_threads,  # this can be different at search time if you would like
+        initial_search_complexity=Ls,
+        index_prefix=index_prefix
+    )
+    timer_results["load_index_seconds"] = load_index_timer.elapsed()
+
+    queries = utils.bin_to_numpy(dtype, querydata_file)
+
+    query_timer = utils.Timer()
+    ids, dists = index.batch_search(queries, 10, Ls, num_threads)
+    query_time = query_timer.elapsed()
+    qps = round(queries.shape[0]/query_time, 1)
+    print('Batch searched', queries.shape[0], 'in', query_time, 's @', qps, 'QPS')
+    timer_results["query_seconds"] = query_time
+
+    if gt_file != "":
+        recall_timer = utils.Timer()
+        recall = utils.calculate_recall_from_gt_file(K, ids, gt_file)
+        print(f"recall@{K} is {recall}")
+        timer_results["recall_seconds"] = recall_timer.elapsed()
+
+    timer_results['total_time_seconds'] = method_timer.elapsed()
+
+    return timer_results
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        prog="in-mem-static",
+        description="Static in-memory build and search from vectors in a file",
+    )
+
+    parser.add_argument("-m", "--metric", required=False, default="l2")
+    parser.add_argument("-d", "--data_type", required=True)
+    parser.add_argument("-id", "--index_directory", required=False, default=".")
+    parser.add_argument("-i", "--indexdata_file", required=True)
+    parser.add_argument("-q", "--querydata_file", required=True)
+    parser.add_argument("-Lb", "--Lbuild", default=50, type=int)
+    parser.add_argument("-Ls", "--Lsearch", default=50, type=int)
+    parser.add_argument("-R", "--graph_degree", default=32, type=int)
+    parser.add_argument("-T", "--num_threads", default=8, type=int)
+    parser.add_argument("-K", default=10, type=int)
+    parser.add_argument("-G", "--gt_file", default="")
+    parser.add_argument("-ip", "--index_prefix", required=False, default="ann")
+    parser.add_argument("--search_only", required=False, default=False)
+    parser.add_argument("--json_timings_output", required=False, default=None, help="File to write out timings to as JSON.  If not specified, timings will not be written out.")
+    args = parser.parse_args()
+
+    timings: dict[str, float] = build_and_search(
+        args.metric,
+        args.data_type,
+        args.index_directory.strip(),
+        args.indexdata_file.strip(),
+        args.querydata_file.strip(),
+        args.Lbuild,
+        args.graph_degree,  # Build args
+        args.K,
+        args.Lsearch,
+        args.num_threads,  # search args
+        args.gt_file,
+        args.index_prefix,
+        args.search_only
+    )
+
+    if args.json_timings_output is not None:
+        import json
+        timings['log_file'] = args.json_timings_output
+        with open(args.json_timings_output, "w") as f:
+            json.dump(timings, f)
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/apps/insert-in-clustered-order.py
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/apps/insert-in-clustered-order.py
@@ -0,0 +1,103 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import argparse
+
+import diskannpy
+import numpy as np
+import utils
+
+
+def insert_and_search(
+    dtype_str,
+    indexdata_file,
+    querydata_file,
+    Lb,
+    graph_degree,
+    num_clusters,
+    num_insert_threads,
+    K,
+    Ls,
+    num_search_threads,
+    gt_file,
+):
+    npts, ndims = utils.get_bin_metadata(indexdata_file)
+
+    if dtype_str == "float":
+        dtype = np.float32
+    elif dtype_str == "int8":
+        dtype = np.int8
+    elif dtype_str == "uint8":
+        dtype = np.uint8
+    else:
+        raise ValueError("data_type must be float, int8 or uint8")
+
+    index = diskannpy.DynamicMemoryIndex(
+        distance_metric="l2",
+        vector_dtype=dtype,
+        dimensions=ndims,
+        max_vectors=npts,
+        complexity=Lb,
+        graph_degree=graph_degree
+    )
+    queries = diskannpy.vectors_from_file(querydata_file, dtype)
+    data = diskannpy.vectors_from_file(indexdata_file, dtype)
+
+    offsets, permutation = utils.cluster_and_permute(
+        dtype_str, npts, ndims, data, num_clusters
+    )
+
+    i = 0
+    timer = utils.Timer()
+    for c in range(num_clusters):
+        cluster_index_range = range(offsets[c], offsets[c + 1])
+        cluster_indices = np.array(permutation[cluster_index_range], dtype=np.uint32)
+        cluster_data = data[cluster_indices, :]
+        index.batch_insert(cluster_data, cluster_indices + 1, num_insert_threads)
+        print('Inserted cluster', c, 'in', timer.elapsed(), 's')
+    tags, dists = index.batch_search(queries, K, Ls, num_search_threads)
+    print('Batch searched', queries.shape[0], 'queries in', timer.elapsed(), 's')
+    res_ids = tags - 1
+
+    if gt_file != "":
+        recall = utils.calculate_recall_from_gt_file(K, res_ids, gt_file)
+        print(f"recall@{K} is {recall}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        prog="in-mem-dynamic",
+        description="Inserts points dynamically in a clustered order and search from vectors in a file.",
+    )
+
+    parser.add_argument("-d", "--data_type", required=True)
+    parser.add_argument("-i", "--indexdata_file", required=True)
+    parser.add_argument("-q", "--querydata_file", required=True)
+    parser.add_argument("-Lb", "--Lbuild", default=50, type=int)
+    parser.add_argument("-Ls", "--Lsearch", default=50, type=int)
+    parser.add_argument("-R", "--graph_degree", default=32, type=int)
+    parser.add_argument("-TI", "--num_insert_threads", default=8, type=int)
+    parser.add_argument("-TS", "--num_search_threads", default=8, type=int)
+    parser.add_argument("-C", "--num_clusters", default=32, type=int)
+    parser.add_argument("-K", default=10, type=int)
+    parser.add_argument("--gt_file", default="")
+    args = parser.parse_args()
+
+    insert_and_search(
+        args.data_type,
+        args.indexdata_file,
+        args.querydata_file,
+        args.Lbuild,
+        args.graph_degree,  # Build args
+        args.num_clusters,
+        args.num_insert_threads,
+        args.K,
+        args.Lsearch,
+        args.num_search_threads,  # search args
+        args.gt_file,
+    )
+
+# An ingest optimized example with SIFT1M
+# python3 ~/DiskANN/python/apps/insert-in-clustered-order.py -d float \
+# -i sift_base.fbin -q sift_query.fbin --gt_file  gt100_base \
+# -Lb 10 -R 30 -Ls 200 -C 32
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/apps/utils.py
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/apps/utils.py
@@ -0,0 +1,120 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import numpy as np
+from scipy.cluster.vq import vq, kmeans2
+from typing import Tuple
+from time import perf_counter
+
+
+def get_bin_metadata(bin_file) -> Tuple[int, int]:
+    array = np.fromfile(file=bin_file, dtype=np.uint32, count=2)
+    return array[0], array[1]
+
+
+def bin_to_numpy(dtype, bin_file) -> np.ndarray:
+    npts, ndims = get_bin_metadata(bin_file)
+    return np.fromfile(file=bin_file, dtype=dtype, offset=8).reshape(npts, ndims)
+
+
+class Timer:
+    last = perf_counter()
+
+    def reset(self):
+        new = perf_counter()
+        self.last = new
+
+    def elapsed(self, round_digit:int = 3):
+        new = perf_counter()
+        elapsed_time = new - self.last
+        self.last = new
+        return round(elapsed_time, round_digit)
+
+
+def numpy_to_bin(array, out_file):
+    shape = np.shape(array)
+    npts = shape[0].astype(np.uint32)
+    ndims = shape[1].astype(np.uint32)
+    f = open(out_file, "wb")
+    f.write(npts.tobytes())
+    f.write(ndims.tobytes())
+    f.write(array.tobytes())
+    f.close()
+
+
+def read_gt_file(gt_file) -> Tuple[np.ndarray[int], np.ndarray[float]]:
+    """
+    Return ids and distances to queries
+    """
+    nq, K = get_bin_metadata(gt_file)
+    ids = np.fromfile(file=gt_file, dtype=np.uint32, offset=8, count=nq * K).reshape(
+        nq, K
+    )
+    dists = np.fromfile(
+        file=gt_file, dtype=np.float32, offset=8 + nq * K * 4, count=nq * K
+    ).reshape(nq, K)
+    return ids, dists
+
+
+def calculate_recall(
+    result_set_indices: np.ndarray[int],
+    truth_set_indices: np.ndarray[int],
+    recall_at: int = 5,
+) -> float:
+    """
+    result_set_indices and truth_set_indices correspond by row index. the columns in each row contain the indices of
+    the nearest neighbors, with result_set_indices being the approximate nearest neighbor results and truth_set_indices
+    being the brute force nearest neighbor calculation via sklearn's NearestNeighbor class.
+    :param result_set_indices:
+    :param truth_set_indices:
+    :param recall_at:
+    :return:
+    """
+    found = 0
+    for i in range(0, result_set_indices.shape[0]):
+        result_set_set = set(result_set_indices[i][0:recall_at])
+        truth_set_set = set(truth_set_indices[i][0:recall_at])
+        found += len(result_set_set.intersection(truth_set_set))
+    return found / (result_set_indices.shape[0] * recall_at)
+
+
+def calculate_recall_from_gt_file(K: int, ids: np.ndarray[int], gt_file: str) -> float:
+    """
+    Calculate recall from ids returned from search and those read from file
+    """
+    gt_ids, gt_dists = read_gt_file(gt_file)
+    return calculate_recall(ids, gt_ids, K)
+
+
+def cluster_and_permute(
+    dtype_str, npts, ndims, data, num_clusters
+) -> Tuple[np.ndarray[int], np.ndarray[int]]:
+    """
+    Cluster the data and return permutation of row indices
+    that would group indices of the same cluster together
+    """
+    sample_size = min(100000, npts)
+    sample_indices = np.random.choice(range(npts), size=sample_size, replace=False)
+    sampled_data = data[sample_indices, :]
+    centroids, sample_labels = kmeans2(sampled_data, num_clusters, minit="++", iter=10)
+    labels, dist = vq(data, centroids)
+
+    count = np.zeros(num_clusters)
+    for i in range(npts):
+        count[labels[i]] += 1
+    print("Cluster counts")
+    print(count)
+
+    offsets = np.zeros(num_clusters + 1, dtype=int)
+    for i in range(0, num_clusters, 1):
+        offsets[i + 1] = offsets[i] + count[i]
+
+    permutation = np.zeros(npts, dtype=int)
+    counters = np.zeros(num_clusters, dtype=int)
+    for i in range(npts):
+        label = labels[i]
+        row = offsets[label] + counters[label]
+        counters[label] += 1
+        permutation[row] = i
+
+    return offsets, permutation
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/include/builder.h
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/include/builder.h
@@ -0,0 +1,27 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+#include "common.h"
+#include "distance.h"
+
+namespace diskannpy
+{
+template <typename DT>
+void build_disk_index(diskann::Metric metric, const std::string &data_file_path, const std::string &index_prefix_path,
+                      uint32_t complexity, uint32_t graph_degree, double final_index_ram_limit,
+                      double indexing_ram_budget, uint32_t num_threads, uint32_t pq_disk_bytes,
+                      const std::string &codebook_prefix);
+
+template <typename DT, typename TagT = DynamicIdType, typename LabelT = filterT>
+void build_memory_index(diskann::Metric metric, const std::string &vector_bin_path,
+                        const std::string &index_output_path, uint32_t graph_degree, uint32_t complexity, float alpha,
+                        uint32_t num_threads, bool use_pq_build, size_t num_pq_bytes, bool use_opq,
+                        bool use_tags = false, const std::string &filter_labels_file = "",
+                        const std::string &universal_label = "", uint32_t filter_complexity = 0);
+
+} // namespace diskannpy
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/include/common.h
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/include/common.h
@@ -0,0 +1,24 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <stdint.h>
+#include <utility>
+
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+
+namespace py = pybind11;
+
+namespace diskannpy
+{
+
+typedef uint32_t filterT;
+
+typedef uint32_t StaticIdType;
+typedef uint32_t DynamicIdType;
+
+template <class IdType> using NeighborsAndDistances = std::pair<py::array_t<IdType>, py::array_t<float>>;
+
+}; // namespace diskannpy
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/include/dynamic_memory_index.h
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/include/dynamic_memory_index.h
@@ -0,0 +1,53 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+
+#include "common.h"
+#include "index.h"
+#include "parameters.h"
+
+namespace py = pybind11;
+
+namespace diskannpy
+{
+
+template <typename DT>
+class DynamicMemoryIndex
+{
+  public:
+    DynamicMemoryIndex(diskann::Metric m, size_t dimensions, size_t max_vectors, uint32_t complexity,
+                       uint32_t graph_degree, bool saturate_graph, uint32_t max_occlusion_size, float alpha,
+                       uint32_t num_threads, uint32_t filter_complexity, uint32_t num_frozen_points,
+                       uint32_t initial_search_complexity, uint32_t initial_search_threads,
+                       bool concurrent_consolidation);
+
+    void load(const std::string &index_path);
+    int insert(const py::array_t<DT, py::array::c_style | py::array::forcecast> &vector, DynamicIdType id);
+    py::array_t<int> batch_insert(py::array_t<DT, py::array::c_style | py::array::forcecast> &vectors,
+                                  py::array_t<DynamicIdType, py::array::c_style | py::array::forcecast> &ids, int32_t num_inserts,
+                                  int num_threads = 0);
+    int mark_deleted(DynamicIdType id);
+    void save(const std::string &save_path, bool compact_before_save = false);
+    NeighborsAndDistances<DynamicIdType> search(py::array_t<DT, py::array::c_style | py::array::forcecast> &query, uint64_t knn,
+                                      uint64_t complexity);
+    NeighborsAndDistances<DynamicIdType> batch_search(py::array_t<DT, py::array::c_style | py::array::forcecast> &queries,
+                                            uint64_t num_queries, uint64_t knn, uint64_t complexity,
+                                            uint32_t num_threads);
+    void consolidate_delete();
+    size_t num_points();
+
+
+  private:
+    const uint32_t _initial_search_complexity;
+    const diskann::IndexWriteParameters _write_parameters;
+    diskann::Index<DT, DynamicIdType, filterT> _index;
+};
+
+}; // namespace diskannpy
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/include/static_disk_index.h
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/include/static_disk_index.h
@@ -0,0 +1,65 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+
+#ifdef _WINDOWS
+#include "windows_aligned_file_reader.h"
+#elif __APPLE__
+#include "apple_aligned_file_reader.h"
+#else
+#include "linux_aligned_file_reader.h"
+#endif
+
+#include "common.h"
+#include "pq_flash_index.h"
+
+namespace py = pybind11;
+
+namespace diskannpy
+{
+
+#ifdef _WINDOWS
+typedef WindowsAlignedFileReader PlatformSpecificAlignedFileReader;
+#elif __APPLE__
+typedef AppleAlignedFileReader PlatformSpecificAlignedFileReader;
+#else
+typedef LinuxAlignedFileReader PlatformSpecificAlignedFileReader;
+#endif
+
+template <typename DT> class StaticDiskIndex
+{
+  public:
+    StaticDiskIndex(diskann::Metric metric, const std::string &index_path_prefix, uint32_t num_threads,
+                    size_t num_nodes_to_cache, uint32_t cache_mechanism, const std::string &pq_prefix,
+                    const std::string &partition_prefix);
+
+    void cache_bfs_levels(size_t num_nodes_to_cache);
+
+    void cache_sample_paths(size_t num_nodes_to_cache, const std::string &warmup_query_file, uint32_t num_threads);
+
+    NeighborsAndDistances<StaticIdType> search(py::array_t<DT, py::array::c_style | py::array::forcecast> &query,
+                                               uint64_t knn, uint64_t complexity, uint64_t beam_width,
+                                               bool USE_DEFERRED_FETCH = false, bool skip_search_reorder = false,
+                                               bool recompute_beighbor_embeddings = false, bool dedup_node_dis = false,
+                                               float prune_ratio = 0, bool batch_recompute = false,
+                                               bool global_pruning = false);
+
+    NeighborsAndDistances<StaticIdType> batch_search(
+        py::array_t<DT, py::array::c_style | py::array::forcecast> &queries, uint64_t num_queries, uint64_t knn,
+        uint64_t complexity, uint64_t beam_width, uint32_t num_threads, bool USE_DEFERRED_FETCH = false,
+        bool skip_search_reorder = false, bool recompute_beighbor_embeddings = false, bool dedup_node_dis = false,
+        float prune_ratio = 0, bool batch_recompute = false, bool global_pruning = false);
+
+  private:
+    std::shared_ptr<AlignedFileReader> _reader;
+    std::shared_ptr<AlignedFileReader> _graph_reader;
+    diskann::PQFlashIndex<DT> _index;
+};
+} // namespace diskannpy
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/include/static_memory_index.h
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/include/static_memory_index.h
@@ -0,0 +1,40 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+
+#include "common.h"
+#include "index.h"
+
+namespace py = pybind11;
+
+namespace diskannpy
+{
+
+template <typename DT> class StaticMemoryIndex
+{
+  public:
+    StaticMemoryIndex(diskann::Metric m, const std::string &index_prefix, size_t num_points, size_t dimensions,
+                      uint32_t num_threads, uint32_t initial_search_complexity);
+
+    NeighborsAndDistances<StaticIdType> search(py::array_t<DT, py::array::c_style | py::array::forcecast> &query,
+                                               uint64_t knn, uint64_t complexity);
+
+    NeighborsAndDistances<StaticIdType> search_with_filter(
+        py::array_t<DT, py::array::c_style | py::array::forcecast> &query, uint64_t knn, uint64_t complexity,
+        filterT filter);
+
+    NeighborsAndDistances<StaticIdType> batch_search(
+        py::array_t<DT, py::array::c_style | py::array::forcecast> &queries, uint64_t num_queries, uint64_t knn,
+        uint64_t complexity, uint32_t num_threads);
+
+  private:
+    diskann::Index<DT, StaticIdType, filterT> _index;
+};
+} // namespace diskannpy
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/src/init.py
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/init.py
@@ -0,0 +1,138 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+"""
+# Documentation Overview
+`diskannpy` is mostly structured around 2 distinct processes: [Index Builder Functions](#index-builders) and [Search Classes](#search-classes)
+
+It also includes a few nascent [utilities](#utilities).
+
+And lastly, it makes substantial use of type hints, with various shorthand [type aliases](#parameter-and-response-type-aliases) documented. 
+When reading the `diskannpy` code we refer to the type aliases, though `pdoc` helpfully expands them.
+
+## Index Builders
+- `build_disk_index` - To build an index that cannot fully fit into memory when searching
+- `build_memory_index` - To build an index that can fully fit into memory when searching
+
+## Search Classes
+- `StaticMemoryIndex` - for indices that can fully fit in memory and won't be changed during the search operations
+- `StaticDiskIndex` - for indices that cannot fully fit in memory, thus relying on disk IO to search, and also won't be changed during search operations
+- `DynamicMemoryIndex` - for indices that can fully fit in memory and will be mutated via insert/deletion operations as well as search operations
+
+## Parameter Defaults
+- `diskannpy.defaults` - Default values exported from the C++ extension for Python users
+
+## Parameter and Response Type Aliases
+- `DistanceMetric` - What distance metrics does `diskannpy` support?
+- `VectorDType` - What vector datatypes does `diskannpy` support?
+- `QueryResponse` - What can I expect as a response to my search?
+- `QueryResponseBatch` - What can I expect as a response to my batch search?
+- `VectorIdentifier` - What types do `diskannpy` support as vector identifiers?
+- `VectorIdentifierBatch` - A batch of identifiers of the exact same type. The type can change, but they must **all** change.
+- `VectorLike` - How does a vector look to `diskannpy`, to be inserted or searched with.
+- `VectorLikeBatch` - A batch of those vectors, to be inserted or searched with.
+- `Metadata` - DiskANN vector binary file metadata (num_points, vector_dim)
+
+## Utilities
+- `vectors_to_file` - Turns a 2 dimensional `numpy.typing.NDArray[VectorDType]` with shape `(number_of_points, vector_dim)` into a DiskANN vector bin file.
+- `vectors_from_file` - Reads a DiskANN vector bin file representing stored vectors into a numpy ndarray.
+- `vectors_metadata_from_file` - Reads metadata stored in a DiskANN vector bin file without reading the entire file
+- `tags_to_file` - Turns a 1 dimensional `numpy.typing.NDArray[VectorIdentifier]` into a DiskANN tags bin file.
+- `tags_from_file` - Reads a DiskANN tags bin file representing stored tags into a numpy ndarray.
+- `valid_dtype` - Checks if a given vector dtype is supported by `diskannpy`
+"""
+
+from typing import Any, Literal, NamedTuple, Type, Union
+
+import numpy as np
+from numpy import typing as npt
+
+DistanceMetric = Literal["l2", "mips", "cosine"]
+""" Type alias for one of {"l2", "mips", "cosine"} """
+VectorDType = Union[Type[np.float32], Type[np.int8], Type[np.uint8]]
+""" Type alias for one of {`numpy.float32`, `numpy.int8`, `numpy.uint8`} """
+VectorLike = npt.NDArray[VectorDType]
+""" Type alias for something that can be treated as a vector """
+VectorLikeBatch = npt.NDArray[VectorDType]
+""" Type alias for a batch of VectorLikes """
+VectorIdentifier = np.uint32
+""" 
+Type alias for a vector identifier, whether it be an implicit array index identifier from StaticMemoryIndex or 
+StaticDiskIndex, or an explicit tag identifier from DynamicMemoryIndex 
+"""
+VectorIdentifierBatch = npt.NDArray[np.uint32]
+""" Type alias for a batch of VectorIdentifiers """
+
+
+class QueryResponse(NamedTuple):
+    """
+    Tuple with two values, identifiers and distances. Both are 1d arrays, positionally correspond, and will contain the
+    nearest neighbors from [0..k_neighbors)
+    """
+
+    identifiers: npt.NDArray[VectorIdentifier]
+    """ A `numpy.typing.NDArray[VectorIdentifier]` array of vector identifiers, 1 dimensional """
+    distances: npt.NDArray[np.float32]
+    """
+    A `numpy.typing.NDAarray[numpy.float32]` of distances as calculated by the distance metric function,  1 dimensional
+    """
+
+
+class QueryResponseBatch(NamedTuple):
+    """
+    Tuple with two values, identifiers and distances. Both are 2d arrays, with dimensionality determined by the
+    rows corresponding to the number of queries made, and the columns corresponding to the k neighbors
+    requested. The two 2d arrays have an implicit, position-based relationship
+    """
+
+    identifiers: npt.NDArray[VectorIdentifier]
+    """ 
+    A `numpy.typing.NDArray[VectorIdentifier]` array of vector identifiers, 2 dimensional. The row corresponds to index 
+    of the query, and the column corresponds to the k neighbors requested 
+    """
+    distances: np.ndarray[np.float32]
+    """  
+    A `numpy.typing.NDAarray[numpy.float32]` of distances as calculated by the distance metric function, 2 dimensional. 
+    The row corresponds to the index of the query, and the column corresponds to the distance of the query to the 
+    *k-th* neighbor 
+    """
+
+
+from . import defaults
+from ._builder import build_disk_index, build_memory_index
+from ._common import valid_dtype
+from ._dynamic_memory_index import DynamicMemoryIndex
+from ._files import (
+    Metadata,
+    tags_from_file,
+    tags_to_file,
+    vectors_from_file,
+    vectors_metadata_from_file,
+    vectors_to_file,
+)
+from ._static_disk_index import StaticDiskIndex
+from ._static_memory_index import StaticMemoryIndex
+
+__all__ = [
+    "build_disk_index",
+    "build_memory_index",
+    "StaticDiskIndex",
+    "StaticMemoryIndex",
+    "DynamicMemoryIndex",
+    "defaults",
+    "DistanceMetric",
+    "VectorDType",
+    "QueryResponse",
+    "QueryResponseBatch",
+    "VectorIdentifier",
+    "VectorIdentifierBatch",
+    "VectorLike",
+    "VectorLikeBatch",
+    "Metadata",
+    "vectors_metadata_from_file",
+    "vectors_to_file",
+    "vectors_from_file",
+    "tags_to_file",
+    "tags_from_file",
+    "valid_dtype",
+]
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/src/_builder.py
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_builder.py
@@ -0,0 +1,349 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import json
+import os
+import shutil
+from pathlib import Path
+from typing import Optional, Tuple, Union
+
+import numpy as np
+
+from . import DistanceMetric, VectorDType, VectorIdentifierBatch, VectorLikeBatch
+from . import _diskannpy as _native_dap
+from ._common import (
+    _assert,
+    _assert_is_nonnegative_uint32,
+    _assert_is_positive_uint32,
+    _castable_dtype_or_raise,
+    _valid_metric,
+    _write_index_metadata,
+    valid_dtype,
+)
+from ._diskannpy import defaults
+from ._files import tags_to_file, vectors_metadata_from_file, vectors_to_file
+
+
+def _valid_path_and_dtype(
+    data: Union[str, VectorLikeBatch],
+    vector_dtype: VectorDType,
+    index_path: str,
+    index_prefix: str,
+) -> Tuple[str, VectorDType]:
+    if isinstance(data, str):
+        vector_bin_path = data
+        _assert(
+            Path(data).exists() and Path(data).is_file(),
+            "if data is of type `str`, it must both exist and be a file",
+        )
+        vector_dtype_actual = valid_dtype(vector_dtype)
+    else:
+        vector_bin_path = os.path.join(index_path, f"{index_prefix}_vectors.bin")
+        # if Path(vector_bin_path).exists():
+        #     raise ValueError(
+        #         f"The path {vector_bin_path} already exists. Remove it and try again."
+        #     )
+        vector_dtype_actual = valid_dtype(data.dtype)
+        # vectors_to_file(vector_file=vector_bin_path, vectors=data)
+
+    return vector_bin_path, vector_dtype_actual
+
+
+def build_disk_index(
+    data: Union[str, VectorLikeBatch],
+    distance_metric: DistanceMetric,
+    index_directory: str,
+    complexity: int,
+    graph_degree: int,
+    search_memory_maximum: float,
+    build_memory_maximum: float,
+    num_threads: int,
+    pq_disk_bytes: int = defaults.PQ_DISK_BYTES,
+    vector_dtype: Optional[VectorDType] = None,
+    index_prefix: str = "ann",
+    codebook_prefix: str = "",
+) -> None:
+    """
+    This function will construct a DiskANN disk index. Disk indices are ideal for very large datasets that
+    are too large to fit in memory. Memory is still used, but it is primarily used to provide precise disk
+    locations for fast retrieval of smaller subsets of the index without compromising much on recall.
+
+    If you provide a numpy array, it will save this array to disk in a temp location
+    in the format DiskANN's PQ Flash Index builder requires. This temp folder is deleted upon index creation completion
+    or error.
+
+    ## Distance Metric and Vector Datatype Restrictions
+    | Metric \ Datatype | np.float32 | np.uint8 | np.int8 |
+    |-------------------|------------|----------|---------|
+    | L2                |      ✅     |     ✅    |    ✅    |
+    | MIPS              |      ✅     |     ❌    |    ❌    |
+    | Cosine [^bug-in-disk-cosine]     |      ❌     |     ❌    |    ❌    |
+
+    [^bug-in-disk-cosine]: For StaticDiskIndex, Cosine distances are not currently supported.
+
+    ### Parameters
+    - **data**: Either a `str` representing a path to a DiskANN vector bin file, or a numpy.ndarray,
+      of a supported dtype, in 2 dimensions. Note that `vector_dtype` must be provided if data is a `str`
+    - **distance_metric**: A `str`, strictly one of {"l2", "mips", "cosine"}. `l2` and `cosine` are supported for all 3
+      vector dtypes, but `mips` is only available for single precision floats.
+    - **index_directory**: The index files will be saved to this **existing** directory path
+    - **complexity**: The size of the candidate nearest neighbor list to use when building the index. Values between 75
+      and 200 are typical. Larger values will take more time to build but result in indices that provide higher recall
+      for the same search complexity. Use a value that is at least as large as `graph_degree` unless you are prepared
+      to compromise on quality
+    - **graph_degree**: The degree of the graph index, typically between 60 and 150. A larger maximum degree will
+      result in larger indices and longer indexing times, but better search quality.
+    - **search_memory_maximum**: Build index with the expectation that the search will use at most
+      `search_memory_maximum`, in gb.
+    - **build_memory_maximum**: Build index using at most `build_memory_maximum` in gb. Building processes typically
+      require more memory, while search memory can be reduced.
+    - **num_threads**: Number of threads to use when creating this index. `0` is used to indicate all available
+      logical processors should be used.
+    - **pq_disk_bytes**: Use `0` to store uncompressed data on SSD. This allows the index to asymptote to 100%
+      recall. If your vectors are too large to store in SSD, this parameter provides the option to compress the
+      vectors using PQ for storing on SSD. This will trade off recall. You would also want this to be greater
+      than the number of bytes used for the PQ compressed data stored in-memory. Default is `0`.
+    - **vector_dtype**: Required if the provided `data` is of type `str`, else we use the `data.dtype` if np array.
+    - **index_prefix**: The prefix of the index files. Defaults to "ann".
+    """
+
+    _assert(
+        (isinstance(data, str) and vector_dtype is not None)
+        or isinstance(data, np.ndarray),
+        "vector_dtype is required if data is a str representing a path to the vector bin file",
+    )
+    dap_metric = _valid_metric(distance_metric)
+    _assert_is_positive_uint32(complexity, "complexity")
+    _assert_is_positive_uint32(graph_degree, "graph_degree")
+    _assert(search_memory_maximum > 0, "search_memory_maximum must be larger than 0")
+    _assert(build_memory_maximum > 0, "build_memory_maximum must be larger than 0")
+    _assert_is_nonnegative_uint32(num_threads, "num_threads")
+    _assert_is_nonnegative_uint32(pq_disk_bytes, "pq_disk_bytes")
+    _assert(index_prefix != "", "index_prefix cannot be an empty string")
+
+    index_path = Path(index_directory)
+    _assert(
+        index_path.exists() and index_path.is_dir(),
+        "index_directory must both exist and be a directory",
+    )
+
+    vector_bin_path, vector_dtype_actual = _valid_path_and_dtype(
+        data, vector_dtype, index_directory, index_prefix
+    )
+    _assert(dap_metric != _native_dap.COSINE, "Cosine is currently not supported in StaticDiskIndex")
+    if dap_metric == _native_dap.INNER_PRODUCT:
+        _assert(
+            vector_dtype_actual == np.float32,
+            "Integral vector dtypes (np.uint8, np.int8) are not supported with distance metric mips"
+        )
+
+    num_points, dimensions = vectors_metadata_from_file(vector_bin_path)
+
+    if vector_dtype_actual == np.uint8:
+        _builder = _native_dap.build_disk_uint8_index
+    elif vector_dtype_actual == np.int8:
+        _builder = _native_dap.build_disk_int8_index
+    else:
+        _builder = _native_dap.build_disk_float_index
+
+    index_prefix_path = os.path.join(index_directory, index_prefix)
+
+    _builder(
+        distance_metric=dap_metric,
+        data_file_path=vector_bin_path,
+        index_prefix_path=index_prefix_path,
+        complexity=complexity,
+        graph_degree=graph_degree,
+        final_index_ram_limit=search_memory_maximum,
+        indexing_ram_budget=build_memory_maximum,
+        num_threads=num_threads,
+        pq_disk_bytes=pq_disk_bytes,
+        codebook_prefix=codebook_prefix,
+    )
+    _write_index_metadata(
+        index_prefix_path, vector_dtype_actual, dap_metric, num_points, dimensions
+    )
+
+
+def build_memory_index(
+    data: Union[str, VectorLikeBatch],
+    distance_metric: DistanceMetric,
+    index_directory: str,
+    complexity: int,
+    graph_degree: int,
+    num_threads: int,
+    alpha: float = defaults.ALPHA,
+    use_pq_build: bool = defaults.USE_PQ_BUILD,
+    num_pq_bytes: int = defaults.NUM_PQ_BYTES,
+    use_opq: bool = defaults.USE_OPQ,
+    vector_dtype: Optional[VectorDType] = None,
+    tags: Union[str, VectorIdentifierBatch] = "",
+    filter_labels: Optional[list[list[str]]] = None,
+    universal_label: str = "",
+    filter_complexity: int = defaults.FILTER_COMPLEXITY,
+    index_prefix: str = "ann",
+) -> None:
+    """
+    This function will construct a DiskANN memory index. Memory indices are ideal for smaller datasets whose
+    indices can fit into memory. Memory indices are faster than disk indices, but usually cannot scale to massive
+    sizes in an individual index on an individual machine.
+
+    `diskannpy`'s memory indices take two forms: a `diskannpy.StaticMemoryIndex`, which will not be mutated, only
+    searched upon, and a `diskannpy.DynamicMemoryIndex`, which can be mutated AND searched upon in the same process.
+
+    ## Important Note:
+    You **must** determine the type of index you are building for. If you are building for a
+    `diskannpy.DynamicMemoryIndex`, you **must** supply a valid value for the `tags` parameter. **Do not supply
+    tags if the index is intended to be `diskannpy.StaticMemoryIndex`**!
+
+    ## Distance Metric and Vector Datatype Restrictions
+
+    | Metric \ Datatype | np.float32 | np.uint8 | np.int8 |
+    |-------------------|------------|----------|---------|
+    | L2                |      ✅     |     ✅    |    ✅    |
+    | MIPS              |      ✅     |     ❌    |    ❌    |
+    | Cosine            |      ✅     |     ✅    |    ✅    |
+
+    ### Parameters
+
+    - **data**: Either a `str` representing a path to an existing DiskANN vector bin file, or a numpy.ndarray of a
+      supported dtype in 2 dimensions. Note that `vector_dtype` must be provided if `data` is a `str`.
+    - **distance_metric**: A `str`, strictly one of {"l2", "mips", "cosine"}. `l2` and `cosine` are supported for all 3
+      vector dtypes, but `mips` is only available for single precision floats.
+    - **index_directory**: The index files will be saved to this **existing** directory path
+    - **complexity**: The size of the candidate nearest neighbor list to use when building the index. Values between 75
+      and 200 are typical. Larger values will take more time to build but result in indices that provide higher recall
+      for the same search complexity. Use a value that is at least as large as `graph_degree` unless you are prepared
+      to compromise on quality
+    - **graph_degree**: The degree of the graph index, typically between 60 and 150. A larger maximum degree will
+      result in larger indices and longer indexing times, but better search quality.
+    - **num_threads**: Number of threads to use when creating this index. `0` is used to indicate all available
+      logical processors should be used.
+    - **alpha**: The alpha parameter (>=1) is used to control the nature and number of points that are added to the
+      graph. A higher alpha value (e.g., 1.4) will result in fewer hops (and IOs) to convergence, but probably more
+      distance comparisons compared to a lower alpha value.
+    - **use_pq_build**: Use product quantization during build. Product quantization is a lossy compression technique
+      that can reduce the size of the index on disk. This will trade off recall. Default is `True`.
+    - **num_pq_bytes**: The number of bytes used to store the PQ compressed data in memory. This will trade off recall.
+      Default is `0`.
+    - **use_opq**: Use optimized product quantization during build.
+    - **vector_dtype**: Required if the provided `data` is of type `str`, else we use the `data.dtype` if np array.
+    - **tags**: Tags can be defined either as a path on disk to an existing .tags file, or provided as a np.array of
+      the same length as the number of vectors. Tags are used to identify vectors in the index via your *own*
+      numbering conventions, and is absolutely required for loading DynamicMemoryIndex indices `from_file`.
+    - **filter_labels**: An optional, but exhaustive list of categories for each vector. This is used to filter
+      search results by category. If provided, this must be a list of lists, where each inner list is a list of
+      categories for the corresponding vector. For example, if you have 3 vectors, and the first vector belongs to
+      categories "a" and "b", the second vector belongs to category "b", and the third vector belongs to no categories,
+      you would provide `filter_labels=[["a", "b"], ["b"], []]`. If you do not want to provide categories for a
+      particular vector, you can provide an empty list. If you do not want to provide categories for any vectors,
+      you can provide `None` for this parameter (which is the default)
+    - **universal_label**: An optional label that indicates that this vector should be included in *every* search
+      in which it also meets the knn search criteria.
+    - **filter_complexity**: Complexity to use when using filters. Default is 0. 0 is strictly invalid if you are
+      using filters.
+    - **index_prefix**: The prefix of the index files. Defaults to "ann".
+    """
+    _assert(
+        (isinstance(data, str) and vector_dtype is not None)
+        or isinstance(data, np.ndarray),
+        "vector_dtype is required if data is a str representing a path to the vector bin file",
+    )
+    dap_metric = _valid_metric(distance_metric)
+    _assert_is_positive_uint32(complexity, "complexity")
+    _assert_is_positive_uint32(graph_degree, "graph_degree")
+    _assert(
+        alpha >= 1,
+        "alpha must be >= 1, and realistically should be kept between [1.0, 2.0)",
+    )
+    _assert_is_nonnegative_uint32(num_threads, "num_threads")
+    _assert_is_nonnegative_uint32(num_pq_bytes, "num_pq_bytes")
+    _assert_is_nonnegative_uint32(filter_complexity, "filter_complexity")
+    _assert(index_prefix != "", "index_prefix cannot be an empty string")
+    _assert(
+        filter_labels is None or filter_complexity > 0,
+        "if filter_labels is provided, filter_complexity must not be 0"
+    )
+
+    index_path = Path(index_directory)
+    _assert(
+        index_path.exists() and index_path.is_dir(),
+        "index_directory must both exist and be a directory",
+    )
+
+    vector_bin_path, vector_dtype_actual = _valid_path_and_dtype(
+        data, vector_dtype, index_directory, index_prefix
+    )
+    if dap_metric == _native_dap.INNER_PRODUCT:
+        _assert(
+            vector_dtype_actual == np.float32,
+            "Integral vector dtypes (np.uint8, np.int8) are not supported with distance metric mips"
+        )
+
+    num_points, dimensions = vectors_metadata_from_file(vector_bin_path)
+    if filter_labels is not None:
+        _assert(
+            len(filter_labels) == num_points,
+            "filter_labels must be the same length as the number of points"
+        )
+
+    if vector_dtype_actual == np.uint8:
+        _builder = _native_dap.build_memory_uint8_index
+    elif vector_dtype_actual == np.int8:
+        _builder = _native_dap.build_memory_int8_index
+    else:
+        _builder = _native_dap.build_memory_float_index
+
+    index_prefix_path = os.path.join(index_directory, index_prefix)
+
+    filter_labels_file = ""
+    if filter_labels is not None:
+        label_counts = {}
+        filter_labels_file = f"{index_prefix_path}_pylabels.txt"
+        with open(filter_labels_file, "w") as labels_file:
+            for labels in filter_labels:
+                for label in labels:
+                    label_counts[label] = 1 if label not in label_counts else label_counts[label] + 1
+                if len(labels) == 0:
+                    print("default", file=labels_file)
+                else:
+                    print(",".join(labels), file=labels_file)
+        with open(f"{index_prefix_path}_label_metadata.json", "w") as label_metadata_file:
+            json.dump(label_counts, label_metadata_file, indent=True)
+
+    if isinstance(tags, str) and tags != "":
+        use_tags = True
+        shutil.copy(tags, index_prefix_path + ".tags")
+    elif not isinstance(tags, str):
+        use_tags = True
+        tags_as_array = _castable_dtype_or_raise(tags, expected=np.uint32)
+        _assert(len(tags_as_array.shape) == 1, "Provided tags must be 1 dimensional")
+        _assert(
+            tags_as_array.shape[0] == num_points,
+            "Provided tags must contain an identical population to the number of points, "
+            f"{tags_as_array.shape[0]=}, {num_points=}",
+        )
+        tags_to_file(index_prefix_path + ".tags", tags_as_array)
+    else:
+        use_tags = False
+
+    _builder(
+        distance_metric=dap_metric,
+        data_file_path=vector_bin_path,
+        index_output_path=index_prefix_path,
+        complexity=complexity,
+        graph_degree=graph_degree,
+        alpha=alpha,
+        num_threads=num_threads,
+        use_pq_build=use_pq_build,
+        num_pq_bytes=num_pq_bytes,
+        use_opq=use_opq,
+        use_tags=use_tags,
+        filter_labels_file=filter_labels_file,
+        universal_label=universal_label,
+        filter_complexity=filter_complexity,
+    )
+
+    _write_index_metadata(
+        index_prefix_path, vector_dtype_actual, dap_metric, num_points, dimensions
+    )
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/src/_builder.pyi
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_builder.pyi
@@ -0,0 +1,74 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+from typing import BinaryIO, Optional, overload
+
+import numpy as np
+
+from . import DistanceMetric, VectorDType, VectorIdentifierBatch, VectorLikeBatch
+
+def numpy_to_diskann_file(vectors: np.ndarray, file_handler: BinaryIO): ...
+@overload
+def build_disk_index(
+    data: str,
+    distance_metric: DistanceMetric,
+    index_directory: str,
+    complexity: int,
+    graph_degree: int,
+    search_memory_maximum: float,
+    build_memory_maximum: float,
+    num_threads: int,
+    pq_disk_bytes: int,
+    vector_dtype: VectorDType,
+    index_prefix: str,
+) -> None: ...
+@overload
+def build_disk_index(
+    data: VectorLikeBatch,
+    distance_metric: DistanceMetric,
+    index_directory: str,
+    complexity: int,
+    graph_degree: int,
+    search_memory_maximum: float,
+    build_memory_maximum: float,
+    num_threads: int,
+    pq_disk_bytes: int,
+    index_prefix: str,
+) -> None: ...
+@overload
+def build_memory_index(
+    data: VectorLikeBatch,
+    distance_metric: DistanceMetric,
+    index_directory: str,
+    complexity: int,
+    graph_degree: int,
+    alpha: float,
+    num_threads: int,
+    use_pq_build: bool,
+    num_pq_bytes: int,
+    use_opq: bool,
+    tags: Union[str, VectorIdentifierBatch],
+    filter_labels: Optional[list[list[str]]],
+    universal_label: str,
+    filter_complexity: int,
+    index_prefix: str
+) -> None: ...
+@overload
+def build_memory_index(
+    data: str,
+    distance_metric: DistanceMetric,
+    index_directory: str,
+    complexity: int,
+    graph_degree: int,
+    alpha: float,
+    num_threads: int,
+    use_pq_build: bool,
+    num_pq_bytes: int,
+    use_opq: bool,
+    vector_dtype: VectorDType,
+    tags: Union[str, VectorIdentifierBatch],
+    filter_labels_file: Optional[list[list[str]]],
+    universal_label: str,
+    filter_complexity: int,
+    index_prefix: str
+) -> None: ...
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/src/_common.py
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_common.py
@@ -0,0 +1,251 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import os
+import warnings
+from enum import Enum
+from pathlib import Path
+from typing import Literal, NamedTuple, Optional, Tuple, Type, Union
+
+import numpy as np
+
+from . import (
+    DistanceMetric,
+    VectorDType,
+    VectorIdentifierBatch,
+    VectorLike,
+    VectorLikeBatch,
+)
+from . import _diskannpy as _native_dap
+
+__ALL__ = ["valid_dtype"]
+
+_VALID_DTYPES = [np.float32, np.int8, np.uint8]
+
+
+def valid_dtype(dtype: Type) -> VectorDType:
+    """
+    Utility method to determine whether the provided dtype is supported by `diskannpy`, and if so, the canonical
+    dtype we will use internally (e.g. np.single -> np.float32)
+    """
+    _assert_dtype(dtype)
+    if dtype == np.uint8:
+        return np.uint8
+    if dtype == np.int8:
+        return np.int8
+    if dtype == np.float32:
+        return np.float32
+
+
+def _assert(statement_eval: bool, message: str):
+    if not statement_eval:
+        raise ValueError(message)
+
+
+def _valid_metric(metric: str) -> _native_dap.Metric:
+    if not isinstance(metric, str):
+        raise ValueError("distance_metric must be a string")
+    if metric.lower() == "l2":
+        return _native_dap.L2
+    elif metric.lower() == "mips":
+        return _native_dap.INNER_PRODUCT
+    elif metric.lower() == "cosine":
+        return _native_dap.COSINE
+    else:
+        raise ValueError("distance_metric must be one of 'l2', 'mips', or 'cosine'")
+
+
+def _assert_dtype(dtype: Type):
+    _assert(
+        any(np.can_cast(dtype, _dtype) for _dtype in _VALID_DTYPES),
+        f"Vector dtype must be of one of type {{(np.single, np.float32), (np.byte, np.int8), (np.ubyte, np.uint8)}}",
+    )
+
+
+def _castable_dtype_or_raise(
+    data: Union[VectorLike, VectorLikeBatch, VectorIdentifierBatch], expected: np.dtype
+) -> np.ndarray:
+    if isinstance(data, np.ndarray) and np.can_cast(data.dtype, expected):
+        return data.astype(expected, casting="safe")
+    else:
+        raise TypeError(
+            f"expecting a numpy ndarray of dtype {expected}, not a {type(data)}"
+        )
+
+
+def _assert_2d(vectors: np.ndarray, name: str):
+    _assert(len(vectors.shape) == 2, f"{name} must be 2d numpy array")
+
+
+__MAX_UINT32_VAL = 4_294_967_295
+
+
+def _assert_is_positive_uint32(test_value: int, parameter: str):
+    _assert(
+        test_value is not None and 0 < test_value < __MAX_UINT32_VAL,
+        f"{parameter} must be a positive integer in the uint32 range",
+    )
+
+
+def _assert_is_nonnegative_uint32(test_value: int, parameter: str):
+    _assert(
+        test_value is not None and -1 < test_value < __MAX_UINT32_VAL,
+        f"{parameter} must be a non-negative integer in the uint32 range",
+    )
+
+
+def _assert_is_nonnegative_uint64(test_value: int, parameter: str):
+    _assert(
+        -1 < test_value,
+        f"{parameter} must be a non-negative integer in the uint64 range",
+    )
+
+
+def _assert_existing_directory(path: str, parameter: str):
+    _path = Path(path)
+    _assert(
+        _path.exists() and _path.is_dir(), f"{parameter} must be an existing directory"
+    )
+
+
+def _assert_existing_file(path: str, parameter: str):
+    _path = Path(path)
+    _assert(_path.exists() and _path.is_file(), f"{parameter} must be an existing file")
+
+
+class _DataType(Enum):
+    FLOAT32 = 0
+    INT8 = 1
+    UINT8 = 2
+
+    @classmethod
+    def from_type(cls, vector_dtype: VectorDType) -> "DataType":
+        if vector_dtype == np.float32:
+            return cls.FLOAT32
+        if vector_dtype == np.int8:
+            return cls.INT8
+        if vector_dtype == np.uint8:
+            return cls.UINT8
+
+    def to_type(self) -> VectorDType:
+        if self is _DataType.FLOAT32:
+            return np.float32
+        if self is _DataType.INT8:
+            return np.int8
+        if self is _DataType.UINT8:
+            return np.uint8
+
+
+class _Metric(Enum):
+    L2 = 0
+    MIPS = 1
+    COSINE = 2
+
+    @classmethod
+    def from_native(cls, metric: _native_dap.Metric) -> "_Metric":
+        if metric == _native_dap.L2:
+            return cls.L2
+        if metric == _native_dap.INNER_PRODUCT:
+            return cls.MIPS
+        if metric == _native_dap.COSINE:
+            return cls.COSINE
+
+    def to_native(self) -> _native_dap.Metric:
+        if self is _Metric.L2:
+            return _native_dap.L2
+        if self is _Metric.MIPS:
+            return _native_dap.INNER_PRODUCT
+        if self is _Metric.COSINE:
+            return _native_dap.COSINE
+
+    def to_str(self) -> _native_dap.Metric:
+        if self is _Metric.L2:
+            return "l2"
+        if self is _Metric.MIPS:
+            return "mips"
+        if self is _Metric.COSINE:
+            return "cosine"
+
+
+def _build_metadata_path(index_path_and_prefix: str) -> str:
+    return index_path_and_prefix + "_metadata.bin"
+
+
+def _write_index_metadata(
+    index_path_and_prefix: str,
+    dtype: VectorDType,
+    metric: _native_dap.Metric,
+    num_points: int,
+    dimensions: int,
+):
+    np.array(
+        [
+            _DataType.from_type(dtype).value,
+            _Metric.from_native(metric).value,
+            num_points,
+            dimensions,
+        ],
+        dtype=np.uint64,
+    ).tofile(_build_metadata_path(index_path_and_prefix))
+
+
+def _read_index_metadata(
+    index_path_and_prefix: str,
+) -> Optional[Tuple[VectorDType, str, np.uint64, np.uint64]]:
+    path = _build_metadata_path(index_path_and_prefix)
+    if not Path(path).exists():
+        return None
+    else:
+        metadata = np.fromfile(path, dtype=np.uint64, count=-1)
+        return (
+            _DataType(int(metadata[0])).to_type(),
+            _Metric(int(metadata[1])).to_str(),
+            metadata[2],
+            metadata[3],
+        )
+
+
+def _ensure_index_metadata(
+    index_path_and_prefix: str,
+    vector_dtype: Optional[VectorDType],
+    distance_metric: Optional[DistanceMetric],
+    max_vectors: int,
+    dimensions: Optional[int],
+    warn_size_exceeded: bool = False,
+) -> Tuple[VectorDType, str, np.uint64, np.uint64]:
+    possible_metadata = _read_index_metadata(index_path_and_prefix)
+    if possible_metadata is None:
+        _assert(
+            all([vector_dtype, distance_metric, dimensions]),
+            "distance_metric, vector_dtype, and dimensions must provided if a corresponding metadata file has not "
+            "been built for this index, such as when an index was built via the CLI tools or prior to the addition "
+            "of a metadata file",
+        )
+        _assert_dtype(vector_dtype)
+        _assert_is_positive_uint32(max_vectors, "max_vectors")
+        _assert_is_positive_uint32(dimensions, "dimensions")
+        return vector_dtype, distance_metric, max_vectors, dimensions  # type: ignore
+    else:
+        vector_dtype, distance_metric, num_vectors, dimensions = possible_metadata
+        if warn_size_exceeded:
+            if max_vectors is not None and num_vectors > max_vectors:
+                warnings.warn(
+                    "The number of vectors in the saved index exceeds the max_vectors parameter. "
+                    "max_vectors is being adjusted to accommodate the dataset, but any insertions will fail."
+                )
+                max_vectors = num_vectors
+            if num_vectors == max_vectors:
+                warnings.warn(
+                    "The number of vectors in the saved index equals max_vectors parameter. Any insertions will fail."
+                )
+        return possible_metadata
+
+
+def _valid_index_prefix(index_directory: str, index_prefix: str) -> str:
+    _assert(
+        index_directory is not None and index_directory != "",
+        "index_directory cannot be None or empty",
+    )
+    _assert_existing_directory(index_directory, "index_directory")
+    _assert(index_prefix != "", "index_prefix cannot be an empty string")
+    return os.path.join(index_directory, index_prefix)
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/src/_diskannpy.cpython-310-x86_64-linux-gnu.so.bak
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_diskannpy.cpython-310-x86_64-linux-gnu.so.bak
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/src/_dynamic_memory_index.py
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_dynamic_memory_index.py
@@ -0,0 +1,511 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import os
+import warnings
+from pathlib import Path
+from typing import Optional
+
+import numpy as np
+
+from . import (
+    DistanceMetric,
+    QueryResponse,
+    QueryResponseBatch,
+    VectorDType,
+    VectorIdentifier,
+    VectorIdentifierBatch,
+    VectorLike,
+    VectorLikeBatch,
+)
+from . import _diskannpy as _native_dap
+from ._common import (
+    _assert,
+    _assert_2d,
+    _assert_dtype,
+    _assert_existing_directory,
+    _assert_is_nonnegative_uint32,
+    _assert_is_positive_uint32,
+    _castable_dtype_or_raise,
+    _ensure_index_metadata,
+    _valid_index_prefix,
+    _valid_metric,
+    _write_index_metadata,
+)
+from ._diskannpy import defaults
+
+__ALL__ = ["DynamicMemoryIndex"]
+
+
+class DynamicMemoryIndex:
+    """
+    A DynamicMemoryIndex instance is used to both search and mutate a `diskannpy` memory index. This index is unlike
+    either `diskannpy.StaticMemoryIndex` or `diskannpy.StaticDiskIndex` in the following ways:
+
+    - It requires an explicit vector identifier for each vector added to it.
+    - Insert and (lazy) deletion operations are provided for a flexible, living index
+
+    The mutable aspect of this index will absolutely impact search time performance as new vectors are added and
+    old deleted. `DynamicMemoryIndex.consolidate_deletes()` should be called periodically to restructure the index
+    to remove deleted vectors and improve per-search performance, at the cost of an expensive index consolidation to
+    occur.
+    """
+
+    @classmethod
+    def from_file(
+        cls,
+        index_directory: str,
+        max_vectors: int,
+        complexity: int,
+        graph_degree: int,
+        saturate_graph: bool = defaults.SATURATE_GRAPH,
+        max_occlusion_size: int = defaults.MAX_OCCLUSION_SIZE,
+        alpha: float = defaults.ALPHA,
+        num_threads: int = defaults.NUM_THREADS,
+        filter_complexity: int = defaults.FILTER_COMPLEXITY,
+        num_frozen_points: int = defaults.NUM_FROZEN_POINTS_DYNAMIC,
+        initial_search_complexity: int = 0,
+        search_threads: int = 0,
+        concurrent_consolidation: bool = True,
+        index_prefix: str = "ann",
+        distance_metric: Optional[DistanceMetric] = None,
+        vector_dtype: Optional[VectorDType] = None,
+        dimensions: Optional[int] = None,
+    ) -> "DynamicMemoryIndex":
+        """
+        The `from_file` classmethod is used to load a previously saved index from disk. This index *must* have been
+        created with a valid `tags` file or `tags` np.ndarray of `diskannpy.VectorIdentifier`s. It is *strongly*
+        recommended that you use the same parameters as the `diskannpy.build_memory_index()` function that created
+        the index.
+
+        ### Parameters
+        - **index_directory**: The directory containing the index files. This directory must contain the following
+            files:
+            - `{index_prefix}.data`
+            - `{index_prefix}.tags`
+            - `{index_prefix}`
+
+          It may also include the following optional files:
+            - `{index_prefix}_vectors.bin`: Optional. `diskannpy` builder functions may create this file in the
+              `index_directory` if the index was created from a numpy array
+            - `{index_prefix}_metadata.bin`: Optional. `diskannpy` builder functions create this file to store metadata
+            about the index, such as vector dtype, distance metric, number of vectors and vector dimensionality.
+            If an index is built from the `diskann` cli tools, this file will not exist.
+        - **max_vectors**: Capacity of the memory index including space for future insertions.
+        - **complexity**: Complexity (a.k.a `L`) references the size of the list we store candidate approximate
+          neighbors in. It's used during save (which is an index rebuild), and it's used as an initial search size to
+          warm up our index and lower the latency for initial real searches.
+        - **graph_degree**: Graph degree (a.k.a. `R`) is the maximum degree allowed for a node in the index's graph
+          structure. This degree will be pruned throughout the course of the index build, but it will never grow beyond
+          this value. Higher R values require longer index build times, but may result in an index showing excellent
+          recall and latency characteristics.
+        - **saturate_graph**: If True, the adjacency list of each node will be saturated with neighbors to have exactly
+          `graph_degree` neighbors. If False, each node will have between 1 and `graph_degree` neighbors.
+        - **max_occlusion_size**: The maximum number of points that can be considered by occlude_list function.
+        - **alpha**: The alpha parameter (>=1) is used to control the nature and number of points that are added to the
+          graph. A higher alpha value (e.g., 1.4) will result in fewer hops (and IOs) to convergence, but probably
+          more distance comparisons compared to a lower alpha value.
+        - **num_threads**: Number of threads to use when creating this index. `0` indicates we should use all available
+          logical processors.
+        - **filter_complexity**: Complexity to use when using filters. Default is 0.
+        - **num_frozen_points**: Number of points to freeze. Default is 1.
+        - **initial_search_complexity**: Should be set to the most common `complexity` expected to be used during the
+          life of this `diskannpy.DynamicMemoryIndex` object. The working scratch memory allocated is based off of
+          `initial_search_complexity` * `search_threads`. Note that it may be resized if a `search` or `batch_search`
+          operation requests a space larger than can be accommodated by these values.
+        - **search_threads**: Should be set to the most common `num_threads` expected to be used during the
+          life of this `diskannpy.DynamicMemoryIndex` object. The working scratch memory allocated is based off of
+          `initial_search_complexity` * `search_threads`. Note that it may be resized if a `batch_search`
+          operation requests a space larger than can be accommodated by these values.
+        - **concurrent_consolidation**: This flag dictates whether consolidation can be run alongside inserts and
+          deletes, or whether the index is locked down to changes while consolidation is ongoing.
+        - **index_prefix**: The prefix of the index files. Defaults to "ann".
+        - **distance_metric**: A `str`, strictly one of {"l2", "mips", "cosine"}. `l2` and `cosine` are supported for all 3
+          vector dtypes, but `mips` is only available for single precision floats. Default is `None`. **This
+          value is only used if a `{index_prefix}_metadata.bin` file does not exist.** If it does not exist,
+          you are required to provide it.
+        - **vector_dtype**: The vector dtype this index has been built with. **This value is only used if a
+          `{index_prefix}_metadata.bin` file does not exist.** If it does not exist, you are required to provide it.
+        - **dimensions**: The vector dimensionality of this index. All new vectors inserted must be the same
+          dimensionality. **This value is only used if a `{index_prefix}_metadata.bin` file does not exist.** If it
+          does not exist, you are required to provide it.
+
+        ### Returns
+        A `diskannpy.DynamicMemoryIndex` object, with the index loaded from disk and ready to use for insertions,
+        deletions, and searches.
+
+        """
+        index_prefix_path = _valid_index_prefix(index_directory, index_prefix)
+
+        # do tags exist?
+        tags_file = index_prefix_path + ".tags"
+        _assert(
+            Path(tags_file).exists(),
+            f"The file {tags_file} does not exist in {index_directory}",
+        )
+        vector_dtype, dap_metric, num_vectors, dimensions = _ensure_index_metadata(
+            index_prefix_path, vector_dtype, distance_metric, max_vectors, dimensions, warn_size_exceeded=True
+        )
+
+        index = cls(
+            distance_metric=dap_metric,  # type: ignore
+            vector_dtype=vector_dtype,
+            dimensions=dimensions,
+            max_vectors=max_vectors,
+            complexity=complexity,
+            graph_degree=graph_degree,
+            saturate_graph=saturate_graph,
+            max_occlusion_size=max_occlusion_size,
+            alpha=alpha,
+            num_threads=num_threads,
+            filter_complexity=filter_complexity,
+            num_frozen_points=num_frozen_points,
+            initial_search_complexity=initial_search_complexity,
+            search_threads=search_threads,
+            concurrent_consolidation=concurrent_consolidation,
+        )
+        index._index.load(index_prefix_path)
+        index._num_vectors = num_vectors  # current number of vectors loaded
+        return index
+
+    def __init__(
+        self,
+        distance_metric: DistanceMetric,
+        vector_dtype: VectorDType,
+        dimensions: int,
+        max_vectors: int,
+        complexity: int,
+        graph_degree: int,
+        saturate_graph: bool = defaults.SATURATE_GRAPH,
+        max_occlusion_size: int = defaults.MAX_OCCLUSION_SIZE,
+        alpha: float = defaults.ALPHA,
+        num_threads: int = defaults.NUM_THREADS,
+        filter_complexity: int = defaults.FILTER_COMPLEXITY,
+        num_frozen_points: int = defaults.NUM_FROZEN_POINTS_DYNAMIC,
+        initial_search_complexity: int = 0,
+        search_threads: int = 0,
+        concurrent_consolidation: bool = True,
+    ):
+        """
+        The `diskannpy.DynamicMemoryIndex` represents our python API into a mutable DiskANN memory index.
+
+        This constructor is used to create a new, empty index. If you wish to load a previously saved index from disk,
+        please use the `diskannpy.DynamicMemoryIndex.from_file` classmethod instead.
+
+        ### Parameters
+        - **distance_metric**: A `str`, strictly one of {"l2", "mips", "cosine"}. `l2` and `cosine` are supported for all 3
+          vector dtypes, but `mips` is only available for single precision floats.
+        - **vector_dtype**: One of {`np.float32`, `np.int8`, `np.uint8`}. The dtype of the vectors this index will
+          be storing.
+        - **dimensions**: The vector dimensionality of this index. All new vectors inserted must be the same
+          dimensionality.
+        - **max_vectors**: Capacity of the data store including space for future insertions
+        - **graph_degree**: Graph degree (a.k.a. `R`) is the maximum degree allowed for a node in the index's graph
+          structure. This degree will be pruned throughout the course of the index build, but it will never grow beyond
+          this value. Higher `graph_degree` values require longer index build times, but may result in an index showing
+          excellent recall and latency characteristics.
+        - **saturate_graph**: If True, the adjacency list of each node will be saturated with neighbors to have exactly
+          `graph_degree` neighbors. If False, each node will have between 1 and `graph_degree` neighbors.
+        - **max_occlusion_size**: The maximum number of points that can be considered by occlude_list function.
+        - **alpha**: The alpha parameter (>=1) is used to control the nature and number of points that are added to the
+          graph. A higher alpha value (e.g., 1.4) will result in fewer hops (and IOs) to convergence, but probably
+          more distance comparisons compared to a lower alpha value.
+        - **num_threads**: Number of threads to use when creating this index. `0` indicates we should use all available
+          logical processors.
+        - **filter_complexity**: Complexity to use when using filters. Default is 0.
+        - **num_frozen_points**: Number of points to freeze. Default is 1.
+        - **initial_search_complexity**: Should be set to the most common `complexity` expected to be used during the
+          life of this `diskannpy.DynamicMemoryIndex` object. The working scratch memory allocated is based off of
+          `initial_search_complexity` * `search_threads`. Note that it may be resized if a `search` or `batch_search`
+          operation requests a space larger than can be accommodated by these values.
+        - **search_threads**: Should be set to the most common `num_threads` expected to be used during the
+          life of this `diskannpy.DynamicMemoryIndex` object. The working scratch memory allocated is based off of
+          `initial_search_complexity` * `search_threads`. Note that it may be resized if a `batch_search`
+          operation requests a space larger than can be accommodated by these values.
+        - **concurrent_consolidation**: This flag dictates whether consolidation can be run alongside inserts and
+          deletes, or whether the index is locked down to changes while consolidation is ongoing.
+
+        """
+        self._num_vectors = 0
+        self._removed_num_vectors = 0
+        dap_metric = _valid_metric(distance_metric)
+        self._dap_metric = dap_metric
+        _assert_dtype(vector_dtype)
+        _assert_is_positive_uint32(dimensions, "dimensions")
+
+        self._vector_dtype = vector_dtype
+        self._dimensions = dimensions
+
+        _assert_is_positive_uint32(max_vectors, "max_vectors")
+        _assert_is_positive_uint32(complexity, "complexity")
+        _assert_is_positive_uint32(graph_degree, "graph_degree")
+        _assert(
+            alpha >= 1,
+            "alpha must be >= 1, and realistically should be kept between [1.0, 2.0)",
+        )
+        _assert_is_nonnegative_uint32(max_occlusion_size, "max_occlusion_size")
+        _assert_is_nonnegative_uint32(num_threads, "num_threads")
+        _assert_is_nonnegative_uint32(filter_complexity, "filter_complexity")
+        _assert_is_nonnegative_uint32(num_frozen_points, "num_frozen_points")
+        _assert_is_nonnegative_uint32(
+            initial_search_complexity, "initial_search_complexity"
+        )
+        _assert_is_nonnegative_uint32(search_threads, "search_threads")
+
+        self._max_vectors = max_vectors
+        self._complexity = complexity
+        self._graph_degree = graph_degree
+
+        if vector_dtype == np.uint8:
+            _index = _native_dap.DynamicMemoryUInt8Index
+        elif vector_dtype == np.int8:
+            _index = _native_dap.DynamicMemoryInt8Index
+        else:
+            _index = _native_dap.DynamicMemoryFloatIndex
+
+        self._index = _index(
+            distance_metric=dap_metric,
+            dimensions=dimensions,
+            max_vectors=max_vectors,
+            complexity=complexity,
+            graph_degree=graph_degree,
+            saturate_graph=saturate_graph,
+            max_occlusion_size=max_occlusion_size,
+            alpha=alpha,
+            num_threads=num_threads,
+            filter_complexity=filter_complexity,
+            num_frozen_points=num_frozen_points,
+            initial_search_complexity=initial_search_complexity,
+            search_threads=search_threads,
+            concurrent_consolidation=concurrent_consolidation,
+        )
+        self._points_deleted = False
+
+    def search(
+        self, query: VectorLike, k_neighbors: int, complexity: int
+    ) -> QueryResponse:
+        """
+        Searches the index by a single query vector.
+
+        ### Parameters
+        - **query**: 1d numpy array of the same dimensionality and dtype of the index.
+        - **k_neighbors**: Number of neighbors to be returned. If query vector exists in index, it almost definitely
+          will be returned as well, so adjust your ``k_neighbors`` as appropriate. Must be > 0.
+        - **complexity**: Size of distance ordered list of candidate neighbors to use while searching. List size
+          increases accuracy at the cost of latency. Must be at least k_neighbors in size.
+        """
+        _query = _castable_dtype_or_raise(query, expected=self._vector_dtype)
+        _assert(len(_query.shape) == 1, "query vector must be 1-d")
+        _assert(
+            _query.shape[0] == self._dimensions,
+            f"query vector must have the same dimensionality as the index; index dimensionality: {self._dimensions}, "
+            f"query dimensionality: {_query.shape[0]}",
+        )
+        _assert_is_positive_uint32(k_neighbors, "k_neighbors")
+        _assert_is_nonnegative_uint32(complexity, "complexity")
+
+        if k_neighbors > complexity:
+            warnings.warn(
+                f"k_neighbors={k_neighbors} asked for, but list_size={complexity} was smaller. Increasing {complexity} to {k_neighbors}"
+            )
+            complexity = k_neighbors
+        neighbors, distances = self._index.search(query=_query, knn=k_neighbors, complexity=complexity)
+        return QueryResponse(identifiers=neighbors, distances=distances)
+
+    def batch_search(
+        self,
+        queries: VectorLikeBatch,
+        k_neighbors: int,
+        complexity: int,
+        num_threads: int,
+    ) -> QueryResponseBatch:
+        """
+        Searches the index by a batch of query vectors.
+
+        This search is parallelized and far more efficient than searching for each vector individually.
+
+        ### Parameters
+        - **queries**: 2d numpy array, with column dimensionality matching the index and row dimensionality being the
+          number of queries intended to search for in parallel. Dtype must match dtype of the index.
+        - **k_neighbors**: Number of neighbors to be returned. If query vector exists in index, it almost definitely
+          will be returned as well, so adjust your ``k_neighbors`` as appropriate. Must be > 0.
+        - **complexity**: Size of distance ordered list of candidate neighbors to use while searching. List size
+          increases accuracy at the cost of latency. Must be at least k_neighbors in size.
+        - **num_threads**: Number of threads to use when searching this index. (>= 0), 0 = num_threads in system
+        """
+        _queries = _castable_dtype_or_raise(queries, expected=self._vector_dtype)
+        _assert_2d(_queries, "queries")
+        _assert(
+            _queries.shape[1] == self._dimensions,
+            f"query vectors must have the same dimensionality as the index; index dimensionality: {self._dimensions}, "
+            f"query dimensionality: {_queries.shape[1]}",
+        )
+
+        _assert_is_positive_uint32(k_neighbors, "k_neighbors")
+        _assert_is_positive_uint32(complexity, "complexity")
+        _assert_is_nonnegative_uint32(num_threads, "num_threads")
+
+        if k_neighbors > complexity:
+            warnings.warn(
+                f"k_neighbors={k_neighbors} asked for, but list_size={complexity} was smaller. Increasing {complexity} to {k_neighbors}"
+            )
+            complexity = k_neighbors
+
+        num_queries, dim = queries.shape
+        neighbors, distances = self._index.batch_search(
+            queries=_queries,
+            num_queries=num_queries,
+            knn=k_neighbors,
+            complexity=complexity,
+            num_threads=num_threads,
+        )
+        return QueryResponseBatch(identifiers=neighbors, distances=distances)
+
+    def save(self, save_path: str, index_prefix: str = "ann"):
+        """
+        Saves this index to file.
+
+        ### Parameters
+        - **save_path**: The path to save these index files to.
+        - **index_prefix**: The prefix of the index files. Defaults to "ann".
+        """
+        if save_path == "":
+            raise ValueError("save_path cannot be empty")
+        if index_prefix == "":
+            raise ValueError("index_prefix cannot be empty")
+
+        index_prefix = index_prefix.format(complexity=self._complexity, graph_degree=self._graph_degree)
+        _assert_existing_directory(save_path, "save_path")
+        save_path = os.path.join(save_path, index_prefix)
+        if self._points_deleted is True:
+            warnings.warn(
+                "DynamicMemoryIndex.save() currently requires DynamicMemoryIndex.consolidate_delete() to be called "
+                "prior to save when items have been marked for deletion. This is being done automatically now, though"
+                "it will increase the time it takes to save; on large sets of data it can take a substantial amount of "
+                "time. In the future, we will implement a faster save with unconsolidated deletes, but for now this is "
+                "required."
+            )
+            self._index.consolidate_delete()
+        self._index.save(
+            save_path=save_path, compact_before_save=True
+        )  # we do not yet support uncompacted saves
+        _write_index_metadata(
+            save_path,
+            self._vector_dtype,
+            self._dap_metric,
+            self._index.num_points(),
+            self._dimensions,
+        )
+
+    def insert(self, vector: VectorLike, vector_id: VectorIdentifier):
+        """
+        Inserts a single vector into the index with the provided vector_id.
+
+        If this insertion will overrun the `max_vectors` count boundaries of this index, `consolidate_delete()` will
+        be executed automatically.
+
+        ### Parameters
+        - **vector**: The vector to insert. Note that dtype must match.
+        - **vector_id**: The vector_id to use for this vector.
+        """
+        _vector = _castable_dtype_or_raise(vector, expected=self._vector_dtype)
+        _assert(len(vector.shape) == 1, "insert vector must be 1-d")
+        _assert_is_positive_uint32(vector_id, "vector_id")
+        if self._num_vectors + 1 > self._max_vectors:
+            if self._removed_num_vectors > 0:
+                warnings.warn(f"Inserting this vector would overrun the max_vectors={self._max_vectors} specified at index "
+                              f"construction. We are attempting to consolidate_delete() to make space.")
+                self.consolidate_delete()
+            else:
+                raise RuntimeError(f"Inserting this vector would overrun the max_vectors={self._max_vectors} specified "
+                                   f"at index construction. Unable to make space by consolidating deletions. The insert"
+                                   f"operation has failed.")
+        status = self._index.insert(_vector, np.uint32(vector_id))
+        if status == 0:
+            self._num_vectors += 1
+        else:
+            raise RuntimeError(
+                f"Insert was unable to complete successfully; error code returned from diskann C++ lib: {status}"
+            )
+
+
+    def batch_insert(
+        self,
+        vectors: VectorLikeBatch,
+        vector_ids: VectorIdentifierBatch,
+        num_threads: int = 0,
+    ):
+        """
+        Inserts a batch of vectors into the index with the provided vector_ids.
+
+        If this batch insertion will overrun the `max_vectors` count boundaries of this index, `consolidate_delete()`
+        will be executed automatically.
+
+        ### Parameters
+        - **vectors**: The 2d numpy array of vectors to insert.
+        - **vector_ids**: The 1d array of vector ids to use. This array must have the same number of elements as
+            the vectors array has rows. The dtype of vector_ids must be `np.uint32`
+        - **num_threads**: Number of threads to use when inserting into this index. (>= 0), 0 = num_threads in system
+        """
+        _query = _castable_dtype_or_raise(vectors, expected=self._vector_dtype)
+        _assert(len(vectors.shape) == 2, "vectors must be a 2-d array")
+        _assert(
+            vectors.shape[0] == vector_ids.shape[0],
+            "Number of vectors must be equal to number of ids",
+        )
+        _vectors = vectors.astype(dtype=self._vector_dtype, casting="safe", copy=False)
+        _vector_ids = vector_ids.astype(dtype=np.uint32, casting="safe", copy=False)
+
+        if self._num_vectors + _vector_ids.shape[0] > self._max_vectors:
+            if self._max_vectors + self._removed_num_vectors >= _vector_ids.shape[0]:
+                warnings.warn(f"Inserting these vectors, count={_vector_ids.shape[0]} would overrun the "
+                              f"max_vectors={self._max_vectors} specified at index construction. We are attempting to "
+                              f"consolidate_delete() to make space.")
+                self.consolidate_delete()
+            else:
+                raise RuntimeError(f"Inserting these vectors count={_vector_ids.shape[0]} would overrun the "
+                                   f"max_vectors={self._max_vectors} specified at index construction. Unable to make "
+                                   f"space by consolidating deletions. The batch insert operation has failed.")
+
+        statuses = self._index.batch_insert(
+            _vectors, _vector_ids, _vector_ids.shape[0], num_threads
+        )
+        successes = []
+        failures = []
+        for i in range(0, len(statuses)):
+            if statuses[i] == 0:
+                successes.append(i)
+            else:
+                failures.append(i)
+        self._num_vectors += len(successes)
+        if len(failures) == 0:
+            return
+        failed_ids = vector_ids[failures]
+        raise RuntimeError(
+            f"During batch insert, the following vector_ids were unable to be inserted into the index: {failed_ids}. "
+            f"{len(successes)} were successfully inserted"
+        )
+
+
+    def mark_deleted(self, vector_id: VectorIdentifier):
+        """
+        Mark vector for deletion. This is a soft delete that won't return the vector id in any results, but does not
+        remove it from the underlying index files or memory structure. To execute a hard delete, call this method and
+        then call the much more expensive `consolidate_delete` method on this index.
+        ### Parameters
+        - **vector_id**: The vector id to delete. Must be a uint32.
+        """
+        _assert_is_positive_uint32(vector_id, "vector_id")
+        self._points_deleted = True
+        self._removed_num_vectors += 1
+        # we do not decrement self._num_vectors until consolidate_delete
+        self._index.mark_deleted(np.uint32(vector_id))
+
+    def consolidate_delete(self):
+        """
+        This method actually restructures the DiskANN index to remove the items that have been marked for deletion.
+        """
+        self._index.consolidate_delete()
+        self._points_deleted = False
+        self._num_vectors -= self._removed_num_vectors
+        self._removed_num_vectors = 0
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/src/_files.py
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_files.py
@@ -0,0 +1,122 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import warnings
+from typing import BinaryIO, Literal, NamedTuple
+
+import numpy as np
+import numpy.typing as npt
+
+from . import VectorDType, VectorIdentifierBatch, VectorLikeBatch
+from ._common import _assert, _assert_2d, _assert_dtype, _assert_existing_file
+
+
+class Metadata(NamedTuple):
+    """DiskANN binary vector files contain a small stanza containing some metadata about them."""
+
+    num_vectors: int
+    """ The number of vectors in the file. """
+    dimensions: int
+    """ The dimensionality of the vectors in the file. """
+
+
+def vectors_metadata_from_file(vector_file: str) -> Metadata:
+    """
+    Read the metadata from a DiskANN binary vector file.
+    ### Parameters
+    - **vector_file**: The path to the vector file to read the metadata from.
+
+    ### Returns
+    `diskannpy.Metadata`
+    """
+    _assert_existing_file(vector_file, "vector_file")
+    points, dims = np.fromfile(file=vector_file, dtype=np.int32, count=2)
+    return Metadata(points, dims)
+
+
+def _write_bin(data: np.ndarray, file_handler: BinaryIO):
+    if len(data.shape) == 1:
+        _ = file_handler.write(np.array([data.shape[0], 1], dtype=np.int32).tobytes())
+    else:
+        _ = file_handler.write(np.array(data.shape, dtype=np.int32).tobytes())
+    _ = file_handler.write(data.tobytes())
+
+
+def vectors_to_file(vector_file: str, vectors: VectorLikeBatch) -> None:
+    """
+    Utility function that writes a DiskANN binary vector formatted file to the location of your choosing.
+
+    ### Parameters
+    - **vector_file**: The path to the vector file to write the vectors to.
+    - **vectors**: A 2d array of dtype `numpy.float32`, `numpy.uint8`, or `numpy.int8`
+    """
+    _assert_dtype(vectors.dtype)
+    _assert_2d(vectors, "vectors")
+    with open(vector_file, "wb") as fh:
+        _write_bin(vectors, fh)
+
+
+def vectors_from_file(
+    vector_file: str,
+    dtype: VectorDType,
+    use_memmap: bool = False,
+    mode: Literal["r", "r+"] = "r"
+) -> npt.NDArray[VectorDType]:
+    """
+    Read vectors from a DiskANN binary vector file.
+
+    ### Parameters
+    - **vector_file**: The path to the vector file to read the vectors from.
+    - **dtype**: The data type of the vectors in the file. Ensure you match the data types exactly
+    - **use_memmap**: If True, return a np.memmap, else a standard np.ndarray will be returned
+    - **mode**: Read-only (r) or read-write (r+) (memmap only). Unlike np.memmap, default is read-only (r)
+
+    ### Returns
+    `numpy.typing.NDArray[dtype] | numpy.memmap`
+    """
+    assert mode in ["r", "r+"]
+    points, dims = vectors_metadata_from_file(vector_file)
+    if not use_memmap:
+        return np.fromfile(file=vector_file, dtype=dtype, offset=8).reshape(points, dims)
+    else:
+        return np.memmap(vector_file, dtype=dtype, mode=mode, offset=8, shape=(points, dims), order='C')
+
+
+def tags_to_file(tags_file: str, tags: VectorIdentifierBatch) -> None:
+    """
+    Write tags to a DiskANN binary tag file.
+
+    ### Parameters
+    - **tags_file**: The path to the tag file to write the tags to.
+    - **tags**: A 1d array of dtype `numpy.uint32` containing the tags to write. If you have a 2d array of tags with
+      one column, you can pass it here and it will be reshaped and copied to a new array. It is more efficient for you
+      to reshape on your own without copying it first, as it should be a constant time operation vs. linear time
+
+    """
+    _assert(np.can_cast(tags.dtype, np.uint32), "valid tags must be uint32")
+    _assert(
+        len(tags.shape) == 1 or tags.shape[1] == 1,
+        "tags must be 1d or 2d with 1 column",
+    )
+    if len(tags.shape) == 2:
+        warnings.warn(
+            "Tags in 2d with one column will be reshaped and copied to a new array. "
+            "It is more efficient for you to reshape without copying first."
+        )
+        tags = tags.reshape(tags.shape[0], copy=True)
+    with open(tags_file, "wb") as fh:
+        _write_bin(tags.astype(np.uint32), fh)
+
+
+def tags_from_file(tags_file: str) -> VectorIdentifierBatch:
+    """
+    Read tags from a DiskANN binary tag file and return them as a 1d array of dtype `numpy.uint32`.
+
+    ### Parameters
+    - **tags_file**: The path to the tag file to read the tags from.
+    """
+    _assert_existing_file(tags_file, "tags_file")
+    points, dims = vectors_metadata_from_file(
+        tags_file
+    )  # tag files contain the same metadata stanza
+    return np.fromfile(file=tags_file, dtype=np.uint32, offset=8).reshape(points)
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/src/_static_disk_index.py
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_static_disk_index.py
@@ -0,0 +1,244 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import os
+import warnings
+from typing import Optional
+
+import numpy as np
+
+from . import (
+    DistanceMetric,
+    QueryResponse,
+    QueryResponseBatch,
+    VectorDType,
+    VectorLike,
+    VectorLikeBatch,
+)
+from . import _diskannpy as _native_dap
+from ._common import (
+    _assert,
+    _assert_2d,
+    _assert_is_nonnegative_uint32,
+    _assert_is_positive_uint32,
+    _castable_dtype_or_raise,
+    _ensure_index_metadata,
+    _valid_index_prefix,
+    _valid_metric,
+)
+
+__ALL__ = ["StaticDiskIndex"]
+
+
+class StaticDiskIndex:
+    """
+    A StaticDiskIndex is a disk-backed index that is not mutable.
+    """
+
+    def __init__(
+        self,
+        index_directory: str,
+        num_threads: int,
+        num_nodes_to_cache: int,
+        cache_mechanism: int = 1,
+        distance_metric: Optional[DistanceMetric] = None,
+        vector_dtype: Optional[VectorDType] = None,
+        dimensions: Optional[int] = None,
+        index_prefix: str = "ann",
+        pq_prefix: str = "",
+        partition_prefix: str = "",
+    ):
+        """
+        ### Parameters
+        - **index_directory**: The directory containing the index files. This directory must contain the following
+            files:
+            - `{index_prefix}_sample_data.bin`
+            - `{index_prefix}_mem.index.data`
+            - `{index_prefix}_pq_compressed.bin`
+            - `{index_prefix}_pq_pivots.bin`
+            - `{index_prefix}_sample_ids.bin`
+            - `{index_prefix}_disk.index`
+
+          It may also include the following optional files:
+            - `{index_prefix}_vectors.bin`: Optional. `diskannpy` builder functions may create this file in the
+              `index_directory` if the index was created from a numpy array
+            - `{index_prefix}_metadata.bin`: Optional. `diskannpy` builder functions create this file to store metadata
+            about the index, such as vector dtype, distance metric, number of vectors and vector dimensionality.
+            If an index is built from the `diskann` cli tools, this file will not exist.
+        - **num_threads**: Number of threads to use when searching this index. (>= 0), 0 = num_threads in system
+        - **num_nodes_to_cache**: Number of nodes to cache in memory (> -1)
+        - **cache_mechanism**: 1 -> use the generated sample_data.bin file for
+            the index to initialize a set of cached nodes, up to `num_nodes_to_cache`, 2 -> ready the cache for up to
+            `num_nodes_to_cache`, but do not initialize it with any nodes. Any other value disables node caching.
+        - **distance_metric**: A `str`, strictly one of {"l2", "mips", "cosine"}. `l2` and `cosine` are supported for all 3
+          vector dtypes, but `mips` is only available for single precision floats. Default is `None`. **This
+          value is only used if a `{index_prefix}_metadata.bin` file does not exist.** If it does not exist,
+          you are required to provide it.
+        - **vector_dtype**: The vector dtype this index has been built with. **This value is only used if a
+          `{index_prefix}_metadata.bin` file does not exist.** If it does not exist, you are required to provide it.
+        - **dimensions**: The vector dimensionality of this index. All new vectors inserted must be the same
+          dimensionality. **This value is only used if a `{index_prefix}_metadata.bin` file does not exist.** If it
+          does not exist, you are required to provide it.
+        - **index_prefix**: The prefix of the index files. Defaults to "ann".
+        """
+        index_prefix_path = _valid_index_prefix(index_directory, index_prefix)
+        vector_dtype, metric, _, _ = _ensure_index_metadata(
+            index_prefix_path,
+            vector_dtype,
+            distance_metric,
+            1,  # it doesn't matter because we don't need it in this context anyway
+            dimensions,
+        )
+        dap_metric = _valid_metric(metric)
+
+        _assert_is_nonnegative_uint32(num_threads, "num_threads")
+        _assert_is_nonnegative_uint32(num_nodes_to_cache, "num_nodes_to_cache")
+
+        self._vector_dtype = vector_dtype
+        if vector_dtype == np.uint8:
+            _index = _native_dap.StaticDiskUInt8Index
+        elif vector_dtype == np.int8:
+            _index = _native_dap.StaticDiskInt8Index
+        else:
+            _index = _native_dap.StaticDiskFloatIndex
+        self._index = _index(
+            distance_metric=dap_metric,
+            index_path_prefix=index_prefix_path,
+            num_threads=num_threads,
+            num_nodes_to_cache=num_nodes_to_cache,
+            cache_mechanism=cache_mechanism,
+            pq_prefix=pq_prefix,
+            partition_prefix=partition_prefix,
+        )
+        print("After index init")
+
+    def search(
+        self,
+        query: VectorLike,
+        k_neighbors: int,
+        complexity: int,
+        beam_width: int = 2,
+        USE_DEFERRED_FETCH: bool = False,
+        skip_search_reorder: bool = False,
+        recompute_beighbor_embeddings: bool = False,
+        dedup_node_dis: bool = False,
+        prune_ratio: float = 0,
+        batch_recompute: bool = False,
+        global_pruning: bool = False,
+    ) -> QueryResponse:
+        """
+        Searches the index by a single query vector.
+
+        ### Parameters
+        - **query**: 1d numpy array of the same dimensionality and dtype of the index.
+        - **k_neighbors**: Number of neighbors to be returned. If query vector exists in index, it almost definitely
+          will be returned as well, so adjust your ``k_neighbors`` as appropriate. Must be > 0.
+        - **complexity**: Size of distance ordered list of candidate neighbors to use while searching. List size
+          increases accuracy at the cost of latency. Must be at least k_neighbors in size.
+        - **beam_width**: The beamwidth to be used for search. This is the maximum number of IO requests each query
+          will issue per iteration of search code. Larger beamwidth will result in fewer IO round-trips per query,
+          but might result in slightly higher total number of IO requests to SSD per query. For the highest query
+          throughput with a fixed SSD IOps rating, use W=1. For best latency, use W=4,8 or higher complexity search.
+          Specifying 0 will optimize the beamwidth depending on the number of threads performing search, but will
+          involve some tuning overhead.
+        - **skip_search_reorder**: Whether to skip search reorder for diskann search.
+        - **recompute_beighbor_embeddings**: Whether to recompute the neighbor embeddings.
+        - **dedup_node_dis**: Whether to dedup node distances.
+        - **batch_recompute**: Whether to batch recompute.
+        """
+        _query = _castable_dtype_or_raise(query, expected=self._vector_dtype)
+        _assert(len(_query.shape) == 1, "query vector must be 1-d")
+        _assert_is_positive_uint32(k_neighbors, "k_neighbors")
+        _assert_is_positive_uint32(complexity, "complexity")
+        _assert_is_positive_uint32(beam_width, "beam_width")
+
+        if k_neighbors > complexity:
+            warnings.warn(
+                f"{k_neighbors=} asked for, but {complexity=} was smaller. Increasing {complexity} to {k_neighbors}"
+            )
+            complexity = k_neighbors
+
+        neighbors, distances = self._index.search(
+            query=_query,
+            knn=k_neighbors,
+            complexity=complexity,
+            beam_width=beam_width,
+            USE_DEFERRED_FETCH=USE_DEFERRED_FETCH,
+            skip_search_reorder=skip_search_reorder,
+            recompute_beighbor_embeddings=recompute_beighbor_embeddings,
+            dedup_node_dis=dedup_node_dis,
+            prune_ratio=prune_ratio,
+            batch_recompute=batch_recompute,
+            global_pruning=global_pruning,
+        )
+        return QueryResponse(identifiers=neighbors, distances=distances)
+
+    def batch_search(
+        self,
+        queries: VectorLikeBatch,
+        k_neighbors: int,
+        complexity: int,
+        num_threads: int,
+        beam_width: int = 2,
+        USE_DEFERRED_FETCH: bool = False,
+        skip_search_reorder: bool = False,
+        recompute_beighbor_embeddings: bool = False,
+        dedup_node_dis: bool = False,
+        prune_ratio: float = 0,
+        batch_recompute: bool = False,
+        global_pruning: bool = False,
+    ) -> QueryResponseBatch:
+        """
+        Searches the index by a batch of query vectors.
+
+        This search is parallelized and far more efficient than searching for each vector individually.
+
+        ### Parameters
+        - **queries**: 2d numpy array, with column dimensionality matching the index and row dimensionality being the
+          number of queries intended to search for in parallel. Dtype must match dtype of the index.
+        - **k_neighbors**: Number of neighbors to be returned. If query vector exists in index, it almost definitely
+          will be returned as well, so adjust your ``k_neighbors`` as appropriate. Must be > 0.
+        - **complexity**: Size of distance ordered list of candidate neighbors to use while searching. List size
+          increases accuracy at the cost of latency. Must be at least k_neighbors in size.
+        - **num_threads**: Number of threads to use when searching this index. (>= 0), 0 = num_threads in system
+        - **beam_width**: The beamwidth to be used for search. This is the maximum number of IO requests each query
+          will issue per iteration of search code. Larger beamwidth will result in fewer IO round-trips per query,
+          but might result in slightly higher total number of IO requests to SSD per query. For the highest query
+          throughput with a fixed SSD IOps rating, use W=1. For best latency, use W=4,8 or higher complexity search.
+          Specifying 0 will optimize the beamwidth depending on the number of threads performing search, but will
+          involve some tuning overhead.
+        - **skip_search_reorder**: Whether to skip search reorder for diskann search.
+        """
+        _queries = _castable_dtype_or_raise(queries, expected=self._vector_dtype)
+        _assert_2d(_queries, "queries")
+        _assert_is_positive_uint32(k_neighbors, "k_neighbors")
+        _assert_is_positive_uint32(complexity, "complexity")
+        _assert_is_nonnegative_uint32(num_threads, "num_threads")
+        _assert_is_positive_uint32(beam_width, "beam_width")
+
+        if k_neighbors > complexity:
+            warnings.warn(
+                f"{k_neighbors=} asked for, but {complexity=} was smaller. Increasing {complexity} to {k_neighbors}"
+            )
+            complexity = k_neighbors
+
+        num_queries, dim = _queries.shape
+        print(
+            f"USE_DEFERRED_FETCH={USE_DEFERRED_FETCH} skip_search_reorder={skip_search_reorder} recompute_beighbor_embeddings={recompute_beighbor_embeddings}, dedup_node_dis={dedup_node_dis}"
+        )
+        neighbors, distances = self._index.batch_search(
+            queries=_queries,
+            num_queries=num_queries,
+            knn=k_neighbors,
+            complexity=complexity,
+            beam_width=beam_width,
+            num_threads=num_threads,
+            USE_DEFERRED_FETCH=USE_DEFERRED_FETCH,
+            skip_search_reorder=skip_search_reorder,
+            recompute_beighbor_embeddings=recompute_beighbor_embeddings,
+            dedup_node_dis=dedup_node_dis,
+            prune_ratio=prune_ratio,
+            batch_recompute=batch_recompute,
+            global_pruning=global_pruning,
+        )
+        return QueryResponseBatch(identifiers=neighbors, distances=distances)
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/src/_static_memory_index.py
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/_static_memory_index.py
@@ -0,0 +1,262 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import json
+import os
+import warnings
+from typing import Optional
+
+import numpy as np
+
+from . import (
+    DistanceMetric,
+    QueryResponse,
+    QueryResponseBatch,
+    VectorDType,
+    VectorLike,
+    VectorLikeBatch,
+)
+from . import _diskannpy as _native_dap
+from ._common import (
+    _assert,
+    _assert_is_nonnegative_uint32,
+    _assert_is_positive_uint32,
+    _castable_dtype_or_raise,
+    _ensure_index_metadata,
+    _valid_index_prefix,
+    _valid_metric,
+)
+
+__ALL__ = ["StaticMemoryIndex"]
+
+
+class StaticMemoryIndex:
+    """
+    A StaticMemoryIndex is an immutable in-memory DiskANN index.
+    """
+
+    def __init__(
+        self,
+        index_directory: str,
+        num_threads: int,
+        initial_search_complexity: int,
+        index_prefix: str = "ann",
+        distance_metric: Optional[DistanceMetric] = None,
+        vector_dtype: Optional[VectorDType] = None,
+        dimensions: Optional[int] = None,
+        enable_filters: bool = False,
+    ):
+        """
+        ### Parameters
+        - **index_directory**: The directory containing the index files. This directory must contain the following
+          files:
+            - `{index_prefix}.data`
+            - `{index_prefix}`
+
+
+          It may also include the following optional files:
+            - `{index_prefix}_vectors.bin`: Optional. `diskannpy` builder functions may create this file in the
+              `index_directory` if the index was created from a numpy array
+            - `{index_prefix}_metadata.bin`: Optional. `diskannpy` builder functions create this file to store metadata
+            about the index, such as vector dtype, distance metric, number of vectors and vector dimensionality.
+            If an index is built from the `diskann` cli tools, this file will not exist.
+        - **num_threads**: Number of threads to use when searching this index. (>= 0), 0 = num_threads in system
+        - **initial_search_complexity**: Should be set to the most common `complexity` expected to be used during the
+          life of this `diskannpy.DynamicMemoryIndex` object. The working scratch memory allocated is based off of
+          `initial_search_complexity` * `search_threads`. Note that it may be resized if a `search` or `batch_search`
+          operation requests a space larger than can be accommodated by these values.
+        - **index_prefix**: The prefix of the index files. Defaults to "ann".
+        - **distance_metric**: A `str`, strictly one of {"l2", "mips", "cosine"}. `l2` and `cosine` are supported for all 3
+          vector dtypes, but `mips` is only available for single precision floats. Default is `None`. **This
+          value is only used if a `{index_prefix}_metadata.bin` file does not exist.** If it does not exist,
+          you are required to provide it.
+        - **vector_dtype**: The vector dtype this index has been built with. **This value is only used if a
+          `{index_prefix}_metadata.bin` file does not exist.** If it does not exist, you are required to provide it.
+        - **dimensions**: The vector dimensionality of this index. All new vectors inserted must be the same
+          dimensionality. **This value is only used if a `{index_prefix}_metadata.bin` file does not exist.** If it
+          does not exist, you are required to provide it.
+        - **enable_filters**: Indexes built with filters can also be used for filtered search.
+        """
+        index_prefix_path = _valid_index_prefix(index_directory, index_prefix)
+        self._labels_map = {}
+        self._labels_metadata = {}
+        if enable_filters:
+            try:
+                with open(f"{index_prefix_path}_labels_map.txt", "r") as labels_map_if:
+                    for line in labels_map_if:
+                        (key, val) = line.split("\t")
+                        self._labels_map[key] = int(val)
+                with open(
+                    f"{index_prefix_path}_label_metadata.json", "r"
+                ) as labels_metadata_if:
+                    self._labels_metadata = json.load(labels_metadata_if)
+            except:  # noqa: E722
+                # exceptions are basically presumed to be either file not found or file not formatted correctly
+                raise RuntimeException("Filter labels file was unable to be processed.")
+        vector_dtype, metric, num_points, dims = _ensure_index_metadata(
+            index_prefix_path,
+            vector_dtype,
+            distance_metric,
+            1,  # it doesn't matter because we don't need it in this context anyway
+            dimensions,
+        )
+        dap_metric = _valid_metric(metric)
+
+        _assert_is_nonnegative_uint32(num_threads, "num_threads")
+        _assert_is_positive_uint32(
+            initial_search_complexity, "initial_search_complexity"
+        )
+
+        self._vector_dtype = vector_dtype
+        self._dimensions = dims
+
+        if vector_dtype == np.uint8:
+            _index = _native_dap.StaticMemoryUInt8Index
+        elif vector_dtype == np.int8:
+            _index = _native_dap.StaticMemoryInt8Index
+        else:
+            _index = _native_dap.StaticMemoryFloatIndex
+
+        self._index = _index(
+            distance_metric=dap_metric,
+            num_points=num_points,
+            dimensions=dims,
+            index_path=index_prefix_path,
+            num_threads=num_threads,
+            initial_search_complexity=initial_search_complexity,
+        )
+
+    def search(
+        self,
+        query: VectorLike,
+        k_neighbors: int,
+        complexity: int,
+        filter_label: str = "",
+        USE_DEFERRED_FETCH: bool = False,
+        skip_search_reorder: bool = False,
+        recompute_beighbor_embeddings: bool = False,
+        dedup_node_dis: bool = False,
+        prune_ratio: float = 0,
+        batch_recompute: bool = False,
+        global_pruning: bool = False,
+    ) -> QueryResponse:
+        """
+        Searches the index by a single query vector.
+
+        ### Parameters
+        - **query**: 1d numpy array of the same dimensionality and dtype of the index.
+        - **k_neighbors**: Number of neighbors to be returned. If query vector exists in index, it almost definitely
+          will be returned as well, so adjust your ``k_neighbors`` as appropriate. Must be > 0.
+        - **complexity**: Size of distance ordered list of candidate neighbors to use while searching. List size
+          increases accuracy at the cost of latency. Must be at least k_neighbors in size.
+        """
+        if filter_label != "":
+            if len(self._labels_map) == 0:
+                raise ValueError(
+                    f"A filter label of {filter_label} was provided, but this class was not initialized with filters "
+                    "enabled, e.g. StaticDiskMemory(..., enable_filters=True)"
+                )
+            if filter_label not in self._labels_map:
+                raise ValueError(
+                    f"A filter label of {filter_label} was provided, but the external(str)->internal(np.uint32) labels map "
+                    f"does not include that label."
+                )
+            k_neighbors = min(k_neighbors, self._labels_metadata[filter_label])
+        _query = _castable_dtype_or_raise(query, expected=self._vector_dtype)
+        _assert(len(_query.shape) == 1, "query vector must be 1-d")
+        _assert(
+            _query.shape[0] == self._dimensions,
+            f"query vector must have the same dimensionality as the index; index dimensionality: {self._dimensions}, "
+            f"query dimensionality: {_query.shape[0]}",
+        )
+        _assert_is_positive_uint32(k_neighbors, "k_neighbors")
+        _assert_is_nonnegative_uint32(complexity, "complexity")
+
+        if k_neighbors > complexity:
+            warnings.warn(
+                f"k_neighbors={k_neighbors} asked for, but list_size={complexity} was smaller. Increasing {complexity} to {k_neighbors}"
+            )
+            complexity = k_neighbors
+
+        if filter_label == "":
+            neighbors, distances = self._index.search(
+                query=_query,
+                knn=k_neighbors,
+                complexity=complexity,
+                USE_DEFERRED_FETCH=USE_DEFERRED_FETCH,
+                skip_search_reorder=skip_search_reorder,
+                recompute_beighbor_embeddings=recompute_beighbor_embeddings,
+                dedup_node_dis=dedup_node_dis,
+                prune_ratio=prune_ratio,
+                batch_recompute=batch_recompute,
+                global_pruning=global_pruning,
+            )
+        else:
+            filter = self._labels_map[filter_label]
+            neighbors, distances = self._index.search_with_filter(
+                query=query, knn=k_neighbors, complexity=complexity, filter=filter
+            )
+        return QueryResponse(identifiers=neighbors, distances=distances)
+
+    def batch_search(
+        self,
+        queries: VectorLikeBatch,
+        k_neighbors: int,
+        complexity: int,
+        num_threads: int,
+        USE_DEFERRED_FETCH: bool = False,
+        skip_search_reorder: bool = False,
+        recompute_beighbor_embeddings: bool = False,
+        dedup_node_dis: bool = False,
+        prune_ratio: float = 0,
+        batch_recompute: bool = False,
+        global_pruning: bool = False,
+    ) -> QueryResponseBatch:
+        """
+        Searches the index by a batch of query vectors.
+
+        This search is parallelized and far more efficient than searching for each vector individually.
+
+        ### Parameters
+        - **queries**: 2d numpy array, with column dimensionality matching the index and row dimensionality being the
+          number of queries intended to search for in parallel. Dtype must match dtype of the index.
+        - **k_neighbors**: Number of neighbors to be returned. If query vector exists in index, it almost definitely
+          will be returned as well, so adjust your ``k_neighbors`` as appropriate. Must be > 0.
+        - **complexity**: Size of distance ordered list of candidate neighbors to use while searching. List size
+          increases accuracy at the cost of latency. Must be at least k_neighbors in size.
+        - **num_threads**: Number of threads to use when searching this index. (>= 0), 0 = num_threads in system
+        """
+
+        _queries = _castable_dtype_or_raise(queries, expected=self._vector_dtype)
+        _assert(len(_queries.shape) == 2, "queries must must be 2-d np array")
+        _assert(
+            _queries.shape[1] == self._dimensions,
+            f"query vectors must have the same dimensionality as the index; index dimensionality: {self._dimensions}, "
+            f"query dimensionality: {_queries.shape[1]}",
+        )
+        _assert_is_positive_uint32(k_neighbors, "k_neighbors")
+        _assert_is_positive_uint32(complexity, "complexity")
+        _assert_is_nonnegative_uint32(num_threads, "num_threads")
+
+        if k_neighbors > complexity:
+            warnings.warn(
+                f"k_neighbors={k_neighbors} asked for, but list_size={complexity} was smaller. Increasing {complexity} to {k_neighbors}"
+            )
+            complexity = k_neighbors
+
+        num_queries, dim = _queries.shape
+        neighbors, distances = self._index.batch_search(
+            queries=_queries,
+            num_queries=num_queries,
+            knn=k_neighbors,
+            complexity=complexity,
+            num_threads=num_threads,
+            USE_DEFERRED_FETCH=USE_DEFERRED_FETCH,
+            skip_search_reorder=skip_search_reorder,
+            recompute_beighbor_embeddings=recompute_beighbor_embeddings,
+            dedup_node_dis=dedup_node_dis,
+            prune_ratio=prune_ratio,
+            batch_recompute=batch_recompute,
+            global_pruning=global_pruning,
+        )
+        return QueryResponseBatch(identifiers=neighbors, distances=distances)
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/src/builder.cpp
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/builder.cpp
@@ -0,0 +1,136 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include "builder.h"
+#include "common.h"
+#include "disk_utils.h"
+#include "index.h"
+#include "parameters.h"
+
+namespace diskannpy
+{
+template <typename DT>
+void build_disk_index(const diskann::Metric metric, const std::string &data_file_path,
+                      const std::string &index_prefix_path, const uint32_t complexity, const uint32_t graph_degree,
+                      const double final_index_ram_limit, const double indexing_ram_budget, const uint32_t num_threads,
+                      const uint32_t pq_disk_bytes, const std::string &codebook_prefix)
+{
+    std::string params = std::to_string(graph_degree) + " " + std::to_string(complexity) + " " +
+                         std::to_string(final_index_ram_limit) + " " + std::to_string(indexing_ram_budget) + " " +
+                         std::to_string(num_threads);
+    if (pq_disk_bytes > 0)
+        params = params + " " + std::to_string(pq_disk_bytes);
+    if (!codebook_prefix.empty())
+        params = params + " " + codebook_prefix;
+    diskann::build_disk_index<DT>(data_file_path.c_str(), index_prefix_path.c_str(), params.c_str(), metric, false,
+                                  codebook_prefix);
+}
+
+template void build_disk_index<float>(diskann::Metric, const std::string &, const std::string &, uint32_t, uint32_t,
+                                      double, double, uint32_t, uint32_t, const std::string &);
+
+template void build_disk_index<uint8_t>(diskann::Metric, const std::string &, const std::string &, uint32_t, uint32_t,
+                                        double, double, uint32_t, uint32_t, const std::string &);
+template void build_disk_index<int8_t>(diskann::Metric, const std::string &, const std::string &, uint32_t, uint32_t,
+                                       double, double, uint32_t, uint32_t, const std::string &);
+
+template <typename T, typename TagT, typename LabelT>
+std::string prepare_filtered_label_map(diskann::Index<T, TagT, LabelT> &index, const std::string &index_output_path,
+                                       const std::string &filter_labels_file, const std::string &universal_label)
+{
+    std::string labels_file_to_use = index_output_path + "_label_formatted.txt";
+    std::string mem_labels_int_map_file = index_output_path + "_labels_map.txt";
+    convert_labels_string_to_int(filter_labels_file, labels_file_to_use, mem_labels_int_map_file, universal_label);
+    if (!universal_label.empty())
+    {
+        uint32_t unv_label_as_num = 0;
+        index.set_universal_label(unv_label_as_num);
+    }
+    return labels_file_to_use;
+}
+
+template std::string prepare_filtered_label_map<float>(diskann::Index<float, uint32_t, uint32_t> &, const std::string &,
+                                                       const std::string &, const std::string &);
+
+template std::string prepare_filtered_label_map<int8_t>(diskann::Index<int8_t, uint32_t, uint32_t> &,
+                                                        const std::string &, const std::string &, const std::string &);
+
+template std::string prepare_filtered_label_map<uint8_t>(diskann::Index<uint8_t, uint32_t, uint32_t> &,
+                                                         const std::string &, const std::string &, const std::string &);
+
+template <typename T, typename TagT, typename LabelT>
+void build_memory_index(const diskann::Metric metric, const std::string &vector_bin_path,
+                        const std::string &index_output_path, const uint32_t graph_degree, const uint32_t complexity,
+                        const float alpha, const uint32_t num_threads, const bool use_pq_build,
+                        const size_t num_pq_bytes, const bool use_opq, const bool use_tags,
+                        const std::string &filter_labels_file, const std::string &universal_label,
+                        const uint32_t filter_complexity)
+{
+    diskann::IndexWriteParameters index_build_params = diskann::IndexWriteParametersBuilder(complexity, graph_degree)
+                                                           .with_filter_list_size(filter_complexity)
+                                                           .with_alpha(alpha)
+                                                           .with_saturate_graph(false)
+                                                           .with_num_threads(num_threads)
+                                                           .build();
+    diskann::IndexSearchParams index_search_params =
+        diskann::IndexSearchParams(index_build_params.search_list_size, num_threads);
+    size_t data_num, data_dim;
+    diskann::get_bin_metadata(vector_bin_path, data_num, data_dim);
+
+    diskann::Index<T, TagT, LabelT> index(metric, data_dim, data_num,
+                                          std::make_shared<diskann::IndexWriteParameters>(index_build_params),
+                                          std::make_shared<diskann::IndexSearchParams>(index_search_params), 0,
+                                          use_tags, use_tags, false, use_pq_build, num_pq_bytes, use_opq);
+
+    if (use_tags)
+    {
+        const std::string tags_file = index_output_path + ".tags";
+        if (!file_exists(tags_file))
+        {
+            throw std::runtime_error("tags file not found at expected path: " + tags_file);
+        }
+        TagT *tags_data;
+        size_t tag_dims = 1;
+        diskann::load_bin(tags_file, tags_data, data_num, tag_dims);
+        std::vector<TagT> tags(tags_data, tags_data + data_num);
+        if (filter_labels_file.empty())
+        {
+            index.build(vector_bin_path.c_str(), data_num, tags);
+        }
+        else
+        {
+            auto labels_file = prepare_filtered_label_map<T, TagT, LabelT>(index, index_output_path, filter_labels_file,
+                                                                           universal_label);
+            index.build_filtered_index(vector_bin_path.c_str(), labels_file, data_num, tags);
+        }
+    }
+    else
+    {
+        if (filter_labels_file.empty())
+        {
+            index.build(vector_bin_path.c_str(), data_num);
+        }
+        else
+        {
+            auto labels_file = prepare_filtered_label_map<T, TagT, LabelT>(index, index_output_path, filter_labels_file,
+                                                                           universal_label);
+            index.build_filtered_index(vector_bin_path.c_str(), labels_file, data_num);
+        }
+    }
+
+    index.save(index_output_path.c_str());
+}
+
+template void build_memory_index<float>(diskann::Metric, const std::string &, const std::string &, uint32_t, uint32_t,
+                                        float, uint32_t, bool, size_t, bool, bool, const std::string &,
+                                        const std::string &, uint32_t);
+
+template void build_memory_index<int8_t>(diskann::Metric, const std::string &, const std::string &, uint32_t, uint32_t,
+                                         float, uint32_t, bool, size_t, bool, bool, const std::string &,
+                                         const std::string &, uint32_t);
+
+template void build_memory_index<uint8_t>(diskann::Metric, const std::string &, const std::string &, uint32_t, uint32_t,
+                                          float, uint32_t, bool, size_t, bool, bool, const std::string &,
+                                          const std::string &, uint32_t);
+
+} // namespace diskannpy
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/src/defaults.py
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/defaults.py
@@ -0,0 +1,71 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+"""
+# Parameter Defaults
+These parameter defaults are re-exported from the C++ extension module, and used to keep the pythonic wrapper in sync with the C++.
+"""
+from ._diskannpy import defaults as _defaults
+
+ALPHA = _defaults.ALPHA
+""" 
+Note that, as ALPHA is a `float32` (single precision float) in C++, when converted into Python it becomes a 
+`float64` (double precision float). The actual value is 1.2f. The alpha parameter (>=1) is used to control the nature 
+and number of points that are added to the graph. A higher alpha value (e.g., 1.4) will result in fewer hops (and IOs) 
+to convergence, but probably more distance comparisons compared to a lower alpha value.
+"""
+NUM_THREADS = _defaults.NUM_THREADS
+""" Number of threads to use. `0` will use all available detected logical processors """
+MAX_OCCLUSION_SIZE = _defaults.MAX_OCCLUSION_SIZE
+""" 
+The maximum number of points that can be occluded by a single point. This is used to  prevent a single point from 
+dominating the graph structure. If a point has more than `max_occlusion_size` neighbors closer to it than the current 
+point, it will not be added to the graph. This is a tradeoff between index build time and search quality. 
+"""
+FILTER_COMPLEXITY = _defaults.FILTER_COMPLEXITY
+""" 
+Complexity (a.k.a. `L`) references the size of the list we store candidate approximate neighbors in while doing a 
+filtered search. This value must be larger than `k_neighbors`, and larger values tend toward higher recall in the 
+resultant ANN search at the cost of more time. 
+"""
+NUM_FROZEN_POINTS_STATIC = _defaults.NUM_FROZEN_POINTS_STATIC
+""" Number of points frozen by default in a StaticMemoryIndex """
+NUM_FROZEN_POINTS_DYNAMIC = _defaults.NUM_FROZEN_POINTS_DYNAMIC
+""" Number of points frozen by default in a DynamicMemoryIndex """
+SATURATE_GRAPH = _defaults.SATURATE_GRAPH
+""" Whether to saturate the graph or not. Default is `True` """
+GRAPH_DEGREE = _defaults.GRAPH_DEGREE
+""" 
+Graph degree (a.k.a. `R`) is the maximum degree allowed for a node in the index's graph structure. This degree will be 
+pruned throughout the course of the index build, but it will never grow beyond this value. Higher R values require 
+longer index build times, but may result in an index showing excellent recall and latency characteristics. 
+"""
+COMPLEXITY = _defaults.COMPLEXITY
+""" 
+Complexity (a.k.a `L`) references the size of the list we store candidate approximate neighbors in while doing build
+or search tasks. It's used during index build as part of the index optimization processes. It's used in index search 
+classes both to help mitigate poor latencies during cold start, as well as on subsequent queries to conduct the search. 
+Large values will likely increase latency but also may improve recall, and tuning these values for your particular 
+index is certainly a reasonable choice.
+"""
+PQ_DISK_BYTES = _defaults.PQ_DISK_BYTES
+""" 
+Use `0` to store uncompressed data on SSD. This allows the index to asymptote to 100% recall. If your vectors are 
+too large to store in SSD, this parameter provides the option to compress the vectors using PQ for storing on SSD. 
+This will trade off recall. You would also want this to be greater than the number of bytes used for the PQ 
+compressed data stored in-memory. Default is `0`. 
+"""
+USE_PQ_BUILD = _defaults.USE_PQ_BUILD
+"""
+ Whether to use product quantization in the index building process. Product quantization is an approximation 
+technique that can vastly speed up vector computations and comparisons in a spatial neighborhood, but it is still an 
+approximation technique. It should be preferred when index creation times take longer than you can afford for your 
+use case.
+"""
+NUM_PQ_BYTES = _defaults.NUM_PQ_BYTES
+""" 
+The number of product quantization bytes to use. More bytes requires more resources in both memory and time, but is 
+like to result in better approximations. 
+"""
+USE_OPQ = _defaults.USE_OPQ
+""" Whether to use Optimized Product Quantization or not. """
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/src/dynamic_memory_index.cpp
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/dynamic_memory_index.cpp
@@ -0,0 +1,167 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include "parameters.h"
+#include "dynamic_memory_index.h"
+
+#include "pybind11/numpy.h"
+
+namespace diskannpy
+{
+
+diskann::IndexWriteParameters dynamic_index_write_parameters(const uint32_t complexity, const uint32_t graph_degree,
+                                                             const bool saturate_graph,
+                                                             const uint32_t max_occlusion_size, const float alpha,
+                                                             const uint32_t num_threads,
+                                                             const uint32_t filter_complexity)
+{
+    return diskann::IndexWriteParametersBuilder(complexity, graph_degree)
+        .with_saturate_graph(saturate_graph)
+        .with_max_occlusion_size(max_occlusion_size)
+        .with_alpha(alpha)
+        .with_num_threads(num_threads)
+        .with_filter_list_size(filter_complexity)
+        .build();
+}
+
+template <class DT>
+diskann::Index<DT, DynamicIdType, filterT> dynamic_index_builder(
+    const diskann::Metric m, const diskann::IndexWriteParameters &write_params, const size_t dimensions,
+    const size_t max_vectors, const uint32_t initial_search_complexity, const uint32_t initial_search_threads,
+    const bool concurrent_consolidation, const uint32_t num_frozen_points)
+{
+    const uint32_t _initial_search_threads = initial_search_threads != 0 ? initial_search_threads : omp_get_num_procs();
+
+    auto index_search_params = diskann::IndexSearchParams(initial_search_complexity, _initial_search_threads);
+    return diskann::Index<DT, DynamicIdType, filterT>(
+        m, dimensions, max_vectors,
+        std::make_shared<diskann::IndexWriteParameters>(write_params),     // index write params
+        std::make_shared<diskann::IndexSearchParams>(index_search_params), // index_search_params
+        num_frozen_points,                                                 // frozen_points
+        true,                                                              // dynamic_index
+        true,                                                              // enable_tags
+        concurrent_consolidation,
+        false,  // pq_dist_build
+        0,      // num_pq_chunks
+        false); // use_opq = false
+}
+
+template <class DT>
+DynamicMemoryIndex<DT>::DynamicMemoryIndex(const diskann::Metric m, const size_t dimensions, const size_t max_vectors,
+                                           const uint32_t complexity, const uint32_t graph_degree,
+                                           const bool saturate_graph, const uint32_t max_occlusion_size,
+                                           const float alpha, const uint32_t num_threads,
+                                           const uint32_t filter_complexity, const uint32_t num_frozen_points,
+                                           const uint32_t initial_search_complexity,
+                                           const uint32_t initial_search_threads, const bool concurrent_consolidation)
+    : _initial_search_complexity(initial_search_complexity != 0 ? initial_search_complexity : complexity),
+      _write_parameters(dynamic_index_write_parameters(complexity, graph_degree, saturate_graph, max_occlusion_size,
+                                                       alpha, num_threads, filter_complexity)),
+      _index(dynamic_index_builder<DT>(m, _write_parameters, dimensions, max_vectors, _initial_search_complexity,
+                                       initial_search_threads, concurrent_consolidation, num_frozen_points))
+{
+}
+
+template <class DT> void DynamicMemoryIndex<DT>::load(const std::string &index_path)
+{
+    const std::string tags_file = index_path + ".tags";
+    if (!file_exists(tags_file))
+    {
+        throw std::runtime_error("tags file not found at expected path: " + tags_file);
+    }
+    _index.load(index_path.c_str(), _write_parameters.num_threads, _initial_search_complexity);
+}
+
+template <class DT>
+int DynamicMemoryIndex<DT>::insert(const py::array_t<DT, py::array::c_style | py::array::forcecast> &vector,
+                                   const DynamicIdType id)
+{
+    return _index.insert_point(vector.data(), id);
+}
+
+template <class DT>
+py::array_t<int> DynamicMemoryIndex<DT>::batch_insert(
+    py::array_t<DT, py::array::c_style | py::array::forcecast> &vectors,
+    py::array_t<DynamicIdType, py::array::c_style | py::array::forcecast> &ids, const int32_t num_inserts,
+    const int num_threads)
+{
+    if (num_threads == 0)
+        omp_set_num_threads(omp_get_num_procs());
+    else
+        omp_set_num_threads(num_threads);
+    py::array_t<int> insert_retvals(num_inserts);
+
+#pragma omp parallel for schedule(dynamic, 1) default(none) shared(num_inserts, insert_retvals, vectors, ids)
+    for (int32_t i = 0; i < num_inserts; i++)
+    {
+        insert_retvals.mutable_data()[i] = _index.insert_point(vectors.data(i), *(ids.data(i)));
+    }
+
+    return insert_retvals;
+}
+
+template <class DT> int DynamicMemoryIndex<DT>::mark_deleted(const DynamicIdType id)
+{
+    return this->_index.lazy_delete(id);
+}
+
+template <class DT> void DynamicMemoryIndex<DT>::save(const std::string &save_path, const bool compact_before_save)
+{
+    if (save_path.empty())
+    {
+        throw std::runtime_error("A save_path must be provided");
+    }
+    _index.save(save_path.c_str(), compact_before_save);
+}
+
+template <class DT>
+NeighborsAndDistances<DynamicIdType> DynamicMemoryIndex<DT>::search(
+    py::array_t<DT, py::array::c_style | py::array::forcecast> &query, const uint64_t knn, const uint64_t complexity)
+{
+    py::array_t<DynamicIdType> ids(knn);
+    py::array_t<float> dists(knn);
+    std::vector<DT *> empty_vector;
+    _index.search_with_tags(query.data(), knn, complexity, ids.mutable_data(), dists.mutable_data(), empty_vector);
+    return std::make_pair(ids, dists);
+}
+
+template <class DT>
+NeighborsAndDistances<DynamicIdType> DynamicMemoryIndex<DT>::batch_search(
+    py::array_t<DT, py::array::c_style | py::array::forcecast> &queries, const uint64_t num_queries, const uint64_t knn,
+    const uint64_t complexity, const uint32_t num_threads)
+{
+    py::array_t<DynamicIdType> ids({num_queries, knn});
+    py::array_t<float> dists({num_queries, knn});
+    std::vector<DT *> empty_vector;
+
+    if (num_threads == 0)
+        omp_set_num_threads(omp_get_num_procs());
+    else
+        omp_set_num_threads(static_cast<int32_t>(num_threads));
+
+#pragma omp parallel for schedule(dynamic, 1) default(none)                                                            \
+    shared(num_queries, queries, knn, complexity, ids, dists, empty_vector)
+    for (int64_t i = 0; i < (int64_t)num_queries; i++)
+    {
+        _index.search_with_tags(queries.data(i), knn, complexity, ids.mutable_data(i), dists.mutable_data(i),
+                                empty_vector);
+    }
+
+    return std::make_pair(ids, dists);
+}
+
+template <class DT> void DynamicMemoryIndex<DT>::consolidate_delete()
+{
+    _index.consolidate_deletes(_write_parameters);
+}
+
+template <class DT> size_t DynamicMemoryIndex<DT>::num_points()
+{
+    return _index.get_num_points();
+}
+
+template class DynamicMemoryIndex<float>;
+template class DynamicMemoryIndex<uint8_t>;
+template class DynamicMemoryIndex<int8_t>;
+
+}; // namespace diskannpy
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/src/module.cpp
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/module.cpp
@@ -0,0 +1,142 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include <string>
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "defaults.h"
+#include "distance.h"
+
+#include "builder.h"
+#include "dynamic_memory_index.h"
+#include "static_disk_index.h"
+#include "static_memory_index.h"
+
+PYBIND11_MAKE_OPAQUE(std::vector<uint32_t>);
+PYBIND11_MAKE_OPAQUE(std::vector<float>);
+PYBIND11_MAKE_OPAQUE(std::vector<int8_t>);
+PYBIND11_MAKE_OPAQUE(std::vector<uint8_t>);
+
+namespace py = pybind11;
+using namespace pybind11::literals;
+
+struct Variant
+{
+    std::string disk_builder_name;
+    std::string memory_builder_name;
+    std::string dynamic_memory_index_name;
+    std::string static_memory_index_name;
+    std::string static_disk_index_name;
+};
+
+const Variant FloatVariant{"build_disk_float_index", "build_memory_float_index", "DynamicMemoryFloatIndex",
+                           "StaticMemoryFloatIndex", "StaticDiskFloatIndex"};
+
+const Variant UInt8Variant{"build_disk_uint8_index", "build_memory_uint8_index", "DynamicMemoryUInt8Index",
+                           "StaticMemoryUInt8Index", "StaticDiskUInt8Index"};
+
+const Variant Int8Variant{"build_disk_int8_index", "build_memory_int8_index", "DynamicMemoryInt8Index",
+                          "StaticMemoryInt8Index", "StaticDiskInt8Index"};
+
+template <typename T> inline void add_variant(py::module_ &m, const Variant &variant)
+{
+    m.def(variant.disk_builder_name.c_str(), &diskannpy::build_disk_index<T>, "distance_metric"_a, "data_file_path"_a,
+          "index_prefix_path"_a, "complexity"_a, "graph_degree"_a, "final_index_ram_limit"_a, "indexing_ram_budget"_a,
+          "num_threads"_a, "pq_disk_bytes"_a, "codebook_prefix"_a = "");
+
+    m.def(variant.memory_builder_name.c_str(), &diskannpy::build_memory_index<T>, "distance_metric"_a,
+          "data_file_path"_a, "index_output_path"_a, "graph_degree"_a, "complexity"_a, "alpha"_a, "num_threads"_a,
+          "use_pq_build"_a, "num_pq_bytes"_a, "use_opq"_a, "use_tags"_a = false, "filter_labels_file"_a = "",
+          "universal_label"_a = "", "filter_complexity"_a = 0);
+
+    py::class_<diskannpy::StaticMemoryIndex<T>>(m, variant.static_memory_index_name.c_str())
+        .def(py::init<const diskann::Metric, const std::string &, const size_t, const size_t, const uint32_t,
+                      const uint32_t>(),
+             "distance_metric"_a, "index_path"_a, "num_points"_a, "dimensions"_a, "num_threads"_a,
+             "initial_search_complexity"_a)
+        .def("search", &diskannpy::StaticMemoryIndex<T>::search, "query"_a, "knn"_a, "complexity"_a)
+        .def("search_with_filter", &diskannpy::StaticMemoryIndex<T>::search_with_filter, "query"_a, "knn"_a,
+             "complexity"_a, "filter"_a)
+        .def("batch_search", &diskannpy::StaticMemoryIndex<T>::batch_search, "queries"_a, "num_queries"_a, "knn"_a,
+             "complexity"_a, "num_threads"_a);
+
+    py::class_<diskannpy::DynamicMemoryIndex<T>>(m, variant.dynamic_memory_index_name.c_str())
+        .def(py::init<const diskann::Metric, const size_t, const size_t, const uint32_t, const uint32_t, const bool,
+                      const uint32_t, const float, const uint32_t, const uint32_t, const uint32_t, const uint32_t,
+                      const uint32_t, const bool>(),
+             "distance_metric"_a, "dimensions"_a, "max_vectors"_a, "complexity"_a, "graph_degree"_a,
+             "saturate_graph"_a = diskann::defaults::SATURATE_GRAPH,
+             "max_occlusion_size"_a = diskann::defaults::MAX_OCCLUSION_SIZE, "alpha"_a = diskann::defaults::ALPHA,
+             "num_threads"_a = diskann::defaults::NUM_THREADS,
+             "filter_complexity"_a = diskann::defaults::FILTER_LIST_SIZE,
+             "num_frozen_points"_a = diskann::defaults::NUM_FROZEN_POINTS_DYNAMIC, "initial_search_complexity"_a = 0,
+             "search_threads"_a = 0, "concurrent_consolidation"_a = true)
+        .def("search", &diskannpy::DynamicMemoryIndex<T>::search, "query"_a, "knn"_a, "complexity"_a)
+        .def("load", &diskannpy::DynamicMemoryIndex<T>::load, "index_path"_a)
+        .def("batch_search", &diskannpy::DynamicMemoryIndex<T>::batch_search, "queries"_a, "num_queries"_a, "knn"_a,
+             "complexity"_a, "num_threads"_a)
+        .def("batch_insert", &diskannpy::DynamicMemoryIndex<T>::batch_insert, "vectors"_a, "ids"_a, "num_inserts"_a,
+             "num_threads"_a)
+        .def("save", &diskannpy::DynamicMemoryIndex<T>::save, "save_path"_a = "", "compact_before_save"_a = false)
+        .def("insert", &diskannpy::DynamicMemoryIndex<T>::insert, "vector"_a, "id"_a)
+        .def("mark_deleted", &diskannpy::DynamicMemoryIndex<T>::mark_deleted, "id"_a)
+        .def("consolidate_delete", &diskannpy::DynamicMemoryIndex<T>::consolidate_delete)
+        .def("num_points", &diskannpy::DynamicMemoryIndex<T>::num_points);
+
+    py::class_<diskannpy::StaticDiskIndex<T>>(m, variant.static_disk_index_name.c_str())
+        .def(py::init<const diskann::Metric, const std::string &, const uint32_t, const size_t, const uint32_t,
+                      const std::string &, const std::string &>(),
+             "distance_metric"_a, "index_path_prefix"_a, "num_threads"_a, "num_nodes_to_cache"_a,
+             "cache_mechanism"_a = 1, "pq_prefix"_a = "", "partition_prefix"_a)
+        .def("cache_bfs_levels", &diskannpy::StaticDiskIndex<T>::cache_bfs_levels, "num_nodes_to_cache"_a)
+        .def("search", &diskannpy::StaticDiskIndex<T>::search, "query"_a, "knn"_a, "complexity"_a, "beam_width"_a,
+             "USE_DEFERRED_FETCH"_a = false, "skip_search_reorder"_a = false, "recompute_beighbor_embeddings"_a = false,
+             "dedup_node_dis"_a = false, "prune_ratio"_a = 0, "batch_recompute"_a = false, "global_pruning"_a = false)
+        .def("batch_search", &diskannpy::StaticDiskIndex<T>::batch_search, "queries"_a, "num_queries"_a, "knn"_a,
+             "complexity"_a, "beam_width"_a, "num_threads"_a, "USE_DEFERRED_FETCH"_a = false,
+             "skip_search_reorder"_a = false, "recompute_beighbor_embeddings"_a = false, "dedup_node_dis"_a = false,
+             "prune_ratio"_a = 0, "batch_recompute"_a = false, "global_pruning"_a = false);
+}
+
+PYBIND11_MODULE(_diskannpy, m)
+{
+    m.doc() = "DiskANN Python Bindings";
+#ifdef VERSION_INFO
+    m.attr("__version__") = VERSION_INFO;
+#else
+    m.attr("__version__") = "dev";
+#endif
+
+    // let's re-export our defaults
+    py::module_ default_values = m.def_submodule(
+        "defaults",
+        "A collection of the default values used for common diskann operations. `GRAPH_DEGREE` and `COMPLEXITY` are not"
+        " set as defaults, but some semi-reasonable default values are selected for your convenience. We urge you to "
+        "investigate their meaning and adjust them for your use cases.");
+
+    default_values.attr("ALPHA") = diskann::defaults::ALPHA;
+    default_values.attr("NUM_THREADS") = diskann::defaults::NUM_THREADS;
+    default_values.attr("MAX_OCCLUSION_SIZE") = diskann::defaults::MAX_OCCLUSION_SIZE;
+    default_values.attr("FILTER_COMPLEXITY") = diskann::defaults::FILTER_LIST_SIZE;
+    default_values.attr("NUM_FROZEN_POINTS_STATIC") = diskann::defaults::NUM_FROZEN_POINTS_STATIC;
+    default_values.attr("NUM_FROZEN_POINTS_DYNAMIC") = diskann::defaults::NUM_FROZEN_POINTS_DYNAMIC;
+    default_values.attr("SATURATE_GRAPH") = diskann::defaults::SATURATE_GRAPH;
+    default_values.attr("GRAPH_DEGREE") = diskann::defaults::MAX_DEGREE;
+    default_values.attr("COMPLEXITY") = diskann::defaults::BUILD_LIST_SIZE;
+    default_values.attr("PQ_DISK_BYTES") = (uint32_t)0;
+    default_values.attr("USE_PQ_BUILD") = false;
+    default_values.attr("NUM_PQ_BYTES") = (uint32_t)0;
+    default_values.attr("USE_OPQ") = false;
+
+    add_variant<float>(m, FloatVariant);
+    add_variant<uint8_t>(m, UInt8Variant);
+    add_variant<int8_t>(m, Int8Variant);
+
+    py::enum_<diskann::Metric>(m, "Metric")
+        .value("L2", diskann::Metric::L2)
+        .value("INNER_PRODUCT", diskann::Metric::INNER_PRODUCT)
+        .value("COSINE", diskann::Metric::COSINE)
+        .export_values();
+}
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/src/py.typed
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/py.typed
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/src/static_disk_index.cpp
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/static_disk_index.cpp
@@ -0,0 +1,123 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include "static_disk_index.h"
+
+#include "pybind11/numpy.h"
+
+namespace diskannpy
+{
+
+template <typename DT>
+StaticDiskIndex<DT>::StaticDiskIndex(const diskann::Metric metric, const std::string &index_path_prefix,
+                                     const uint32_t num_threads, const size_t num_nodes_to_cache,
+                                     const uint32_t cache_mechanism, const std::string &pq_prefix,
+                                     const std::string &partition_prefix)
+    : _reader(std::make_shared<PlatformSpecificAlignedFileReader>()),
+      _graph_reader(std::make_shared<PlatformSpecificAlignedFileReader>()), _index(_reader, _graph_reader, metric)
+{
+    std::cout << "Before index load" << std::endl;
+
+    const uint32_t _num_threads = num_threads != 0 ? num_threads : omp_get_num_procs();
+    int load_success =
+        _index.load(_num_threads, index_path_prefix.c_str(), pq_prefix.c_str(), partition_prefix.c_str());
+    if (load_success != 0)
+    {
+        throw std::runtime_error("index load failed, " + index_path_prefix);
+    }
+    if (cache_mechanism == 1)
+    {
+        std::string sample_file = index_path_prefix + std::string("_sample_data.bin");
+        cache_sample_paths(num_nodes_to_cache, sample_file, _num_threads);
+    }
+    else if (cache_mechanism == 2)
+    {
+        cache_bfs_levels(num_nodes_to_cache);
+    }
+    std::cout << "After index load" << std::endl;
+}
+
+template <typename DT> void StaticDiskIndex<DT>::cache_bfs_levels(const size_t num_nodes_to_cache)
+{
+    std::vector<uint32_t> node_list;
+    _index.cache_bfs_levels(num_nodes_to_cache, node_list);
+    _index.load_cache_list(node_list);
+}
+
+template <typename DT>
+void StaticDiskIndex<DT>::cache_sample_paths(const size_t num_nodes_to_cache, const std::string &warmup_query_file,
+                                             const uint32_t num_threads)
+{
+    if (!file_exists(warmup_query_file))
+    {
+        return;
+    }
+
+    std::vector<uint32_t> node_list;
+    _index.generate_cache_list_from_sample_queries(warmup_query_file, 15, 4, num_nodes_to_cache, num_threads,
+                                                   node_list);
+    _index.load_cache_list(node_list);
+}
+
+template <typename DT>
+NeighborsAndDistances<StaticIdType> StaticDiskIndex<DT>::search(
+    py::array_t<DT, py::array::c_style | py::array::forcecast> &query, const uint64_t knn, const uint64_t complexity,
+    const uint64_t beam_width, const bool USE_DEFERRED_FETCH, const bool skip_search_reorder,
+    const bool recompute_beighbor_embeddings, const bool dedup_node_dis, const float prune_ratio,
+    const bool batch_recompute, const bool global_pruning)
+{
+    py::array_t<StaticIdType> ids(knn);
+    py::array_t<float> dists(knn);
+
+    std::vector<uint32_t> u32_ids(knn);
+    std::vector<uint64_t> u64_ids(knn);
+    diskann::QueryStats stats;
+
+    _index.cached_beam_search(query.data(), knn, complexity, u64_ids.data(), dists.mutable_data(), beam_width, false,
+                              &stats, USE_DEFERRED_FETCH, skip_search_reorder, recompute_beighbor_embeddings,
+                              dedup_node_dis, prune_ratio, batch_recompute, global_pruning);
+
+    auto r = ids.mutable_unchecked<1>();
+    for (uint64_t i = 0; i < knn; ++i)
+        r(i) = (unsigned)u64_ids[i];
+
+    return std::make_pair(ids, dists);
+}
+
+template <typename DT>
+NeighborsAndDistances<StaticIdType> StaticDiskIndex<DT>::batch_search(
+    py::array_t<DT, py::array::c_style | py::array::forcecast> &queries, const uint64_t num_queries, const uint64_t knn,
+    const uint64_t complexity, const uint64_t beam_width, const uint32_t num_threads, const bool USE_DEFERRED_FETCH,
+    const bool skip_search_reorder, const bool recompute_beighbor_embeddings, const bool dedup_node_dis,
+    const float prune_ratio, const bool batch_recompute, const bool global_pruning)
+{
+    py::array_t<StaticIdType> ids({num_queries, knn});
+    py::array_t<float> dists({num_queries, knn});
+
+    omp_set_num_threads(num_threads);
+
+    std::vector<uint64_t> u64_ids(knn * num_queries);
+
+#pragma omp parallel for schedule(dynamic, 1) default(none)                                                            \
+    shared(num_queries, queries, knn, complexity, u64_ids, dists, beam_width, USE_DEFERRED_FETCH, skip_search_reorder, \
+               recompute_beighbor_embeddings, dedup_node_dis, prune_ratio, batch_recompute, global_pruning)
+    for (int64_t i = 0; i < (int64_t)num_queries; i++)
+    {
+        _index.cached_beam_search(queries.data(i), knn, complexity, u64_ids.data() + i * knn, dists.mutable_data(i),
+                                  beam_width, false, nullptr, USE_DEFERRED_FETCH, skip_search_reorder,
+                                  recompute_beighbor_embeddings, dedup_node_dis, prune_ratio, batch_recompute,
+                                  global_pruning);
+    }
+
+    auto r = ids.mutable_unchecked();
+    for (uint64_t i = 0; i < num_queries; ++i)
+        for (uint64_t j = 0; j < knn; ++j)
+            r(i, j) = (uint32_t)u64_ids[i * knn + j];
+
+    return std::make_pair(ids, dists);
+}
+
+template class StaticDiskIndex<float>;
+template class StaticDiskIndex<uint8_t>;
+template class StaticDiskIndex<int8_t>;
+} // namespace diskannpy
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/src/static_memory_index.cpp
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/src/static_memory_index.cpp
@@ -0,0 +1,91 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include "static_memory_index.h"
+
+#include "pybind11/numpy.h"
+
+namespace diskannpy
+{
+
+template <class DT>
+diskann::Index<DT, StaticIdType, filterT> static_index_builder(const diskann::Metric m, const size_t num_points,
+                                                               const size_t dimensions,
+                                                               const uint32_t initial_search_complexity)
+{
+    if (initial_search_complexity == 0)
+    {
+        throw std::runtime_error("initial_search_complexity must be a positive uint32_t");
+    }
+    auto index_search_params = diskann::IndexSearchParams(initial_search_complexity, omp_get_num_procs());
+    return diskann::Index<DT>(m, dimensions, num_points,
+                              nullptr,                                                           // index write params
+                              std::make_shared<diskann::IndexSearchParams>(index_search_params), // index search params
+                              0,                                                                 // num frozen points
+                              false,                                                             // not a dynamic_index
+                              false,                                                             // no enable_tags/ids
+                              false,  // no concurrent_consolidate,
+                              false,  // pq_dist_build
+                              0,      // num_pq_chunks
+                              false); // use_opq = false
+}
+
+template <class DT>
+StaticMemoryIndex<DT>::StaticMemoryIndex(const diskann::Metric m, const std::string &index_prefix,
+                                         const size_t num_points, const size_t dimensions, const uint32_t num_threads,
+                                         const uint32_t initial_search_complexity)
+    : _index(static_index_builder<DT>(m, num_points, dimensions, initial_search_complexity))
+{
+    const uint32_t _num_threads = num_threads != 0 ? num_threads : omp_get_num_procs();
+    _index.load(index_prefix.c_str(), _num_threads, initial_search_complexity);
+}
+
+template <typename DT>
+NeighborsAndDistances<StaticIdType> StaticMemoryIndex<DT>::search(
+    py::array_t<DT, py::array::c_style | py::array::forcecast> &query, const uint64_t knn, const uint64_t complexity)
+{
+    py::array_t<StaticIdType> ids(knn);
+    py::array_t<float> dists(knn);
+    std::vector<DT *> empty_vector;
+    _index.search(query.data(), knn, complexity, ids.mutable_data(), dists.mutable_data());
+    return std::make_pair(ids, dists);
+}
+
+template <typename DT>
+NeighborsAndDistances<StaticIdType> StaticMemoryIndex<DT>::search_with_filter(
+    py::array_t<DT, py::array::c_style | py::array::forcecast> &query, const uint64_t knn, const uint64_t complexity,
+    const filterT filter)
+{
+    py::array_t<StaticIdType> ids(knn);
+    py::array_t<float> dists(knn);
+    std::vector<DT *> empty_vector;
+    _index.search_with_filters(query.data(), filter, knn, complexity, ids.mutable_data(), dists.mutable_data());
+    return std::make_pair(ids, dists);
+}
+
+template <typename DT>
+NeighborsAndDistances<StaticIdType> StaticMemoryIndex<DT>::batch_search(
+    py::array_t<DT, py::array::c_style | py::array::forcecast> &queries, const uint64_t num_queries, const uint64_t knn,
+    const uint64_t complexity, const uint32_t num_threads)
+{
+    const uint32_t _num_threads = num_threads != 0 ? num_threads : omp_get_num_procs();
+    py::array_t<StaticIdType> ids({num_queries, knn});
+    py::array_t<float> dists({num_queries, knn});
+    std::vector<DT *> empty_vector;
+
+    omp_set_num_threads(static_cast<int32_t>(_num_threads));
+
+#pragma omp parallel for schedule(dynamic, 1) default(none) shared(num_queries, queries, knn, complexity, ids, dists)
+    for (int64_t i = 0; i < (int64_t)num_queries; i++)
+    {
+        _index.search(queries.data(i), knn, complexity, ids.mutable_data(i), dists.mutable_data(i));
+    }
+
+    return std::make_pair(ids, dists);
+}
+
+template class StaticMemoryIndex<float>;
+template class StaticMemoryIndex<uint8_t>;
+template class StaticMemoryIndex<int8_t>;
+
+} // namespace diskannpy
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/init.py
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/init.py
@@ -0,0 +1,6 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+from .build_memory_index import build_random_vectors_and_memory_index
+from .create_test_data import random_vectors, vectors_as_temp_file, write_vectors
+from .recall import calculate_recall
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/build_memory_index.py
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/build_memory_index.py
@@ -0,0 +1,51 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import os
+from tempfile import mkdtemp
+
+import diskannpy as dap
+import numpy as np
+
+from .create_test_data import random_vectors
+
+
+def build_random_vectors_and_memory_index(
+    dtype, metric, with_tags: bool = False, index_prefix: str = "ann", seed: int = 12345
+):
+    query_vectors: np.ndarray = random_vectors(1000, 10, dtype=dtype, seed=seed)
+    index_vectors: np.ndarray = random_vectors(10000, 10, dtype=dtype, seed=seed)
+    ann_dir = mkdtemp()
+
+    if with_tags:
+        rng = np.random.default_rng(seed)
+        tags = np.arange(start=1, stop=10001, dtype=np.uint32)
+        rng.shuffle(tags)
+    else:
+        tags = ""
+
+    dap.build_memory_index(
+        data=index_vectors,
+        distance_metric=metric,
+        index_directory=ann_dir,
+        graph_degree=16,
+        complexity=32,
+        alpha=1.2,
+        num_threads=0,
+        use_pq_build=False,
+        num_pq_bytes=8,
+        use_opq=False,
+        filter_complexity=32,
+        tags=tags,
+        index_prefix=index_prefix,
+    )
+
+    return (
+        metric,
+        dtype,
+        query_vectors,
+        index_vectors,
+        ann_dir,
+        os.path.join(ann_dir, "vectors.bin"),
+        tags,
+    )
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/create_test_data.py
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/create_test_data.py
@@ -0,0 +1,40 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+from contextlib import contextmanager
+from pathlib import Path
+from tempfile import NamedTemporaryFile
+from typing import BinaryIO
+
+import numpy as np
+
+
+def random_vectors(rows: int, dimensions: int, dtype, seed: int = 12345) -> np.ndarray:
+    rng = np.random.default_rng(seed)
+    if dtype == np.float32:
+        vectors = rng.random((rows, dimensions), dtype=dtype)
+    elif dtype == np.uint8:
+        vectors = rng.integers(
+            low=0, high=256, size=(rows, dimensions), dtype=dtype
+        )  # low is inclusive, high is exclusive
+    elif dtype == np.int8:
+        vectors = rng.integers(
+            low=-128, high=128, size=(rows, dimensions), dtype=dtype
+        )  # low is inclusive, high is exclusive
+    else:
+        raise RuntimeError("Only np.float32, np.int8, and np.uint8 are supported")
+    return vectors
+
+
+def write_vectors(file_handler: BinaryIO, vectors: np.ndarray):
+    _ = file_handler.write(np.array(vectors.shape, dtype=np.int32).tobytes())
+    _ = file_handler.write(vectors.tobytes())
+
+
+@contextmanager
+def vectors_as_temp_file(vectors: np.ndarray) -> str:
+    temp = NamedTemporaryFile(mode="wb", delete=False)
+    write_vectors(temp, vectors)
+    temp.close()
+    yield temp.name
+    Path(temp.name).unlink()
--- a/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/recall.py
+++ b/packages/leann-backend-diskann/third_party/DiskANN/python/tests/fixtures/recall.py
@@ -0,0 +1,24 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import numpy as np
+
+
+def calculate_recall(
+    result_set_indices: np.ndarray, truth_set_indices: np.ndarray, recall_at: int = 5
+) -> float:
+    """
+    result_set_indices and truth_set_indices correspond by row index. the columns in each row contain the indices of
+    the nearest neighbors, with result_set_indices being the approximate nearest neighbor results and truth_set_indices
+    being the brute force nearest neighbor calculation via sklearn's NearestNeighbor class.
+    :param result_set_indices:
+    :param truth_set_indices:
+    :param recall_at:
+    :return:
+    """
+    found = 0
+    for i in range(0, result_set_indices.shape[0]):
+        result_set_set = set(result_set_indices[i][0:recall_at])
+        truth_set_set = set(truth_set_indices[i][0:recall_at])
+        found += len(result_set_set.intersection(truth_set_set))
+    return found / (result_set_indices.shape[0] * recall_at)