Initial commit
This commit is contained in:
27
packages/leann-backend-diskann/third_party/DiskANN/python/include/builder.h
vendored
Normal file
27
packages/leann-backend-diskann/third_party/DiskANN/python/include/builder.h
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#include "common.h"
|
||||
#include "distance.h"
|
||||
|
||||
namespace diskannpy
|
||||
{
|
||||
template <typename DT>
|
||||
void build_disk_index(diskann::Metric metric, const std::string &data_file_path, const std::string &index_prefix_path,
|
||||
uint32_t complexity, uint32_t graph_degree, double final_index_ram_limit,
|
||||
double indexing_ram_budget, uint32_t num_threads, uint32_t pq_disk_bytes,
|
||||
const std::string &codebook_prefix);
|
||||
|
||||
template <typename DT, typename TagT = DynamicIdType, typename LabelT = filterT>
|
||||
void build_memory_index(diskann::Metric metric, const std::string &vector_bin_path,
|
||||
const std::string &index_output_path, uint32_t graph_degree, uint32_t complexity, float alpha,
|
||||
uint32_t num_threads, bool use_pq_build, size_t num_pq_bytes, bool use_opq,
|
||||
bool use_tags = false, const std::string &filter_labels_file = "",
|
||||
const std::string &universal_label = "", uint32_t filter_complexity = 0);
|
||||
|
||||
} // namespace diskannpy
|
||||
24
packages/leann-backend-diskann/third_party/DiskANN/python/include/common.h
vendored
Normal file
24
packages/leann-backend-diskann/third_party/DiskANN/python/include/common.h
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <utility>
|
||||
|
||||
#include <pybind11/pybind11.h>
|
||||
#include <pybind11/numpy.h>
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
namespace diskannpy
|
||||
{
|
||||
|
||||
typedef uint32_t filterT;
|
||||
|
||||
typedef uint32_t StaticIdType;
|
||||
typedef uint32_t DynamicIdType;
|
||||
|
||||
template <class IdType> using NeighborsAndDistances = std::pair<py::array_t<IdType>, py::array_t<float>>;
|
||||
|
||||
}; // namespace diskannpy
|
||||
53
packages/leann-backend-diskann/third_party/DiskANN/python/include/dynamic_memory_index.h
vendored
Normal file
53
packages/leann-backend-diskann/third_party/DiskANN/python/include/dynamic_memory_index.h
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#include <pybind11/pybind11.h>
|
||||
#include <pybind11/numpy.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "index.h"
|
||||
#include "parameters.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
namespace diskannpy
|
||||
{
|
||||
|
||||
template <typename DT>
|
||||
class DynamicMemoryIndex
|
||||
{
|
||||
public:
|
||||
DynamicMemoryIndex(diskann::Metric m, size_t dimensions, size_t max_vectors, uint32_t complexity,
|
||||
uint32_t graph_degree, bool saturate_graph, uint32_t max_occlusion_size, float alpha,
|
||||
uint32_t num_threads, uint32_t filter_complexity, uint32_t num_frozen_points,
|
||||
uint32_t initial_search_complexity, uint32_t initial_search_threads,
|
||||
bool concurrent_consolidation);
|
||||
|
||||
void load(const std::string &index_path);
|
||||
int insert(const py::array_t<DT, py::array::c_style | py::array::forcecast> &vector, DynamicIdType id);
|
||||
py::array_t<int> batch_insert(py::array_t<DT, py::array::c_style | py::array::forcecast> &vectors,
|
||||
py::array_t<DynamicIdType, py::array::c_style | py::array::forcecast> &ids, int32_t num_inserts,
|
||||
int num_threads = 0);
|
||||
int mark_deleted(DynamicIdType id);
|
||||
void save(const std::string &save_path, bool compact_before_save = false);
|
||||
NeighborsAndDistances<DynamicIdType> search(py::array_t<DT, py::array::c_style | py::array::forcecast> &query, uint64_t knn,
|
||||
uint64_t complexity);
|
||||
NeighborsAndDistances<DynamicIdType> batch_search(py::array_t<DT, py::array::c_style | py::array::forcecast> &queries,
|
||||
uint64_t num_queries, uint64_t knn, uint64_t complexity,
|
||||
uint32_t num_threads);
|
||||
void consolidate_delete();
|
||||
size_t num_points();
|
||||
|
||||
|
||||
private:
|
||||
const uint32_t _initial_search_complexity;
|
||||
const diskann::IndexWriteParameters _write_parameters;
|
||||
diskann::Index<DT, DynamicIdType, filterT> _index;
|
||||
};
|
||||
|
||||
}; // namespace diskannpy
|
||||
65
packages/leann-backend-diskann/third_party/DiskANN/python/include/static_disk_index.h
vendored
Normal file
65
packages/leann-backend-diskann/third_party/DiskANN/python/include/static_disk_index.h
vendored
Normal file
@@ -0,0 +1,65 @@
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#include <pybind11/pybind11.h>
|
||||
#include <pybind11/numpy.h>
|
||||
|
||||
#ifdef _WINDOWS
|
||||
#include "windows_aligned_file_reader.h"
|
||||
#elif __APPLE__
|
||||
#include "apple_aligned_file_reader.h"
|
||||
#else
|
||||
#include "linux_aligned_file_reader.h"
|
||||
#endif
|
||||
|
||||
#include "common.h"
|
||||
#include "pq_flash_index.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
namespace diskannpy
|
||||
{
|
||||
|
||||
#ifdef _WINDOWS
|
||||
typedef WindowsAlignedFileReader PlatformSpecificAlignedFileReader;
|
||||
#elif __APPLE__
|
||||
typedef AppleAlignedFileReader PlatformSpecificAlignedFileReader;
|
||||
#else
|
||||
typedef LinuxAlignedFileReader PlatformSpecificAlignedFileReader;
|
||||
#endif
|
||||
|
||||
template <typename DT> class StaticDiskIndex
|
||||
{
|
||||
public:
|
||||
StaticDiskIndex(diskann::Metric metric, const std::string &index_path_prefix, uint32_t num_threads,
|
||||
size_t num_nodes_to_cache, uint32_t cache_mechanism, const std::string &pq_prefix,
|
||||
const std::string &partition_prefix);
|
||||
|
||||
void cache_bfs_levels(size_t num_nodes_to_cache);
|
||||
|
||||
void cache_sample_paths(size_t num_nodes_to_cache, const std::string &warmup_query_file, uint32_t num_threads);
|
||||
|
||||
NeighborsAndDistances<StaticIdType> search(py::array_t<DT, py::array::c_style | py::array::forcecast> &query,
|
||||
uint64_t knn, uint64_t complexity, uint64_t beam_width,
|
||||
bool USE_DEFERRED_FETCH = false, bool skip_search_reorder = false,
|
||||
bool recompute_beighbor_embeddings = false, bool dedup_node_dis = false,
|
||||
float prune_ratio = 0, bool batch_recompute = false,
|
||||
bool global_pruning = false);
|
||||
|
||||
NeighborsAndDistances<StaticIdType> batch_search(
|
||||
py::array_t<DT, py::array::c_style | py::array::forcecast> &queries, uint64_t num_queries, uint64_t knn,
|
||||
uint64_t complexity, uint64_t beam_width, uint32_t num_threads, bool USE_DEFERRED_FETCH = false,
|
||||
bool skip_search_reorder = false, bool recompute_beighbor_embeddings = false, bool dedup_node_dis = false,
|
||||
float prune_ratio = 0, bool batch_recompute = false, bool global_pruning = false);
|
||||
|
||||
private:
|
||||
std::shared_ptr<AlignedFileReader> _reader;
|
||||
std::shared_ptr<AlignedFileReader> _graph_reader;
|
||||
diskann::PQFlashIndex<DT> _index;
|
||||
};
|
||||
} // namespace diskannpy
|
||||
40
packages/leann-backend-diskann/third_party/DiskANN/python/include/static_memory_index.h
vendored
Normal file
40
packages/leann-backend-diskann/third_party/DiskANN/python/include/static_memory_index.h
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#include <pybind11/pybind11.h>
|
||||
#include <pybind11/numpy.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "index.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
namespace diskannpy
|
||||
{
|
||||
|
||||
template <typename DT> class StaticMemoryIndex
|
||||
{
|
||||
public:
|
||||
StaticMemoryIndex(diskann::Metric m, const std::string &index_prefix, size_t num_points, size_t dimensions,
|
||||
uint32_t num_threads, uint32_t initial_search_complexity);
|
||||
|
||||
NeighborsAndDistances<StaticIdType> search(py::array_t<DT, py::array::c_style | py::array::forcecast> &query,
|
||||
uint64_t knn, uint64_t complexity);
|
||||
|
||||
NeighborsAndDistances<StaticIdType> search_with_filter(
|
||||
py::array_t<DT, py::array::c_style | py::array::forcecast> &query, uint64_t knn, uint64_t complexity,
|
||||
filterT filter);
|
||||
|
||||
NeighborsAndDistances<StaticIdType> batch_search(
|
||||
py::array_t<DT, py::array::c_style | py::array::forcecast> &queries, uint64_t num_queries, uint64_t knn,
|
||||
uint64_t complexity, uint32_t num_threads);
|
||||
|
||||
private:
|
||||
diskann::Index<DT, StaticIdType, filterT> _index;
|
||||
};
|
||||
} // namespace diskannpy
|
||||
Reference in New Issue
Block a user