Initial commit

This commit is contained in:
yichuan520030910320
2025-06-30 09:05:05 +00:00
commit 46f6cc100b
1231 changed files with 278432 additions and 0 deletions

View File

@@ -0,0 +1,49 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# @lint-ignore-every LINEWRAP
project(faiss_perf_tests)
set(BENCHMARK_ENABLE_TESTING OFF)
include(FetchContent)
FetchContent_Declare(googlebenchmark
GIT_REPOSITORY https://github.com/google/benchmark.git
GIT_TAG main) # need main for benchmark::benchmark
FetchContent_MakeAvailable(
googlebenchmark)
find_package(Threads REQUIRED)
find_package(OpenMP REQUIRED)
find_package(gflags REQUIRED)
add_library(faiss_perf_tests_utils
utils.cpp
)
# `#include <faiss/perf_tests/utils.h>` or any other headers
target_include_directories(faiss_perf_tests_utils PRIVATE
${PROJECT_SOURCE_DIR}/../..)
include(../cmake/link_to_faiss_lib.cmake)
link_to_faiss_lib(faiss_perf_tests_utils)
set(FAISS_PERF_TEST_SRC
bench_no_multithreading_rcq_search.cpp
bench_scalar_quantizer_accuracy.cpp
bench_scalar_quantizer_decode.cpp
bench_scalar_quantizer_distance.cpp
bench_scalar_quantizer_encode.cpp
)
foreach(bench ${FAISS_PERF_TEST_SRC})
get_filename_component(bench_exec ${bench} NAME_WE)
add_executable(${bench_exec} ${bench})
link_to_faiss_lib(${bench_exec})
target_link_libraries(${bench_exec} PRIVATE faiss_perf_tests_utils OpenMP::OpenMP_CXX benchmark::benchmark gflags)
# `#include <faiss/perf_tests/utils.h>` or any other headers
target_include_directories(${bench_exec} PRIVATE
${PROJECT_SOURCE_DIR}/../..)
endforeach()

View File

@@ -0,0 +1,204 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import argparse
import resource
import time
from contextlib import contextmanager
from dataclasses import dataclass
from typing import Dict, Generator, List, Optional
import faiss # @manual=//faiss/python:pyfaiss
import numpy as np
from faiss.contrib.datasets import ( # @manual=//faiss/contrib:faiss_contrib
Dataset,
SyntheticDataset,
)
US_IN_S = 1_000_000
@dataclass
class PerfCounters:
wall_time_s: float = 0.0
user_time_s: float = 0.0
system_time_s: float = 0.0
@contextmanager
def timed_execution() -> Generator[PerfCounters, None, None]:
pcounters = PerfCounters()
wall_time_start = time.perf_counter()
rusage_start = resource.getrusage(resource.RUSAGE_SELF)
yield pcounters
wall_time_end = time.perf_counter()
rusage_end = resource.getrusage(resource.RUSAGE_SELF)
pcounters.wall_time_s = wall_time_end - wall_time_start
pcounters.user_time_s = rusage_end.ru_utime - rusage_start.ru_utime
pcounters.system_time_s = rusage_end.ru_stime - rusage_start.ru_stime
def is_perf_counter(key: str) -> bool:
return key.endswith("_time_us")
def accumulate_perf_counter(
phase: str,
t: PerfCounters,
counters: Dict[str, int]
):
counters[f"{phase}_wall_time_us"] = int(t.wall_time_s * US_IN_S)
counters[f"{phase}_user_time_us"] = int(t.user_time_s * US_IN_S)
def run_on_dataset(
ds: Dataset,
M: int,
num_threads: int,
num_add_iterations: int,
num_search_iterations: int,
efSearch: int = 16,
efConstruction: int = 40,
search_bounded_queue: bool = True,
) -> Dict[str, int]:
xq = ds.get_queries()
xb = ds.get_database()
nb, d = xb.shape
nq, d = xq.shape
k = 10
# pyre-ignore[16]: Module `faiss` has no attribute `omp_set_num_threads`.
faiss.omp_set_num_threads(num_threads)
index = faiss.IndexHNSWFlat(d, M)
index.hnsw.efConstruction = efConstruction # default
with timed_execution() as t:
for _ in range(num_add_iterations):
index.add(xb)
counters = {}
accumulate_perf_counter("add", t, counters)
counters["nb"] = nb
counters["num_add_iterations"] = num_add_iterations
index.hnsw.efSearch = efSearch
index.hnsw.search_bounded_queue = search_bounded_queue
with timed_execution() as t:
for _ in range(num_search_iterations):
D, I = index.search(xq, k)
accumulate_perf_counter("search", t, counters)
counters["nq"] = nq
counters["efSearch"] = efSearch
counters["efConstruction"] = efConstruction
counters["M"] = M
counters["d"] = d
counters["num_search_iterations"] = num_search_iterations
return counters
def run(
d: int,
nb: int,
nq: int,
M: int,
num_threads: int,
num_add_iterations: int = 1,
num_search_iterations: int = 1,
efSearch: int = 16,
efConstruction: int = 40,
search_bounded_queue: bool = True,
) -> Dict[str, int]:
ds = SyntheticDataset(d=d, nb=nb, nt=0, nq=nq, metric="L2", seed=1338)
return run_on_dataset(
ds,
M=M,
num_add_iterations=num_add_iterations,
num_search_iterations=num_search_iterations,
num_threads=num_threads,
efSearch=efSearch,
efConstruction=efConstruction,
search_bounded_queue=search_bounded_queue,
)
def _accumulate_counters(
element: Dict[str, int], accu: Optional[Dict[str, List[int]]] = None
) -> Dict[str, List[int]]:
if accu is None:
accu = {key: [value] for key, value in element.items()}
return accu
else:
assert accu.keys() <= element.keys(), (
"Accu keys must be a subset of element keys: "
f"{accu.keys()} not a subset of {element.keys()}"
)
for key in accu.keys():
accu[key].append(element[key])
return accu
def main():
parser = argparse.ArgumentParser(description="Benchmark HNSW")
parser.add_argument("--M", type=int, default=32)
parser.add_argument("--num-threads", type=int, default=5)
parser.add_argument("--warm-up-iterations", type=int, default=0)
parser.add_argument("--num-search-iterations", type=int, default=1)
parser.add_argument("--num-add-iterations", type=int, default=1)
parser.add_argument("--num-repetitions", type=int, default=1)
parser.add_argument("--ef-search", type=int, default=16)
parser.add_argument("--ef-construction", type=int, default=40)
parser.add_argument("--search-bounded-queue", action="store_true")
parser.add_argument("--nb", type=int, default=5000)
parser.add_argument("--nq", type=int, default=500)
parser.add_argument("--d", type=int, default=128)
args = parser.parse_args()
if args.warm_up_iterations > 0:
print(f"Warming up for {args.warm_up_iterations} iterations...")
# warm-up
run(
num_search_iterations=args.warm_up_iterations,
num_add_iterations=args.warm_up_iterations,
d=args.d,
nb=args.nb,
nq=args.nq,
M=args.M,
num_threads=args.num_threads,
efSearch=args.ef_search,
efConstruction=args.ef_construction,
search_bounded_queue=args.search_bounded_queue,
)
print(
f"Running benchmark with dataset(nb={args.nb}, nq={args.nq}, "
f"d={args.d}), M={args.M}, num_threads={args.num_threads}, "
f"efSearch={args.ef_search}, efConstruction={args.ef_construction}"
)
result = None
for _ in range(args.num_repetitions):
counters = run(
num_search_iterations=args.num_search_iterations,
num_add_iterations=args.num_add_iterations,
d=args.d,
nb=args.nb,
nq=args.nq,
M=args.M,
num_threads=args.num_threads,
efSearch=args.ef_search,
efConstruction=args.ef_construction,
search_bounded_queue=args.search_bounded_queue,
)
result = _accumulate_counters(counters, result)
assert result is not None
for counter, values in result.items():
if is_perf_counter(counter):
print(
"%s t=%.3f us (± %.4f)" %
(counter, np.mean(values), np.std(values))
)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,65 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <gflags/gflags.h>
#include <benchmark/benchmark.h>
#include <faiss/IndexAdditiveQuantizer.h> // @manual=//faiss:faiss_no_multithreading
#include <faiss/utils/random.h> // @manual=//faiss:faiss_no_multithreading
using namespace faiss;
DEFINE_uint32(iterations, 20, "iterations");
DEFINE_uint32(nprobe, 1, "nprobe");
DEFINE_uint32(batch_size, 1, "batch_size");
DEFINE_double(beam_factor, 4.0, "beam factor");
static void bench_search(
benchmark::State& state,
int batch_size,
int nprobe,
float beam_factor) {
int d = 512;
int nt = 2 << 15;
std::vector<float> xt(d * nt);
float_rand(xt.data(), d * nt, 12345);
ResidualCoarseQuantizer rq(d, {16, 8});
rq.verbose = false;
rq.train(nt, xt.data());
std::vector<float> xq(d * batch_size);
float_rand(xq.data(), d * batch_size, 12345);
std::vector<float> distances(nprobe * batch_size);
std::vector<int64_t> clusterIndices(nprobe * batch_size);
SearchParametersResidualCoarseQuantizer param;
param.beam_factor = beam_factor;
for (auto _ : state) {
rq.search(
batch_size,
xq.data(),
nprobe,
distances.data(),
clusterIndices.data(),
&param);
}
}
int main(int argc, char** argv) {
benchmark::Initialize(&argc, argv);
gflags::AllowCommandLineReparsing();
gflags::ParseCommandLineFlags(&argc, &argv, true);
int iterations = FLAGS_iterations;
int nprobe = FLAGS_nprobe;
float beam_factor = FLAGS_beam_factor;
int batch_size = FLAGS_batch_size;
benchmark::RegisterBenchmark(
"search", bench_search, batch_size, nprobe, beam_factor)
->Iterations(iterations);
benchmark::RunSpecifiedBenchmarks();
benchmark::Shutdown();
}

View File

@@ -0,0 +1,87 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/perf_tests/utils.h>
#include <gflags/gflags.h>
#include <cstdio>
#include <map>
#include <benchmark/benchmark.h>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/utils/distances.h>
#include <faiss/utils/random.h>
using namespace faiss;
DEFINE_uint32(d, 128, "dimension");
DEFINE_uint32(n, 2000, "dimension");
DEFINE_uint32(iterations, 20, "iterations");
static void bench_reconstruction_error(
benchmark::State& state,
ScalarQuantizer::QuantizerType type,
int d,
int n) {
std::vector<float> x(d * n);
float_rand(x.data(), d * n, 12345);
// make sure it's idempotent
ScalarQuantizer sq(d, type);
sq.train(n, x.data());
size_t code_size = sq.code_size;
state.counters["code_size"] = sq.code_size;
// encode
std::vector<uint8_t> codes(code_size * n);
sq.compute_codes(x.data(), codes.data(), n);
// decode
std::vector<float> x2(d * n);
sq.decode(codes.data(), x2.data(), n);
state.counters["sql2_recons_error"] =
fvec_L2sqr(x.data(), x2.data(), n * d) / n;
// encode again
std::vector<uint8_t> codes2(code_size * n);
sq.compute_codes(x2.data(), codes2.data(), n);
size_t ndiff = 0;
for (size_t i = 0; i < codes.size(); i++) {
if (codes[i] != codes2[i])
ndiff++;
}
state.counters["ndiff_for_idempotence"] = ndiff;
state.counters["code_size_two"] = codes.size();
}
int main(int argc, char** argv) {
benchmark::Initialize(&argc, argv);
gflags::AllowCommandLineReparsing();
gflags::ParseCommandLineFlags(&argc, &argv, true);
int iterations = FLAGS_iterations;
int d = FLAGS_d;
int n = FLAGS_n;
auto benchs = ::perf_tests::sq_types();
for (auto& [bench_name, quantizer_type] : benchs) {
benchmark::RegisterBenchmark(
bench_name.c_str(),
bench_reconstruction_error,
quantizer_type,
d,
n)
->Iterations(iterations);
}
benchmark::RunSpecifiedBenchmarks();
benchmark::Shutdown();
}

View File

@@ -0,0 +1,70 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/perf_tests/utils.h>
#include <gflags/gflags.h>
#include <omp.h>
#include <cstdio>
#include <map>
#include <benchmark/benchmark.h>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/utils/random.h>
using namespace faiss;
DEFINE_uint32(d, 128, "dimension");
DEFINE_uint32(n, 2000, "dimension");
DEFINE_uint32(iterations, 20, "iterations");
static void bench_decode(
benchmark::State& state,
ScalarQuantizer::QuantizerType type,
int d,
int n) {
std::vector<float> x(d * n);
float_rand(x.data(), d * n, 12345);
// make sure it's idempotent
ScalarQuantizer sq(d, type);
omp_set_num_threads(1);
sq.train(n, x.data());
size_t code_size = sq.code_size;
state.counters["code_size"] = sq.code_size;
// encode
std::vector<uint8_t> codes(code_size * n);
sq.compute_codes(x.data(), codes.data(), n);
std::vector<float> x2(d * n);
for (auto _ : state) {
// decode
sq.decode(codes.data(), x2.data(), n);
}
}
int main(int argc, char** argv) {
benchmark::Initialize(&argc, argv);
gflags::AllowCommandLineReparsing();
gflags::ParseCommandLineFlags(&argc, &argv, true);
int iterations = FLAGS_iterations;
int d = FLAGS_d;
int n = FLAGS_n;
auto benchs = ::perf_tests::sq_types();
for (auto& [bench_name, quantizer_type] : benchs) {
benchmark::RegisterBenchmark(
bench_name.c_str(), bench_decode, quantizer_type, d, n)
->Iterations(iterations);
}
benchmark::RunSpecifiedBenchmarks();
benchmark::Shutdown();
}

View File

@@ -0,0 +1,78 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <gflags/gflags.h>
#include <omp.h>
#include <cstdio>
#include <map>
#include <benchmark/benchmark.h>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/perf_tests/utils.h>
#include <faiss/utils/random.h>
using namespace faiss;
DEFINE_uint32(d, 128, "dimension");
DEFINE_uint32(n, 2000, "dimension");
DEFINE_uint32(iterations, 20, "iterations");
static void bench_distance(
benchmark::State& state,
ScalarQuantizer::QuantizerType type,
int d,
int n) {
std::vector<float> x(d * n);
float_rand(x.data(), d * n, 12345);
// make sure it's idempotent
ScalarQuantizer sq(d, type);
omp_set_num_threads(1);
sq.train(n, x.data());
size_t code_size = sq.code_size;
state.counters["code_size"] = sq.code_size;
// encode
std::vector<uint8_t> codes(code_size * n);
sq.compute_codes(x.data(), codes.data(), n);
std::unique_ptr<ScalarQuantizer::SQDistanceComputer> dc(
sq.get_distance_computer());
dc->codes = codes.data();
dc->code_size = sq.code_size;
for (auto _ : state) {
float sum_dis = 0;
for (int i = 0; i < n; i++) {
dc->set_query(&x[i * d]);
for (int j = 0; j < n; j++) {
benchmark::DoNotOptimize(sum_dis += (*dc)(j));
}
}
}
}
int main(int argc, char** argv) {
benchmark::Initialize(&argc, argv);
gflags::AllowCommandLineReparsing();
gflags::ParseCommandLineFlags(&argc, &argv, true);
int iterations = FLAGS_iterations;
int d = FLAGS_d;
int n = FLAGS_n;
auto benchs = ::perf_tests::sq_types();
for (auto& [bench_name, quantizer_type] : benchs) {
benchmark::RegisterBenchmark(
bench_name.c_str(), bench_distance, quantizer_type, d, n)
->Iterations(iterations);
}
benchmark::RunSpecifiedBenchmarks();
benchmark::Shutdown();
}

View File

@@ -0,0 +1,64 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <gflags/gflags.h>
#include <omp.h>
#include <cstdio>
#include <map>
#include <benchmark/benchmark.h>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/perf_tests/utils.h>
#include <faiss/utils/distances.h>
#include <faiss/utils/random.h>
using namespace faiss;
DEFINE_uint32(d, 128, "dimension");
DEFINE_uint32(n, 2000, "dimension");
DEFINE_uint32(iterations, 20, "iterations");
static void bench_encode(
benchmark::State& state,
ScalarQuantizer::QuantizerType type,
int d,
int n) {
std::vector<float> x(d * n);
float_rand(x.data(), d * n, 12345);
ScalarQuantizer sq(d, type);
omp_set_num_threads(1);
size_t code_size = sq.code_size;
sq.train(n, x.data());
state.counters["code_size"] = sq.code_size;
std::vector<uint8_t> codes(code_size * n);
for (auto _ : state) {
// encode
sq.compute_codes(x.data(), codes.data(), n);
}
}
int main(int argc, char** argv) {
benchmark::Initialize(&argc, argv);
gflags::AllowCommandLineReparsing();
gflags::ParseCommandLineFlags(&argc, &argv, true);
int iterations = FLAGS_iterations;
int d = FLAGS_d;
int n = FLAGS_n;
auto benchs = ::perf_tests::sq_types();
for (auto& [bench_name, quantizer_type] : benchs) {
benchmark::RegisterBenchmark(
bench_name.c_str(), bench_encode, quantizer_type, d, n)
->Iterations(iterations);
}
benchmark::RunSpecifiedBenchmarks();
benchmark::Shutdown();
}

View File

@@ -0,0 +1,27 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/perf_tests/utils.h>
namespace faiss::perf_tests {
std::map<std::string, faiss::ScalarQuantizer::QuantizerType> sq_types() {
static std::map<std::string, faiss::ScalarQuantizer::QuantizerType>
sq_types = {
{"QT_8bit", faiss::ScalarQuantizer::QT_8bit},
{"QT_4bit", faiss::ScalarQuantizer::QT_4bit},
{"QT_8bit_uniform",
faiss::ScalarQuantizer::QT_8bit_uniform},
{"QT_4bit_uniform",
faiss::ScalarQuantizer::QT_4bit_uniform},
{"QT_fp16", faiss::ScalarQuantizer::QT_fp16},
{"QT_8bit_direct", faiss::ScalarQuantizer::QT_8bit_direct},
{"QT_6bit", faiss::ScalarQuantizer::QT_6bit},
{"QT_bf16", faiss::ScalarQuantizer::QT_bf16},
{"QT_8bit_direct_signed",
faiss::ScalarQuantizer::QT_8bit_direct_signed}};
return sq_types;
}
} // namespace faiss::perf_tests

View File

@@ -0,0 +1,16 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <faiss/impl/ScalarQuantizer.h>
#include <map>
namespace faiss::perf_tests {
std::map<std::string, faiss::ScalarQuantizer::QuantizerType> sq_types();
} // namespace faiss::perf_tests